From 31fd884442f6bceb7d4623b116b6998171c7227b Mon Sep 17 00:00:00 2001
From: Juan Pavez <juan.pavezs@alumnos.usm.cl>
Date: Sat, 19 Mar 2016 21:00:16 +0100
Subject: [PATCH] Full clean up of ACAT branch, pep8 formatting, deleted unused
 code

---
 DecomposingTest_10D.py |  421 ++++-----
 DecomposingTest_1D.py  |  177 ++--
 decomposed_test.py     | 1885 ++++++++++++++++++++--------------------
 logistic_sgd.py        |   90 +-
 make_data.py           |  694 +++++++++------
 mlp.py                 |  112 ++-
 pyMorphWrapper.py      |  501 -----------
 train_classifiers.py   |  177 ++--
 utils.py               | 1174 ++++++++++++-------------
 xgboost_wrapper.py     |   44 +-
 10 files changed, 2422 insertions(+), 2853 deletions(-)
 delete mode 100644 pyMorphWrapper.py

diff --git a/DecomposingTest_10D.py b/DecomposingTest_10D.py
index 94a4cda..32c115b 100644
--- a/DecomposingTest_10D.py
+++ b/DecomposingTest_10D.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python
 '''
- This python script can be used to reproduce the results on 10D distributions 
- on the article Experiments using machine learning to approximate likelihood ratios 
- for mixture models (ACAT 2016). 
+ This python script can be used to reproduce the results on 10D distributions
+ on the article Experiments using machine learning to approximate likelihood ratios
+ for mixture models (ACAT 2016).
 '''
 
 __author__ = "Pavez J. <juan.pavezs@alumnos.usm.cl>"
@@ -11,279 +11,186 @@
 import ROOT
 import numpy as np
 from sklearn import svm, linear_model
-from sklearn.externals import joblib
-from sklearn.metrics import roc_curve, auc
 from sklearn.ensemble import GradientBoostingClassifier
 
 import sys
 
 import os.path
-import pdb
-
-import matplotlib.pyplot as plt
-from mpl_toolkits.mplot3d import Axes3D
-
 from mlp import MLPTrainer
 
 from make_data import makeData, makeModelND, makeModelPrivateND,\
-              makeModel
+    makeModel
 from utils import printMultiFrame, printFrame, saveFig, loadData,\
-              makeROC, makeSigBkg, makePlotName
+    makeROC, makeSigBkg, makePlotName
 
 from train_classifiers import trainClassifiers, predict
 from decomposed_test import DecomposedTest
 
 from xgboost_wrapper import XGBoostClassifier
-''' 
- A simple example for the work on the section 
- 5.4 of the paper 'Approximating generalized 
+'''
+ A simple example for the work on the section
+ 5.4 of the paper 'Approximating generalized
  likelihood ratio test with calibrated discriminative
  classifiers' by Kyle Cranmer
-''' 
-
-def evalC1C2Likelihood(test,c0,c1,dir='/afs/cern.ch/user/j/jpavezse/systematics',
-            workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root',
-            c1_g='',model_g='mlp',use_log=False,true_dist=False,vars_g=None,clf=None,
-            verbose_printing=False):
-
-  f = ROOT.TFile('{0}/{1}'.format(dir,workspace))
-  w = f.Get('w')
-  f.Close()
-  if true_dist == True:
-    vars = ROOT.TList()
-    for var in vars_g:
-      vars.Add(w.var(var))
-    x = ROOT.RooArgSet(vars)
-  else:
-    x = None
-
-  score = ROOT.RooArgSet(w.var('score'))
-  if use_log == True:
-    evaluateRatio = test.evaluateLogDecomposedRatio
-    post = 'log'
-  else:
-    evaluateRatio = test.evaluateDecomposedRatio
-    post = ''
-
-  npoints = 25
-  csarray = np.linspace(0.01,0.2,npoints)
-  cs2array = np.linspace(0.1,0.4,npoints)
-  testdata = np.loadtxt('{0}/data/{1}/{2}/{3}_{4}.dat'.format(dir,model_g,c1_g,'test','F1'))
-
-  decomposedLikelihood = np.zeros((npoints,npoints))
-  trueLikelihood = np.zeros((npoints,npoints))
-  c1s = np.zeros(c1.shape[0])
-  c0s = np.zeros(c1.shape[0])
-  pre_pdf = []
-  pre_dist = []
-  pre_pdf.extend([[],[]])
-  pre_dist.extend([[],[]])
-  for k,c0_ in enumerate(c0):
-    pre_pdf[0].append([])
-    pre_pdf[1].append([])
-    pre_dist[0].append([])
-    pre_dist[1].append([])
-    for j,c1_ in enumerate(c1):
-      if k <> j:
-        f0pdf = w.function('bkghistpdf_{0}_{1}'.format(k,j))
-        f1pdf = w.function('sighistpdf_{0}_{1}'.format(k,j))
-        outputs = predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(dir,model_g,c1_g,
-        'adaptive',k,j),testdata,model_g=model_g,clf=clf)
-        f0pdfdist = np.array([test.evalDist(score,f0pdf,[xs]) for xs in outputs])
-        f1pdfdist = np.array([test.evalDist(score,f1pdf,[xs]) for xs in outputs])
-        pre_pdf[0][k].append(f0pdfdist)
-        pre_pdf[1][k].append(f1pdfdist)
-      else:
-        pre_pdf[0][k].append(None)
-        pre_pdf[1][k].append(None)
-      if true_dist == True:          
-        f0 = w.pdf('f{0}'.format(k))
-        f1 = w.pdf('f{0}'.format(j))
-        if len(testdata.shape) > 1:
-          f0dist = np.array([test.evalDist(x,f0,xs) for xs in testdata])
-          f1dist = np.array([test.evalDist(x,f1,xs) for xs in testdata])
-        else:
-          f0dist = np.array([test.evalDist(x,f0,[xs]) for xs in testdata])
-          f1dist = np.array([test.evalDist(x,f1,[xs]) for xs in testdata])
-        pre_dist[0][k].append(f0dist) 
-        pre_dist[1][k].append(f1dist) 
-  
-  # Evaluate Likelihood in different c1[0] and c1[1] values
-  for i,cs in enumerate(csarray):
-    for j, cs2 in enumerate(cs2array):
-      c1s[:] = c1[:]
-      c1s[0] = cs
-      c1s[1] = cs2
-      c1s[2] = 1.-cs-cs2
-      decomposedRatios,trueRatios = evaluateRatio(w,testdata,
-      x=x,plotting=False,roc=False,c0arr=c0,c1arr=c1s,true_dist=true_dist,
-      pre_evaluation=pre_pdf,
-      pre_dist=pre_dist)
-
-      if use_log == False:
-        decomposedLikelihood[i,j] = np.log(decomposedRatios).sum()
-        trueLikelihood[i,j] = np.log(trueRatios).sum()
-      else:
-        decomposedLikelihood[i,j] = decomposedRatios.sum()
-        trueLikelihood[i,j] = trueRatios.sum()
-
-  decomposedLikelihood = decomposedLikelihood - decomposedLikelihood.min()
-  X,Y = np.meshgrid(csarray, cs2array)
-  decMin = np.unravel_index(decomposedLikelihood.argmin(), decomposedLikelihood.shape)
-  min_value = [csarray[decMin[0]],cs2array[decMin[1]]]
-  if verbose_printing == True:
-      saveFig(X,[Y,decomposedLikelihood,trueLikelihood],makePlotName('comp','train',type='multilikelihood'),labels=['composed','true'],contour=True,marker=True,dir=dir,marker_value=(c1[0],c1[1]),print_pdf=True,min_value=min_value)
-  if true_dist == True:
-    trueLikelihood = trueLikelihood - trueLikelihood.min() 
-    trueMin = np.unravel_index(trueLikelihood.argmin(), trueLikelihood.shape)
-    return [[csarray[trueMin[0]],cs2array[trueMin[1]]], [csarray[decMin[0]],cs2array[decMin[1]]]]
-  else:
-    return [[0.,0.],[csarray[decMin[0]],cs2array[decMin[1]]]]
-  
-
-def fitCValues(test,c0,c1,dir='/afs/cern.ch/user/j/jpavezse/systematics',
-            c1_g='',model_g='mlp',true_dist=False,vars_g=None,
-            workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root',
-            use_log=False, clf=None):
-  if use_log == True:
-    post = 'log'
-  else:
-    post = ''
-  n_hist_c = 200
-  keys = ['true','dec']
-  c1_ = dict((key,np.zeros(n_hist_c)) for key in keys)
-  c1_values = dict((key,np.zeros(n_hist_c)) for key in keys)
-  c2_values = dict((key,np.zeros(n_hist_c)) for key in keys)
-
-  fil2 = open('{0}/fitting_values_c1c2{1}.txt'.format(dir,post),'w')
-
-  for i in range(n_hist_c):
+'''
 
-    makeData(vars_g, c0,c1, num_train=200000,num_test=500,no_train=True,
-          workspace=workspace,dir=dir,c1_g=c1_g) 
 
-    if i == 0:
-        verbose_printing = True
+def plotCValues(
+        test,
+        c0,
+        c1,
+        dir='/afs/cern.ch/user/j/jpavezse/systematics',
+        c1_g='',
+        model_g='mlp',
+        true_dist=False,
+        vars_g=None,
+        workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root',
+        use_log=False,
+        n_hist_c=100):
+    if use_log:
+        post = 'log'
     else:
-        verbose_printing = False
-    
-    ((c1_true,c2_true),(c1_dec,c2_dec)) = evalC1C2Likelihood(test,c0,c1,dir=dir,
-              c1_g=c1_g,model_g=model_g, true_dist=true_dist,vars_g=vars_g,
-              workspace=workspace,use_log=use_log,clf=clf, verbose_printing=verbose_printing)   
-
-    print '2: {0} {1} {2} {3}'.format(c1_true, c1_dec, c2_true, c2_dec)
-
-    fil2.write('{0} {1} {2} {3}\n'.format(c1_true, c1_dec, c2_true, c2_dec))
-
-  fil2.close()
-
-def plotCValues(test,c0,c1,dir='/afs/cern.ch/user/j/jpavezse/systematics',
-            c1_g='',model_g='mlp',true_dist=False,vars_g=None,
-            workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root',
-            use_log=False):
-  if use_log == True:
-    post = 'log'
-  else:
-    post = ''
-
-  n_hist_c = 200
-  keys = ['true','dec']
-  c1_values = dict((key,np.zeros(n_hist_c)) for key in keys)
-  c2_values = dict((key,np.zeros(n_hist_c)) for key in keys)
-  c1_2 = np.loadtxt('{0}/fitting_values_c1c2{1}.txt'.format(dir,post))
-  c1_values['true'] = c1_2[:,0]
-  c1_values['dec'] = c1_2[:,1]
-  c2_values['true'] = c1_2[:,2]
-  c2_values['dec'] = c1_2[:,3]
-  
-  saveFig([],[c1_values['true'],c1_values['dec']], 
-      makePlotName('c1c2','train',type='c1_hist{0}'.format(post)),hist=True, 
-      axis=['signal weight'],marker=True,marker_value=c1[0],
-      labels=['true','composed'],x_range=[0.,0.2],dir=dir,
-      model_g=model_g,title='Histogram for estimated values signal weight',print_pdf=True)
-  saveFig([],[c2_values['true'],c2_values['dec']], 
-      makePlotName('c1c2','train',type='c2_hist{0}'.format(post)),hist=True, 
-      axis=['bkg. weight'],marker=True,marker_value=c1[1],
-      labels=['true','composed'],x_range=[0.1,0.4],dir=dir,
-      model_g=model_g,title='Histogram for estimated values bkg. weight',print_pdf=True)
+        post = ''
+
+    keys = ['true', 'dec']
+    c1_values = dict((key, np.zeros(n_hist_c)) for key in keys)
+    c2_values = dict((key, np.zeros(n_hist_c)) for key in keys)
+    c1_2 = np.loadtxt('{0}/fitting_values_c1c2{1}.txt'.format(dir, post))
+    c1_values['true'] = c1_2[:, 0]
+    c1_values['dec'] = c1_2[:, 1]
+    c2_values['true'] = c1_2[:, 2]
+    c2_values['dec'] = c1_2[:, 3]
+
+    saveFig([], [c1_values['true'], c1_values['dec']],
+            makePlotName('c1c2', 'train', type='c1_hist{0}'.format(post)), type='hist',
+            axis=['signal weight'], marker=True, marker_value=c1[0],
+            labels=['Exact', 'Approx. Decomposed'], x_range=[0., 0.2], dir=dir,
+            model_g=model_g, title='Histogram for estimated values signal weight', print_pdf=True)
+    saveFig([], [c2_values['true'], c2_values['dec']],
+            makePlotName('c1c2', 'train', type='c2_hist{0}'.format(post)), type='hist',
+            axis=['bkg. weight'], marker=True, marker_value=c1[1],
+            labels=['Exact', 'Approx. Decomposed'], x_range=[0.1, 0.4], dir=dir,
+            model_g=model_g, title='Histogram for estimated values bkg. weight', print_pdf=True)
 
 
 if __name__ == '__main__':
-  # Setting the classifier to use
-  model_g = None
-  classifiers = {'svc':svm.NuSVC(probability=True),'svr':svm.NuSVR(),
-        'logistic': linear_model.LogisticRegression(), 
-        'bdt':GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
-        max_depth=5, random_state=0),
-        'mlp':MLPTrainer(n_hidden=40, L2_reg=0.0001),
-        'xgboost': XGBoostClassifier(num_class=2, nthread=4, silent=1,
-          num_boost_round=100, eta=0.5, max_depth=4)}
-  clf = None
-  if (len(sys.argv) > 1):
-    model_g = sys.argv[1]
-    clf = classifiers.get(sys.argv[1])
-  if clf == None:
-    model_g = 'logistic'
-    clf = classifiers['logistic']    
-    print 'Not found classifier, Using logistic instead'
-
-  # parameters of the mixture model
-  c0 = np.array([.0,.3, .7])
-  c1 = np.array([.1,.3, .7])
-  c1_g = ''
-
-  c0 = c0/c0.sum()
-  c1[0] = sys.argv[2]
-  if c1[0] < 0.01:
-    c1_g = "%.3f"%c1[0]
-  else:
-    c1_g = "%.2f"%c1[0]
-  c1[0] = (c1[0]*(c1[1]+c1[2]))/(1.-c1[0])
-  c1 = c1 / c1.sum()
-
-  verbose_printing = True
-  dir = '/afs/cern.ch/user/j/jpavezse/systematics'
-  workspace_file = 'workspace_DecomposingTestOfMixtureModelsClassifiers.root'
-  
-  # features
-  vars_g = ['x0','x1','x2','x3','x4','x5','x6','x7','x8','x9']
-
-  ROOT.gROOT.SetBatch(ROOT.kTRUE)
-  ROOT.RooAbsPdf.defaultIntegratorConfig().setEpsRel(1E-15)
-  ROOT.RooAbsPdf.defaultIntegratorConfig().setEpsAbs(1E-15)
-  # Set this value to False if only final plots are needed
-  verbose_printing = True
-  
-  if (len(sys.argv) > 3):
-    print 'Setting seed: {0} '.format(sys.argv[3])
-    ROOT.RooRandom.randomGenerator().SetSeed(int(sys.argv[3])) 
-    np.random.seed(int(sys.argv[3]))
-
-  # make private mixture model
-  makeModelPrivateND(vars_g=vars_g,c0=c0,c1=c1,workspace=workspace_file,dir=dir,
-                     model_g=model_g,verbose_printing=verbose_printing,load_cov=True)
-
-  # make sintetic data to train the classifiers
-  makeData(vars_g=vars_g,c0=c0,c1=c1,num_train=100000,num_test=50000,
-    workspace=workspace_file,dir=dir, c1_g=c1_g, model_g='mlp') 
-    
-  # train the pairwise classifiers
-  trainClassifiers(clf,3,dir=dir, model_g=model_g,
-     c1_g=c1_g ,model_file='adaptive')
-
-  # class which implement the decomposed method
-  test = DecomposedTest(c0,c1,dir=dir,c1_g=c1_g,model_g=model_g,
-          input_workspace=workspace_file, verbose_printing = verbose_printing,
-          dataset_names=['0','1','2'], clf=clf if model_g=='mlp' else None)
-  test.fit(data_file='test',importance_sampling=False, true_dist=True,vars_g=vars_g)
- 
-  test.computeRatios(true_dist=True,vars_g=vars_g,use_log=False) 
-
-  # compute likelihood for c0[0] and c0[1] values
-  fitCValues(test,c0,c1,dir=dir,c1_g=c1_g,model_g=model_g,true_dist=True,vars_g=vars_g,
-       workspace=workspace_file,use_log=False, clf=clf if model_g=='mlp' else None)
-
-  plotCValues(test,c0,c1,dir=dir,c1_g=c1_g,model_g=model_g,true_dist=True,vars_g=vars_g,
-        workspace=workspace_file,use_log=False)
-
-
+    # Setting the classifier to use
+    model_g = None
+    classifiers = {
+        'svc': svm.NuSVC(
+            probability=True),
+        'svr': svm.NuSVR(),
+        'logistic': linear_model.LogisticRegression(),
+        'bdt': GradientBoostingClassifier(
+            n_estimators=100,
+            learning_rate=1.0,
+            max_depth=5,
+            random_state=0),
+        'mlp': MLPTrainer(
+            n_hidden=40,
+            L2_reg=0.0001),
+        'xgboost': XGBoostClassifier(
+            num_class=2,
+            nthread=4,
+            silent=1,
+            num_boost_round=100,
+            eta=0.5,
+            max_depth=4)}
+    clf = None
+    if (len(sys.argv) > 1):
+        model_g = sys.argv[1]
+        clf = classifiers.get(sys.argv[1])
+    if clf is None:
+        model_g = 'logistic'
+        clf = classifiers['logistic']
+        print 'Not found classifier, Using logistic instead'
+
+    # parameters of the mixture model
+    c0 = np.array([.0, .3, .7])
+    c1 = np.array([.1, .3, .7])
+    c1_g = ''
+
+    c0 = c0 / c0.sum()
+    c1[0] = sys.argv[2]
+    if c1[0] < 0.01:
+        c1_g = "%.3f" % c1[0]
+    else:
+        c1_g = "%.2f" % c1[0]
+    c1[0] = (c1[0] * (c1[1] + c1[2])) / (1. - c1[0])
+    c1 = c1 / c1.sum()
+
+    verbose_printing = True
+    dir = '/afs/cern.ch/user/j/jpavezse/systematics'
+    workspace_file = 'workspace_DecomposingTestOfMixtureModelsClassifiers.root'
+
+    # features
+    vars_g = ['x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9']
+
+    ROOT.gROOT.SetBatch(ROOT.kTRUE)
+    ROOT.RooAbsPdf.defaultIntegratorConfig().setEpsRel(1E-15)
+    ROOT.RooAbsPdf.defaultIntegratorConfig().setEpsAbs(1E-15)
+    # Set this value to False if only final plots are needed
+    verbose_printing = True
+
+    if (len(sys.argv) > 3):
+        print 'Setting seed: {0} '.format(sys.argv[3])
+        ROOT.RooRandom.randomGenerator().SetSeed(int(sys.argv[3]))
+        np.random.seed(int(sys.argv[3]))
+
+    # make private mixture model
+    makeModelPrivateND(
+        vars_g=vars_g,
+        c0=c0,
+        c1=c1,
+        workspace=workspace_file,
+        dir=dir,
+        model_g=model_g,
+        verbose_printing=verbose_printing,
+        load_cov=True)
+
+    # make sintetic data to train the classifiers
+    makeData(vars_g=vars_g, c0=c0, c1=c1, num_train=100000, num_test=50000,
+             workspace=workspace_file, dir=dir, c1_g=c1_g, model_g='mlp')
+
+    # train the pairwise classifiers
+    trainClassifiers(clf, 3, dir=dir, model_g=model_g,
+                     c1_g=c1_g, model_file='adaptive')
+
+    # class which implement the decomposed method
+    test = DecomposedTest(
+        c0,
+        c1,
+        dir=dir,
+        c1_g=c1_g,
+        model_g=model_g,
+        input_workspace=workspace_file,
+        verbose_printing=verbose_printing,
+        dataset_names=[
+            '0',
+            '1',
+            '2'],
+        clf=clf if model_g == 'mlp' else None)
+    test.fit(
+        data_file='test',
+        true_dist=True,
+        vars_g=vars_g)
+
+    test.computeRatios(true_dist=True, vars_g=vars_g, use_log=False)
+
+    n_hist = 200
+    # compute likelihood for c0[0] and c0[1] values
+    test.fitCValues(
+        c0,
+        c1,
+        dir=dir,
+        c1_g=c1_g,
+        model_g=model_g,
+        true_dist=True,
+        vars_g=vars_g,
+        workspace=workspace_file,
+        use_log=False,
+        clf=clf if model_g == 'mlp' else None,
+        n_hist_c=n_hist)
+
+    plotCValues(test,c0,c1,dir=dir,c1_g=c1_g,model_g=model_g,true_dist=True,vars_g=vars_g,
+        workspace=workspace_file,use_log=False, n_hist_c=n_hist)
diff --git a/DecomposingTest_1D.py b/DecomposingTest_1D.py
index 5b92ea6..29a7684 100644
--- a/DecomposingTest_1D.py
+++ b/DecomposingTest_1D.py
@@ -1,19 +1,16 @@
 #!/usr/bin/env python
 '''
- This python script can be used to reproduce the results on 1D distributions 
- on the article Experiments using machine learning to approximate likelihood ratios 
- for mixture models (ACAT 2016). 
+ This python script can be used to reproduce the results on 1D distributions
+ on the article Experiments using machine learning to approximate likelihood ratios
+ for mixture models (ACAT 2016).
 '''
 
 __author__ = "Pavez J. <juan.pavezs@alumnos.usm.cl>"
 
 
-
 import ROOT
 import numpy as np
 from sklearn import svm, linear_model
-from sklearn.externals import joblib
-from sklearn.metrics import roc_curve, auc
 from sklearn.ensemble import GradientBoostingClassifier
 
 import sys
@@ -23,9 +20,9 @@
 from mlp import MLPTrainer
 
 from make_data import makeData, makeModelND, makeModelPrivateND,\
-              makeModel
+    makeModel
 from utils import printMultiFrame, printFrame, saveFig, loadData,\
-              makeROC, makeSigBkg, makePlotName
+    makeROC, makeSigBkg, makePlotName
 
 from train_classifiers import trainClassifiers, predict
 from decomposed_test import DecomposedTest
@@ -34,75 +31,95 @@
 
 
 if __name__ == '__main__':
-  # Setting the classifier to use
-  model_g = None
-  classifiers = {'svc':svm.NuSVC(probability=True),'svr':svm.NuSVR(),
-        'logistic': linear_model.LogisticRegression(), 
-        'bdt':GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
-        max_depth=5, random_state=0),
-        'mlp':MLPTrainer(n_hidden=4, L2_reg=0),
-        'xgboost': XGBoostClassifier(num_class=2, nthread=4, silent=1,
-          num_boost_round=100, eta=0.5, max_depth=4)}
-  clf = None
-  if (len(sys.argv) > 1):
-    model_g = sys.argv[1]
-    clf = classifiers.get(sys.argv[1])
-  if clf == None:
-    model_g = 'logistic'
-    clf = classifiers['logistic']    
-    print 'Not found classifier, Using logistic instead'
-
-  # parameters of the mixture model
-  c0 = np.array([.0,.3, .7])
-  c1 = np.array([.1,.3, .7])
-  c1_g = ''
-
-  c0 = c0/c0.sum()
-  c1[0] = sys.argv[2]
-  if c1[0] < 0.01:
-    c1_g = "%.3f"%c1[0]
-  else:
-    c1_g = "%.2f"%c1[0]
-  c1[0] = (c1[0]*(c1[1]+c1[2]))/(1.-c1[0])
-  c1 = c1 / c1.sum()
-
-  verbose_printing = True
-  dir = '.'
-  workspace_file = 'workspace_DecomposingTestOfMixtureModelsClassifiers.root'
-  
-  # features
-  vars_g = ['x']
-    
-  ROOT.gROOT.SetBatch(ROOT.kTRUE)
-  ROOT.RooAbsPdf.defaultIntegratorConfig().setEpsRel(1E-15)
-    
-  # Set this value to False if only final plots are needed
-  verbose_printing = True
-  
-  if (len(sys.argv) > 3):
-    print 'Setting seed: {0} '.format(sys.argv[3])
-    ROOT.RooRandom.randomGenerator().SetSeed(int(sys.argv[3])) 
-    np.random.seed(int(sys.argv[3]))
-
-  # Create models to sample from
-  makeModel(c0=c0,c1=c1,workspace=workspace_file,dir=dir,verbose_printing=
-    verbose_printing)
-
-  # make sintetic data to train the classifiers
-  makeData(vars_g=vars_g,c0=c0,c1=c1,num_train=100000,num_test=50000,
-    workspace=workspace_file,dir=dir, c1_g=c1_g, model_g='mlp') 
-    
-  # train the pairwise classifiers
-  trainClassifiers(clf,3,dir=dir, model_g=model_g,
-     c1_g=c1_g ,model_file='adaptive')
-
-  # class which implement the decomposed method
-  test = DecomposedTest(c0,c1,dir=dir,c1_g=c1_g,model_g=model_g,
-          input_workspace=workspace_file, verbose_printing = verbose_printing,
-          dataset_names=['0','1','2'],clf=clf if model_g=='mlp' else None)
-  
-  test.fit(data_file='test',importance_sampling=False, true_dist=True,vars_g=vars_g)
-  test.computeRatios(true_dist=True,vars_g=vars_g,use_log=False) 
-
-
-
+    # Setting the classifier to use
+    model_g = None
+    classifiers = {
+        'svc': svm.NuSVC(
+            probability=True),
+        'svr': svm.NuSVR(),
+        'logistic': linear_model.LogisticRegression(),
+        'bdt': GradientBoostingClassifier(
+            n_estimators=100,
+            learning_rate=1.0,
+            max_depth=5,
+            random_state=0),
+        'mlp': MLPTrainer(
+            n_hidden=4,
+            L2_reg=0),
+        'xgboost': XGBoostClassifier(
+            num_class=2,
+            nthread=4,
+            silent=1,
+            num_boost_round=100,
+            eta=0.5,
+            max_depth=4)}
+    clf = None
+    if (len(sys.argv) > 1):
+        model_g = sys.argv[1]
+        clf = classifiers.get(sys.argv[1])
+    if clf is None:
+        model_g = 'logistic'
+        clf = classifiers['logistic']
+        print 'Not found classifier, Using logistic instead'
+
+    # parameters of the mixture model
+    c0 = np.array([.0, .3, .7])
+    c1 = np.array([.1, .3, .7])
+    c1_g = ''
+
+    c0 = c0 / c0.sum()
+    c1[0] = sys.argv[2]
+    if c1[0] < 0.01:
+        c1_g = "%.3f" % c1[0]
+    else:
+        c1_g = "%.2f" % c1[0]
+    c1[0] = (c1[0] * (c1[1] + c1[2])) / (1. - c1[0])
+    c1 = c1 / c1.sum()
+
+    verbose_printing = True
+    dir = '.'
+    workspace_file = 'workspace_DecomposingTestOfMixtureModelsClassifiers.root'
+
+    # features
+    vars_g = ['x']
+
+    ROOT.gROOT.SetBatch(ROOT.kTRUE)
+    ROOT.RooAbsPdf.defaultIntegratorConfig().setEpsRel(1E-15)
+
+    # Set this value to False if only final plots are needed
+    verbose_printing = True
+
+    if (len(sys.argv) > 3):
+        print 'Setting seed: {0} '.format(sys.argv[3])
+        ROOT.RooRandom.randomGenerator().SetSeed(int(sys.argv[3]))
+        np.random.seed(int(sys.argv[3]))
+
+    # Create models to sample from
+    makeModel(c0=c0,c1=c1,workspace=workspace_file,dir=dir,verbose_printing=
+        verbose_printing)
+
+    # make sintetic data to train the classifiers
+    makeData(vars_g=vars_g,c0=c0,c1=c1,num_train=100000,num_test=50000,
+        workspace=workspace_file,dir=dir, c1_g=c1_g, model_g='mlp')
+
+    # train the pairwise classifiers
+    trainClassifiers(clf,3,dir=dir, model_g=model_g,
+        c1_g=c1_g ,model_file='adaptive')
+
+    # class which implement the decomposed method
+    test = DecomposedTest(
+        c0,
+        c1,
+        dir=dir,
+        c1_g=c1_g,
+        model_g=model_g,
+        input_workspace=workspace_file,
+        verbose_printing=verbose_printing,
+        dataset_names=[
+            '0',
+            '1',
+            '2'],
+        clf=clf if model_g == 'mlp' else None)
+
+    test.fit(data_file='test',true_dist=True,vars_g=vars_g)
+    test.computeRatios(true_dist=True, vars_g=vars_g, use_log=False)
diff --git a/decomposed_test.py b/decomposed_test.py
index 583db11..7d4ec4c 100644
--- a/decomposed_test.py
+++ b/decomposed_test.py
@@ -4,988 +4,975 @@
 
 import ROOT
 import numpy as np
-from sklearn import svm, linear_model
-from sklearn.externals import joblib
-from sklearn.metrics import roc_curve, auc
-from sklearn.ensemble import GradientBoostingClassifier
 
 import sys
 
 import os.path
-import pdb
 
-import matplotlib.pyplot as plt
-from mpl_toolkits.mplot3d import Axes3D
-
-from make_data import makeData, makeModelND
 from utils import printMultiFrame, printFrame, saveFig, loadData, printFrame, makePlotName,\
-          loadData,makeSigBkg,makeROC, makeMultiROC,saveMultiFig,preProcessing
+    makeSigBkg, makeROC, makeMultiROC, saveMultiFig
+    
+from make_data import makeData
 from train_classifiers import predict
 
-from pyMorphWrapper import MorphingWrapper
-
 
 class DecomposedTest:
-  ''' 
-    Class which implement the decomposed test on 
-    the data
-  ''' 
-  def __init__(self,c0,c1,model_file='adaptive',
+    '''
+      Class which implement the decomposed test on
+      the data
+    '''
+
+    def __init__(
+            self,
+            c0,
+            c1,
+            model_file='adaptive',
             input_workspace=None,
             output_workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root',
             dir='/afs/cern.ch/user/j/jpavezse/systematics',
-            c1_g='',model_g='mlp',
+            c1_g='',
+            model_g='mlp',
             verbose_printing=False,
             dataset_names=None,
-            preprocessing=False,
-            scaler=None,
             seed=1234,
             F1_dist='F1',
             F0_dist='F0',
-            cross_section=None,
             all_couplings=None,
             F1_couplings=None,
             basis_indexes=None,
             clf=None):
-    self.input_workspace = input_workspace
-    self.workspace = output_workspace
-    self.c0 = c0
-    self.c1 = c1
-    self.model_file = model_file
-    self.dir = dir
-    self.c1_g = c1_g
-    self.model_g = model_g
-    self.verbose_printing=verbose_printing
-    self.preprocessing = preprocessing
-    self.scaler = scaler
-    self.seed=seed
-    self.F1_dist=F1_dist
-    self.F0_dist=F0_dist
-    self.cross_section=cross_section
-    self.dataset_names=dataset_names
-    self.basis_indexes = basis_indexes if basis_indexes <> None else range(len(dataset_names))
-    self.F1_couplings=F1_couplings
-    self.all_couplings=all_couplings
-    self.nsamples = len(dataset_names)
-    self.clf = clf
-
-
-  def fit(self, data_file='test',importance_sampling=False, true_dist=True,vars_g=None):
-    ''' 
-      Create pdfs for the classifier 
-      score to be used later on the ratio 
-      test, input workspace only needed in case 
-      there exist true pdfs for the distributions
-      the models being used are ./model/{model_g}/{c1_g}/{model_file}_i_j.pkl
-      and the data files are ./data/{model_g}/{c1_g}/{data_file}_i_j.dat
-    '''
-
-    bins = 40
-    low = 0.
-    high = 1.  
-    
-    if self.input_workspace <> None:
-      #f = ROOT.TFile('{0}/{1}'.format('/afs/cern.ch/work/j/jpavezse/private',self.workspace))
-      f = ROOT.TFile('{0}/{1}'.format(self.dir,self.workspace))
-     
-      w = f.Get('w')
-      # TODO test this when workspace is present
-      w = ROOT.RooWorkspace('w') if w == None else w
-      f.Close()
-    else: 
-      w = ROOT.RooWorkspace('w')
-    w.Print()
-
-    print 'Generating Score Histograms'
-
-    w.factory('score[{0},{1}]'.format(low,high))
-    s = w.var('score')
-    
-    if importance_sampling == True:
-      if true_dist == True:
-        vars = ROOT.TList()
-        for var in vars_g:
-          vars.Add(w.var(var))
-        x = ROOT.RooArgSet(vars)
-      else:
-        x = None
-
-    #This is because most of the data of the full model concentrate around 0 
-    bins_full = 40
-    low_full = 0.
-    high_full = 1.
-    w.factory('scoref[{0},{1}]'.format(low_full, high_full))
-    s_full = w.var('scoref')
-    histos = []
-    histos_names = []
-    inv_histos = []
-    inv_histos_names = []
-    sums_histos = []
-    def saveHistos(w,outputs,s,bins,low,high,pos=None,importance_sampling=False,importance_data=None,
-          importance_outputs=None):
-      if pos <> None:
-        k,j = pos
-      else:
-        k,j = ('F0','F1')
-      print 'Estimating {0} {1}'.format(k,j)
-      for l,name in enumerate(['sig','bkg']):
-        data = ROOT.RooDataSet('{0}data_{1}_{2}'.format(name,k,j),"data",
-            ROOT.RooArgSet(s))
-        hist = ROOT.TH1F('{0}hist_{1}_{2}'.format(name,k,j),'hist',bins,low,high)
-        values = outputs[l]
-        #values = values[self.findOutliers(values)]
-        for val in values:
-          hist.Fill(val)
-          s.setVal(val)
-          data.add(ROOT.RooArgSet(s))
-        norm = 1./hist.Integral()
-        hist.Scale(norm) 
-          
-        s.setBins(bins)
-        datahist = ROOT.RooDataHist('{0}datahist_{1}_{2}'.format(name,k,j),'hist',
-              ROOT.RooArgList(s),hist)
-        #histpdf = ROOT.RooHistPdf('{0}histpdf_{1}_{2}'.format(name,k,j),'hist',
-        #      ROOT.RooArgSet(s), datahist, 1)
-        histpdf = ROOT.RooHistFunc('{0}histpdf_{1}_{2}'.format(name,k,j),'hist',
-              ROOT.RooArgSet(s), datahist, 1)
-        #histpdf.setUnitNorm(True)
-        #testvalues = np.array([self.evalDist(ROOT.RooArgSet(s), histpdf, [xs]) for xs in values])
-
-        #histpdf.specialIntegratorConfig(ROOT.kTRUE).method1D().setLabel('RooBinIntegrator')
-
-        #print 'INTEGRAL'
-        #print histpdf.createIntegral(ROOT.RooArgSet(s)).getVal()
-        #print histpdf.Integral()
-      
-        #histpdf.specialIntegratorConfig(ROOT.kTRUE).method1D().setLabel('RooAdaptiveGaussKronrodIntegrator1D')
-
-        getattr(w,'import')(hist)
-        getattr(w,'import')(data)
-        getattr(w,'import')(datahist) # work around for morph = w.import(morph)
-        getattr(w,'import')(histpdf) # work around for morph = w.import(morph)
-        score_str = 'scoref' if pos == None else 'score'
-        # Calculate the density of the classifier output using kernel density 
-        #w.factory('KeysPdf::{0}dist_{1}_{2}({3},{0}data_{1}_{2},RooKeysPdf::NoMirror,2)'.format(name,k,j,score_str))
-
-        # Print histograms pdfs and estimated densities
-        if self.verbose_printing == True and name == 'bkg' and k <> j:
-          full = 'full' if pos == None else 'dec'
-          if k < j and k <> 'F0':
-            histos.append([w.function('sighistpdf_{0}_{1}'.format(k,j)), w.function('bkghistpdf_{0}_{1}'.format(k,j))])
-            histos_names.append(['f{0}-f{1}_f{1}(signal)'.format(k,j), 'f{0}-f{1}_f{0}(background)'.format(k,j)])
-          if j < k and k <> 'F0':
-            inv_histos.append([w.function('sighistpdf_{0}_{1}'.format(k,j)), w.function('bkghistpdf_{0}_{1}'.format(k,j))])
-            inv_histos_names.append(['f{0}-f{1}_f{1}(signal)'.format(k,j), 'f{0}-f{1}_f{0}(background)'.format(k,j)])
-
-    if self.scaler == None:
-      self.scaler = {}
-
-    # change this
-    for k in range(self.nsamples):
-      for j in range(self.nsamples):
-        if k == j:
-          continue
-        #if k <> 2 and j <> 2:
-        #  continue
-        if self.dataset_names <> None:
-          name_k, name_j = (self.dataset_names[k], self.dataset_names[j])
+        self.input_workspace = input_workspace
+        self.workspace = output_workspace
+        self.c0 = c0
+        self.c1 = c1
+        self.model_file = model_file
+        self.dir = dir
+        self.c1_g = c1_g
+        self.model_g = model_g
+        self.verbose_printing = verbose_printing
+        self.seed = seed
+        self.F1_dist = F1_dist
+        self.F0_dist = F0_dist
+        self.dataset_names = dataset_names
+        self.basis_indexes = basis_indexes if basis_indexes is not None else range(
+            len(dataset_names))
+        self.F1_couplings = F1_couplings
+        self.all_couplings = all_couplings
+        self.nsamples = len(dataset_names)
+        self.clf = clf
+
+    def fit(
+            self,
+            data_file='test',
+            true_dist=True,
+            vars_g=None):
+        '''
+          Create pdfs for the classifier
+          score to be used later on the ratio
+          test, input workspace only needed in case
+          there exist true pdfs for the distributions
+          the models being used are ./model/{model_g}/{c1_g}/{model_file}_i_j.pkl
+          and the data files are ./data/{model_g}/{c1_g}/{data_file}_i_j.dat
+        '''
+
+        bins = 40
+        low = 0.
+        high = 1.
+
+        if self.input_workspace is not None:
+            f = ROOT.TFile('{0}/{1}'.format(self.dir, self.workspace))
+            w = f.Get('w')
+            w = ROOT.RooWorkspace('w') if w is None else w
+            f.Close()
         else:
-          name_k, name_j = (k,j)
-        print 'Loading {0}:{1} {2}:{3}'.format(k,name_k, j,name_j)
-        traindata, targetdata = loadData(data_file,name_k,name_j,dir=self.dir,c1_g=self.c1_g,
-            preprocessing=self.preprocessing,scaler=self.scaler,persist=True)
-       
-        numtrain = traindata.shape[0]       
-        size2 = traindata.shape[1] if len(traindata.shape) > 1 else 1
-        #output = [predict('/afs/cern.ch/work/j/jpavezse/private/{0}_{1}_{2}.pkl'.format(self.model_file,k,j),traindata[targetdata == 1],model_g=self.model_g),
-        #  predict('/afs/cern.ch/work/j/jpavezse/private/{0}_{1}_{2}.pkl'.format(self.model_file,k,j),traindata[targetdata == 0],model_g=self.model_g)]
-        output = [predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file,k,j),traindata[targetdata==1],model_g=self.model_g,clf=self.clf),
-              predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file,k,j),traindata[targetdata==0],model_g=self.model_g,clf=self.clf)]
-        saveHistos(w,output,s,bins,low,high,(k,j))
-        #w.writeToFile('{0}/{1}'.format('/afs/cern.ch/work/j/jpavezse/private',self.workspace))
-        w.writeToFile('{0}/{1}'.format(self.dir,self.workspace))
-
-    if self.verbose_printing==True:
-      for ind in range(1,(len(histos)/3+1)):
-        print_histos = histos[(ind-1)*3:(ind-1)*3+3]
-        print_histos_names = histos_names[(ind-1)*3:(ind-1)*3+3]
-        printMultiFrame(w,['score']*len(print_histos),print_histos, makePlotName('dec{0}'.format(ind-1),'all',type='hist',dir=self.dir,c1_g=self.c1_g,model_g=self.model_g),print_histos_names,
-          dir=self.dir,model_g=self.model_g,y_text='score(x)',print_pdf=True,title='Pairwise score distributions')
-    # Full model
-    traindata, targetdata = loadData(data_file,self.F0_dist,self.F1_dist,dir=self.dir,c1_g=self.c1_g,
-      preprocessing=self.preprocessing, scaler=self.scaler)
-    numtrain = traindata.shape[0]       
-    size2 = traindata.shape[1] if len(traindata.shape) > 1 else 1
-    outputs = [predict('{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file),traindata[targetdata==1],model_g=self.model_g,clf=self.clf),
-              predict('{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file),traindata[targetdata==0],model_g=self.model_g,clf=self.clf)]
-    #outputs = [predict('/afs/cern.ch/work/j/jpavezse/private/{0}_F0_F1.pkl'.format(self.model_file),traindata[targetdata==1],model_g=self.model_g),
-    #          predict('/afs/cern.ch/work/j/jpavezse/private/{0}_F0_F1.pkl'.format(self.model_file),traindata[targetdata==0],model_g=self.model_g)]
-
-    saveHistos(w,outputs,s_full, bins_full, low_full, high_full,importance_sampling=False)
-    if self.verbose_printing == True:
-      printFrame(w,['scoref'],[w.function('sighistpdf_F0_F1'),w.function('bkghistpdf_F0_F1')], makePlotName('full','all',type='hist',dir=self.dir,c1_g=self.c1_g,model_g=self.model_g),['signal','bkg'],
-    dir=self.dir,model_g=self.model_g,y_text='score(x)',print_pdf=True,title='Pairwise score distributions')
-   
-    #w.writeToFile('{0}/{1}'.format('/afs/cern.ch/work/j/jpavezse/private',self.workspace))
-    w.writeToFile('{0}/{1}'.format(self.dir,self.workspace))
-    
-    w.Print()
-
-  # To calculate the ratio between single functions
-  def singleRatio(self,f0,f1):
-    ratio = f1 / f0
-    ratio[np.abs(ratio) == np.inf] = 0 
-    ratio[np.isnan(ratio)] = 0
-    return ratio
-
-  def evalDist(self,x,f0,val):
-    iter = x.createIterator()
-    v = iter.Next()
-    i = 0
-    while v:
-      v.setVal(val[i])
-      v = iter.Next()
-      i = i+1
-    return f0.getVal(x)
-
-  # To calculate the ratio between single functions
-  def __regFunc(self,x,f0,f1,val):
-    iter = x.createIterator()
-    v = iter.Next()
-    i = 0
-    while v:
-      v.setVal(val[i])
-      v = iter.Next()
-      i = i+1
-    if (f0.getVal(x) + f1.getVal(x)) == 0.:
-      return 0.
-    return f1.getVal(x) / (f0.getVal(x) + f1.getVal(x))
-
-
-  def evaluateDecomposedRatio(self,w,evalData,x=None,plotting=True, roc=False,gridsize=None,c0arr=None, c1arr=None,true_dist=False,pre_evaluation=None,pre_dist=None,data_type='test',debug=False,cross_section=None,indexes=None):
-    '''
-      Compute composed ratio for dataset 'evalData'.
-      Single ratios can be precomputed in pre_evaluation
-    '''
+            w = ROOT.RooWorkspace('w')
+        w.Print()
 
-    # pair-wise ratios
-    # and decomposition computation
-    #f = ROOT.TFile('{0}/{1}'.format(self.dir,self.workspace))
-    #w = f.Get('w')
-    #f.Close()
-
-    if indexes == None:
-      indexes = self.basis_indexes
-
-    score = ROOT.RooArgSet(w.var('score'))
-    npoints = evalData.shape[0]
-    fullRatios = np.zeros(npoints)
-    fullRatiosReal = np.zeros(npoints)
-    c0arr = self.c0 if c0arr == None else c0arr
-    c1arr = self.c1 if c1arr == None else c1arr
-
-    true_score = []
-    train_score = []
-    all_targets = []
-    all_positions = []
-    all_ratios = []
-    for k,c in enumerate(c0arr):
-      innerRatios = np.zeros(npoints)
-      innerTrueRatios = np.zeros(npoints)
-      if c == 0:
-        continue
-      for j,c_ in enumerate(c1arr):
-        index_k, index_j = (indexes[k],indexes[j])
-        f0pdf = w.function('bkghistpdf_{0}_{1}'.format(index_k,index_j))
-        f1pdf = w.function('sighistpdf_{0}_{1}'.format(index_k,index_j))
-        if index_k<>index_j:
-          if pre_evaluation == None:
-            traindata = evalData
-            if self.preprocessing == True:
-              traindata = preProcessing(evalData,self.dataset_names[min(index_k,index_j)],
-              self.dataset_names[max(index_k,index_j)],self.scaler) 
-            outputs = predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file,k,j),traindata,model_g=self.model_g,clf=self.clf)
-            #outputs = predict('/afs/cern.ch/work/j/jpavezse/private/{0}_{1}_{2}.pkl'.format(self.model_file,index_k,
-            #index_j),traindata,model_g=self.model_g)
-            f0pdfdist = np.array([self.evalDist(score,f0pdf,[xs]) for xs in outputs])
-            f1pdfdist = np.array([self.evalDist(score,f1pdf,[xs]) for xs in outputs])
-          else:
-            f0pdfdist = pre_evaluation[0][index_k][index_j]
-            f1pdfdist = pre_evaluation[1][index_k][index_j]
-            if f0pdfdist == None or f1pdfdist == None:
-              pdb.set_trace()
-          pdfratios = self.singleRatio(f0pdfdist,f1pdfdist)
-        else:
-          pdfratios = np.ones(npoints) 
-        all_ratios.append(pdfratios)
-        innerRatios += (c_/c) * pdfratios
-        if true_dist == True:
-          if pre_dist == None:
-            f0 = w.pdf('f{0}'.format(index_k))
-            f1 = w.pdf('f{0}'.format(index_j))
-            if len(evalData.shape) > 1:
-              f0dist = np.array([self.evalDist(x,f0,xs) for xs in evalData])
-              f1dist = np.array([self.evalDist(x,f1,xs) for xs in evalData])
-            else:
-              f0dist = np.array([self.evalDist(x,f0,[xs]) for xs in evalData])
-              f1dist = np.array([self.evalDist(x,f1,[xs]) for xs in evalData])
-          else:
-            f0dist = pre_dist[0][index_k][index_j]
-            f1dist = pre_dist[1][index_k][index_j]
-          ratios = self.singleRatio(f0dist, f1dist)
-          innerTrueRatios += (c_/c) * ratios
-        # ROC curves for pair-wise ratios
-        if (roc == True or plotting==True) and k < j:
-          all_positions.append((k,j))
-          if roc == True:
-            if self.dataset_names <> None:
-              name_k, name_j = (self.dataset_names[index_k], self.dataset_names[index_j])
-            else:
-              name_k, name_j = (index_k,index_j)
-            testdata, testtarget = loadData(data_type,name_k,name_j,dir=self.dir,c1_g=self.c1_g,
-                  preprocessing=self.preprocessing, scaler=self.scaler) 
-          else:
-            testdata = evalData
-          size2 = testdata.shape[1] if len(testdata.shape) > 1 else 1
-          outputs = predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file,k,j),testdata,model_g=self.model_g,clf=self.clf)
-          #outputs = predict('/afs/cern.ch/work/j/jpavezse/private/{0}_{1}_{2}.pkl'.format(self.model_file,index_k,
-          #          index_j),testdata.reshape(testdata.shape[0],size2),model_g=self.model_g)
-          f0pdfdist = np.array([self.evalDist(score,f0pdf,[xs]) for xs in outputs])
-          f1pdfdist = np.array([self.evalDist(score,f1pdf,[xs]) for xs in outputs])
-          clfRatios = self.singleRatio(f0pdfdist,f1pdfdist)
-          train_score.append(clfRatios)
-          if roc == True:
-            all_targets.append(testtarget)
-          #individual ROC
-          #makeROC(clfRatios, testtarget,makePlotName('dec','train',k,j,type='roc',dir=self.dir,
-          #model_g=self.model_g,c1_g=self.c1_g),dir=self.dir,model_g=self.model_g)
-          if true_dist == True:
-            if len(evalData.shape) > 1:
-              f0dist = np.array([self.evalDist(x,f0,xs) for xs in testdata])
-              f1dist = np.array([self.evalDist(x,f1,xs) for xs in testdata])
+        print 'Generating Score Histograms'
+
+        w.factory('score[{0},{1}]'.format(low, high))
+        s = w.var('score')
+
+        bins_full = 40
+        low_full = 0.
+        high_full = 1.
+        w.factory('scoref[{0},{1}]'.format(low_full, high_full))
+        s_full = w.var('scoref')
+        
+        histos = []
+        histos_names = []
+        inv_histos = []
+        inv_histos_names = []
+        sums_histos = []
+
+        def saveHistos(
+                w,
+                outputs,
+                s,
+                bins,
+                low,
+                high,
+                pos=None):
+            if pos is not None:
+                k, j = pos
             else:
-              f0dist = np.array([self.evalDist(x,f0,[xs]) for xs in testdata])
-              f1dist = np.array([self.evalDist(x,f1,[xs]) for xs in testdata])
-
-            trRatios = self.singleRatio(f0dist,f1dist)
-
-            true_score.append(trRatios)
- 
-          #  makeROC(trRatios, testtarget, makePlotName('dec','truth',k,j,type='roc',
-          #  dir=self.dir,model_g=self.model_g,c1_g=self.c1_g),dir=self.dir,model_g=self.model_g)
-          
-
-      innerRatios = 1./innerRatios
-      innerRatios[np.abs(innerRatios) == np.inf] = 0.
-      fullRatios += innerRatios
-      if true_dist == True:
-        innerTrueRatios = 1./innerTrueRatios
-        innerTrueRatios[np.abs(innerTrueRatios) == np.inf] = 0.
-        fullRatiosReal += innerTrueRatios
-    if roc == True:
-      for ind in range(1,(len(train_score)/3+1)):
-        print_scores = train_score[(ind-1)*3:(ind-1)*3+3]
-        print_targets = all_targets[(ind-1)*3:(ind-1)*3+3]
-        print_positions = all_positions[(ind-1)*3:(ind-1)*3+3]
-        if true_dist == True:
-          makeMultiROC(print_scores, print_targets,makePlotName('all{0}'.format(ind-1),'comparison',type='roc',
-          dir=self.dir,model_g=self.model_g,c1_g=self.c1_g),dir=self.dir,model_g=self.model_g,
-          true_score = true_score,print_pdf=True,title='ROC for pairwise trained classifier',pos=print_positions)
-        else:
-          makeMultiROC(print_scores, print_targets,makePlotName('all{0}'.format(ind-1),'comparison',type='roc',
-          dir=self.dir,model_g=self.model_g,c1_g=self.c1_g),dir=self.dir,model_g=self.model_g,
-          print_pdf=True,title='ROC for pairwise trained classifier',pos=print_positions)
+                k, j = ('F0', 'F1')
+            print 'Estimating {0} {1}'.format(k, j)
+            for l, name in enumerate(['sig', 'bkg']):
+                data = ROOT.RooDataSet(
+                    '{0}data_{1}_{2}'.format(
+                        name, k, j), "data", ROOT.RooArgSet(s))
+                hist = ROOT.TH1F(
+                    '{0}hist_{1}_{2}'.format(
+                        name, k, j), 'hist', bins, low, high)
+                values = outputs[l]
+                for val in values:
+                    hist.Fill(val)
+                    s.setVal(val)
+                    data.add(ROOT.RooArgSet(s))
+                norm = 1. / hist.Integral()
+                hist.Scale(norm)
+
+                s.setBins(bins)
+                datahist = ROOT.RooDataHist(
+                    '{0}datahist_{1}_{2}'.format(
+                        name, k, j), 'hist', ROOT.RooArgList(s), hist)
+
+                histpdf = ROOT.RooHistFunc('{0}histpdf_{1}_{2}'.format(
+                    name, k, j), 'hist', ROOT.RooArgSet(s), datahist, 1)
+
+                getattr(w, 'import')(hist)
+                getattr(w, 'import')(data)
+                getattr(w, 'import')(datahist)
+                getattr(w, 'import')(histpdf)
+                score_str = 'scoref' if pos is None else 'score'
+                # Calculate the density of the classifier output using kernel density
+                # w.factory('KeysPdf::{0}dist_{1}_{2}({3},{0}data_{1}_{2},RooKeysPdf::NoMirror,2)'.format(name,k,j,score_str))
+
+                # Print histograms pdfs and estimated densities
+                if self.verbose_printing and name == 'bkg' and k != j:
+                    full = 'full' if pos is None else 'dec'
+                    if k < j and k != 'F0':
+                        histos.append([w.function('sighistpdf_{0}_{1}'.format(
+                            k, j)), w.function('bkghistpdf_{0}_{1}'.format(k, j))])
+                        histos_names.append(
+                            ['f{0}-f{1}_f{1}(signal)'.format(k, j), 'f{0}-f{1}_f{0}(background)'.format(k, j)])
+                    if j < k and k != 'F0':
+                        inv_histos.append([w.function('sighistpdf_{0}_{1}'.format(
+                            k, j)), w.function('bkghistpdf_{0}_{1}'.format(k, j))])
+                        inv_histos_names.append(
+                            ['f{0}-f{1}_f{1}(signal)'.format(k, j), 'f{0}-f{1}_f{0}(background)'.format(k, j)])
 
-    if plotting == True:
-      saveMultiFig(evalData,[x for x in zip(train_score,true_score)],
-      makePlotName('all_dec','train',type='ratio'),labels=[['f0-f1(trained)','f0-f1(truth)'],['f0-f2(trained)','f0-f2(truth)'],['f1-f2(trained)','f1-f2(truth)']],title='Pairwise Ratios',print_pdf=True,dir=self.dir)
+        for k in range(self.nsamples):
+            for j in range(self.nsamples):
+                if k == j:
+                    continue
+                if self.dataset_names is not None:
+                    name_k, name_j = (
+                        self.dataset_names[k], self.dataset_names[j])
+                else:
+                    name_k, name_j = (k, j)
+                print 'Loading {0}:{1} {2}:{3}'.format(k, name_k, j, name_j)
+                traindata, targetdata = loadData(data_file, name_k, name_j, dir=self.dir, c1_g=self.c1_g)
+
+                numtrain = traindata.shape[0]
+                size2 = traindata.shape[1] if len(traindata.shape) > 1 else 1
+                output = [
+                    predict(
+                        '{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(
+                            self.dir, self.model_g, self.c1_g, self.model_file, k, j), traindata[
+                            targetdata == 1], model_g=self.model_g, clf=self.clf), predict(
+                        '{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(
+                            self.dir, self.model_g, self.c1_g, self.model_file, k, j), traindata[
+                            targetdata == 0], model_g=self.model_g, clf=self.clf)]
+                saveHistos(w, output, s, bins, low, high, (k, j))
+                w.writeToFile('{0}/{1}'.format(self.dir, self.workspace))
+
+        if self.verbose_printing:
+            for ind in range(1, (len(histos) / 3 + 1)):
+                print_histos = histos[(ind - 1) * 3:(ind - 1) * 3 + 3]
+                print_histos_names = histos_names[
+                    (ind - 1) * 3:(ind - 1) * 3 + 3]
+                printMultiFrame(
+                    w,
+                    ['score'] * len(print_histos),
+                    print_histos,
+                    makePlotName(
+                        'dec{0}'.format(
+                            ind - 1),
+                        'all',
+                        type='hist',
+                        dir=self.dir,
+                        c1_g=self.c1_g,
+                        model_g=self.model_g),
+                    print_histos_names,
+                    dir=self.dir,
+                    model_g=self.model_g,
+                    y_text='score(x)',
+                    print_pdf=True,
+                    title='Pairwise score distributions')
+        # Full model
+        traindata, targetdata = loadData(data_file, self.F0_dist, self.F1_dist, dir=self.dir, c1_g=self.c1_g)
+        numtrain = traindata.shape[0]
+        size2 = traindata.shape[1] if len(traindata.shape) > 1 else 1
+        outputs = [predict('{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(self.dir,
+                                                                    self.model_g,
+                                                                    self.c1_g,
+                                                                    self.model_file),
+                           traindata[targetdata == 1],
+                           model_g=self.model_g,
+                           clf=self.clf),
+                   predict('{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(self.dir,
+                                                                    self.model_g,
+                                                                    self.c1_g,
+                                                                    self.model_file),
+                           traindata[targetdata == 0],
+                           model_g=self.model_g,
+                           clf=self.clf)]
+        saveHistos(
+            w,
+            outputs,
+            s_full,
+            bins_full,
+            low_full,
+            high_full)
+        if self.verbose_printing:
+            printFrame(w,
+                       ['scoref'],
+                       [w.function('sighistpdf_F0_F1'),
+                        w.function('bkghistpdf_F0_F1')],
+                       makePlotName('full',
+                                    'all',
+                                    type='hist',
+                                    dir=self.dir,
+                                    c1_g=self.c1_g,
+                                    model_g=self.model_g),
+                       ['signal',
+                           'bkg'],
+                       dir=self.dir,
+                       model_g=self.model_g,
+                       y_text='score(x)',
+                       print_pdf=True,
+                       title='Pairwise score distributions')
+
+        w.writeToFile('{0}/{1}'.format(self.dir, self.workspace))
+
+        w.Print()
+
+    # To calculate the ratio between single functions
+    def singleRatio(self, f0, f1):
+        ratio = f1 / f0
+        ratio[~np.isfinite(ratio)] = 0
+        return ratio
+
+    def evalDist(self, x, f0, val):
+        iter = x.createIterator()
+        v = iter.Next()
+        i = 0
+        while v:
+            v.setVal(val[i])
+            v = iter.Next()
+            i = i + 1
+        return f0.getVal(x)
+
+    # To calculate the ratio between single functions
+    def __regFunc(self, x, f0, f1, val):
+        iter = x.createIterator()
+        v = iter.Next()
+        i = 0
+        while v:
+            v.setVal(val[i])
+            v = iter.Next()
+            i = i + 1
+        if (f0.getVal(x) + f1.getVal(x)) == 0.:
+            return 0.
+        return f1.getVal(x) / (f0.getVal(x) + f1.getVal(x))
+
+    def evaluateDecomposedRatio(
+            self,
+            w,
+            evalData,
+            x=None,
+            roc=False,
+            c0arr=None,
+            c1arr=None,
+            true_dist=False,
+            pre_evaluation=None,
+            pre_dist=None,
+            data_type='test',
+            indexes=None,
+            plotting=None):
+        '''
+          Compute composed ratio for dataset 'evalData'.
+          Single ratios can be precomputed in pre_evaluation
+        '''
+
+        plotting = plotting if plotting <> None else self.verbose_printing
+        if indexes is None:
+            indexes = self.basis_indexes
+
+        score = ROOT.RooArgSet(w.var('score'))
+        npoints = evalData.shape[0]
+        fullRatios = np.zeros(npoints)
+        fullRatiosReal = np.zeros(npoints)
+        c0arr = self.c0 if c0arr is None else c0arr
+        c1arr = self.c1 if c1arr is None else c1arr
+
+        true_score = []
+        train_score = []
+        all_targets = []
+        all_positions = []
+        all_ratios = []
+        for k, c in enumerate(c0arr):
+            innerRatios = np.zeros(npoints)
+            innerTrueRatios = np.zeros(npoints)
+            if c == 0:
+                continue
+            for j, c_ in enumerate(c1arr):
+                index_k, index_j = (indexes[k], indexes[j])
+                f0pdf = w.function(
+                    'bkghistpdf_{0}_{1}'.format(
+                        index_k, index_j))
+                f1pdf = w.function(
+                    'sighistpdf_{0}_{1}'.format(
+                        index_k, index_j))
+                if index_k != index_j:
+                    if pre_evaluation is None:
+                        traindata = evalData
+                        outputs = predict(
+                            '{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(
+                                self.dir,
+                                self.model_g,
+                                self.c1_g,
+                                self.model_file,
+                                k,
+                                j),
+                            traindata,
+                            model_g=self.model_g,
+                            clf=self.clf)
+
+                        f0pdfdist = np.array(
+                            [self.evalDist(score, f0pdf, [xs]) for xs in outputs])
+                        f1pdfdist = np.array(
+                            [self.evalDist(score, f1pdf, [xs]) for xs in outputs])
+                    else:
+                        f0pdfdist = pre_evaluation[0][index_k][index_j]
+                        f1pdfdist = pre_evaluation[1][index_k][index_j]
+                    pdfratios = self.singleRatio(f0pdfdist, f1pdfdist)
+                else:
+                    pdfratios = np.ones(npoints)
+                all_ratios.append(pdfratios)
+                innerRatios += (c_ / c) * pdfratios
+                if true_dist:
+                    if pre_dist is None:
+                        f0 = w.pdf('f{0}'.format(index_k))
+                        f1 = w.pdf('f{0}'.format(index_j))
+                        if len(evalData.shape) > 1:
+                            f0dist = np.array(
+                                [self.evalDist(x, f0pdf, xs) for xs in evalData])
+                            f1dist = np.array(
+                                [self.evalDist(x, f1pdf, xs) for xs in evalData])
+                        else:
+                            f0dist = np.array(
+                                [self.evalDist(x, f0pdf, [xs]) for xs in evalData])
+                            f1dist = np.array(
+                                [self.evalDist(x, f1pdf, [xs]) for xs in evalData])
+                    else:
+                        f0dist = pre_dist[0][index_k][index_j]
+                        f1dist = pre_dist[1][index_k][index_j]
+                    ratios = self.singleRatio(f0dist, f1dist)
+                    innerTrueRatios += (c_ / c) * ratios
+                # ROC curves for pair-wise ratios
+                if (roc or plotting) and k < j:
+                    all_positions.append((k, j))
+                    if roc:
+                        if self.dataset_names is not None:
+                            name_k, name_j = (
+                                self.dataset_names[index_k], self.dataset_names[index_j])
+                        else:
+                            name_k, name_j = (index_k, index_j)
+                        testdata, testtarget = loadData(data_type, name_k, name_j, dir=self.dir, c1_g=self.c1_g)
+                    else:
+                        testdata = evalData
+                    size2 = testdata.shape[1] if len(testdata.shape) > 1 else 1
+                    outputs = predict(
+                        '{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(
+                            self.dir,
+                            self.model_g,
+                            self.c1_g,
+                            self.model_file,
+                            k,
+                            j),
+                        testdata,
+                        model_g=self.model_g,
+                        clf=self.clf)
+                    f0pdfdist = np.array(
+                        [self.evalDist(score, f0pdf, [xs]) for xs in outputs])
+                    f1pdfdist = np.array(
+                        [self.evalDist(score, f1pdf, [xs]) for xs in outputs])
+                    clfRatios = self.singleRatio(f0pdfdist, f1pdfdist)
+                    train_score.append(clfRatios)
+                    if roc:
+                        all_targets.append(testtarget)
+                    if true_dist:
+                        if len(evalData.shape) > 1:
+                            f0dist = np.array(
+                                [self.evalDist(x, f0pdf, xs) for xs in testdata])
+                            f1dist = np.array(
+                                [self.evalDist(x, f1pdf, xs) for xs in testdata])
+                        else:
+                            f0dist = np.array(
+                                [self.evalDist(x, f0pdf, [xs]) for xs in testdata])
+                            f1dist = np.array(
+                                [self.evalDist(x, f1pdf, [xs]) for xs in testdata])
+
+                        trRatios = self.singleRatio(f0dist, f1dist)
+
+                        true_score.append(trRatios)
+
+            innerRatios = 1. / innerRatios
+            innerRatios[np.abs(innerRatios) == np.inf] = 0.
+            fullRatios += innerRatios
+            if true_dist:
+                innerTrueRatios = 1. / innerTrueRatios
+                innerTrueRatios[np.abs(innerTrueRatios) == np.inf] = 0.
+                fullRatiosReal += innerTrueRatios
+        if roc:
+            for ind in range(1, (len(train_score) / 3 + 1)):
+                print_scores = train_score[(ind - 1) * 3:(ind - 1) * 3 + 3]
+                print_targets = all_targets[(ind - 1) * 3:(ind - 1) * 3 + 3]
+                print_positions = all_positions[
+                    (ind - 1) * 3:(ind - 1) * 3 + 3]
+                if true_dist:
+                    makeMultiROC(
+                        print_scores,
+                        print_targets,
+                        makePlotName(
+                            'all{0}'.format(
+                                ind - 1),
+                            'comparison',
+                            type='roc',
+                            dir=self.dir,
+                            model_g=self.model_g,
+                            c1_g=self.c1_g),
+                        dir=self.dir,
+                        model_g=self.model_g,
+                        true_score=true_score,
+                        print_pdf=True,
+                        title='ROC for pairwise trained classifier',
+                        pos=print_positions)
+                else:
+                    makeMultiROC(
+                        print_scores,
+                        print_targets,
+                        makePlotName(
+                            'all{0}'.format(
+                                ind - 1),
+                            'comparison',
+                            type='roc',
+                            dir=self.dir,
+                            model_g=self.model_g,
+                            c1_g=self.c1_g),
+                        dir=self.dir,
+                        model_g=self.model_g,
+                        print_pdf=True,
+                        title='ROC for pairwise trained classifier',
+                        pos=print_positions)
+
+        return fullRatios, fullRatiosReal
+
+    def findOutliers(self, x):
+        q5, q95 = np.percentile(x, [5, 95])
+        iqr = 2.0 * (q95 - q5)
+        outliers = (x <= q95 + iqr) & (x >= q5 - iqr)
+        return outliers
+
+    def computeRatios(self, true_dist=False, vars_g=None,
+                      data_file='test', use_log=False):
+        '''
+          Use the computed score densities to compute
+          the decomposed ratio test.
+          set true_dist to True if workspace have the true distributions to
+          make plots, in that case vars_g also must be provided
+          Final result is histogram for ratios and signal - bkf rejection curves
+        '''
+
+        f = ROOT.TFile('{0}/{1}'.format(self.dir, self.workspace))
+        w = f.Get('w')
+        f.Close()
+
+        c1 = self.c1
+        c0 = self.c0
+        c1 = c1 / c1.sum()
+        c0 = c0 / c0.sum()
+
+        print 'Calculating ratios'
+
+        npoints = 50
+
+        if true_dist:
+            vars = ROOT.TList()
+            for var in vars_g:
+                vars.Add(w.var(var))
+            x = ROOT.RooArgSet(vars)
+
+        if use_log:
+            evaluateRatio = self.evaluateLogDecomposedRatio
+            post = 'log'
+        else:
+            evaluateRatio = self.evaluateDecomposedRatio
+            post = ''
 
-    return fullRatios,fullRatiosReal
+        score = ROOT.RooArgSet(w.var('score'))
+        scoref = ROOT.RooArgSet(w.var('scoref'))
 
-  def findOutliers(self, x):
-    q5, q95 = np.percentile(x, [5,95])  
-    iqr = 2.0*(q95 - q5)
-    outliers = (x <= q95 + iqr) & (x >= q5 - iqr)
-    return outliers
+        if use_log:
+            getRatio = self.singleLogRatio
+        else:
+            getRatio = self.singleRatio
+
+        # NN trained on complete model
+        F0pdf = w.function('bkghistpdf_F0_F1')
+        F1pdf = w.function('sighistpdf_F0_F1')
+
+        # TODO Here assuming that signal is first dataset
+        testdata, testtarget = loadData(
+            data_file, self.F0_dist, 0, dir=self.dir, c1_g=self.c1_g)
+        if len(vars_g) == 1:
+            xarray = np.linspace(0, 5, npoints)
+            fullRatios, _ = evaluateRatio(
+                w, xarray, x=x, roc=False, true_dist=True)
+
+            F1dist = np.array([self.evalDist(x, w.pdf('F1'), [xs])
+                               for xs in xarray])
+            F0dist = np.array([self.evalDist(x, w.pdf('F0'), [xs])
+                               for xs in xarray])
+            y2 = getRatio(F1dist, F0dist)
+
+            # NN trained on complete model
+            outputs = predict(
+                '{0}/model/{1}/{2}/adaptive_F0_F1.pkl'.format(
+                    self.dir, self.model_g, self.c1_g), xarray.reshape(
+                    xarray.shape[0], 1), model_g=self.model_g, clf=self.clf)
+            F1fulldist = np.array(
+                [self.evalDist(scoref, F1pdf, [xs]) for xs in outputs])
+            F0fulldist = np.array(
+                [self.evalDist(scoref, F0pdf, [xs]) for xs in outputs])
+
+            pdfratios = getRatio(F1fulldist, F0fulldist)
+
+            saveFig(xarray,
+                    [fullRatios,
+                     y2,
+                     pdfratios],
+                    makePlotName('all',
+                                 'train',
+                                 type='ratio' + post),
+                    title='Likelihood Ratios',
+                    labels=['Approx. Decomposed',
+                            'Exact',
+                            'Approx.'],
+                    print_pdf=True,
+                    dir=self.dir)
+
+        if true_dist:
+            decomposedRatio, _ = evaluateRatio(
+                w, testdata, x=x, roc=self.verbose_printing, true_dist=True)
+        else:
+            decomposedRatio, _ = evaluateRatio(
+                w, testdata, c0arr=c0, c1arr=c1, roc=True, data_type=data_file)
+        if len(testdata.shape) > 1:
+            outputs = predict(
+                '{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(
+                    self.dir,
+                    self.model_g,
+                    self.c1_g,
+                    self.model_file),
+                testdata,
+                model_g=self.model_g,
+                clf=self.clf)
 
-  def computeRatios(self,true_dist=False, vars_g=None,
-      data_file='test',use_log=False):
-    '''
-      Use the computed score densities to compute 
-      the decomposed ratio test.
-      set true_dist to True if workspace have the true distributions to 
-      make plots, in that case vars_g also must be provided
-      Final result is histogram for ratios and signal - bkf rejection curves
-    '''
+        else:
+            outputs = predict(
+                '{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(
+                    self.dir, self.model_g, self.c1_g, self.model_file), testdata.reshape(
+                    testdata.shape[0], 1), model_g=self.model_g, clf=self.clf)
+
+        F1fulldist = np.array(
+            [self.evalDist(scoref, F1pdf, [xs]) for xs in outputs])
+        F0fulldist = np.array(
+            [self.evalDist(scoref, F0pdf, [xs]) for xs in outputs])
+
+        completeRatio = getRatio(F1fulldist, F0fulldist)
+        if true_dist:
+            if len(testdata.shape) > 1:
+                F1dist = np.array([self.evalDist(x, w.pdf('F1'), xs)
+                                   for xs in testdata])
+                F0dist = np.array([self.evalDist(x, w.pdf('F0'), xs)
+                                   for xs in testdata])
+            else:
+                F1dist = np.array(
+                    [self.evalDist(x, w.pdf('F1'), [xs]) for xs in testdata])
+                F0dist = np.array(
+                    [self.evalDist(x, w.pdf('F0'), [xs]) for xs in testdata])
+
+            realRatio = getRatio(F1dist, F0dist)
+
+        decomposed_target = testtarget
+        complete_target = testtarget
+        real_target = testtarget
+
+        # Removing outliers
+        numtest = decomposedRatio.shape[0]
+
+        if true_dist:
+            real_outliers = np.zeros(numtest, dtype=bool)
+            real_outliers = self.findOutliers(realRatio)
+
+
+        all_ratios_plots = []
+        all_names_plots = []
+        bins = 70
+        low = 0.6
+        high = 1.2
+        if use_log:
+            low = -1.0
+            high = 1.0
+        low = []
+        high = []
+        low = []
+        high = []
+        ratios_vars = []
+        for l, name in enumerate(['sig', 'bkg']):
+            if true_dist:
+                ratios_names = ['true', 'full', 'composed']
+                ratios_vec = [realRatio, completeRatio, decomposedRatio]
+                target_vec = [real_target, complete_target, decomposed_target]
+
+                minimum = min([realRatio[real_target == 1 - l].min(),
+                               completeRatio[complete_target == 1 - l].min(),
+                               decomposedRatio[decomposed_target == 1 - l].min()])
+                maximum = max([realRatio[real_target == 1 - l].max(),
+                               completeRatio[complete_target == 1 - l].max(),
+                               decomposedRatio[decomposed_target == 1 - l].max()])
 
-    f = ROOT.TFile('{0}/{1}'.format(self.dir,self.workspace))
-    w = f.Get('w')
-    f.Close()
+            else:
+                ratios_names = ['full', 'composed']
+                ratios_vec = [completeRatio, decomposedRatio]
+                target_vec = [complete_target, decomposed_target]
+                minimum = min([completeRatio[complete_target == 1 - l].min(),
+                               decomposedRatio[decomposed_target == 1 - l].min()])
+                maximum = max([completeRatio[complete_target == 1 - l].max(),
+                               decomposedRatio[decomposed_target == 1 - l].max()])
+
+            low.append(minimum - ((maximum - minimum) / bins) * 10)
+            high.append(maximum + ((maximum - minimum) / bins) * 10)
+            w.factory('ratio{0}[{1},{2}]'.format(name, low[l], high[l]))
+            ratios_vars.append(w.var('ratio{0}'.format(name)))
+        for curr, curr_ratios, curr_targets in zip(
+                ratios_names, ratios_vec, target_vec):
+            numtest = curr_ratios.shape[0]
+            for l, name in enumerate(['sig', 'bkg']):
+                hist = ROOT.TH1F(
+                    '{0}_{1}hist_F0_f0'.format(
+                        curr, name), 'hist', bins, low[l], high[l])
+                for val in curr_ratios[curr_targets == 1 - l]:
+                    hist.Fill(val)
+                datahist = ROOT.RooDataHist(
+                    '{0}_{1}datahist_F0_f0'.format(
+                        curr, name), 'hist', ROOT.RooArgList(
+                        ratios_vars[l]), hist)
+                ratios_vars[l].setBins(bins)
+                histpdf = ROOT.RooHistFunc(
+                    '{0}_{1}histpdf_F0_f0'.format(
+                        curr, name), 'hist', ROOT.RooArgSet(
+                        ratios_vars[l]), datahist, 0)
+
+                histpdf.specialIntegratorConfig(
+                    ROOT.kTRUE).method1D().setLabel('RooBinIntegrator')
+                getattr(w, 'import')(hist)
+                getattr(w, 'import')(datahist)
+                getattr(w, 'import')(histpdf)
+                if name == 'bkg':
+                    all_ratios_plots.append([w.function('{0}_sighistpdf_F0_f0'.format(
+                        curr)), w.function('{0}_bkghistpdf_F0_f0'.format(curr))])
+                    all_names_plots.append(
+                        ['sig_{0}'.format(curr), 'bkg_{0}'.format(curr)])
+
+        all_ratios_plots = [[all_ratios_plots[j][i] for j, _ in enumerate(
+            all_ratios_plots)] for i, _ in enumerate(all_ratios_plots[0])]
+        all_names_plots = [[all_names_plots[j][i] for j, _ in enumerate(
+            all_names_plots)] for i, _ in enumerate(all_names_plots[0])]
+
+        printMultiFrame(w,
+                        ['ratiosig',
+                         'ratiobkg'],
+                        all_ratios_plots,
+                        makePlotName('ratio',
+                                     'comparison',
+                                     type='hist' + post,
+                                     dir=self.dir,
+                                     model_g=self.model_g,
+                                     c1_g=self.c1_g),
+                        all_names_plots,
+                        setLog=True,
+                        dir=self.dir,
+                        model_g=self.model_g,
+                        y_text='Count',
+                        title='Histograms for ratios',
+                        x_text='ratio value',
+                        print_pdf=True)
+
+
+        if use_log:
+            decomposedRatio = np.exp(decomposedRatio)
+            completeRatio = np.exp(completeRatio)
+            if true_dist:
+                realRatio = np.exp(realRatio)
+        if true_dist:
+            ratios_list = [decomposedRatio / decomposedRatio.max(),
+                           completeRatio / completeRatio.max(),
+                           realRatio / realRatio.max()]
+            targets_list = [decomposed_target, complete_target, real_target]
+            legends_list = ['Approx. Decomposed', 'Approx.', 'Exact']
+        else:
 
-    
-    #TODO: This are Harcoded for now
-    c1 = self.c1
-    c0 = self.c0
-    #c1 = np.multiply(c1, self.cross_section)
-    c1 = c1/c1.sum()
-    c0 = c0/c0.sum()
-
-    print 'Calculating ratios'
-
-    npoints = 50
-
-    if true_dist == True:
-      vars = ROOT.TList()
-      for var in vars_g:
-        vars.Add(w.var(var))
-      x = ROOT.RooArgSet(vars)
-
-    if use_log == True:
-      evaluateRatio = self.evaluateLogDecomposedRatio
-      post = 'log'
-    else:
-      evaluateRatio = self.evaluateDecomposedRatio
-      post = ''
-
-    score = ROOT.RooArgSet(w.var('score'))
-    scoref = ROOT.RooArgSet(w.var('scoref'))
-
-    if use_log == True:
-      getRatio = self.singleLogRatio
-    else:
-      getRatio = self.singleRatio
-   
-    if self.preprocessing == True:
-      if self.scaler == None:
-        self.scaler = {}
-        for k in range(self.nsamples):
-         for j in range(self.nsamples):
-           if k < j:
-            self.scaler[(k,j)] = joblib.load('{0}/model/{1}/{2}/{3}_{4}_{5}.dat'.format(self.dir,'mlp',self.c1_g,'scaler',self.dataset_names[k],self.dataset_names[j]))
-            
-
-    # NN trained on complete model
-    F0pdf = w.function('bkghistpdf_F0_F1')
-    F1pdf = w.function('sighistpdf_F0_F1')
-
-    # TODO Here assuming that signal is first dataset  
-    testdata, testtarget = loadData(data_file,self.F0_dist,0,dir=self.dir,c1_g=self.c1_g,preprocessing=False) 
-    if len(vars_g) == 1:
-      xarray = np.linspace(0,5,npoints)
-      fullRatios,_ = evaluateRatio(w,xarray,x=x,plotting=True,roc=False,true_dist=True)
-
-      F1dist = np.array([self.evalDist(x,w.pdf('F1'),[xs]) for xs in xarray])
-      F0dist = np.array([self.evalDist(x,w.pdf('F0'),[xs]) for xs in xarray])
-      y2 = getRatio(F1dist, F0dist)
-
-      # NN trained on complete model
-      outputs = predict('{0}/model/{1}/{2}/adaptive_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g),xarray.reshape(xarray.shape[0],1),model_g=self.model_g,clf=self.clf)
-      F1fulldist = np.array([self.evalDist(scoref,F1pdf,[xs]) for xs in outputs])
-      F0fulldist = np.array([self.evalDist(scoref,F0pdf,[xs]) for xs in outputs])
-
-      pdfratios = getRatio(F1fulldist, F0fulldist)
-
-      saveFig(xarray, [fullRatios, y2, pdfratios], makePlotName('all','train',type='ratio'+post),title='Likelihood Ratios',labels=['Composed trained', 'True', 'Full trained'],print_pdf=True,dir=self.dir)
-      
-    if true_dist == True:
-      decomposedRatio,_ = evaluateRatio(w,testdata,x=x,plotting=False,roc=self.verbose_printing,true_dist=True)
-    else:
-      decomposedRatio,_ = evaluateRatio(w,testdata,c0arr=c0,c1arr=c1,plotting=True,
-      roc=True,data_type=data_file)
-    if len(testdata.shape) > 1:
-      outputs = predict('{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file),testdata,model_g=self.model_g,clf=self.clf)
-      #outputs = predict('/afs/cern.ch/work/j/jpavezse/private/{0}_F0_F1.pkl'.format(self.model_file),testdata,model_g=self.model_g)
-
-    else:
-      outputs = predict('{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file),testdata.reshape(testdata.shape[0],1),model_g=self.model_g,clf=self.clf)
-
-    F1fulldist = np.array([self.evalDist(scoref,F1pdf,[xs]) for xs in outputs])
-    F0fulldist = np.array([self.evalDist(scoref,F0pdf,[xs]) for xs in outputs])
-
-    completeRatio = getRatio(F1fulldist,F0fulldist)
-    if true_dist == True:
-      if len(testdata.shape) > 1:
-        F1dist = np.array([self.evalDist(x,w.pdf('F1'),xs) for xs in testdata])
-        F0dist = np.array([self.evalDist(x,w.pdf('F0'),xs) for xs in testdata])
-      else:
-        F1dist = np.array([self.evalDist(x,w.pdf('F1'),[xs]) for xs in testdata])
-        F0dist = np.array([self.evalDist(x,w.pdf('F0'),[xs]) for xs in testdata])
-
-      realRatio = getRatio(F1dist,F0dist)
-
-    decomposed_target = testtarget
-    complete_target = testtarget
-    real_target = testtarget
-    #Histogram F0-f0 for composed, full and true
-
-    # Removing outliers
-    numtest = decomposedRatio.shape[0] 
-    #decomposedRatio[decomposedRatio < 0.] = completeRatio[decomposedRatio < 0.]
-
-    #decomposed_outliers = np.zeros(numtest,dtype=bool)
-    #complete_outliers = np.zeros(numtest,dtype=bool)
-    #decomposed_outliers = self.findOutliers(decomposedRatio)
-    #complete_outliers = self.findOutliers(completeRatio)
-    #decomposed_target = testtarget[decomposed_outliers] 
-    #complete_target = testtarget[complete_outliers] 
-    #decomposedRatio = decomposedRatio[decomposed_outliers]
-    #completeRatio = completeRatio[complete_outliers]
-    if true_dist == True:
-      real_outliers = np.zeros(numtest,dtype=bool)
-      real_outliers = self.findOutliers(realRatio)
-      #real_target = testtarget[real_outliers] 
-      #realRatio = realRatio[real_outliers]
-
-    all_ratios_plots = []
-    all_names_plots = []
-    bins = 70
-    low = 0.6
-    high = 1.2
-    if use_log == True:
-      low = -1.0
-      high = 1.0
-    low = []
-    high = []
-    low = []
-    high = []
-    ratios_vars = []
-    for l,name in enumerate(['sig','bkg']):
-      if true_dist == True:
-        ratios_names = ['truth','full','composed']
-        ratios_vec = [realRatio, completeRatio, decomposedRatio]
-        target_vec = [real_target, complete_target, decomposed_target] 
-
-        minimum = min([realRatio[real_target == 1-l].min(), 
-              completeRatio[complete_target == 1-l].min(), 
-              decomposedRatio[decomposed_target == 1-l].min()])
-        maximum = max([realRatio[real_target == 1-l].max(), 
-              completeRatio[complete_target == 1-l].max(), 
-              decomposedRatio[decomposed_target == 1-l].max()])
-
-      else:
-        ratios_names = ['full','composed']
-        ratios_vec = [completeRatio, decomposedRatio]
-        target_vec = [complete_target, decomposed_target] 
-        minimum = min([completeRatio[complete_target == 1-l].min(), 
-              decomposedRatio[decomposed_target == 1-l].min()])
-        maximum = max([completeRatio[complete_target == 1-l].max(), 
-              decomposedRatio[decomposed_target == 1-l].max()])
-
-      low.append(minimum - ((maximum - minimum) / bins)*10)
-      high.append(maximum + ((maximum - minimum) / bins)*10)
-      w.factory('ratio{0}[{1},{2}]'.format(name, low[l], high[l]))
-      ratios_vars.append(w.var('ratio{0}'.format(name)))
-    for curr, curr_ratios, curr_targets in zip(ratios_names,ratios_vec,target_vec):
-      numtest = curr_ratios.shape[0] 
-      for l,name in enumerate(['sig','bkg']):
-        hist = ROOT.TH1F('{0}_{1}hist_F0_f0'.format(curr,name),'hist',bins,low[l],high[l])
-        for val in curr_ratios[curr_targets == 1-l]:
-          hist.Fill(val)
-        datahist = ROOT.RooDataHist('{0}_{1}datahist_F0_f0'.format(curr,name),'hist',
-              ROOT.RooArgList(ratios_vars[l]),hist)
-        ratios_vars[l].setBins(bins)
-        histpdf = ROOT.RooHistFunc('{0}_{1}histpdf_F0_f0'.format(curr,name),'hist',
-              ROOT.RooArgSet(ratios_vars[l]), datahist, 0)
-
-        histpdf.specialIntegratorConfig(ROOT.kTRUE).method1D().setLabel('RooBinIntegrator')
-        getattr(w,'import')(hist)
-        getattr(w,'import')(datahist) # work around for morph = w.import(morph)
-        getattr(w,'import')(histpdf) # work around for morph = w.import(morph)
-        #print '{0} {1} {2}'.format(curr,name,hist.Integral())
-        if name == 'bkg':
-          all_ratios_plots.append([w.function('{0}_sighistpdf_F0_f0'.format(curr)),
-                w.function('{0}_bkghistpdf_F0_f0'.format(curr))])
-          all_names_plots.append(['sig_{0}'.format(curr),'bkg_{0}'.format(curr)])
-        
-    all_ratios_plots = [[all_ratios_plots[j][i] for j,_ in enumerate(all_ratios_plots)] 
-                for i,_ in enumerate(all_ratios_plots[0])]
-    all_names_plots = [[all_names_plots[j][i] for j,_ in enumerate(all_names_plots)] 
-                for i,_ in enumerate(all_names_plots[0])]
-
-    printMultiFrame(w,['ratiosig','ratiobkg'],all_ratios_plots, makePlotName('ratio','comparison',type='hist'+post,dir=self.dir,model_g=self.model_g,c1_g=self.c1_g),all_names_plots,setLog=True,dir=self.dir,model_g=self.model_g,y_text='Count',title='Histograms for ratios',x_text='ratio value',print_pdf=True)
-
-    # scatter plot true ratio - composed - full ratio
-
-    #if self.verbose_printing == True and true_dist == True:
-    #  saveFig(completeRatio,[realRatio], makePlotName('full','train',type='scat'+post,dir=self.dir,model_g=self.model_g,c1_g=self.c1_g),scatter=True,axis=['full trained ratio','true ratio'],dir=self.dir,model_g=self.model_g)
-    #  saveFig(decomposedRatio,[realRatio], makePlotName('comp','train',type='scat'+post,dir=self.dir, model_g=self.model_g, c1_g=self.c1_g),scatter=True, axis=['composed trained ratio','true ratio'],dir=self.dir, model_g=self.model_g)
-    # signal - bkg rejection plots
-    if use_log == True:
-      decomposedRatio = np.exp(decomposedRatio)
-      completeRatio = np.exp(completeRatio)
-      if true_dist == True:
-        realRatio = np.exp(realRatio)
-    if true_dist == True:
-
-      ratios_list = [decomposedRatio/decomposedRatio.max(), 
-                    completeRatio/completeRatio.max(),
-                    realRatio/realRatio.max()]
-      targets_list = [decomposed_target, complete_target, real_target]
-      legends_list = ['composed', 'full', 'true']
-    else:
-
-      indices = (decomposedRatio > 0.)
-      decomposedRatio = decomposedRatio[indices] 
-      decomposed_target = decomposed_target[indices]
-      indices = (completeRatio > 0.)
-      completeRatio = completeRatio[indices]
-      complete_target = complete_target[indices]
-
-      completeRatio = np.log(completeRatio)
-      decomposedRatio = np.log(decomposedRatio)
-      decomposedRatio = decomposedRatio + np.abs(decomposedRatio.min())
-      completeRatio = completeRatio + np.abs(completeRatio.min())
-      ratios_list = [decomposedRatio/decomposedRatio.max(), 
-                    completeRatio/completeRatio.max()]
-      targets_list = [decomposed_target, complete_target]
-      legends_list = ['composed','full']
-    makeSigBkg(ratios_list,targets_list,makePlotName('comp','all',type='sigbkg'+post,dir=self.dir,
-          model_g=self.model_g,c1_g=self.c1_g),dir=self.dir,model_g=self.model_g,print_pdf=True,legends=legends_list,title='Signal-Background rejection curves')
-
-    # Scatter plot to compare regression function and classifier score
-    if self.verbose_printing == True and true_dist == True:
-      testdata, testtarget = loadData('test',self.F0_dist,self.F1_dist,dir=self.dir,c1_g=self.c1_g) 
-      if len(testdata.shape) > 1:
-        reg = np.array([self.__regFunc(x,w.pdf('F0'),w.pdf('F1'),xs) for xs in testdata])
-      else:
-        reg = np.array([self.__regFunc(x,w.pdf('F0'),w.pdf('F1'),[xs]) for xs in testdata])
-      if len(testdata.shape) > 1:
-        outputs = predict('{0}/model/{1}/{2}/adaptive_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g),testdata.reshape(testdata.shape[0],testdata.shape[1]),model_g=self.model_g, clf=self.clf)
-      else:
-        outputs = predict('{0}/model/{1}/{2}/adaptive_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g),testdata.reshape(testdata.shape[0],1),model_g=self.model_g, clf=self.clf)
-      #saveFig(outputs,[reg], makePlotName('full','train',type='scat',dir=self.dir,model_g=self.model_g,c1_g=self.c1_g),scatter=True,axis=['score','regression'],dir=self.dir,model_g=self.model_g)
-
-
-
-  def evalC1Likelihood(self,w,testdata,c0,c1,c_eval=0,c_min=0.01,c_max=0.2,use_log=False,true_dist=False, vars_g=None, npoints=50,samples_ids=None,weights_func=None,coef_index=0):
-
-    if true_dist == True:
-      vars = ROOT.TList()
-      for var in vars_g:
-        vars.Add(w.var(var))
-      x = ROOT.RooArgSet(vars)
-    else:
-      x = None
-
-    score = ROOT.RooArgSet(w.var('score'))
-    if use_log == True:
-      evaluateRatio = self.evaluateLogDecomposedRatio
-      post = 'log'
-    else:
-      evaluateRatio = self.evaluateDecomposedRatio
-      post = ''
-
-    csarray = np.linspace(c_min,c_max,npoints)
-    decomposedLikelihood = np.zeros(npoints)
-    trueLikelihood = np.zeros(npoints)
-    c1s = np.zeros(c0.shape[0])
-    pre_pdf = []
-    pre_dist = []
-    pre_pdf.extend([[],[]])
-    pre_dist.extend([[],[]])
-    # change this enumerates
-    for k in enumerate(self.nsamples):
-      pre_pdf[0].append([])
-      pre_pdf[1].append([])
-      pre_dist[0].append([])
-      pre_dist[1].append([])
-      for j in enumerate(self.nsamples):
-        index_k,index_j = (self.basis_indexes[k],self.basis_indexes[j])
-        if k <> j:
-          f0pdf = w.function('bkghistpdf_{0}_{1}'.format(index_k,index_j))
-          f1pdf = w.function('sighistpdf_{0}_{1}'.format(index_k,index_j))
-          data = testdata
-          if self.preprocessing == True:
-            data = preProcessing(testdata,self.dataset_names[min(index_k,index_j)],
-            self.dataset_names[max(index_k,index_j)],self.scaler) 
-          outputs = predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g,
-          self.c1_g,self.model_file,index_k,index_j),data,model_g=self.model_g, clf=self.clf)
-          f0pdfdist = np.array([self.evalDist(score,f0pdf,[xs]) for xs in outputs])
-          f1pdfdist = np.array([self.evalDist(score,f1pdf,[xs]) for xs in outputs])
-          pre_pdf[0][k].append(f0pdfdist)
-          pre_pdf[1][k].append(f1pdfdist)
+            indices = (decomposedRatio > 0.)
+            decomposedRatio = decomposedRatio[indices]
+            decomposed_target = decomposed_target[indices]
+            indices = (completeRatio > 0.)
+            completeRatio = completeRatio[indices]
+            complete_target = complete_target[indices]
+
+            completeRatio = np.log(completeRatio)
+            decomposedRatio = np.log(decomposedRatio)
+            decomposedRatio = decomposedRatio + np.abs(decomposedRatio.min())
+            completeRatio = completeRatio + np.abs(completeRatio.min())
+            ratios_list = [decomposedRatio / decomposedRatio.max(),
+                           completeRatio / completeRatio.max()]
+            targets_list = [decomposed_target, complete_target]
+            legends_list = ['Approx. Decomposed', 'Approx.']
+        makeSigBkg(
+            ratios_list,
+            targets_list,
+            makePlotName(
+                'comp',
+                'all',
+                type='sigbkg' + post,
+                dir=self.dir,
+                model_g=self.model_g,
+                c1_g=self.c1_g),
+            dir=self.dir,
+            model_g=self.model_g,
+            print_pdf=True,
+            legends=legends_list,
+            title='Signal-Background rejection curves')
+
+
+    def evalC1C2Likelihood(
+            self,
+            c0,
+            c1,
+            dir='/afs/cern.ch/user/j/jpavezse/systematics',
+            workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root',
+            c1_g='',
+            model_g='mlp',
+            use_log=False,
+            true_dist=False,
+            vars_g=None,
+            clf=None,
+            verbose_printing=False):
+
+        f = ROOT.TFile('{0}/{1}'.format(dir, workspace))
+        w = f.Get('w')
+        f.Close()
+        if true_dist:
+            vars = ROOT.TList()
+            for var in vars_g:
+                vars.Add(w.var(var))
+            x = ROOT.RooArgSet(vars)
         else:
-          pre_pdf[0][k].append(None)
-          pre_pdf[1][k].append(None)
-        if true_dist == True:
-          f0 = w.pdf('f{0}'.format(index_k))
-          f1 = w.pdf('f{0}'.format(index_j))
-          if len(testdata.shape) > 1:
-            f0dist = np.array([self.evalDist(x,f0,xs) for xs in testdata])
-            f1dist = np.array([self.evalDist(x,f1,xs) for xs in testdata])
-          else:
-            f0dist = np.array([self.evalDist(x,f0,[xs]) for xs in testdata])
-            f1dist = np.array([self.evalDist(x,f1,[xs]) for xs in testdata])
-          pre_dist[0][k].append(f0dist)
-          pre_dist[1][k].append(f1dist)
-    indices = np.ones(testdata.shape[0], dtype=bool)
-    ratiosList = []
-    samples = []
-    # This is needed for calibration of full ratios
-    #for i,sample in enumerate(self.dataset_names):
-    #  samples.append(np.loadtxt('{0}/data/{1}/{2}/{3}_{4}.dat'.format(self.dir,'mlp',self.c1_g,'data',sample)))
-
-    #cross_section = self.cross_section / np.sum(self.cross_section)
-    n_eff_ratio = np.zeros(csarray.shape[0])
-    n_zeros = np.zeros(csarray.shape[0])
-    cross_section = None
-    for i,cs in enumerate(csarray):
-      if weights_func <> None: 
-        c1s = weights_func(cs,c1[1]) if coef_index == 0 else weights_func(c1[0],cs)
-        print '{0} {1}'.format(cs, c1[1]) if coef_index == 0 else '{0} {1}'.format(c1[0],cs)
-        print c1s
-      else:
-        c1s[:] = c1[:]
-        c1s[c_eval] = cs
-      if self.cross_section <> None:
-        c1s = np.multiply(c1s,self.cross_section)
-        #c1s = np.abs(c1s)
-      n_eff = c1s.sum()
-      n_tot = np.abs(c1s).sum()
-      print 'n_eff: {0}, n_tot: {1}, n_eff/n_tot: {2}'.format(n_eff, n_tot, n_eff/n_tot)
-      c1s = c1s/c1s.sum()
-      decomposedRatios,trueRatios = evaluateRatio(w,testdata,x=x,
-      plotting=False,roc=False,c0arr=c0,c1arr=c1s,true_dist=true_dist,pre_dist=pre_dist,
-      pre_evaluation=pre_pdf,cross_section=cross_section)
-      decomposedRatios = 1./decomposedRatios
-      n_eff_ratio[i] = n_eff/n_tot
-      n_zeros[i] = decomposedRatios[decomposedRatios < 0.].shape[0]
-      print decomposedRatios[decomposedRatios < 0.].shape 
-      #calibratedRatios = self.calibrateFullRatios(w, decomposedRatios,
-      #    c0,c1s,debug=debug,samples_data=samples,index=i) 
-      #saveFig(decomposedRatios2, [calibratedRatios], makePlotName('calibrated_{0}'.format(i),'ratio',type='scat',
-      #dir=self.dir, model_g=self.model_g, c1_g=self.c1_g),scatter=True, axis=['composed ratio', 
-      #'composed calibrated'], dir=self.dir, model_g=self.model_g)
-      ratiosList.append(decomposedRatios)
-      #indices = np.logical_and(indices, decomposedRatios > 0.)
-    for i,cs in enumerate(csarray):
-      decomposedRatios = ratiosList[i]
-      if use_log == False:
-        if samples_ids <> None:
-          ratios = decomposedRatios
-          ids = samples_ids
-          decomposedLikelihood[i] = (np.dot(np.log(ratios),
-              np.array([c1[x] for x in ids]))).sum()
+            x = None
+
+        score = ROOT.RooArgSet(w.var('score'))
+        if use_log:
+            evaluateRatio = self.evaluateLogDecomposedRatio
+            post = 'log'
         else:
-          decomposedRatios[decomposedRatios < 0.] = 1.0
-          decomposedLikelihood[i] = -np.log(decomposedRatios).sum()
-          print decomposedLikelihood[i]
-          
-        trueLikelihood[i] = -np.log(trueRatios).sum()
-      else:
-        decomposedLikelihood[i] = decomposedRatios.sum()
-        trueLikelihood[i] = trueRatios.sum()
-    decomposedLikelihood = decomposedLikelihood - decomposedLikelihood.min()
-    # print n_eff/n_zero relation
-    #saveFig(csarray,[n_eff_ratio, n_zeros/n_zeros.max()],makePlotName('eff_ratio','zeros',type=post+'plot_g2'),labels=['n_eff/n_tot','zeros/{0}'.format(n_zeros.max())],axis=['g2','values'],marker=True,dir=self.dir,marker_value=c1[0],title='#zeros and n_eff/n_tot given g2',print_pdf=True,model_g=self.model_g)
-    #saveFig(n_eff_ratio, [n_zeros/n_zeros.max()], makePlotName('eff_ratio','zeros',type='scat',
-    #dir=self.dir, model_g=self.model_g, c1_g=self.c1_g),scatter=True, axis=['n_eff/n_tot', 
-    #'#zeros/{0}'.format(n_zeros.max())], dir=self.dir, model_g=self.model_g,title='# zeros given n_eff/n_tot ratio')
-
-    if true_dist == True:
-      trueLikelihood = trueLikelihood - trueLikelihood.min()
-      saveFig(csarray,[decomposedLikelihood,trueLikelihood],makePlotName('comp','train',type=post+'likelihood_{0}'.format(n_sample)),labels=['decomposed','true'],axis=['c1[0]','-ln(L)'],marker=True,dir=self.dir,marker_value=c1[0],title='c1[0] Fitting',print_pdf=True)
-      return (csarray[trueLikelihood.argmin()], csarray[decomposedLikelihood.argmin()])
-    else:
-      saveFig(csarray,[decomposedLikelihood],makePlotName('comp','train',type='likelihood_g2'),labels=['decomposed'],axis=['g2','-ln(L)'],marker=True,dir=self.dir,marker_value=c1[c_eval],title='g2 Fitting',print_pdf=True,model_g=self.model_g)
-      pdb.set_trace()
-      return (0.,csarray[decomposedLikelihood.argmin()])
-
-  def evalC1C2Likelihood(self,w,testdata,c0,c1,c_eval=0,c_min=0.01,c_max=0.2,use_log=False,true_dist=False, vars_g=None, npoints=50,samples_ids=None,weights_func=None):
-
-    if true_dist == True:
-      vars = ROOT.TList()
-      for var in vars_g:
-        vars.Add(w.var(var))
-      x = ROOT.RooArgSet(vars)
-    else:
-      x = None
-
-    score = ROOT.RooArgSet(w.var('score'))
-    if use_log == True:
-      evaluateRatio = self.evaluateLogDecomposedRatio
-      post = 'log'
-    else:
-      evaluateRatio = self.evaluateDecomposedRatio
-      post = ''
-
-    csarray = np.linspace(c_min[0],c_max[0],npoints)
-    csarray2 = np.linspace(c_min[1], c_max[1], npoints)
-    decomposedLikelihood = np.zeros((npoints,npoints))
-    trueLikelihood = np.zeros((npoints,npoints))
-    c1s = np.zeros(c0.shape[0])
-    pre_pdf = []
-    pre_dist = []
-    pre_pdf.extend([[],[]])
-    pre_dist.extend([[],[]])
-    # change this enumerates
-    for k,c0_ in enumerate(c0):
-      pre_pdf[0].append([])
-      pre_pdf[1].append([])
-      pre_dist[0].append([])
-      pre_dist[1].append([])
-      for j,c1_ in enumerate(c0):
-        index_k,index_j = (self.basis_indexes[k],self.basis_indexes[j])
-        if k <> j:
-          f0pdf = w.function('bkghistpdf_{0}_{1}'.format(index_k,index_j))
-          f1pdf = w.function('sighistpdf_{0}_{1}'.format(index_k,index_j))
-          data = testdata
-          if self.preprocessing == True:
-            data = preProcessing(testdata,self.dataset_names[min(index_k,index_j)],
-            self.dataset_names[max(index_k,index_j)],self.scaler) 
-          outputs = predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g,
-          self.c1_g,self.model_file,index_k,index_j),data,model_g=self.model_g, clf=self.clf)
-          f0pdfdist = np.array([self.evalDist(score,f0pdf,[xs]) for xs in outputs])
-          f1pdfdist = np.array([self.evalDist(score,f1pdf,[xs]) for xs in outputs])
-          pre_pdf[0][k].append(f0pdfdist)
-          pre_pdf[1][k].append(f1pdfdist)
+            evaluateRatio = self.evaluateDecomposedRatio
+            post = ''
+
+        npoints = 25
+        csarray = np.linspace(0.01, 0.2, npoints)
+        cs2array = np.linspace(0.1, 0.4, npoints)
+        testdata = np.loadtxt(
+            '{0}/data/{1}/{2}/{3}_{4}.dat'.format(dir, model_g, c1_g, 'test', 'F1'))
+
+        decomposedLikelihood = np.zeros((npoints, npoints))
+        trueLikelihood = np.zeros((npoints, npoints))
+        c1s = np.zeros(c1.shape[0])
+        c0s = np.zeros(c1.shape[0])
+        pre_pdf = []
+        pre_dist = []
+        pre_pdf.extend([[], []])
+        pre_dist.extend([[], []])
+        for k, c0_ in enumerate(c0):
+            pre_pdf[0].append([])
+            pre_pdf[1].append([])
+            pre_dist[0].append([])
+            pre_dist[1].append([])
+            for j, c1_ in enumerate(c1):
+                if k != j:
+                    f0pdf = w.function('bkghistpdf_{0}_{1}'.format(k, j))
+                    f1pdf = w.function('sighistpdf_{0}_{1}'.format(k, j))
+                    outputs = predict(
+                        '{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(
+                            dir,
+                            model_g,
+                            c1_g,
+                            'adaptive',
+                            k,
+                            j),
+                        testdata,
+                        model_g=model_g,
+                        clf=clf)
+                    f0pdfdist = np.array(
+                        [self.evalDist(score, f0pdf, [xs]) for xs in outputs])
+                    f1pdfdist = np.array(
+                        [self.evalDist(score, f1pdf, [xs]) for xs in outputs])
+                    pre_pdf[0][k].append(f0pdfdist)
+                    pre_pdf[1][k].append(f1pdfdist)
+                else:
+                    pre_pdf[0][k].append(None)
+                    pre_pdf[1][k].append(None)
+                if true_dist:
+                    f0 = w.pdf('f{0}'.format(k))
+                    f1 = w.pdf('f{0}'.format(j))
+                    if len(testdata.shape) > 1:
+                        f0dist = np.array([self.evalDist(x, f0, xs)
+                                           for xs in testdata])
+                        f1dist = np.array([self.evalDist(x, f1, xs)
+                                           for xs in testdata])
+                    else:
+                        f0dist = np.array([self.evalDist(x, f0, [xs])
+                                           for xs in testdata])
+                        f1dist = np.array([self.evalDist(x, f1, [xs])
+                                           for xs in testdata])
+                    pre_dist[0][k].append(f0dist)
+                    pre_dist[1][k].append(f1dist)
+
+        # Evaluate Likelihood in different c1[0] and c1[1] values
+        self
+        for i, cs in enumerate(csarray):
+            for j, cs2 in enumerate(cs2array):
+                c1s[:] = c1[:]
+                c1s[0] = cs
+                c1s[1] = cs2
+                c1s[2] = 1. - cs - cs2
+                decomposedRatios, trueRatios = evaluateRatio(w, testdata,
+                                                             x=x, roc=False, plotting=False,
+                                                             c0arr=c0, c1arr=c1s, true_dist=true_dist,
+                                                             pre_evaluation=pre_pdf,
+                                                             pre_dist=pre_dist)
+
+                if not use_log:
+                    decomposedLikelihood[i, j] = np.log(decomposedRatios).sum()
+                    trueLikelihood[i, j] = np.log(trueRatios).sum()
+                else:
+                    decomposedLikelihood[i, j] = decomposedRatios.sum()
+                    trueLikelihood[i, j] = trueRatios.sum()
+        decomposedLikelihood = 2. * decomposedLikelihood
+        if true_dist:
+            trueLikelihood = 2. * trueLikelihood
+        decomposedLikelihood = decomposedLikelihood - decomposedLikelihood.min()
+        X, Y = np.meshgrid(csarray, cs2array)
+        decMin = np.unravel_index(
+            decomposedLikelihood.argmin(),
+            decomposedLikelihood.shape)
+        min_value = [csarray[decMin[0]], cs2array[decMin[1]]]
+        if true_dist:
+            trueLikelihood = trueLikelihood - trueLikelihood.min()
+            trueMin = np.unravel_index(
+                trueLikelihood.argmin(),
+                trueLikelihood.shape)
+            if verbose_printing:
+                saveFig(X,
+                        [Y,
+                         decomposedLikelihood,
+                         trueLikelihood],
+                        makePlotName('comp',
+                                     'train',
+                                     type='multilikelihood'),
+                        labels=['Approx. Decomposed',
+                                'Exact'],
+                        type='contour2',
+                        marker=True,
+                        dir=dir,
+                        marker_value=(c1[0],
+                                      c1[1]),
+                        print_pdf=True,
+                        min_value=min_value)
+            return [[csarray[trueMin[0]], cs2array[trueMin[1]]],
+                    [csarray[decMin[0]], cs2array[decMin[1]]]]
         else:
-          pre_pdf[0][k].append(None)
-          pre_pdf[1][k].append(None)
-        if true_dist == True:
-          f0 = w.pdf('f{0}'.format(k))
-          f1 = w.pdf('f{0}'.format(j))
-          if len(testdata.shape) > 1:
-            f0dist = np.array([self.evalDist(x,f0,xs) for xs in testdata])
-            f1dist = np.array([self.evalDist(x,f1,xs) for xs in testdata])
-          else:
-            f0dist = np.array([self.evalDist(x,f0,[xs]) for xs in testdata])
-            f1dist = np.array([self.evalDist(x,f1,[xs]) for xs in testdata])
-          pre_dist[0][k].append(f0dist)
-          pre_dist[1][k].append(f1dist)
-    indices = np.ones(testdata.shape[0], dtype=bool)
-    ratiosList = []
-    samples = []
-    # This is needed for calibration of full ratios
-    #for i,sample in enumerate(self.dataset_names):
-    #  samples.append(np.loadtxt('{0}/data/{1}/{2}/{3}_{4}.dat'.format(self.dir,'mlp',self.c1_g,'data',sample)))
-    n_eff_ratio = np.zeros((csarray.shape[0], csarray2.shape[0]))
-    for i,cs in enumerate(csarray):
-      ratiosList.append([])
-      for j, cs2 in enumerate(csarray2):
-        if weights_func <> None: 
-          c1s = weights_func(cs,cs2)
-          #print '{0} {1}'.format(cs,cs2)
-          #print c1s
+            return [[0., 0.], [csarray[decMin[0]], cs2array[decMin[1]]]]
+
+
+    def fitCValues(
+            self,
+            c0,
+            c1,
+            dir='/afs/cern.ch/user/j/jpavezse/systematics',
+            c1_g='',
+            model_g='mlp',
+            true_dist=False,
+            vars_g=None,
+            workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root',
+            use_log=False,
+            clf=None,
+            n_hist_c=100):
+        if use_log:
+            post = 'log'
         else:
-          c1s[:] = c1[:]
-          c1s[c_eval] = cs
-        if self.cross_section <> None:
-          c1s = np.multiply(c1s,self.cross_section)
-        n_eff = c1s.sum()
-        n_tot = np.abs(c1s).sum()
-        n_eff_ratio[i,j] = n_eff/n_tot 
-        #print '{0} {1}'.format(i,j)
-        #print 'n_eff: {0}, n_tot: {1}, n_eff/n_tot: {2}'.format(n_eff, n_tot, n_eff/n_tot)
-        c1s = c1s/c1s.sum()
-        #print c1s
-        decomposedRatios,trueRatios = evaluateRatio(w,testdata,x=x,
-        plotting=False,roc=False,c0arr=c0,c1arr=c1s,true_dist=true_dist,pre_dist=pre_dist,
-        pre_evaluation=pre_pdf)
-        decomposedRatios = 1./decomposedRatios
-        #calibratedRatios = self.calibrateFullRatios(w, decomposedRatios,
-        #    c0,c1s,debug=debug,samples_data=samples,index=i) 
-        #saveFig(decomposedRatios2, [calibratedRatios], makePlotName('calibrated_{0}'.format(i),'ratio',type='scat',
-        #dir=self.dir, model_g=self.model_g, c1_g=self.c1_g),scatter=True, axis=['composed ratio', 
-        #'composed calibrated'], dir=self.dir, model_g=self.model_g)
-        ratiosList[i].append(decomposedRatios)
-        #print('{0} {1} '.format(i,j)),
-        #print decomposedRatios[decomposedRatios < 0.].shape 
-        #print c1s
-        #indices = np.logical_and(indices, decomposedRatios > 0.)
-    for i,cs in enumerate(csarray):
-      for j, cs2 in enumerate(csarray2):
-        decomposedRatios = ratiosList[i][j]
-        if use_log == False:
-          if samples_ids <> None:
-            ratios = decomposedRatios
-            ids = samples_ids
-            decomposedLikelihood[i,j] = (np.dot(np.log(ratios),
-                np.array([c1[x] for x in ids]))).sum()
-          else:
-            #decomposedRatios[decomposedRatios < 0.] = 0.9
-            decomposedRatios[decomposedRatios < 0.] = 1.0
-            #decomposedRatios = decomposedRatios[self.findOutliers(decomposedRatios)]
-            if n_eff_ratio[i,j] <= 0.5:
-              #TODO: Harcoded number
-              decomposedLikelihood[i,j] = 20000
+            post = ''
+        keys = ['true', 'dec']
+        c1_ = dict((key, np.zeros(n_hist_c)) for key in keys)
+        c1_values = dict((key, np.zeros(n_hist_c)) for key in keys)
+        c2_values = dict((key, np.zeros(n_hist_c)) for key in keys)
+
+        fil2 = open('{0}/fitting_values_c1c2{1}.txt'.format(dir, post), 'w')
+
+        for i in range(n_hist_c):
+
+            makeData(vars_g, c0, c1, num_train=200000, num_test=500, no_train=True,
+                     workspace=workspace, dir=dir, c1_g=c1_g)
+
+            if i == 0:
+                verbose_printing = True
             else:
-              decomposedLikelihood[i,j] = -np.log(decomposedRatios).sum()
-            #print decomposedLikelihood[i,j]
-            #print '{0} {1} {2}'.format(i,j,decomposedLikelihood[i,j])
-          trueLikelihood[i,j] = -np.log(trueRatios).sum()
-        else:
-          decomposedLikelihood[i,j] = decomposedRatios.sum()
-          trueLikelihood[i,j] = trueRatios.sum()
-      #print '\n {0}'.format(i)
-    decomposedLikelihood = decomposedLikelihood - decomposedLikelihood.min()
-    decMin = np.unravel_index(decomposedLikelihood.argmin(), decomposedLikelihood.shape)
-    # pixel plots
-    #saveFig(csarray,[csarray2,decomposedLikelihood],makePlotName('comp','train',type='likelihood_g1g2'),labels=['composed'],pixel=True,marker=True,dir=self.dir,model_g=self.model_g,marker_value=(c1[0],c1[1]),print_pdf=True,contour=True,title='Likelihood fit for g1,g2')
-
-    #decMin = [np.sum(decomposedLikelihood,1).argmin(),np.sum(decomposedLikelihood,0).argmin()] 
-    X,Y = np.meshgrid(csarray, csarray2)
-
-    saveFig(X,[Y,decomposedLikelihood],makePlotName('comp','train',type='multilikelihood'),labels=['composed'],contour=True,marker=True,dir=self.dir,model_g=self.model_g,marker_value=(c1[0],c1[1]),print_pdf=True,min_value=(csarray[decMin[0]],csarray2[decMin[1]]))
-    #print decMin
-    print [csarray[decMin[0]],csarray2[decMin[1]]]
-    pdb.set_trace()
-    if true_dist == True:
-      trueLikelihood = trueLikelihood - trueLikelihood.min()
-      trueMin = np.unravel_index(trueLikelihood.argmin(), trueLikelihood.shape)
-      saveFig(csarray,[decomposedLikelihood,trueLikelihood],makePlotName('comp','train',type=post+'likelihood_{0}'.format(n_sample)),labels=['decomposed','true'],axis=['c1[0]','-ln(L)'],marker=True,dir=self.dir,marker_value=c1[0],title='c1[0] Fitting',print_pdf=True)
-      return [[csarray[trueMin[0]],csarray2[trueMin[1]]],
-          [csarray2[decMin[0],csarray2[decMin[1]]]]]
-    else:
-      return [[0.,0.],[csarray[decMin[0]],csarray2[decMin[1]]]]
-
-  def fitCValues(self,c0,c1,data_file = 'test',true_dist=False,vars_g=None,use_log=False,n_hist=150,num_pseudodata=1000,weights_func=None):
-    if use_log == True:
-      post = 'log'
-    else:
-      post = ''
-    npoints = 15
-    c_eval = 1
-    c_min = [0.6,0.1]
-    c_max = [1.5,0.9]
-    c_min = [-1.1,-1.1]
-    c_max = [-0.1,-0.1]
-
-    #c_min = 0.6
-    #c_max = 1.4
-
-    f = ROOT.TFile('{0}/{1}'.format('/afs/cern.ch/work/j/jpavezse/private/',self.workspace))
-    w = f.Get('w')
-    f.Close()
-    assert w 
-
-    print '{0} {1}'.format(c_min,c_max)
-    rng = np.random.RandomState(self.seed)
-    # Needed in case of working of NN with scaled features
-    if self.preprocessing == True:
-      if self.scaler == None:
-        self.scaler = {}
-        for k in range(self.nsamples):
-         for j in enumerate(self.nsamples):
-           if k < j:
-            self.scaler[(k,j)] = joblib.load('{0}/model/{1}/{2}/{3}_{4}_{5}.dat'.format(self.dir,'mlp',self.c1_g,'scaler',self.dataset_names[k],self.dataset_names[j]))
-  
-    testdata = np.loadtxt('{0}/data/{1}/{2}/{3}_{4}.dat'.format(self.dir,'mlp',self.c1_g,data_file,self.F1_dist))[:,[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,16,20,24,25,26,27,28,29,30,31,36,40,42]]
-    print self.F1_dist
-    print testdata.shape
-
-    fil1 = open('{0}/fitting_values_c1.txt'.format(self.dir),'a')
-    for i in range(n_hist):
-      indexes = rng.choice(testdata.shape[0], num_pseudodata) 
-      dataset = testdata[indexes]
-      #(c1_true_1, c1_dec_1) = self.evalSingleC1Likelihood(w,dataset, c0,c1,c_eval=c_eval,c_min=c_min,
-      #c_max=c_max,true_dist=true_dist,vars_g=vars_g,weights_func=weights_func,
-      #              npoints=npoints,use_log=use_log)  
-      ((c1_true,c2_true),(c1_dec,c2_dec)) = self.evalDoubleC1C2Likelihood(w,dataset, c0,c1,c_eval=c_eval,c_min=c_min,
-      c_max=c_max,true_dist=true_dist,vars_g=vars_g,weights_func=weights_func,
-                    npoints=npoints,use_log=use_log)  
-      print '2: {0} {1} {2} {3}'.format(c1_true, c1_dec, c2_true, c2_dec)
-      fil1.write('{0} {1} {2} {3}\n'.format(c1_true, c1_dec, c2_true, c2_dec))
-      #print '1: {0} {1}'.format(c1_true_1, c1_dec_1)
-      #fil1.write('{0} {1}\n'.format(c1_true_1, c1_dec_1))
-    fil1.close()  
+                verbose_printing = False
+
+            ((c1_true,
+              c2_true),
+             (c1_dec,
+              c2_dec)) = self.evalC1C2Likelihood(c0,
+                                            c1,
+                                            dir=dir,
+                                            c1_g=c1_g,
+                                            model_g=model_g,
+                                            true_dist=true_dist,
+                                            vars_g=vars_g,
+                                            workspace=workspace,
+                                            use_log=use_log,
+                                            clf=clf,
+                                            verbose_printing=verbose_printing)
+            print '2: {0} {1} {2} {3}'.format(c1_true, c1_dec, c2_true, c2_dec)
+
+            fil2.write(
+                '{0} {1} {2} {3}\n'.format(
+                    c1_true,
+                    c1_dec,
+                    c2_true,
+                    c2_dec))
+
+        fil2.close()
\ No newline at end of file
diff --git a/logistic_sgd.py b/logistic_sgd.py
index ed1ef58..d8c55de 100644
--- a/logistic_sgd.py
+++ b/logistic_sgd.py
@@ -46,6 +46,7 @@
 import theano
 import theano.tensor as T
 
+
 class LogisticRegression(object):
     """Multi-class Logistic Regression Class
 
@@ -55,7 +56,6 @@ class LogisticRegression(object):
     determine a class membership probability.
     """
 
-
     def __init__(self, input, n_in, n_out):
         """ Initialize the parameters of the logistic regression
 
@@ -76,13 +76,12 @@ def __init__(self, input, n_in, n_out):
         # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
         self.W = theano.shared(value=numpy.zeros((n_in, n_out),
                                                  dtype=theano.config.floatX),
-                                name='W', borrow=True)
+                               name='W', borrow=True)
         # initialize the baises b as a vector of n_out 0s
         self.b = theano.shared(value=numpy.zeros((n_out,),
                                                  dtype=theano.config.floatX),
                                name='b', borrow=True)
 
-
         out = T.dot(input, self.W) + self.b
         # compute vector of class-membership probabilities in symbolic form
         self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
@@ -122,8 +121,8 @@ def negative_log_likelihood(self, y):
         # LP[n-1,y[n-1]]] and T.mean(LP[T.arange(y.shape[0]),y]) is
         # the mean (across minibatch examples) of the elements in v,
         # i.e., the mean log-likelihood across the minibatch.
-        #return -(T.mean(T.log(self.p_y_given_x[y==1])) + T.mean(T.log(1.-self.p_y_given_x[y==0])))
-        #return T.mean(T.nnet.binary_crossentropy(self.p_y_given_x, y)) 
+        # return -(T.mean(T.log(self.p_y_given_x[y==1])) + T.mean(T.log(1.-self.p_y_given_x[y==0])))
+        # return T.mean(T.nnet.binary_crossentropy(self.p_y_given_x, y))
         return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
 
     def errors(self, y):
@@ -139,7 +138,7 @@ def errors(self, y):
         # check if y has same dimension of y_pred
         if y.ndim != self.y_pred.ndim:
             raise TypeError('y should have the same shape as self.y_pred',
-                ('y', target.type, 'y_pred', self.y_pred.type))
+                            ('y', target.type, 'y_pred', self.y_pred.type))
         # check if y is of the correct datatype
         if y.dtype.startswith('int'):
             # the T.neq operator returns a vector of 0s and 1s, where 1
@@ -164,7 +163,8 @@ def load_data(dataset):
     data_dir, data_file = os.path.split(dataset)
     if data_dir == "" and not os.path.isfile(dataset):
         # Check if dataset is in the data directory.
-        new_path = os.path.join(os.path.split(__file__)[0], "..", "data", dataset)
+        new_path = os.path.join(
+            os.path.split(__file__)[0], "..", "data", dataset)
         if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz':
             dataset = new_path
 
@@ -180,12 +180,12 @@ def load_data(dataset):
     f = gzip.open(dataset, 'rb')
     train_set, valid_set, test_set = cPickle.load(f)
     f.close()
-    #train_set, valid_set, test_set format: tuple(input, target)
-    #input is an numpy.ndarray of 2 dimensions (a matrix)
-    #witch row's correspond to an example. target is a
-    #numpy.ndarray of 1 dimensions (vector)) that have the same length as
-    #the number of rows in the input. It should give the target
-    #target to the example with the same index in the input.
+    # train_set, valid_set, test_set format: tuple(input, target)
+    # input is an numpy.ndarray of 2 dimensions (a matrix)
+    # witch row's correspond to an example. target is a
+    # numpy.ndarray of 1 dimensions (vector)) that have the same length as
+    # the number of rows in the input. It should give the target
+    # target to the example with the same index in the input.
 
     def shared_dataset(data_xy, borrow=True):
         """ Function that loads the dataset into shared variables
@@ -221,9 +221,11 @@ def shared_dataset(data_xy, borrow=True):
     return rval
 
 
-def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
-                           dataset='/user/j/jgpavez/dbnTheano/code/mnist.pkl.gz',
-                           batch_size=600):
+def sgd_optimization_mnist(
+        learning_rate=0.13,
+        n_epochs=1000,
+        dataset='/user/j/jgpavez/dbnTheano/code/mnist.pkl.gz',
+        batch_size=600):
     """
     Demonstrate stochastic gradient descent optimization of a log-linear
     model
@@ -262,7 +264,7 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
     index = T.lscalar()  # index to a [mini]batch
     x = T.matrix('x')  # the data is presented as rasterized images
     y = T.ivector('y')  # the labels are presented as 1D vector of
-                           # [int] labels
+    # [int] labels
 
     # construct the logistic regression class
     # Each MNIST image has size 28*28
@@ -275,16 +277,16 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
     # compiling a Theano function that computes the mistakes that are made by
     # the model on a minibatch
     test_model = theano.function(inputs=[index],
-            outputs=classifier.errors(y),
-            givens={
-                x: test_set_x[index * batch_size: (index + 1) * batch_size],
-                y: test_set_y[index * batch_size: (index + 1) * batch_size]})
+                                 outputs=classifier.errors(y),
+                                 givens={
+        x: test_set_x[index * batch_size: (index + 1) * batch_size],
+        y: test_set_y[index * batch_size: (index + 1) * batch_size]})
 
     validate_model = theano.function(inputs=[index],
-            outputs=classifier.errors(y),
-            givens={
-                x: valid_set_x[index * batch_size:(index + 1) * batch_size],
-                y: valid_set_y[index * batch_size:(index + 1) * batch_size]})
+                                     outputs=classifier.errors(y),
+                                     givens={
+        x: valid_set_x[index * batch_size:(index + 1) * batch_size],
+        y: valid_set_y[index * batch_size:(index + 1) * batch_size]})
 
     # compute the gradient of cost with respect to theta = (W,b)
     g_W = T.grad(cost=cost, wrt=classifier.W)
@@ -299,11 +301,11 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
     # the same time updates the parameter of the model based on the rules
     # defined in `updates`
     train_model = theano.function(inputs=[index],
-            outputs=cost,
-            updates=updates,
-            givens={
-                x: train_set_x[index * batch_size:(index + 1) * batch_size],
-                y: train_set_y[index * batch_size:(index + 1) * batch_size]})
+                                  outputs=cost,
+                                  updates=updates,
+                                  givens={
+        x: train_set_x[index * batch_size:(index + 1) * batch_size],
+        y: train_set_y[index * batch_size:(index + 1) * batch_size]})
 
     ###############
     # TRAIN MODEL #
@@ -312,14 +314,14 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
     # early-stopping parameters
     patience = 5000  # look as this many examples regardless
     patience_increase = 2  # wait this much longer when a new best is
-                                  # found
+    # found
     improvement_threshold = 0.995  # a relative improvement of this much is
-                                  # considered significant
+    # considered significant
     validation_frequency = min(n_train_batches, patience / 2)
-                                  # go through this many
-                                  # minibatche before checking the network
-                                  # on the validation set; in this case we
-                                  # check every epoch
+    # go through this many
+    # minibatche before checking the network
+    # on the validation set; in this case we
+    # check every epoch
 
     best_params = None
     best_validation_loss = numpy.inf
@@ -342,13 +344,13 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
                                      for i in xrange(n_valid_batches)]
                 this_validation_loss = numpy.mean(validation_losses)
 
-                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
-                    (epoch, minibatch_index + 1, n_train_batches,
-                    this_validation_loss * 100.))
+                print('epoch %i, minibatch %i/%i, validation error %f %%' %
+                      (epoch, minibatch_index + 1, n_train_batches,
+                       this_validation_loss * 100.))
 
                 # if we got the best validation score until now
                 if this_validation_loss < best_validation_loss:
-                    #improve patience if loss improvement is good enough
+                    # improve patience if loss improvement is good enough
                     if this_validation_loss < best_validation_loss *  \
                        improvement_threshold:
                         patience = max(patience, iter * patience_increase)
@@ -361,9 +363,9 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
                     test_score = numpy.mean(test_losses)
 
                     print(('     epoch %i, minibatch %i/%i, test error of best'
-                       ' model %f %%') %
-                        (epoch, minibatch_index + 1, n_train_batches,
-                         test_score * 100.))
+                           ' model %f %%') %
+                          (epoch, minibatch_index + 1, n_train_batches,
+                           test_score * 100.))
 
             if patience <= iter:
                 done_looping = True
@@ -372,7 +374,7 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
     end_time = time.clock()
     print(('Optimization complete with best validation score of %f %%,'
            'with test performance %f %%') %
-                 (best_validation_loss * 100., test_score * 100.))
+          (best_validation_loss * 100., test_score * 100.))
     print 'The code run for %d epochs, with %f epochs/sec' % (
         epoch, 1. * epoch / (end_time - start_time))
     print >> sys.stderr, ('The code for file ' +
diff --git a/make_data.py b/make_data.py
index 41e019c..cb48ed1 100644
--- a/make_data.py
+++ b/make_data.py
@@ -13,285 +13,449 @@
 from mpl_toolkits.mplot3d import Axes3D
 
 from utils import printMultiFrame, printFrame, saveFig, loadData,\
-              makeROC, makeSigBkg, makePlotName
+    makeROC, makeSigBkg, makePlotName
 
 '''
-  Functions to make model and data for the decomposed training 
+  Functions to make model and data for the decomposed training
   method
 '''
 
 # Default model parameters
 # private coefficients
-coeffs_g = [[ 0.28174199,0.46707738,0.25118062],[0.18294893,0.33386682,0.48318425],[ 0.25763285,0.28015834,0.46220881]]
+coeffs_g = [[0.28174199, 0.46707738, 0.25118062], [0.18294893,
+                                                   0.33386682, 0.48318425], [0.25763285, 0.28015834, 0.46220881]]
 
 # gaussians parameters
 mu_g = []
 cov_g = []
-mu_g.append([5.,5.,4.,3.,5.,5.,4.5,2.5,4.,3.5])
-mu_g.append([2.,4.5,0.6,5.,6.,4.5,4.2,0.2,4.1,3.3])
-mu_g.append([1.,0.5,0.3,0.5,0.6,0.4,0.1,0.2,0.1,0.3])
-meansum = [[7.6,10.,-9.],[5.,-6.,7.5],[8.2,12.2,-4.3]]
-
-cov_g.append([[3.,0.,5.,0.,0.,0.,0.,1.,0.,5.],
-         [0.,2.,0.,0.,0.,0.,0.,0.,0.,0.],
-         [0.,0.,14.,0.,0.,0.,4.2,0.,5.,0.],
-         [0.,0.,0.,6.,0.,0.,0.,3.,0.,0.],
-         [0.,0.,0.,0.,17.,0.,0.,2.,0.,0.],
-         [0.,0.,0.,0.,0.,10.,0.,0.,0.,0.],
-         [0.,0.,0.,0.,0.,0.,5.,0.,0.,0.],
-         [0.,0.,0.,0.,0.,0.,0.,1.3,1.,0.],
-         [0.,0.,0.,0.,0.,0.,0.,0.,1.,0.],
-         [0.,0.,0.,0.,0.,0.,0.,0.,0.,9.3]])
-cov_g.append([[3.5,0.,0.,4.,0.,0.,0.,0.,5.,0.],
-         [0.,3.5,0.,0.,0.,0.,0.,0.,0.,0.],
-         [0.,0.,9.5,0.,0.,2.,0.,0.5,0.,0.],
-         [0.,0.,0.,7.2,0.,0.,0.,0.,2.,0.],
-         [0.,0.,0.,0.,4.5,0.,0.,0.,0.,0.],
-         [0.,0.,0.,0.,0.,4.5,0.,0.,0.,0.],
-         [0.,0.,0.,0.,0.,0.,8.2,0.,0.,0.2],
-         [0.,0.,0.,0.,0.,0.,0.,9.5,3.,0.],
-         [0.,0.,0.,0.,0.,0.,0.,0.,3.5,0.],
-         [0.,0.,0.,0.,0.,0.,0.,0.,0.,4.5]])
-cov_g.append([[13.,0.,0.,0.,0.,0.,0.,0.,0.,0.],
-         [0.,12.,0.,0.,0.,0.2,0.,4.,0.,0.],
-         [0.,0.,14.,0.,0.5,0.,0.,0.,0.,3.],
-         [0.,0.,0.,6.,0.,0.,0.,0.,0.,0.],
-         [0.,0.,0.,0.,1.,2.,0.,0.,0.,0.],
-         [0.,0.,0.,0.,0.,10.,0.,3.,0.,0.],
-         [0.,0.,0.,0.,0.,0.,15.,0.,0.,4.],
-         [0.,0.,0.,0.,0.,0.,0.,6.3,0.,0.],
-         [0.,0.,0.,0.,0.,0.,0.,0.,11.,0.],
-         [0.,0.,0.,0.,0.,0.,0.,0.,0.,1.3]])
-
-def makeModelPrivateND(vars_g,c0, c1, n_private=3, coeffs=coeffs_g,cov_l=cov_g, mu_l=mu_g, 
-    workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root', 
-    dir='/afs/cern.ch/user/j/jpavezse/systematics',model_g='mlp',
-    c1_g='',verbose_printing=False,load_cov=False):
-  '''
-  RooFit statistical model for the data
-  
-  '''  
-  # Statistical model
-  w = ROOT.RooWorkspace('w')
-
-  print 'Generating initial distributions'
-  cov_m = []
-  mu_m = []
-  mu_str = []
-  cov_root = []
-  vec = []
-  argus = ROOT.RooArgList()     
-
-  # features
-  for i,var in enumerate(vars_g):
-    w.factory('{0}[{1},{2}]'.format(var,-25,30))
-    argus.add(w.var(var))
-  n = len(cov_l[0][0])
-  for glob in range(3):
-    for priv in range(n_private):
-      if load_cov == False: 
-        cov_i = np.random.random((n,n))
-        cov_i = cov_i + cov_i.transpose() 
-        cov_i = cov_i + n*np.eye(n)
-        np.savetxt('{0}/covariance_{1}_{2}.txt'.format(dir,glob,priv),
-            cov_i,fmt='%f')
-      else:
-        cov_i = np.matrix(np.loadtxt('{0}/data/covariance_{1}_{2}.txt'.format(
-                    dir,glob,priv)))
-      print cov_i
-      # generate covriance matrix
-      cov_m.append(cov_i)
-      cov_root.append(ROOT.TMatrixDSym(len(vars_g)))
-      for i,var1 in enumerate(vars_g):
-        for j,var2 in enumerate(vars_g):
-          if i <= j:
-            cov_root[-1][i][j] = cov_m[-1][i,j]
-          else:
-            cov_root[-1][i][j] = cov_m[-1][j,i]
-      getattr(w,'import')(cov_root[-1],'cov{0}'.format(glob*3 + priv))
-      # generate mu vectors
-      mu_m.append(np.array(mu_l[glob]) + meansum[glob][priv])
-      vec.append(ROOT.TVectorD(len(vars_g)))
-      for i, mu in enumerate(mu_m[-1]):
-        vec[-1][i] = mu
-      mu_str.append(','.join([str(mu) for mu in mu_m[-1]]))
-      # create multivariate gaussian
-      gaussian = ROOT.RooMultiVarGaussian('f{0}_{1}'.format(glob,priv),
-            'f{0}_{1}'.format(glob,priv),argus,vec[-1],cov_root[-1])
-      getattr(w,'import')(gaussian)
-    # create private mixture model
-    priv_coeffs = np.array(coeffs[glob])
-    #print 'priv coef {0} {1}'.format(priv_coeffs, priv_coeffs.sum())
-    sum_str = ','.join(['c_{0}_{1}[{2}]*f{0}_{1}'.format(glob,j,priv_coeffs[j]) for j in range(n_private)])
-    w.factory('SUM::f{0}({1})'.format(glob,sum_str))
-  #mixture model  
-  w.factory("SUM::F0(c00[{0}]*f0,c01[{1}]*f1,f2)".format(c0[0],c0[1]))
-  w.factory("SUM::F1(c10[{0}]*f0,c11[{1}]*f1,f2)".format(c1[0],c1[1]))
-  
-  # Check Model
-  w.Print()
-
-  w.writeToFile('{0}/{1}'.format(dir,workspace))
-  if verbose_printing == True:
-    printFrame(w,['x0','x1','x2'],[w.pdf('f0'),w.pdf('f1'),w.pdf('f2')],'decomposed_model',['f0','f1','f2']
-    ,dir=dir,model_g=model_g,range=[-15,20],title='Single distributions',x_text='x0',y_text='p(x)')
-    printFrame(w,['x0','x1','x2'],[w.pdf('F0'),w.pdf('F1')],'full_model',['Bkg','Bkg+Signal'],
-    dir=dir,model_g=model_g,range=[-15,20],title='Composed model',x_text='x0',y_text='p(x)')
-    printFrame(w,['x0','x1','x2'],[w.pdf('F1'),'f0'],'full_signal', ['Bkg','Signal'],
-    dir=dir,model_g=model_g,range=[-15,20],title='Background and signal',x_text='x0',y_text='p(x)')
-
-  return w
-
-def makeModelND(vars_g,c0,c1,cov_l=cov_g,mu_l=mu_g,
-    workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root', 
-    dir='/afs/cern.ch/user/j/jpavezse/systematics',model_g='mlp',
-    c1_g='',verbose_printing=False):
-  '''
-  RooFit statistical model for the data
-  
-  '''  
-  # Statistical model
-  w = ROOT.RooWorkspace('w')
-
-  print 'Generating initial distributions'
-  cov_m = []
-  mu_m = []
-  mu_str = []
-  cov_root = []
-  vec = []
-  argus = ROOT.RooArgList() 
-  #features
-  for i,var in enumerate(vars_g):
-    w.factory('{0}[{1},{2}]'.format(var,-25,30))
-    argus.add(w.var(var))
-
-  for glob in range(3):
-    # generate covariance matrix
-    cov_m.append(np.matrix(cov_l[glob]))
-    cov_root.append(ROOT.TMatrixDSym(len(vars_g)))
-    for i,var1 in enumerate(vars_g):
-      for j,var2 in enumerate(vars_g):
-        cov_root[-1][i][j] = cov_m[-1][i,j]
-    getattr(w,'import')(cov_root[-1],'cov{0}'.format(glob))
-    # generate mu vector
-    mu_m.append(np.array(mu_l[glob]))
-    vec.append(ROOT.TVectorD(len(vars_g)))
-    for i, mu in enumerate(mu_m[-1]):
-      vec[-1][i] = mu
-    mu_str.append(','.join([str(mu) for mu in mu_m[-1]]))
-    # multivariate gaussian
-    gaussian = ROOT.RooMultiVarGaussian('f{0}'.format(glob),
-          'f{0}'.format(glob),argus,vec[-1],cov_root[-1])
-    getattr(w,'import')(gaussian)
-  # mixture models
-  w.factory("SUM::F0(c00[{0}]*f0,c01[{1}]*f1,f2)".format(c0[0],c0[1]))
-  w.factory("SUM::F1(c10[{0}]*f0,c11[{1}]*f1,f2)".format(c1[0],c1[1]))
-  
-  # Check Model
-  w.Print()
-
-  w.writeToFile('{0}/{1}'.format(dir,workspace))
-  if verbose_printing == True:
-    printFrame(w,['x0','x1','x2'],[w.pdf('f0'),w.pdf('f1'),w.pdf('f2')],'decomposed_model',['f0','f1','f2']
-    ,dir=dir,model_g=model_g,range=[-15,20],title='Single distributions',x_text='x0',y_text='p(x)',print_pdf=True)
-    printFrame(w,['x0','x1','x2'],[w.pdf('F0'),w.pdf('F1')],'full_model',['Bkg','Bkg+Signal'],
-    dir=dir,model_g=model_g,range=[-15,20],title='Composed model',x_text='x0',y_text='p(x)',print_pdf=True)
-    printFrame(w,['x0','x1','x2'],[w.pdf('F1'),'f0'],'full_signal', ['Bkg','Signal'],
-    dir=dir,model_g=model_g,range=[-15,20],title='Background and signal',x_text='x0',y_text='p(x)',print_pdf=True)
-
-  return w
-
-
-def makeModel(c0,c1,cov_l=cov_g,mu_l=mu_g,
-    workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root', 
-    dir='/afs/cern.ch/user/j/jpavezse/systematics',model_g='mlp',
-    c1_g='',verbose_printing=False):
-  '''
-  RooFit statistical model for the data
-  
-  '''  
-  # Statistical model
-  w = ROOT.RooWorkspace('w')
-  #w.factory("EXPR::f1('cos(x)**2 + .01',x)")
-  w.factory("EXPR::f2('exp(x*-1)',x[0,5])")
-  w.factory("EXPR::f1('0.3 + exp(-(x-5)**2/5.)',x)")
-  w.factory("EXPR::f0('exp(-(x-2.5)**2/1.)',x)")
-  #w.factory("EXPR::f2('exp(-(x-2)**2/2)',x)")
-  w.factory("SUM::F0(c00[{0}]*f0,c01[{1}]*f1,f2)".format(c0[0],c0[1]))
-  w.factory("SUM::F1(c10[{0}]*f0,c11[{1}]*f1,f2)".format(c1[0],c1[1]))
-  
-  # Check Model
-  w.Print()
-  w.writeToFile('{0}/workspace_DecomposingTestOfMixtureModelsClassifiers.root'.format(dir))
-  if verbose_printing == True:
-    printFrame(w,['x'],[w.pdf('f0'),w.pdf('f1'),w.pdf('f2')],'decomposed_model',['f0','f1','f2']
-    ,dir=dir,model_g=model_g,range=[-15,20],title='Single distributions',x_text='x0',y_text='p(x)',
-    print_pdf=True)
-    printFrame(w,['x'],[w.pdf('F0'),w.pdf('F1')],'full_model',['Bkg','Bkg+Signal'],
-    dir=dir,model_g=model_g,range=[-15,20],title='Composed model',x_text='x0',y_text='p(x)',print_pdf=True)
-    printFrame(w,['x'],[w.pdf('F1'),'f0'],'full_signal', ['Bkg','Signal'],
-    dir=dir,model_g=model_g,range=[-15,20],title='Background and signal',x_text='x0',y_text='p(x)',
-    print_pdf=True)
-
-
-def makeData(vars_g,c0,c1, num_train=500,num_test=100,no_train=False,
-  workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root',
-  dir='/afs/cern.ch/user/j/jpavezse/systematics',
-  c1_g='',model_g='mlp'):
-  # Start generating data
-  ''' 
-    Each function will be discriminated pair-wise
-    so n*n datasets are needed (maybe they can be reused?)
-  ''' 
+mu_g.append([5., 5., 4., 3., 5., 5., 4.5, 2.5, 4., 3.5])
+mu_g.append([2., 4.5, 0.6, 5., 6., 4.5, 4.2, 0.2, 4.1, 3.3])
+mu_g.append([1., 0.5, 0.3, 0.5, 0.6, 0.4, 0.1, 0.2, 0.1, 0.3])
+meansum = [[7.6, 10., -9.], [5., -6., 7.5], [8.2, 12.2, -4.3]]
+
+cov_g.append([[3., 0., 5., 0., 0., 0., 0., 1., 0., 5.],
+              [0., 2., 0., 0., 0., 0., 0., 0., 0., 0.],
+              [0., 0., 14., 0., 0., 0., 4.2, 0., 5., 0.],
+              [0., 0., 0., 6., 0., 0., 0., 3., 0., 0.],
+              [0., 0., 0., 0., 17., 0., 0., 2., 0., 0.],
+              [0., 0., 0., 0., 0., 10., 0., 0., 0., 0.],
+              [0., 0., 0., 0., 0., 0., 5., 0., 0., 0.],
+              [0., 0., 0., 0., 0., 0., 0., 1.3, 1., 0.],
+              [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
+              [0., 0., 0., 0., 0., 0., 0., 0., 0., 9.3]])
+cov_g.append([[3.5, 0., 0., 4., 0., 0., 0., 0., 5., 0.],
+              [0., 3.5, 0., 0., 0., 0., 0., 0., 0., 0.],
+              [0., 0., 9.5, 0., 0., 2., 0., 0.5, 0., 0.],
+              [0., 0., 0., 7.2, 0., 0., 0., 0., 2., 0.],
+              [0., 0., 0., 0., 4.5, 0., 0., 0., 0., 0.],
+              [0., 0., 0., 0., 0., 4.5, 0., 0., 0., 0.],
+              [0., 0., 0., 0., 0., 0., 8.2, 0., 0., 0.2],
+              [0., 0., 0., 0., 0., 0., 0., 9.5, 3., 0.],
+              [0., 0., 0., 0., 0., 0., 0., 0., 3.5, 0.],
+              [0., 0., 0., 0., 0., 0., 0., 0., 0., 4.5]])
+cov_g.append([[13., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+              [0., 12., 0., 0., 0., 0.2, 0., 4., 0., 0.],
+              [0., 0., 14., 0., 0.5, 0., 0., 0., 0., 3.],
+              [0., 0., 0., 6., 0., 0., 0., 0., 0., 0.],
+              [0., 0., 0., 0., 1., 2., 0., 0., 0., 0.],
+              [0., 0., 0., 0., 0., 10., 0., 3., 0., 0.],
+              [0., 0., 0., 0., 0., 0., 15., 0., 0., 4.],
+              [0., 0., 0., 0., 0., 0., 0., 6.3, 0., 0.],
+              [0., 0., 0., 0., 0., 0., 0., 0., 11., 0.],
+              [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.3]])
+
+
+def makeModelPrivateND(
+        vars_g,
+        c0,
+        c1,
+        n_private=3,
+        coeffs=coeffs_g,
+        cov_l=cov_g,
+        mu_l=mu_g,
+        workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root',
+        dir='/afs/cern.ch/user/j/jpavezse/systematics',
+        model_g='mlp',
+        c1_g='',
+        verbose_printing=False,
+        load_cov=False):
+    '''
+    RooFit statistical model for the data
+
+    '''
+    # Statistical model
+    w = ROOT.RooWorkspace('w')
+
+    print 'Generating initial distributions'
+    cov_m = []
+    mu_m = []
+    mu_str = []
+    cov_root = []
+    vec = []
+    argus = ROOT.RooArgList()
+
+    # features
+    for i, var in enumerate(vars_g):
+        w.factory('{0}[{1},{2}]'.format(var, -25, 30))
+        argus.add(w.var(var))
+    n = len(cov_l[0][0])
+    for glob in range(3):
+        for priv in range(n_private):
+            if not load_cov:
+                cov_i = np.random.random((n, n))
+                cov_i = cov_i + cov_i.transpose()
+                cov_i = cov_i + n * np.eye(n)
+                np.savetxt('{0}/covariance_{1}_{2}.txt'.format(dir,
+                                                               glob, priv), cov_i, fmt='%f')
+            else:
+                cov_i = np.matrix(np.loadtxt(
+                    '{0}/data/covariance_{1}_{2}.txt'.format(dir, glob, priv)))
+            print cov_i
+            # generate covriance matrix
+            cov_m.append(cov_i)
+            cov_root.append(ROOT.TMatrixDSym(len(vars_g)))
+            for i, var1 in enumerate(vars_g):
+                for j, var2 in enumerate(vars_g):
+                    if i <= j:
+                        cov_root[-1][i][j] = cov_m[-1][i, j]
+                    else:
+                        cov_root[-1][i][j] = cov_m[-1][j, i]
+            getattr(w, 'import')(cov_root[-1],
+                                 'cov{0}'.format(glob * 3 + priv))
+            # generate mu vectors
+            mu_m.append(np.array(mu_l[glob]) + meansum[glob][priv])
+            vec.append(ROOT.TVectorD(len(vars_g)))
+            for i, mu in enumerate(mu_m[-1]):
+                vec[-1][i] = mu
+            mu_str.append(','.join([str(mu) for mu in mu_m[-1]]))
+            # create multivariate gaussian
+            gaussian = ROOT.RooMultiVarGaussian('f{0}_{1}'.format(
+                glob, priv), 'f{0}_{1}'.format(glob, priv), argus, vec[-1], cov_root[-1])
+            getattr(w, 'import')(gaussian)
+        # create private mixture model
+        priv_coeffs = np.array(coeffs[glob])
+        # print 'priv coef {0} {1}'.format(priv_coeffs, priv_coeffs.sum())
+        sum_str = ','.join(
+            ['c_{0}_{1}[{2}]*f{0}_{1}'.format(glob, j, priv_coeffs[j]) for j in range(n_private)])
+        w.factory('SUM::f{0}({1})'.format(glob, sum_str))
+    # mixture model
+    w.factory("SUM::F0(c00[{0}]*f0,c01[{1}]*f1,f2)".format(c0[0], c0[1]))
+    w.factory("SUM::F1(c10[{0}]*f0,c11[{1}]*f1,f2)".format(c1[0], c1[1]))
+
+    # Check Model
+    w.Print()
+
+    w.writeToFile('{0}/{1}'.format(dir, workspace))
+    if verbose_printing:
+        printFrame(w,
+                   ['x0',
+                    'x1',
+                    'x2'],
+                   [w.pdf('f0'),
+                    w.pdf('f1'),
+                       w.pdf('f2')],
+                   'decomposed_model',
+                   ['f0',
+                       'f1',
+                       'f2'],
+                   dir=dir,
+                   model_g=model_g,
+                   range=[-15,
+                          20],
+                   title='Single distributions',
+                   x_text='x0',
+                   y_text='p(x)')
+        printFrame(w,
+                   ['x0',
+                    'x1',
+                    'x2'],
+                   [w.pdf('F0'),
+                    w.pdf('F1')],
+                   'full_model',
+                   ['Bkg',
+                       'Bkg+Signal'],
+                   dir=dir,
+                   model_g=model_g,
+                   range=[-15,
+                          20],
+                   title='Composed model',
+                   x_text='x0',
+                   y_text='p(x)')
+        printFrame(w,
+                   ['x0',
+                    'x1',
+                    'x2'],
+                   [w.pdf('F1'),
+                    'f0'],
+                   'full_signal',
+                   ['Bkg',
+                       'Signal'],
+                   dir=dir,
+                   model_g=model_g,
+                   range=[-15,
+                          20],
+                   title='Background and signal',
+                   x_text='x0',
+                   y_text='p(x)')
+
+    return w
+
+
+def makeModelND(
+        vars_g,
+        c0,
+        c1,
+        cov_l=cov_g,
+        mu_l=mu_g,
+        workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root',
+        dir='/afs/cern.ch/user/j/jpavezse/systematics',
+        model_g='mlp',
+        c1_g='',
+        verbose_printing=False):
+    '''
+    RooFit statistical model for the data
 
-  f = ROOT.TFile('{0}/{1}'.format(dir,workspace))
-  w = f.Get('w')
-  f.Close()
+    '''
+    # Statistical model
+    w = ROOT.RooWorkspace('w')
 
-  print 'Making Data'
-  # Start generating data
-  ''' 
+    print 'Generating initial distributions'
+    cov_m = []
+    mu_m = []
+    mu_str = []
+    cov_root = []
+    vec = []
+    argus = ROOT.RooArgList()
+    # features
+    for i, var in enumerate(vars_g):
+        w.factory('{0}[{1},{2}]'.format(var, -25, 30))
+        argus.add(w.var(var))
+
+    for glob in range(3):
+        # generate covariance matrix
+        cov_m.append(np.matrix(cov_l[glob]))
+        cov_root.append(ROOT.TMatrixDSym(len(vars_g)))
+        for i, var1 in enumerate(vars_g):
+            for j, var2 in enumerate(vars_g):
+                cov_root[-1][i][j] = cov_m[-1][i, j]
+        getattr(w, 'import')(cov_root[-1], 'cov{0}'.format(glob))
+        # generate mu vector
+        mu_m.append(np.array(mu_l[glob]))
+        vec.append(ROOT.TVectorD(len(vars_g)))
+        for i, mu in enumerate(mu_m[-1]):
+            vec[-1][i] = mu
+        mu_str.append(','.join([str(mu) for mu in mu_m[-1]]))
+        # multivariate gaussian
+        gaussian = ROOT.RooMultiVarGaussian('f{0}'.format(
+            glob), 'f{0}'.format(glob), argus, vec[-1], cov_root[-1])
+        getattr(w, 'import')(gaussian)
+    # mixture models
+    w.factory("SUM::F0(c00[{0}]*f0,c01[{1}]*f1,f2)".format(c0[0], c0[1]))
+    w.factory("SUM::F1(c10[{0}]*f0,c11[{1}]*f1,f2)".format(c1[0], c1[1]))
+
+    # Check Model
+    w.Print()
+
+    w.writeToFile('{0}/{1}'.format(dir, workspace))
+    if verbose_printing:
+        printFrame(w,
+                   ['x0',
+                    'x1',
+                    'x2'],
+                   [w.pdf('f0'),
+                    w.pdf('f1'),
+                       w.pdf('f2')],
+                   'decomposed_model',
+                   ['f0',
+                       'f1',
+                       'f2'],
+                   dir=dir,
+                   model_g=model_g,
+                   range=[-15,
+                          20],
+                   title='Single distributions',
+                   x_text='x0',
+                   y_text='p(x)',
+                   print_pdf=True)
+        printFrame(w,
+                   ['x0',
+                    'x1',
+                    'x2'],
+                   [w.pdf('F0'),
+                    w.pdf('F1')],
+                   'full_model',
+                   ['Bkg',
+                       'Bkg+Signal'],
+                   dir=dir,
+                   model_g=model_g,
+                   range=[-15,
+                          20],
+                   title='Composed model',
+                   x_text='x0',
+                   y_text='p(x)',
+                   print_pdf=True)
+        printFrame(w,
+                   ['x0',
+                    'x1',
+                    'x2'],
+                   [w.pdf('F1'),
+                    'f0'],
+                   'full_signal',
+                   ['Bkg',
+                       'Signal'],
+                   dir=dir,
+                   model_g=model_g,
+                   range=[-15,
+                          20],
+                   title='Background and signal',
+                   x_text='x0',
+                   y_text='p(x)',
+                   print_pdf=True)
+
+    return w
+
+
+def makeModel(
+        c0,
+        c1,
+        cov_l=cov_g,
+        mu_l=mu_g,
+        workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root',
+        dir='/afs/cern.ch/user/j/jpavezse/systematics',
+        model_g='mlp',
+        c1_g='',
+        verbose_printing=False):
+    '''
+    RooFit statistical model for the data
+
+    '''
+    # Statistical model
+    w = ROOT.RooWorkspace('w')
+    #w.factory("EXPR::f1('cos(x)**2 + .01',x)")
+    w.factory("EXPR::f2('exp(x*-1)',x[0,5])")
+    w.factory("EXPR::f1('0.3 + exp(-(x-5)**2/5.)',x)")
+    w.factory("EXPR::f0('exp(-(x-2.5)**2/1.)',x)")
+    # w.factory("EXPR::f2('exp(-(x-2)**2/2)',x)")
+    w.factory("SUM::F0(c00[{0}]*f0,c01[{1}]*f1,f2)".format(c0[0], c0[1]))
+    w.factory("SUM::F1(c10[{0}]*f0,c11[{1}]*f1,f2)".format(c1[0], c1[1]))
+
+    # Check Model
+    w.Print()
+    w.writeToFile(
+        '{0}/workspace_DecomposingTestOfMixtureModelsClassifiers.root'.format(dir))
+    if verbose_printing:
+        printFrame(w,
+                   ['x'],
+                   [w.pdf('f0'),
+                    w.pdf('f1'),
+                       w.pdf('f2')],
+                   'decomposed_model',
+                   ['f0',
+                       'f1',
+                       'f2'],
+                   dir=dir,
+                   model_g=model_g,
+                   range=[-15,
+                          20],
+                   title='Single distributions',
+                   x_text='x0',
+                   y_text='p(x)',
+                   print_pdf=True)
+        printFrame(w,
+                   ['x'],
+                   [w.pdf('F0'),
+                    w.pdf('F1')],
+                   'full_model',
+                   ['Bkg',
+                       'Bkg+Signal'],
+                   dir=dir,
+                   model_g=model_g,
+                   range=[-15,
+                          20],
+                   title='Composed model',
+                   x_text='x0',
+                   y_text='p(x)',
+                   print_pdf=True)
+        printFrame(w,
+                   ['x'],
+                   [w.pdf('F1'),
+                    'f0'],
+                   'full_signal',
+                   ['Bkg',
+                       'Signal'],
+                   dir=dir,
+                   model_g=model_g,
+                   range=[-15,
+                          20],
+                   title='Background and signal',
+                   x_text='x0',
+                   y_text='p(x)',
+                   print_pdf=True)
+
+
+def makeData(
+        vars_g,
+        c0,
+        c1,
+        num_train=500,
+        num_test=100,
+        no_train=False,
+        workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root',
+        dir='/afs/cern.ch/user/j/jpavezse/systematics',
+        c1_g='',
+        model_g='mlp'):
+    # Start generating data
+    '''
+      Each function will be discriminated pair-wise
+      so n*n datasets are needed (maybe they can be reused?)
+    '''
+
+    f = ROOT.TFile('{0}/{1}'.format(dir, workspace))
+    w = f.Get('w')
+    f.Close()
+
+    print 'Making Data'
+    # Start generating data
+    '''
     Each function will be discriminated pair-wise
     so n*n datasets are needed (maybe they can be reused?)
-  ''' 
-   
-  # make data from root pdf
-  def makeDataFi(x, pdf, num):
-    traindata = np.zeros((num,len(vars_g))) 
-    data = pdf.generate(x,num)
-    traindata[:] = [[data.get(i).getRealValue(var) for var in vars_g]
-        for i in range(num)]
-    return traindata
-  
-  # features
-  vars = ROOT.TList()
-  for var in vars_g:
-    vars.Add(w.var(var))
-  x = ROOT.RooArgSet(vars)
-
-  # make data from pdf and save to .dat in folder 
-  # ./data/{model}/{c1}
-  for k,c in enumerate(c0):
-    print 'Making {0}'.format(k)
-    if not no_train:
-      traindata = makeDataFi(x,w.pdf('f{0}'.format(k)), num_train)
-      np.savetxt('{0}/data/{1}/{2}/train_{3}.dat'.format(dir,model_g,c1_g,k),
-                        traindata,fmt='%f')
-    testdata = makeDataFi(x, w.pdf('f{0}'.format(k)), num_test)
-    np.savetxt('{0}/data/{1}/{2}/test_{3}.dat'.format(dir,model_g,c1_g,k),
-                      testdata,fmt='%f')
-  if not no_train:
-    traindata = makeDataFi(x,w.pdf('F0'), num_train)
-    np.savetxt('{0}/data/{1}/{2}/train_F0.dat'.format(dir,model_g,c1_g),
-                      traindata,fmt='%f')
-    traindata = makeDataFi(x,w.pdf('F1'), num_train)
-    np.savetxt('{0}/data/{1}/{2}/train_F1.dat'.format(dir,model_g,c1_g),
-                      traindata,fmt='%f')
-  testdata = makeDataFi(x, w.pdf('F0'), num_test)
-  np.savetxt('{0}/data/{1}/{2}/test_F0.dat'.format(dir,model_g,c1_g),
-                    testdata,fmt='%f')
-  testdata = makeDataFi(x, w.pdf('F1'), num_test)
-  np.savetxt('{0}/data/{1}/{2}/test_F1.dat'.format(dir,model_g,c1_g),
-                    testdata,fmt='%f')
+  '''
 
+    # make data from root pdf
+    def makeDataFi(x, pdf, num):
+        traindata = np.zeros((num, len(vars_g)))
+        data = pdf.generate(x, num)
+        traindata[:] = [[data.get(i).getRealValue(var) for var in vars_g]
+                        for i in range(num)]
+        return traindata
 
+    # features
+    vars = ROOT.TList()
+    for var in vars_g:
+        vars.Add(w.var(var))
+    x = ROOT.RooArgSet(vars)
+
+    # make data from pdf and save to .dat in folder
+    # ./data/{model}/{c1}
+    for k, c in enumerate(c0):
+        print 'Making {0}'.format(k)
+        if not no_train:
+            traindata = makeDataFi(x, w.pdf('f{0}'.format(k)), num_train)
+            np.savetxt('{0}/data/{1}/{2}/train_{3}.dat'.format(dir,
+                                                               model_g, c1_g, k), traindata, fmt='%f')
+        testdata = makeDataFi(x, w.pdf('f{0}'.format(k)), num_test)
+        np.savetxt('{0}/data/{1}/{2}/test_{3}.dat'.format(dir,
+                                                          model_g, c1_g, k), testdata, fmt='%f')
+    if not no_train:
+        traindata = makeDataFi(x, w.pdf('F0'), num_train)
+        np.savetxt('{0}/data/{1}/{2}/train_F0.dat'.format(dir, model_g, c1_g),
+                   traindata, fmt='%f')
+        traindata = makeDataFi(x, w.pdf('F1'), num_train)
+        np.savetxt('{0}/data/{1}/{2}/train_F1.dat'.format(dir, model_g, c1_g),
+                   traindata, fmt='%f')
+    testdata = makeDataFi(x, w.pdf('F0'), num_test)
+    np.savetxt('{0}/data/{1}/{2}/test_F0.dat'.format(dir, model_g, c1_g),
+               testdata, fmt='%f')
+    testdata = makeDataFi(x, w.pdf('F1'), num_test)
+    np.savetxt('{0}/data/{1}/{2}/test_F1.dat'.format(dir, model_g, c1_g),
+               testdata, fmt='%f')
diff --git a/mlp.py b/mlp.py
index fb95651..02fafdb 100644
--- a/mlp.py
+++ b/mlp.py
@@ -17,7 +17,9 @@
 
 from logistic_sgd import LogisticRegression
 
+
 class HiddenLayer(object):
+
     def __init__(self, rng, input, n_in, n_out, W=None, b=None,
                  activation=T.tanh):
         """
@@ -61,9 +63,9 @@ def __init__(self, rng, input, n_in, n_out, W=None, b=None,
         #        tanh.
         if W is None:
             W_values = numpy.asarray(rng.uniform(
-                    low=-numpy.sqrt(6. / (n_in + n_out)),
-                    high=numpy.sqrt(6. / (n_in + n_out)),
-                    size=(n_in, n_out)), dtype=theano.config.floatX)
+                low=-numpy.sqrt(6. / (n_in + n_out)),
+                high=numpy.sqrt(6. / (n_in + n_out)),
+                size=(n_in, n_out)), dtype=theano.config.floatX)
             if activation == theano.tensor.nnet.sigmoid:
                 W_values *= 4
 
@@ -135,12 +137,12 @@ def __init__(self, rng, input, n_in, n_hidden, n_out):
         # L1 norm ; one regularization option is to enforce L1 norm to
         # be small
         self.L1 = abs(self.hiddenLayer.W).sum() \
-                + abs(self.logRegressionLayer.W).sum()
+            + abs(self.logRegressionLayer.W).sum()
 
         # square of L2 norm ; one regularization option is to enforce
         # square of L2 norm to be small
         self.L2_sqr = (self.hiddenLayer.W ** 2).sum() \
-                    + (self.logRegressionLayer.W ** 2).sum()
+            + (self.logRegressionLayer.W ** 2).sum()
 
         # negative log likelihood of the MLP is given by the negative
         # log likelihood of the output of the model, computed in the
@@ -161,27 +163,28 @@ def save_model(self, best_params, filename="params.pkl"):
         ''' Save parameters of the model '''
 
         print '...saving model'
-        #if not os.path.isdir(save_dir):
+        # if not os.path.isdir(save_dir):
         #    os.makedirs(save_dir)
         #save_file= open(os.path.join(save_dir, filename),'wb')
-        save_file= open(filename,'wb')
+        save_file = open(filename, 'wb')
         cPickle.dump(best_params, save_file, protocol=cPickle.HIGHEST_PROTOCOL)
         save_file.close()
 
     def load_model(self, filename="params.pkl"):
         ''' Save parameters of the model '''
 
-        #print '...loading model'
+        # print '...loading model'
 
         save_file = open(filename, 'r')
         params = cPickle.load(save_file)
         save_file.close()
-            
+
         self.hiddenLayer.W.set_value(params[0].get_value(), borrow=True)
         self.hiddenLayer.b.set_value(params[1].get_value(), borrow=True)
         self.logRegressionLayer.W.set_value(params[2].get_value(), borrow=True)
         self.logRegressionLayer.b.set_value(params[3].get_value(), borrow=True)
 
+
 def shared_dataset(data_xy, borrow=True):
     """ Function that loads the dataset into shared variables
 
@@ -193,7 +196,7 @@ def shared_dataset(data_xy, borrow=True):
     """
     data_x, data_y = data_xy
     shared_x = theano.shared(numpy.asmatrix(data_x,
-                                           dtype=theano.config.floatX),
+                                            dtype=theano.config.floatX),
                              borrow=borrow)
     shared_y = theano.shared(numpy.asarray(data_y,
                                            dtype=theano.config.floatX),
@@ -207,31 +210,39 @@ def shared_dataset(data_xy, borrow=True):
     # lets ous get around this issue
     return shared_x, T.cast(shared_y, 'int32')
 
-def make_predictions(dataset, learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=10,
-              batch_size=20, n_hidden=40,in_size=1,out_size=2,
-              model_file='model/mlp/adaptive_0_1.pkl'):
+
+def make_predictions(
+        dataset,
+        learning_rate=0.01,
+        L1_reg=0.00,
+        L2_reg=0.0001,
+        n_epochs=10,
+        batch_size=20,
+        n_hidden=40,
+        in_size=1,
+        out_size=2,
+        model_file='model/mlp/adaptive_0_1.pkl'):
 
     test_set_x = dataset
     in_size = test_set_x.shape[1] if len(test_set_x.shape) > 1 else 1
     if in_size == 1:
-      test_set_x = test_set_x.reshape(test_set_x.shape[0],1)
+        test_set_x = test_set_x.reshape(test_set_x.shape[0], 1)
     else:
-      test_set_x = test_set_x.reshape(test_set_x.shape[0],in_size)
+        test_set_x = test_set_x.reshape(test_set_x.shape[0], in_size)
     # quick fix to avoid more change of code, have to change it
     test_set_y = numpy.ones(test_set_x.shape[0])
     test_set_x, test_set_y = shared_dataset((test_set_x, test_set_y))
 
-
     ######################
     # BUILD ACTUAL MODEL #
     ######################
 
-    #print '... building the model'
+    # print '... building the model'
 
     # allocate symbolic variables for the data
     x = T.matrix('x')  # the data is presented as rasterized images
     y = T.ivector('y')  # the labels are presented as 1D vector of
-                        # [int] labels
+    # [int] labels
 
     rng = numpy.random.RandomState(1234)
 
@@ -239,19 +250,28 @@ def make_predictions(dataset, learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_
     classifier = MLP(rng=rng, input=x, n_in=in_size,
                      n_hidden=n_hidden, n_out=out_size)
 
-
     classifier.load_model(filename=model_file)
-    test_model = theano.function([], [classifier.predictions,classifier.y_out],
-                            givens={x: test_set_x})
+    test_model = theano.function(
+        [], [
+            classifier.predictions, classifier.y_out], givens={
+            x: test_set_x})
 
     predictions, probs = test_model()
 
     return probs
 
-def train_mlp(datasets,learning_rate=0.01, L1_reg=0.000, L2_reg=0.0001, n_epochs=100,
-             batch_size=50, n_hidden=40,in_size=1,out_size=2,
-              save_file='model/mlp/adaptive_0_1.pkl'):
 
+def train_mlp(
+        datasets,
+        learning_rate=0.01,
+        L1_reg=0.000,
+        L2_reg=0.0001,
+        n_epochs=100,
+        batch_size=50,
+        n_hidden=40,
+        in_size=1,
+        out_size=2,
+        save_file='model/mlp/adaptive_0_1.pkl'):
 
     train_set_x, train_set_y = datasets
     in_size = train_set_x.shape[1] if len(train_set_x.shape) > 1 else 1
@@ -260,10 +280,10 @@ def train_mlp(datasets,learning_rate=0.01, L1_reg=0.000, L2_reg=0.0001, n_epochs
     train_set_x = train_set_x[indices]
     train_set_y = train_set_y[indices]
     if in_size == 1:
-      train_set_x = train_set_x.reshape(train_set_x.shape[0],1)
+        train_set_x = train_set_x.reshape(train_set_x.shape[0], 1)
     else:
-      train_set_x = train_set_x.reshape(train_set_x.shape[0],in_size)
-    train_set_x , train_set_y = shared_dataset((train_set_x, train_set_y))
+        train_set_x = train_set_x.reshape(train_set_x.shape[0], in_size)
+    train_set_x, train_set_y = shared_dataset((train_set_x, train_set_y))
 
     # compute number of minibatches for training, validation and testing
     n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
@@ -277,8 +297,7 @@ def train_mlp(datasets,learning_rate=0.01, L1_reg=0.000, L2_reg=0.0001, n_epochs
     index = T.lscalar()  # index to a [mini]batch
     x = T.matrix('x')  # the data is presented as rasterized images
     y = T.ivector('y')  # the labels are presented as 1D vector of
-                        # [int] labels
-
+    # [int] labels
 
     # construct the MLP class
     classifier = MLP(rng=rng, input=x, n_in=in_size,
@@ -288,8 +307,8 @@ def train_mlp(datasets,learning_rate=0.01, L1_reg=0.000, L2_reg=0.0001, n_epochs
     # the model plus the regularization terms (L1 and L2); cost is expressed
     # here symbolically
     cost = classifier.negative_log_likelihood(y) \
-         + L1_reg * classifier.L1 \
-         + L2_reg * classifier.L2_sqr
+        + L1_reg * classifier.L1 \
+        + L2_reg * classifier.L2_sqr
 
     # compute the gradient of cost with respect to theta (sotred in params)
     # the resulting gradients will be stored in a list gparams
@@ -312,10 +331,10 @@ def train_mlp(datasets,learning_rate=0.01, L1_reg=0.000, L2_reg=0.0001, n_epochs
     # in the same time updates the parameter of the model based on the rules
     # defined in `updates`
     train_model = theano.function(inputs=[index], outputs=cost,
-            updates=updates,
-            givens={
-                x: train_set_x[index * batch_size:(index + 1) * batch_size],
-                y: train_set_y[index * batch_size:(index + 1) * batch_size]})
+                                  updates=updates,
+                                  givens={
+        x: train_set_x[index * batch_size:(index + 1) * batch_size],
+        y: train_set_y[index * batch_size:(index + 1) * batch_size]})
 
     ###############
     # TRAIN MODEL #
@@ -330,17 +349,17 @@ def train_mlp(datasets,learning_rate=0.01, L1_reg=0.000, L2_reg=0.0001, n_epochs
 
     epoch = 0
     done_looping = False
-  
+
     while (epoch < n_epochs) and (not done_looping):
         minibatch_avg_cost = 0.
         epoch = epoch + 1
         for minibatch_index in xrange(n_train_batches):
 
-            minibatch_avg_cost += train_model(minibatch_index)/batch_size
+            minibatch_avg_cost += train_model(minibatch_index) / batch_size
             # iteration number
             iter = (epoch - 1) * n_train_batches + minibatch_index
         print 'Epoch: {0}, cost: {1}'.format(
-             epoch, minibatch_avg_cost)  
+            epoch, minibatch_avg_cost)
 
     best_params = copy.deepcopy(classifier.params)
     classifier.save_model(best_params, save_file)
@@ -348,15 +367,18 @@ def train_mlp(datasets,learning_rate=0.01, L1_reg=0.000, L2_reg=0.0001, n_epochs
 
 # This is a wrapper in order to make use on decomposing test easier
 class MLPTrainer():
-    def __init__(self,n_hidden=40, L2_reg=0.001):
+
+    def __init__(self, n_hidden=40, L2_reg=0.001):
         self.n_hidden = n_hidden
         self.L2_reg = L2_reg
 
-    def fit(self, X, y,save_file=''):
-        train_mlp((X,y),
+    def fit(self, X, y, save_file=''):
+        train_mlp((X, y),
                   save_file=save_file,
-                  n_hidden = self.n_hidden, L2_reg = self.L2_reg)
-    def predict_proba(self, X, model_file='model'):
-        return make_predictions(dataset=X, model_file=model_file, n_hidden = self.n_hidden)
+                  n_hidden=self.n_hidden, L2_reg=self.L2_reg)
 
- 
\ No newline at end of file
+    def predict_proba(self, X, model_file='model'):
+        return make_predictions(
+            dataset=X,
+            model_file=model_file,
+            n_hidden=self.n_hidden)
diff --git a/pyMorphWrapper.py b/pyMorphWrapper.py
deleted file mode 100644
index 2920fdd..0000000
--- a/pyMorphWrapper.py
+++ /dev/null
@@ -1,501 +0,0 @@
-'''
-Simple python Wrapper for EFT Morphing code
-author: jpavezse
-'''
-
-
-from ctypes import *
-import numpy as np
-import pdb
-import time
-from itertools import combinations
-import copy
-
-from deap import base
-from deap import tools
-from deap import creator
-from deap import algorithms
-from scoop import futures
-import multiprocessing
-
-
-#reading C++ library
-lib = cdll.LoadLibrary('./RandomEFT/RandomEFT/libcMorphWrapper.so')
-#lib.getWeights.restype = c_char_p
-lib.getCrossSections.restype = c_char_p
-
-  
-def initInd(icls, size, samples):
-  pop1 = np.zeros(len(samples))
-  idx1 = np.random.choice(len(samples),size,replace=False)
-  pop1[idx1] = 1
-  pop2 = np.zeros(len(samples))
-  idx2 = list(set(range(len(samples))) - set(idx1))
-  idx2 = np.random.choice(idx2, size, replace=False)
-  pop2[idx2] = 1
-  return icls(np.append(pop1,pop2).astype(int))
- 
-
-def mutateInd(individual):
-  # External mutation
-  size = len(individual) / 2
-  ind_arr = np.array(individual)
-  i = np.random.choice(np.where(np.logical_and(ind_arr[:size] == 0, ind_arr[size:] == 0))[0])
-  base  = np.random.choice((0,1))
-  individual[np.random.choice(np.where(np.array(individual)[base*size:(base+1)*size] == 1)[0]) + base*size] = 0
-  individual[i + size*base] = 1 
-  # Internal Mutation
-  ind_arr = np.array(individual)
-  i = np.random.choice(np.where(np.logical_and(ind_arr[:size] == base, ind_arr[size:] == 1-base))[0])
-  j = np.random.choice(np.where(np.logical_and(ind_arr[:size] == 1-base, ind_arr[size:] == base))[0])
-  individual[i],individual[i+size] = individual[i+size],individual[i]
-  individual[j],individual[j+size] = individual[j+size],individual[j]
-  return individual,
-    
-def cxOver(ind1, ind2, section, base_size):
-  size = len(ind1) / 2
-  assert size > section
-  base  = np.random.choice((0,1))
-  choice = np.random.choice(size-section)
-  mask = np.zeros(len(ind1)/2, np.bool)
-  mask[choice:choice+section] = 1
-
-  ind1_base = list()
-  ind2_base = list()
-    
-  for base in (0,1):
-    ind1_base.append(np.array(ind1[base*size:(base+1)*size]))
-    ind2_base.append(np.array(ind2[base*size:(base+1)*size]))
-    tmp = np.copy(ind2_base[-1][mask])
-    ind2_base[-1][mask] = ind1_base[-1][mask]
-    ind1_base[-1][mask] = tmp
- 
-    
-  for inds in (ind1_base,ind2_base):
-      for ind in inds:
-        if sum(ind) > base_size: # Case in which we add 1s
-            ind[np.random.choice(np.where(ind == 1)[0], sum(ind) - base_size,replace=False)] = 0      
-        if sum(ind) < base_size: # Case in which we add 0s
-            ind[np.random.choice(np.where(np.logical_and(inds[0] == 0, inds[1] == 0))[0], 
-                                 base_size - sum(ind),replace=False)] = 1
-  
-  ind1[:] = list(np.append(ind1_base[0],ind1_base[1]))
-  ind2[:] = list(np.append(ind2_base[0],ind2_base[1]))
-
-  return ind1, ind2
-  
-
-class MorphingWrapper:
-  def __init__(self):
-    # Initialize the C object
-    lib.MorphingWrapperNew.restype = POINTER(c_char)   
-    self.obj = lib.MorphingWrapperNew()
-    self.mem_values = dict()
-
-  def setStringData(self,data):
-    # Set samples data in the form of string: 
-    #'Nsamples Ncouplings types[P|D|S]*Ncouplings target sample1 sample2 ...'
-    # where target are the N couplings of the sample to morph and sample* 
-    # N couplings of each sample
-    self.string_data = data
-    s_data = create_string_buffer(data)
-    lib.setSampleData(self.obj,s_data)
-  
-  def getStringData(self):
-    string_data = '{0} {1} '.format(self.nsamples,self.ncouplings)
-    string_data = string_data + ' '.join(str(x) for x in self.types) + ' '
-    string_data = string_data + ' '.join(str(x) for x in self. morphed) + ' '
-    string_data = string_data + ' '.join(str(x) for sample in self.basis for x in sample)
-    return string_data
-
-  def setSampleData(self,nsamples,ncouplings,types,morphed=[1.,1.,1.],samples=None,ncomb=20,used_samples=30):
-    lib.getWeights.restype = POINTER(c_float*(nsamples+2))
-    self.nsamples = nsamples
-    self.ncouplings = ncouplings
-    self.types = types
-    self.morphed = morphed
-    self.samples = samples
-    self.basis = samples[:self.nsamples]
-    self.ncomb = ncomb 
-    string_data = self.getStringData()
-    self.setStringData(string_data)
-    self.used_samples = used_samples
-    
-  def resetBasis(self, sample):
-    self.basis = sample
-    string_data = self.getStringData()
-    self.setStringData(string_data)
-
-  def resetTarget(self, target):
-    self.morphed = target
-    string_data = self.getStringData()
-    self.setStringData(string_data)
-
-  def getWeights(self):
-    # Return computed weights for each sample
-    s = lib.getWeights(self.obj)
-    results = [float(x) for x in s.contents]
-    weights = results[:-2]
-    self.det = results[-2]
-    self.cond = results[-1]
-    return weights
-    #return [float(x) for x in s.split()]
-
-  def getCrossSections(self):
-    # Return cross section for each one of the samples 
-    # The C++ version is not working
-
-    #s = lib.getCrossSections(obj)
-    return self.__computeCrossSections()
-  
-  def __computeCrossSections(self):
-    # Compute Cross Section using analytical formula, only for VBF 2nSM for now
-    # Defining constants
-    ca =  0.70710678
-    sa =  0.70710678
-    c_ = [0.1220,0.05669,0.003246,0.00006837,0.04388,0.002685,0.00007918,0.00002289]
-    osm = 0.1151
-
-    def formula(ksm,khzz,kazz):
-      return 4*osm*((ca**4)*((ksm**4) + c_[0]*(ksm**3)*khzz + c_[1]*(ksm**2)*(khzz**2) + c_[2]*ksm*(khzz**3) + c_[3]*(khzz**4)) +\
-        (ca**2)*(sa**2)*(c_[4]*(ksm**2)*(kazz**2) + c_[5]*ksm*khzz*(kazz**2) + c_[6]*(khzz**2)*(kazz**2)) + (sa**4)*c_[7]*(kazz**4))
-
-    cross_sections = []
-    for sample in self.basis:
-      ksm = sample[0]
-      khzz = 16.247*sample[1]
-      kazz = 16.247*sample[2]
-      cross_sections.append(formula(ksm,khzz,kazz))
-
-    return cross_sections
-
-  def computeNeff(self,basis,samples):
-    # TODO : Harcoded!
-    #self.resetTarget(self.morphed)
-    self.resetTarget(self.morphed)
-    basis = [samples[b] for b in basis]
-    self.resetBasis(basis)
-    weights = np.array(self.getWeights())
-    #print weights
-    cross_sections = np.array(self.getCrossSections())
-    n_tot = (np.abs(np.multiply(weights,cross_sections))).sum()
-    n_eff = (np.multiply(weights,cross_sections)).sum()
-    return np.abs(n_eff / n_tot)
-
-  def evalMean(self,x,samples,cvalues_1,cvalues_2,verb=False):
-    val = 0.
-    target = self.morphed[:]
-    norm = len(cvalues_1) * len(cvalues_2)
-    result = 0.
-    for val1 in cvalues_1:
-      for val2 in cvalues_2:
-        target[1] = val1
-        target[2] = val2
-        self.resetTarget(target)
-        res,det,cond = self.computeStats(x,samples)
-        result += (1./(val1**2 + val2**2)) * res
-    r1,det1,cond1 = self.computeStats(x,samples)
-    val = result/norm
-    if cond1 > 100000.:
-      val = 0.
-    #else:
-    #  print val
-    return val
-
-  def computeStats(self,basis,samples):
-    basis = [samples[b] for b in basis]
-    self.resetBasis(basis)
-    weights = np.array(self.getWeights())
-    #print weights
-    cross_sections = np.array(self.getCrossSections())
-    n_tot = (np.abs(np.multiply(weights,cross_sections))).sum()
-    n_eff = (np.multiply(weights,cross_sections)).sum()
-    return (np.abs(n_eff / n_tot),self.det,self.cond)
-
-  def evalMaxPair(self,x,samples,cvalues_1,cvalues_2,verb=False):
-    val = 0.
-    target = self.morphed[:]
-    norm = len(cvalues_1) * len(cvalues_2)
-    result = 0.
-    for val1 in cvalues_1:
-      for val2 in cvalues_2:
-        target[1] = val1
-        target[2] = val2
-        self.resetTarget(target)
-        r1 = self.computeNeff(x[0],samples)
-        r2 = self.computeNeff(x[1],samples) 
-        #result += np.max((r1, r2))
-        result += r1 + r2
-        #result = (r1 + r2)/2.
-    r1,det1,cond1 = self.computeStats(x[0],samples)
-    r2,det2,cond2 = self.computeStats(x[1],samples)
-    #self.resetTarget(self.morphed)
-    #r1 = self.computeNeff(x[0],samples)
-    #r2 = self.computeNeff(x[1],samples) 
-    #result += 2.*(r1 + r2)
-    val = result/(norm)
-    if cond1 > 100000. or cond2 > 100000.:
-      val = 0.
-    #else:
-    #  print val
-    return val
-
-  def dynamicMorphing(self,target=None,cvalues_1=None,cvalues_2=None):
-    # This function will work in case you have more samples that needed
-    rng = np.random.RandomState(1234)
-    #rng = np.random.RandomState(1111)
-    samples = self.samples[:]
-    indexes = np.array([samples.index(x) for x in samples if x <> self.morphed])
-    # This is the sample to fit (not necessarily the morphed when fitting)
-    if target <> None:
-      indexes = np.array([samples.index(x) for x in samples if x <> target])
-
-    samples_indexes = indexes
-    #samples_indexes = indexes[rng.choice(len(indexes), self.used_samples, replace=False)]
-    #samples_indexes = sorted(indexes,key=lambda x: np.sqrt(np.sum((np.array(samples[x])-self.morphed)**2)))
-    #samples_indexes = np.array((samples_indexes[:self.used_samples]))
-    print len(samples_indexes)
-    ncomb_indexes = samples_indexes[rng.choice(len(samples_indexes),self.ncomb,replace=False)]
-
-    print 'Starting combinations'
-    #Considering condition number
-    comb = list(combinations(ncomb_indexes,self.nsamples))
-    #comb = [comb[i] for i in rng.choice(len(comb),int(len(comb)/2),replace=False)] 
-    start = time.time()
-    print 'Start sorting'
-    comb = sorted(comb,key=lambda x: self.computeNeff(x,samples),reverse=True)[:int(len(comb)*0.1)]
-    #comb = sorted(comb,key=lambda x: self.computeNeff(x,samples),reverse=True)
-    end = time.time()
-    print 'Elapsed time combinations: {0}'.format(end-start)
-    print 'Number of combinations: {0}'.format(len(comb))
-    pairs = []
-    for c in comb:
-      print '.',
-      #l1 = [i for i in samples_indexes if i not in c]
-      l1 = np.array(list(set(samples_indexes) - set(c)))
-      l1 = l1[rng.choice(len(l1),self.ncomb,replace=False)]
-      #comb2_len = self.nsamples*2 - len(indexes)
-      #comb2 = list(combinations(c,self.nsamples*2 - len(indexes)))
-      #comb2 = np.array(comb2)[rng.choice(len(comb2),int(len(comb2)*0.05),replace=False)]
-      #for c2 in comb2:
-      #  pairs.append([c,tuple(l1 + list(c2))])
-      if len(l1) <> self.nsamples:
-        comb2 = list(combinations(l1, self.nsamples))
-        comb2 = np.array(comb2)[rng.choice(len(comb2), int(len(comb2) * 0.03),replace=False)]
-        for c2 in comb2:
-          pairs.append([c,tuple(list(c2))])
-      else:
-        pairs.append([c,tuple(l1)])
-    print ''
-    pairs = [pairs[i] for i in rng.choice(len(pairs),int(len(pairs)*0.3),replace=False)]
-    print 'Number of pairs: {0}'.format(len(pairs)) 
-    start = time.time()
-    best_result = 0.
-    for p in pairs:
-      try: 
-        result = self.evalMaxPair(p,samples,cvalues_1,cvalues_2)
-        if result > best_result:
-          best_result = result
-          best = p
-        if result > 0.8:
-          best = p
-          best_result = result
-          break
-      except KeyboardInterrupt:
-        print 'KeyboardInterrupt caught'
-        break    
-    end = time.time()
-    print 'Elapsed time 2nd basis: {0}'.format(end-start)
-    print 'Best Result : {0}'.format(best_result)
-    print 'Results on target for each basis: '
-    self.mem_values.clear()
-    self.resetTarget(self.morphed)
-    result,det,cond = self.computeStats(best[0],samples)
-    print 'Result: {0} ,Det: {1}, Cond: {2}'.format(result, det, cond)
-    result,det,cond = self.computeStats(best[1],samples)
-    print 'Result: {0} ,Det: {1}, Cond: {2}'.format(result, det, cond)
-
-    return best
-
-  def dynamicMorphing_(self,target=None,cvalues_1=None,cvalues_2=None):
-    # This function will work in case you have more samples that needed
-    rng = np.random.RandomState(1234)
-    #rng = np.random.RandomState(1111)
-    samples = self.samples[:]
-    indexes = np.array([samples.index(x) for x in samples if x <> self.morphed])
-    # This is the sample to fit (not necessarily the morphed when fitting)
-    if target <> None:
-      indexes = np.array([samples.index(x) for x in samples if x <> target])
-
-    samples_indexes = indexes
-    print len(samples_indexes)
-    ncomb_indexes = samples_indexes[rng.choice(len(samples_indexes),self.ncomb,replace=False)]
-
-    print 'Starting combinations'
-    #Considering condition number
-    comb = list(combinations(ncomb_indexes,self.nsamples))
-    #comb = [comb[i] for i in rng.choice(len(comb),int(len(comb)/2),replace=False)] 
-    start = time.time()
-    print 'Start sorting'
-    comb = sorted(comb,key=lambda x: self.computeNeff(x,samples),reverse=True)[:int(len(comb)*0.1)]
-    #comb = sorted(comb,key=lambda x: self.computeNeff(x,samples),reverse=True)
-    end = time.time()
-    print 'Elapsed time combinations: {0}'.format(end-start)
-    print 'Number of combinations: {0}'.format(len(comb))
-    pairs = []
-    for c in comb:
-      print '.',
-      l1 = [i for i in samples_indexes if i not in c]
-      comb2_len = self.nsamples*2 - len(indexes)
-      comb2 = list(combinations(c,self.nsamples*2 - len(indexes)))
-      comb2 = np.array(comb2)[rng.choice(len(comb2),int(len(comb2)*0.05),replace=False)]
-      for c2 in comb2:
-        pairs.append([c,tuple(l1 + list(c2))])
-    print ''
-    pairs = [pairs[i] for i in rng.choice(len(pairs),int(len(pairs)*0.3),replace=False)]
-    print 'Number of pairs: {0}'.format(len(pairs)) 
-    start = time.time()
-    best_result = 0.
-    for p in pairs:
-      try: 
-        result = self.evalMaxPair(p,samples,cvalues_1,cvalues_2)
-        if result > best_result:
-          best_result = result
-          best = p
-        if result > 0.8:
-          best = p
-          best_result = result
-          break
-      except KeyboardInterrupt:
-        print 'KeyboardInterrupt caught'
-        break    
-    end = time.time()
-    print 'Elapsed time 2nd basis: {0}'.format(end-start)
-    print 'Best Result : {0}'.format(best_result)
-    print 'Results on target for each basis: '
-    self.mem_values.clear()
-    self.resetTarget(self.morphed)
-    result,det,cond = self.computeStats(best[0],samples)
-    print 'Result: {0} ,Det: {1}, Cond: {2}'.format(result, det, cond)
-    result,det,cond = self.computeStats(best[1],samples)
-    print 'Result: {0} ,Det: {1}, Cond: {2}'.format(result, det, cond)
-
-    return best
-
-
-  def simpleMorphing(self,target=None,cvalues_1=None,cvalues_2=None,c_eval=0):
-    # TODO: Have to check the condition number and det
-    # This function will work in case you have more samples that needed
-    rng = np.random.RandomState(1234)
-    samples = self.samples[:]
-    indexes = np.array([samples.index(x) for x in samples if x <> self.morphed])
-    # This is the sample to fit (not necessarily the morphed when fitting)
-    if target <> None:
-      indexes = np.array([samples.index(x) for x in samples if x <> target])
-    #indexes = sorted(indexes,key=lambda x: np.sqrt(np.sum((np.array(samples[x])-self.morphed)**2)))
-    #ncomb_indexes = indexes[:self.ncomb]
-    ncomb_indexes = indexes[rng.choice(len(indexes),self.ncomb,replace=False)]
-    #Considering condition number
-    comb = list(combinations(ncomb_indexes,self.nsamples))
-    comb = [comb[i] for i in rng.choice(len(comb),int(len(comb)/2),replace=False)] 
-    start = time.time()
-    comb = sorted(comb,key=lambda x: self.computeNeff(x,samples),reverse=True)[:int(len(comb)*0.3)]
-    end = time.time()
-    print 'elapsed time : {0}'.format(end-start)
-    print len(comb)
-    start = time.time()
-    comb = [comb[i] for i in rng.choice(len(comb),int(len(comb)*0.1),replace=False)] 
-    best_result = 0.
-    for p in comb:
-      result = self.evalMean(p,samples,cvalues_1,cvalues_2) if cvalues_2 <> None else\
-                  self.evalMeanSingle(p,samples,cvalues_1,c_eval)
-      if result > best_result:
-        best_result = result
-        best = p
-    end = time.time()
-    self.resetTarget(self.morphed)
-    result,det,cond = self.computeStats(best,samples)
-    print 'Result: {0} ,Det: {1}, Cond: {2}'.format(result, det, cond)
-    print 'elapsed time : {0}'.format(end-start)
-    #print 'n_eff: {0}'.format(self.computeNeff(best,samples))
-    #self.resetBasis([samples[b] for b in best])
-
-    return best
-
-  def evaluateInd(self,individual,cvalues_1, cvalues_2):
-    size = len(individual) / 2
-    ind1,ind2 = np.array(individual[:size]),np.array(individual[size:])
-    # Check that individual is a valid sample
-    assert sum(ind1) == 15
-    assert sum(ind2) == 15
-    assert sum(np.logical_and(ind1,ind2)) == 0
-    p = map(list,(np.where(ind1 == 1)[0], np.where(ind2 == 1)[0]))
-    result = self.evalMaxPair(p,self.samples,cvalues_1,cvalues_2)
-    return result,
-
-  # Workaround for parallel processing
-  def __call__(self, individual, cvalues_1, cvalues_2):
-    return self.evaluateInd(individual, cvalues_1, cvalues_2) 
-    
-  def evolMorphing(self,target=None,cvalues_1=None,cvalues_2=None,c_eval=0):
-
-    res = initInd(np.array, 15, self.samples)
-  
-    toolbox = base.Toolbox()
-    creator.create("FitnessMax", base.Fitness, weights=(1.0,), typecode='d')
-    creator.create("Individual", list, fitness=creator.FitnessMax)
-    toolbox.register("individual", initInd, creator.Individual, size=15, samples=self.samples)
-    ind1 = toolbox.individual()
-    ind2 = toolbox.individual()
-      
-    #toolbox.register('evaluate', self.evaluateInd, cvalues_1=cvalues_1, cvalues_2=cvalues_2)
-    toolbox.register('evaluate', self, cvalues_1=cvalues_1, cvalues_2=cvalues_2)
-
-    #mutateInd(ind)
-    toolbox.register('mutate', mutateInd)
-    
-    toolbox.register('mate', cxOver, section=3, base_size=15)
-    toolbox.register("select", tools.selTournament, tournsize=3)
-    #toolbox.register("select", tools.selRoulette)
-    toolbox.register("population", tools.initRepeat, list, toolbox.individual)
-    #toolbox.register("map", futures.map)
-    pool = multiprocessing.Pool()
-    toolbox.register("map", pool.map)
-    
-    halloffame = tools.HallOfFame(maxsize=10)
-   
-    mu_es, lambda_es = 3,21
-    
-    pop_stats = tools.Statistics(key=lambda ind: ind.fitness.values)
-    #pop_stats.register('pop', copy.deepcopy)
-    pop_stats.register('avg',np.mean)
-    pop_stats.register('max',np.max)
-    #toolbox.mate(ind1,ind2)
-    pop = toolbox.population(n=50)
-    start = time.time()
-    pop, logbook = algorithms.eaSimple(pop, toolbox, cxpb=0.5, mutpb=0.5, ngen=500, stats=pop_stats, halloffame=halloffame)
-    #pop, logbook = algorithms.eaMuCommaLambda(pop, toolbox, mu=mu_es, lambda_=lambda_es, 
-    #    cxpb=0.6, mutpb=0.3, ngen=50, stats=pop_stats, halloffame=halloffame)
-    end = time.time()
-    halloffame.update(pop)
-
-    individual = halloffame[0]
-    size = len(individual) / 2
-    ind1,ind2 = np.array(individual[:size]),np.array(individual[size:])
-    # Check that individual is a valid sample
-    assert sum(ind1) == 15
-    assert sum(ind2) == 15
-    assert sum(np.logical_and(ind1,ind2)) == 0
-    p = map(list,(np.where(ind1 == 1)[0], np.where(ind2 == 1)[0]))
-    print self.evalMaxPair(p,self.samples,cvalues_1,cvalues_2)
-    
-    best = p
-    self.mem_values.clear()
-    self.resetTarget(self.morphed)
-    result,det,cond = self.computeStats(best[0],self.samples)
-    print 'Result: {0} ,Det: {1}, Cond: {2}'.format(result, det, cond)
-    result,det,cond = self.computeStats(best[1],self.samples)
-    print 'Result: {0} ,Det: {1}, Cond: {2}'.format(result, det, cond)
-    print 'elapsed time : {0}'.format(end-start)  
-    return p
-
diff --git a/train_classifiers.py b/train_classifiers.py
index 473dc03..862dbbf 100644
--- a/train_classifiers.py
+++ b/train_classifiers.py
@@ -6,7 +6,6 @@
 import numpy as np
 from sklearn import svm, linear_model
 from sklearn.externals import joblib
-from sklearn.metrics import roc_curve, auc
 from sklearn.ensemble import GradientBoostingClassifier
 from sklearn import cross_validation
 from xgboost_wrapper import XGBoostClassifier
@@ -16,102 +15,108 @@
 from os import listdir
 from os.path import isfile, join
 import os.path
-import pdb
 
 from utils import printMultiFrame, printFrame, saveFig, loadData
 
-''' 
+'''
   Train each one of the classifiers on the data ./data/c1/{train|test}_i_j.dat
   each model is saved on the file ./model/c1/{model_file}_i_j.pkl
 '''
 
-def trainClassifiers(clf,nsamples,
-      model_g='mlp',c1_g='',
-      dir='/afs/cern.ch/user/j/jpavezse/systematics',
-      model_file='adaptive',
-      dataset_names = None,
-      full_names = None, 
-      data_file='train',
-      preprocessing=False,
-      seed=1234,
-      index = None,
-      vars_names = None
-    ):
-  '''
-    Train classifiers pair-wise on 
-    datasets
-  '''
-  print 'Training classifier'
-  scaler=None
-  if preprocessing == True:
-    scaler = {}
-  for k in range(nsamples):
-    for j in range(nsamples):
-      if k==j or k > j:
-        continue
-      if dataset_names <> None:
-        name_k, name_j = (dataset_names[k], dataset_names[j])
-      else:
-        name_k, name_j = (k,j)
-      print " Training Classifier on {0}/{1}".format(name_k,name_j)
-      traindata,targetdata = loadData(data_file,name_k,name_j,dir=dir,c1_g=c1_g,
-            preprocessing=preprocessing, scaler=scaler) 
-      if model_g == 'mlp':
-        clf.fit(traindata, targetdata, save_file='{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(dir,model_g,c1_g,model_file,k,j))
-      else:
+
+def trainClassifiers(clf, nsamples,
+                     model_g='mlp', c1_g='',
+                     dir='/afs/cern.ch/user/j/jpavezse/systematics',
+                     model_file='adaptive',
+                     dataset_names=None,
+                     full_names=None,
+                     data_file='train',
+                     seed=1234,
+                     index=None,
+                     vars_names=None
+                     ):
+    '''
+      Train classifiers pair-wise on
+      datasets
+    '''
+    print 'Training classifier'
+    for k in range(nsamples):
+        for j in range(nsamples):
+            if k == j or k > j:
+                continue
+            if dataset_names is not None:
+                name_k, name_j = (dataset_names[k], dataset_names[j])
+            else:
+                name_k, name_j = (k, j)
+            print " Training Classifier on {0}/{1}".format(name_k, name_j)
+            traindata, targetdata = loadData(data_file, name_k, name_j, dir=dir, c1_g=c1_g)
+            if model_g == 'mlp':
+                clf.fit(traindata,
+                        targetdata,
+                        save_file='{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(dir,
+                                                                             model_g,
+                                                                             c1_g,
+                                                                             model_file,
+                                                                             k,
+                                                                             j))
+            else:
+                rng = np.random.RandomState(seed)
+                indices = rng.permutation(traindata.shape[0])
+                traindata = traindata[indices]
+                targetdata = targetdata[indices]
+                scores = cross_validation.cross_val_score(clf, traindata.reshape(
+                    traindata.shape[0], traindata.shape[1]), targetdata)
+                print "Accuracy: {0} (+/- {1})".format(scores.mean(), scores.std() * 2)
+                clf.fit(
+                    traindata.reshape(
+                        traindata.shape[0],
+                        traindata.shape[1]),
+                    targetdata)
+                joblib.dump(clf,
+                            '{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(dir,
+                                                                       model_g,
+                                                                       c1_g,
+                                                                       model_file,
+                                                                       k,
+                                                                       j))
+    print " Training Classifier on F0/F1"
+    traindata, targetdata = loadData(data_file, 'F0' if full_names is None else full_names[0],
+                                     'F1' if full_names is None else full_names[1], dir=dir, c1_g=c1_g)
+    if model_g == 'mlp':
+        clf.fit(traindata,
+                targetdata,
+                save_file='{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(dir,
+                                                                   model_g,
+                                                                   c1_g,
+                                                                   model_file))
+    else:
         rng = np.random.RandomState(seed)
         indices = rng.permutation(traindata.shape[0])
         traindata = traindata[indices]
         targetdata = targetdata[indices]
-        #scores = cross_validation.cross_val_score(clf, traindata.reshape(traindata.shape[0],
-        #traindata.shape[1]), targetdata)
-        #print "Accuracy: {0} (+/- {1})".format(scores.mean(), scores.std() * 2)
-        clf.fit(traindata.reshape(traindata.shape[0],traindata.shape[1])
-            ,targetdata)
-        #joblib.dump(clf, '/afs/cern.ch/work/j/jpavezse/private/{0}_{1}_{2}.pkl'.format(model_file,k,j))
-        joblib.dump(clf, '{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(dir,model_g,c1_g,model_file,k,j))
-  print " Training Classifier on F0/F1"
-  traindata,targetdata = loadData(data_file,'F0' if full_names == None else full_names[0],
-          'F1' if full_names == None else full_names[1],dir=dir,c1_g=c1_g) 
-  if model_g == 'mlp':
-    clf.fit(traindata, targetdata, save_file='{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(dir,model_g,c1_g,model_file))
-  else:
-    rng = np.random.RandomState(seed)
-    indices = rng.permutation(traindata.shape[0])
-    traindata = traindata[indices]
-    targetdata = targetdata[indices]
-    #clf = svm.NuSVC(probability=True) #Why use a SVR??
-    scores = cross_validation.cross_val_score(clf, traindata, targetdata)
-    print "Accuracy: {0} (+/- {1})".format(scores.mean(), scores.std() * 2)
-    clf.fit(traindata,targetdata)
-    #clf.plot_importance_matrix(vars_names)
-    #joblib.dump(clf, '/afs/cern.ch/work/j/jpavezse/private/{0}_F0_F1.pkl'.format(model_file))
-    joblib.dump(clf, '{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(dir,model_g,c1_g,model_file))
-
-
-  return scaler
+        scores = cross_validation.cross_val_score(clf, traindata, targetdata)
+        print "Accuracy: {0} (+/- {1})".format(scores.mean(), scores.std() * 2)
+        clf.fit(traindata, targetdata)
+        joblib.dump(
+            clf, '{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(dir, model_g, c1_g, model_file))
 
 
-def predict(filename, traindata,model_g='mlp', sig=1, clf=None):
-  sfilename,k,j = filename.split('/')[-1].split('_')
-  sfilename = '/'.join(filename.split('/')[:-1]) + '/' +  sfilename
-  j = j.split('.')[0]
-  sig = 1
-  if k <> 'F0':
-    k = int(k)
-    j = int(j)
-    sig = 1 if k < j else 0
-    filename = '{0}_{1}_{2}.pkl'.format(sfilename,min(k,j),max(k,j))
-  if clf <> None:
-    return clf.predict_proba(traindata, model_file=filename)[:,sig]
-  else:
-    clf = joblib.load(filename)
-    if clf.__class__.__name__ == 'NuSVR':
-      output = clf.predict(traindata)
-      return np.clip(output,0.,1.)
+def predict(filename, traindata, model_g='mlp', sig=1, clf=None):
+    sfilename, k, j = filename.split('/')[-1].split('_')
+    sfilename = '/'.join(filename.split('/')[:-1]) + '/' + sfilename
+    j = j.split('.')[0]
+    sig = 1
+    if k != 'F0':
+        k = int(k)
+        j = int(j)
+        sig = 1 if k < j else 0
+        filename = '{0}_{1}_{2}.pkl'.format(sfilename, min(k, j), max(k, j))
+    if clf is not None:
+        return clf.predict_proba(traindata, model_file=filename)[:, sig]
     else:
-      return clf.predict_proba(traindata)[:,sig]
-
-
-
-
+        clf = joblib.load(filename)
+        if clf.__class__.__name__ == 'NuSVR':
+            output = clf.predict(traindata)
+            return np.clip(output, 0., 1.)
+        else:
+            return clf.predict_proba(traindata)[:, sig]
diff --git a/utils.py b/utils.py
index abcd7ab..f4ce6e9 100644
--- a/utils.py
+++ b/utils.py
@@ -12,650 +12,604 @@
 import os.path
 import pdb
 
-from matplotlib.colors import LogNorm
 import matplotlib.pyplot as plt
 import matplotlib.cm as cm
-from mpl_toolkits.mplot3d import Axes3D
 from sklearn import preprocessing
 
 
-''' 
+'''
   Some usefull functions to print and load data
 '''
 
-def makePlotName(full, truth, f0 = None, f1 = None, type=None,
-        dir='/afs/cern.ch/user/j/jpavezse/systematics',
-        c1_g='',model_g='mlp'):
-  if full == 'dec':
-    return '{0}_{1}_f{2}_f{3}_{4}_{5}'.format(full, truth, f0, f1, model_g,type)
-  else:
-    return '{0}_{1}_{2}_{3}'.format(full, truth, model_g,type)
-
-def preProcessing(traindata,k,j, scaler):
-  assert scaler <> None
-  if (k,j) not in scaler:
-    scaler[(k,j)] = preprocessing.StandardScaler().fit(traindata)
-  traindata = scaler[(k,j)].transform(traindata)
-  return traindata
-
-def loadData(type,k,j,folder=None,dir='',c1_g='',preprocessing=False,scaler=None, persist=False,
-      model_g='mlp'):
-  if folder <> None:
-    fk = np.loadtxt('{0}/{1}_{2}.dat'.format(folder,type,k))
-    fj = np.loadtxt('{0}/{1}_{2}.dat'.format(folder,type,j))
-  else:
-    fk = np.loadtxt('{0}/data/{1}/{2}/{3}_{4}.dat'.format(dir,model_g,c1_g,type,k))
-    fj = np.loadtxt('{0}/data/{1}/{2}/{3}_{4}.dat'.format(dir,model_g,c1_g,type,j))
-  rng = np.random.RandomState(1111)
-  # HARCODED RIGHT NOW
-
-  #data_num = 19900
-  #data_num = 5000
-  #indices = rng.choice(fk.shape[0],data_num,replace=False)
-  #fk = fk[indices]
-  #indices = rng.choice(fj.shape[0],data_num,replace=False)
-  #fj = fj[indices] 
-
-  #fk = fk[:,[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,16,20,24,25,26,27,28,29,30,31,36,40,42]]
-  #fj = fj[:,[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,16,20,24,25,26,27,28,29,30,31,36,40,42]]
-
-  num1 = fj.shape[0]
-  num0 = fk.shape[0]
-  traindata = np.zeros((num0+num1,fk.shape[1])) if len(fk.shape) > 1 else \
-                  np.zeros(num0+num1)
-  targetdata = np.zeros(num0+num1)
-  traindata[:num1] = fj[:]
-  traindata[num1:] = fk[:]
-  targetdata[:num1].fill(1)
-  targetdata[num1:].fill(0)
-  if preprocessing == True:
-    traindata = preProcessing(traindata,k,j, scaler)
-    # this should be in the preProcessing?
-    if persist == True:
-      joblib.dump(scaler[(k,j)],'{0}/model/{1}/{2}/{3}_{4}_{5}.dat'.format(dir,'mlp',c1_g,'scaler',k,j))
-  return (traindata, targetdata)
-
-
-def printMultiFrame(w,obs,all_pdfs,name,legends,
-              dir='/afs/cern.ch/user/j/jpavezse/systematics',
-              model_g='mlp',setLog=False,y_text='',print_pdf=False,title='',
-              x_text='x'):
-  '''
-    This just print a bunch of pdfs 
-    in a Canvas
-  ''' 
-
-  # Preliminaries
-  ROOT.gROOT.SetStyle('Plain')
-  #ROOT.gStyle.SetOptTitle(0)
-  ROOT.gStyle.SetOptStat(0)
-  ROOT.gStyle.SetOptFit(1)
-  ROOT.gStyle.SetPalette(1)
-
-  ROOT.gStyle.SetTitleX(0.5)
-  ROOT.gStyle.SetTitleAlign(23)
-  ROOT.gStyle.SetTitleBorderSize(0)
-
-  # Hope I don't need more colors ...
-  colors = [ROOT.kBlue,ROOT.kRed,ROOT.kBlack,ROOT.kGreen,
-    ROOT.kYellow]
-  style = [ROOT.kSolid,ROOT.kSolid,ROOT.kDashed,ROOT.kDashed]
-  can = ROOT.TCanvas('c1')
-  can.Divide(1,len(all_pdfs))
-  legs = []
-  frames = []
-  for curr,pdf in enumerate(all_pdfs): 
-    x = w.var(obs[curr])
-    can.cd(curr+1)
-    if curr <> len(pdf) - 1:
-      #ROOT.gPad.SetBottomMargin(0.01)
-      if curr == 0:
-        ROOT.gPad.SetTopMargin(0.09)
-      else:
-        ROOT.gPad.SetTopMargin(0.01)
-      ROOT.gPad.SetRightMargin(0.01)
+
+def makePlotName(full, truth, f0=None, f1=None, type=None,
+                 dir='/afs/cern.ch/user/j/jpavezse/systematics',
+                 c1_g='', model_g='mlp'):
+    if full == 'dec':
+        return '{0}_{1}_f{2}_f{3}_{4}_{5}'.format(
+            full, truth, f0, f1, model_g, type)
     else:
-      ROOT.gPad.SetTopMargin(0.01)
-      ROOT.gPad.SetRightMargin(0.01)
-    ROOT.gPad.SetLeftMargin(0.04)
-    if setLog == True:
-      ROOT.gPad.SetLogy(1)
+        return '{0}_{1}_{2}_{3}'.format(full, truth, model_g, type)
+
+
+def loadData(type, k, j, folder=None, dir='', c1_g='', model_g='mlp'):
+    if folder is not None:
+        fk = np.loadtxt('{0}/{1}_{2}.dat'.format(folder, type, k))
+        fj = np.loadtxt('{0}/{1}_{2}.dat'.format(folder, type, j))
+    else:
+        fk = np.loadtxt(
+            '{0}/data/{1}/{2}/{3}_{4}.dat'.format(dir, model_g, c1_g, type, k))
+        fj = np.loadtxt(
+            '{0}/data/{1}/{2}/{3}_{4}.dat'.format(dir, model_g, c1_g, type, j))
+    rng = np.random.RandomState(1111)
+
+    num1 = fj.shape[0]
+    num0 = fk.shape[0]
+    traindata = np.zeros(
+        (num0 + num1,
+         fk.shape[1])) if len(
+        fk.shape) > 1 else np.zeros(
+            num0 + num1)
+    targetdata = np.zeros(num0 + num1)
+    traindata[:num1] = fj[:]
+    traindata[num1:] = fk[:]
+    targetdata[:num1].fill(1)
+    targetdata[num1:].fill(0)
+
+    return (traindata, targetdata)
+
+def makePreliminaries():   
+    # Preliminaries
+    ROOT.gROOT.SetStyle('Plain')
+    ROOT.gStyle.SetOptStat(0)
+    ROOT.gStyle.SetOptFit(1)
+    ROOT.gStyle.SetPalette(1)
+
+    ROOT.gStyle.SetTitleX(0.5)
+    ROOT.gStyle.SetTitleAlign(23)
+    ROOT.gStyle.SetTitleBorderSize(0)
+
+def printMultiFrame(
+        w,
+        obs,
+        all_pdfs,
+        name,
+        legends,
+        dir='/afs/cern.ch/user/j/jpavezse/systematics',
+        model_g='mlp',
+        setLog=False,
+        y_text='',
+        print_pdf=False,
+        title='',
+        x_text='x'):
+    '''
+      This just print a bunch of pdfs
+      in a Canvas
+    '''
+
+    makePreliminaries()
+    colors = [ROOT.kBlue, ROOT.kRed, ROOT.kBlack, ROOT.kGreen,
+              ROOT.kYellow]
+    style = [ROOT.kSolid, ROOT.kSolid, ROOT.kDashed, ROOT.kDashed]
+    can = ROOT.TCanvas('c1')
+    can.Divide(1, len(all_pdfs))
+    legs = []
+    frames = []
+    for curr, pdf in enumerate(all_pdfs):
+        x = w.var(obs[curr])
+        can.cd(curr + 1)
+        if curr != len(pdf) - 1:
+            if curr == 0:
+                ROOT.gPad.SetTopMargin(0.09)
+            else:
+                ROOT.gPad.SetTopMargin(0.01)
+            ROOT.gPad.SetRightMargin(0.01)
+        else:
+            ROOT.gPad.SetTopMargin(0.01)
+            ROOT.gPad.SetRightMargin(0.01)
+        ROOT.gPad.SetLeftMargin(0.04)
+        if setLog:
+            ROOT.gPad.SetLogy(1)
+        funcs = []
+        line_colors = []
+        line_styles = []
+        for i, p in enumerate(pdf):
+            funcs.append(p)
+            line_colors.append(ROOT.RooFit.LineColor(colors[i]))
+            line_styles.append(ROOT.RooFit.LineStyle(style[i]))
+        frames.append(x.frame(ROOT.RooFit.Name(legends[curr][0]), ROOT.RooFit.
+                              Title(legends[curr][0].split('_')[0])))
+        for i, f in enumerate(funcs):
+            if isinstance(f, str):
+                funcs[0].plotOn(frames[-1],
+                                ROOT.RooFit.Components(f),
+                                ROOT.RooFit.Name(legends[curr][i]),
+                                line_colors[i],
+                                line_styles[i])
+            else:
+                f.plotOn(frames[-1],
+                         ROOT.RooFit.Name(legends[curr][i]),
+                         line_colors[i],
+                         line_styles[i])
+        legs.append(ROOT.TLegend(0.79, 0.73, 0.90, 0.87))
+        for i, l in enumerate(legends[curr]):
+            if i == 0:
+                legs[-1].AddEntry(frames[-1].findObject(legends[curr]
+                                                        [i]), l.split('_')[1], 'l')
+            else:
+                legs[-1].AddEntry(frames[-1].findObject(legends[curr]
+                                                        [i]), l.split('_')[1], 'l')
+        legs[-1].SetFillColor(0)
+        legs[-1].SetBorderSize(0)
+        legs[-1].SetTextSize(0.06)
+        legs[-1].SetFillColor(0)
+        legs[-1].SetBorderSize(0)
+        frames[-1].SetTitleSize(0.06, "Y")
+        frames[-1].SetTitleSize(0.06, "X")
+        frames[-1].GetYaxis().CenterTitle(1)
+        frames[-1].GetYaxis().SetTitleOffset(0.35)
+        if curr == 0:
+            frames[-1].SetTitle("{0};;{1}".format(title, y_text))
+        elif curr == len(all_pdfs) - 1:
+            frames[-1].SetTitle(";{0};{1}".format(x_text, y_text))
+            frames[-1].GetXaxis().SetTitleOffset(0.25)
+        else:
+            frames[-1].SetTitle(";;{0}".format(y_text))
+
+        frames[-1].Draw()
+        legs[-1].Draw()
+        ROOT.gPad.Update()
+        can.Modified()
+        can.Update()
+    can.SaveAs('{0}/plots/{1}/{2}.png'.format(dir, model_g, name))
+    if print_pdf:
+        can.SaveAs('{0}/plots/{1}/{2}.pdf'.format(dir, model_g, name))
+
+
+def printFrame(w, obs, pdf, name, legends,
+               dir='/afs/cern.ch/user/j/jpavezse/systematics', model_g='mlp',
+               title='', y_text='', x_text='', range=None, print_pdf=False
+               ):
+    '''
+      This just print a bunch of pdfs
+      in a Canvas
+    '''
+    # Preliminaries
+    makePreliminaries()
+    if len(obs) > 1:
+        ROOT.gStyle.SetOptTitle(0)
+
+    colors = [ROOT.kBlue, ROOT.kRed, ROOT.kGreen, ROOT.kBlack,
+              ROOT.kYellow]
+    x = []
+    for var in obs:
+        x.append(w.var(var))
     funcs = []
     line_colors = []
-    line_styles = []
-    for i,p in enumerate(pdf):
-      funcs.append(p)
-      line_colors.append(ROOT.RooFit.LineColor(colors[i]))
-      line_styles.append(ROOT.RooFit.LineStyle(style[i]))
-    frames.append(x.frame(ROOT.RooFit.Name(legends[curr][0]),ROOT.RooFit.
-        Title(legends[curr][0].split('_')[0])))
-    for i,f in enumerate(funcs):
-      if isinstance(f,str):
-        funcs[0].plotOn(frames[-1], ROOT.RooFit.Components(f),ROOT.RooFit.Name(legends[curr][i]), line_colors[i],
-        line_styles[i])
-      else:
-        f.plotOn(frames[-1],ROOT.RooFit.Name(legends[curr][i]),line_colors[i],line_styles[i])
-    legs.append(ROOT.TLegend(0.79, 0.73, 0.90, 0.87))
-    #leg.SetFillColor(ROOT.kWhite)
-    #leg.SetLineColor(ROOT.kWhite)
-    # TODO This is just a quick fix because is now working how it should
-    for i,l in enumerate(legends[curr]):
-      if i == 0:
-        legs[-1].AddEntry(frames[-1].findObject(legends[curr][i]), l.split('_')[1], 'l')
-      else:
-        legs[-1].AddEntry(frames[-1].findObject(legends[curr][i]), l.split('_')[1], 'l')
-    legs[-1].SetFillColor(0)
-    legs[-1].SetBorderSize(0)
-    legs[-1].SetTextSize(0.06)
-    legs[-1].SetFillColor(0)
-    legs[-1].SetBorderSize(0)
-    frames[-1].SetTitleSize(0.06,"Y")
-    frames[-1].SetTitleSize(0.06,"X")
-    frames[-1].GetYaxis().CenterTitle(1)
-    frames[-1].GetYaxis().SetTitleOffset(0.35)
-    if curr == 0:
-      frames[-1].SetTitle("{0};;{1}".format(title,y_text))
-    elif curr == len(all_pdfs)-1:
-      frames[-1].SetTitle(";{0};{1}".format(x_text,y_text))
-      frames[-1].GetXaxis().SetTitleOffset(0.25)
-    else:
-      frames[-1].SetTitle(";;{0}".format(y_text))
-        
-    frames[-1].Draw()
-    legs[-1].Draw()
-    ROOT.gPad.Update()
-    can.Modified()
-    can.Update()
-  can.SaveAs('{0}/plots/{1}/{2}.png'.format(dir,model_g,name))
-  if print_pdf == True:
-    can.SaveAs('{0}/plots/{1}/{2}.pdf'.format(dir,model_g,name))
-
-def printFrame(w,obs,pdf,name,legends,
-              dir='/afs/cern.ch/user/j/jpavezse/systematics',model_g='mlp',
-              title='',y_text='',x_text='',range=None,print_pdf=False
-      ):
-  '''
-    This just print a bunch of pdfs 
-    in a Canvas
-  ''' 
-  # Preliminaries
-  ROOT.gROOT.SetStyle('Plain')
-  if len(obs) > 1:
-    ROOT.gStyle.SetOptTitle(0)
-  ROOT.gStyle.SetOptStat(0)
-  ROOT.gStyle.SetOptFit(1)
-  ROOT.gStyle.SetPalette(1)
-
-  ROOT.gStyle.SetTitleX(0.5)
-  ROOT.gStyle.SetTitleAlign(23)
-  ROOT.gStyle.SetTitleBorderSize(0)
-
-  # Hope I don't need more colors ...
-  colors = [ROOT.kBlue,ROOT.kRed,ROOT.kGreen,ROOT.kBlack,
-    ROOT.kYellow]
-  x = []
-  for var in obs:
-    x.append(w.var(var))
-  funcs = []
-  line_colors = []
-  for i,p in enumerate(pdf):
-    funcs.append(p)
-    line_colors.append(ROOT.RooFit.LineColor(colors[i]))
-  
-  can = ROOT.TCanvas('c1')
-  can.Divide(1,len(obs))
-  frame = []
-  for var in x:
-    frame.append(var.frame())
-  legs = []
-  for j,fra in enumerate(frame):    
-    can.cd(j+1)
-    if len(obs) == 1:
-      ROOT.gPad.SetRightMargin(0.01)
-    else:
-      if j <> len(obs) - 1:
-        #ROOT.gPad.SetBottomMargin(0.01)
-        if j == 0:
-          ROOT.gPad.SetTopMargin(0.09)
+    for i, p in enumerate(pdf):
+        funcs.append(p)
+        line_colors.append(ROOT.RooFit.LineColor(colors[i]))
+
+    can = ROOT.TCanvas('c1')
+    can.Divide(1, len(obs))
+    frame = []
+    for var in x:
+        frame.append(var.frame())
+    legs = []
+    for j, fra in enumerate(frame):
+        can.cd(j + 1)
+        if len(obs) == 1:
+            ROOT.gPad.SetRightMargin(0.01)
         else:
-          ROOT.gPad.SetTopMargin(0.01)
-        ROOT.gPad.SetRightMargin(0.01)
-      else:
-        ROOT.gPad.SetTopMargin(0.01)
-        ROOT.gPad.SetRightMargin(0.01)
-      ROOT.gPad.SetLeftMargin(0.04)
-    for i,f in enumerate(funcs):
-        if isinstance(f,str):
-          funcs[0].plotOn(fra, ROOT.RooFit.Components(f),ROOT.RooFit.Name(legends[i]), line_colors[i])
+            if j != len(obs) - 1:
+                # ROOT.gPad.SetBottomMargin(0.01)
+                if j == 0:
+                    ROOT.gPad.SetTopMargin(0.09)
+                else:
+                    ROOT.gPad.SetTopMargin(0.01)
+                ROOT.gPad.SetRightMargin(0.01)
+            else:
+                ROOT.gPad.SetTopMargin(0.01)
+                ROOT.gPad.SetRightMargin(0.01)
+            ROOT.gPad.SetLeftMargin(0.04)
+        for i, f in enumerate(funcs):
+            if isinstance(f, str):
+                funcs[0].plotOn(
+                    fra, ROOT.RooFit.Components(f), ROOT.RooFit.Name(
+                        legends[i]), line_colors[i])
+            else:
+                f.plotOn(fra, ROOT.RooFit.Name(legends[i]), line_colors[i])
+        legs.append(ROOT.TLegend(0.69, 0.73, 0.89, 0.87))
+
+        for i, l in enumerate(legends):
+            if i == 0:
+                legs[-1].AddEntry(fra.findObject(legends[i]), l, 'l')
+            else:
+                legs[-1].AddEntry(fra.findObject(legends[i]), l, 'l')
+        legs[-1].SetFillColor(0)
+        legs[-1].SetBorderSize(0)
+        legs[-1].SetFillColor(0)
+        legs[-1].SetBorderSize(0)
+
+        fra.GetYaxis().CenterTitle(1)
+
+        fra.SetTitle(" ")
+        if range is not None:
+            fra.GetXaxis().SetRangeUser(range[0], range[1])
+        fra.Draw()
+        legs[-1].Draw()
+    can.SaveAs('{0}/plots/{1}/{2}.png'.format(dir, model_g, name))
+    if print_pdf:
+        can.SaveAs('{0}/plots/{1}/{2}.pdf'.format(dir, model_g, name))
+
+
+def saveFig(
+        x,
+        y,
+        file,
+        type='',
+        labels=None,
+        axis=None,
+        dir='/afs/cern.ch/user/j/jpavezse/systematics',
+        model_g='mlp',
+        marker=False,
+        marker_value=None,
+        x_range=None,
+        title='',
+        multi=False,
+        print_pdf=False,
+        min_value=None):
+    '''
+        Helper to print a lof of different plots
+    '''
+    
+    plt.ioff()
+    fig, ax = plt.subplots()
+    colors = ['b-', 'r-', 'k-']
+    colors_rgb = ['blue', 'red', 'black']
+    if type=='contour2':
+        cs1 = plt.contour(
+            x, y[0], y[1].transpose(), [
+                0., 0.1, 0.5, 1., 5., 10., 20., 50.], label=labels[0])
+        cs2 = plt.contour(
+            x, y[0], y[2].transpose(), [
+                0., 0.1, 0.5, 1., 5., 10., 20., 50.], linestyles="dashed", label=labels[1])
+        plt.clabel(cs1, inline=1, fontsize=10)
+        lines = [cs1.collections[0], cs2.collections[0]]
+        plt.legend(lines, labels, frameon=False, fontsize=11)
+
+        ax.set_xlabel('signal weight', fontsize=11)
+        ax.set_ylabel('background weight', fontsize=11)
+        ax.annotate(
+            'min=({0:.2f},{1:.2f})'.format(
+                min_value[0],
+                min_value[1]),
+            xy=(
+                min_value[0] + 0.001,
+                min_value[1] + 0.001),
+            xytext=(
+                min_value[0] + 0.03,
+                min_value[1] + 0.03),
+            arrowprops=dict(
+                facecolor='red'))
+        if marker:
+            plt.axvline(marker_value[0], color='black')
+            plt.axhline(marker_value[1], color='black')
+        plt.legend(loc='upper left', frameon=False, numpoints=1)
+    elif type=='contour':
+        levels = [2.3,6.]
+        cs1 = plt.contour(
+            x,
+            y[0],
+            y[1].transpose(),
+            levels,
+            origin='lower',
+            alpha=.5,
+            colors=(
+                '#ff0000',
+                '#ff9900',
+                '#999900',
+                'w',
+                '#009999',
+                '#0099ff',
+                '#0000ff',
+                '#00ffff'))
+        plt.clabel(cs1, inline=1, fontsize=10)
+        lines = [cs1.collections[0]]
+        ax.set_title('-2ln(Q), target=({0:.2f},{1:.2f})'.
+            format(marker_value[0],marker_value[1]))
+        ax.set_xlabel('Kazz',fontsize=11)
+        ax.set_ylabel('Khazz',fontsize=11)
+        ax.set_xlabel('signal weight', fontsize=11)
+        ax.set_ylabel('background weight', fontsize=11)
+        ax.annotate(
+            'min=({0:.2f},{1:.2f})'.format(
+                min_value[0],
+                min_value[1]),
+            xy=(
+                min_value[0] + 0.001,
+                min_value[1] + 0.001),
+            xytext=(
+                min_value[0] + 0.03,
+                min_value[1] + 0.03),
+            arrowprops=dict(
+                facecolor='red'))
+        if marker:
+            plt.axvline(marker_value[0], color='black')
+            plt.axhline(marker_value[1], color='black')
+
+        labels = ['95% C.L','68.27% C.L']
+        for i in range(len(labels)):
+            cs1.collections[i].set_label(labels[i])
+        plt.legend(loc='upper left', frameon=False, numpoints=1)
+    elif type=='pixel':
+        vals = np.flipud(y[1])
+        im = plt.imshow(
+            vals,
+            extent=(
+                y[0].min(),
+                y[0].max(),
+                x.min(),
+                x.max()),
+            interpolation='nearest',
+            cmap=cm.gist_rainbow_r)
+
+        CB = plt.colorbar(im, shrink=0.8, extend='both')
+        ax.set_title(title)
+        ax.set_xlabel('g2', fontsize=11)
+        ax.set_ylabel('g1', fontsize=11)
+    elif type=='scatter':
+        if len(y) == 1:
+            ax.scatter(x, y[0], marker='*', color='g')
+            ax.set_xlabel(axis[0])
+            ax.set_ylabel(axis[1])
         else:
-          f.plotOn(fra,ROOT.RooFit.Name(legends[i]),line_colors[i])
-    legs.append(ROOT.TLegend(0.69, 0.73, 0.89, 0.87))
-    #leg.SetFillColor(ROOT.kWhite)
-    #leg.SetLineColor(ROOT.kWhite)
-    for i,l in enumerate(legends):
-      if i == 0:
-        legs[-1].AddEntry(fra.findObject(legends[i]), l, 'l')
-      else:
-        legs[-1].AddEntry(fra.findObject(legends[i]), l, 'l')
-    legs[-1].SetFillColor(0)
-    legs[-1].SetBorderSize(0)
-    #legs[-1].SetTextSize(0.06)
-    legs[-1].SetFillColor(0)
-    legs[-1].SetBorderSize(0)
-    #fra.SetTitleSize(0.06,"Y")
-    #fra.SetTitleSize(0.06,"X")
-    fra.GetYaxis().CenterTitle(1)
-    #fra.GetYaxis().SetTitleOffset(0.35)
-    if len(obs) == 1:
-      fra.SetTitle("{0};{1};{2}".format(title,x_text,y_text))
-    else:
-      fra.SetTitle(";;{0}".format(y_text))
-    if range <> None:
-      fra.GetXaxis().SetRangeUser(range[0],range[1])
-    fra.Draw()
-    legs[-1].Draw()
-  can.SaveAs('{0}/plots/{1}/{2}.png'.format(dir,model_g,name))
-  if print_pdf == True:
-    can.SaveAs('{0}/plots/{1}/{2}.pdf'.format(dir,model_g,name))
-
-def saveFig(x,y,file,labels=None,scatter=False,contour=False,axis=None, 
-            dir='/afs/cern.ch/user/j/jpavezse/systematics',
-            model_g='mlp',marker=False, hist=False, marker_value=None, x_range=None,title='',multi=False,print_pdf=False,hist2D=False,pixel=False,min_value=None):
-  fig,ax = plt.subplots()
-  colors = ['b-','r-','k-']
-  colors_rgb = ['blue','red','black']
-  if contour == True: 
-    if pixel == True:
-      vals = np.flipud(y[1]) 
-      #vals = y[1]
-      im = plt.imshow(vals, extent=(y[0].min(), y[0].max(), x.min(),x.max()),
-                 interpolation='nearest', cmap=cm.gist_rainbow_r)
-
-      CB = plt.colorbar(im, shrink=0.8, extend='both')
-      #plt.legend(lines,labels,frameon=False,fontsize=11)
-      ax.set_title(title)
-      ax.set_xlabel('g2',fontsize=11) 
-      ax.set_ylabel('g1',fontsize=11)
-    else:
-      print min_value
-      #levels = [2.,10.,40.,60.,80.,100.,200.,400.,600.]
-      #levels = [2.3,6.]
-      #levels = [-800.,-600.,-400.,-200.,0.,200,1000.,10000.,50000.]
-      #im = plt.imshow(y[1], interpolation='bilinear', origin='lower',
-      #            cmap=cm.gray, extent=(-3,3,-2,2))
-      cs1 = plt.contour(x,y[0],y[1].transpose(),[0.,0.1,0.5,1.,5.,10.,50.,100.])
-      #cs1 = plt.contour(x,y[0],y[1].transpose(),levels,origin='lower',alpha=.5, colors=('#ff0000', '#ff9900', '#999900', 'w', '#009999', '#0099ff', '#0000ff','#00ffff'))
-      cs2 = plt.contour(x,y[0],y[2].transpose(),[0.,0.1,0.5,1.,5.,10.,50.,100.],linestyles="dashed")
-      plt.clabel(cs1, inline=1, fontsize=10)
-      #plt.xlim(0.8,1.2)
-      #plt.ylim(0.3,0.7)
-      #CB = plt.colorbar(cs1, shrink=0.8, extend='both')
-      lines = [cs1.collections[0],cs2.collections[0]]
-      #lines = [cs1.collections[0]]
-      plt.legend(lines,labels,frameon=False,fontsize=11)
-      ax.set_title('-ln($\lambda$), target=({0:.2f},{1:.2f})'.
-          format(marker_value[0],marker_value[1]))
-      #ax.set_title('-2ln(Q), target=({0:.2f},{1:.2f})'.
-      #    format(marker_value[0],marker_value[1]))
-      #ax.set_xlabel('Kazz',fontsize=11) 
-      #ax.set_ylabel('Khazz',fontsize=11)
-      ax.set_xlabel('signal weight',fontsize=11) 
-      ax.set_ylabel('background weight',fontsize=11)
-      ax.annotate('min=({0:.2f},{1:.2f})'.format(min_value[0],min_value[1]), xy=(min_value[0]+0.001
-      ,min_value[1]+0.001),xytext=(min_value[0]+0.03,min_value[1]+0.03),arrowprops=dict(facecolor='red'))
-      if marker == True: 
-        plt.axvline(marker_value[0], color='black')
-        plt.axhline(marker_value[1], color='black')
-
-      #labels = ['95% C.L','68.27% C.L']
-      #for i in range(len(labels)):
-      # cs1.collections[i].set_label(labels[i])
-      #ax.plot([marker_value[0]],[marker_value[1]],'+',markersize=15,label='True')
-      #ax.plot([min_value[0]],[min_value[1]],'+',color='red',markersize=15,label='Fit')     
-      plt.legend(loc='upper left',frameon=False,numpoints=1)
-      #ax.annotate('min',xy=(c1[0],c1[1]),xytext=(0.,0.))
-  else:
-    if scatter == True:
-      if len(y) == 1: 
-        ax.scatter(x,y[0],marker='*',color='g')
+            sc1 = ax.scatter(x, y[0], color='black')
+            sc2 = ax.scatter(x, y[1], color='red')
+            ax.legend((sc1, sc2), (labels[0], labels[1]))
+            ax.set_xlabel('x')
+            ax.set_ylabel('regression(score)')
+    elif type=='hist2D':
+        H, xedges, yedges, img = plt.hist2d(
+            y[0][:450], y[1][:450], bins=12)
+        extent = [xedges[0], xedges[-1], yedges[-1], yedges[0]]
+        fig = plt.figure()
+        plt.xlim([xedges[0], xedges[-1]])
+        plt.ylim([1.2, yedges[-1]])
+        ax = fig.add_subplot(1, 1, 1)
         ax.set_xlabel(axis[0])
         ax.set_ylabel(axis[1])
-      else:
-        sc1 = ax.scatter(x,y[0],color='black')
-        sc2 = ax.scatter(x,y[1],color='red')
-        ax.legend((sc1,sc2),(labels[0],labels[1]))
-        ax.set_xlabel('x')
-        ax.set_ylabel('regression(score)')
-    else:
-      if hist == True:
-        if hist2D == True:
-          H, xedges, yedges, img = plt.hist2d(y[0][:450], y[1][:450],bins=12)
-          pdb.set_trace()
-          extent = [xedges[0], xedges[-1], yedges[-1], yedges[0]]
-          fig = plt.figure()
-          plt.xlim([xedges[0],xedges[-1]])
-          plt.ylim([1.2,yedges[-1]])
-          ax = fig.add_subplot(1, 1, 1)
-          ax.set_xlabel(axis[0])
-          ax.set_ylabel(axis[1])
-          im = ax.imshow(H, cmap=plt.cm.jet,interpolation='nearest',extent=extent)
-          mean1,mean2 = (y[0][:450].mean(),y[1][:450].mean())
-          std1, std2 = (y[0][:450].std(),y[1][:450].std())
-          ax.annotate('mean=[{0:.2f},{1:.2f}]\nstd=[{2:.2f},{3:.2f}]'.format(mean1,mean2,std1,std2)
-              ,xy=(mean1,mean2),xytext=(mean1-0.01,mean2-0.1),arrowprops=dict(facecolor='red'),color='white')
-          fig.colorbar(im, ax=ax)
-          if marker == True:
+        im = ax.imshow(
+            H,
+            cmap=plt.cm.jet,
+            interpolation='nearest',
+            extent=extent)
+        mean1, mean2 = (y[0][:450].mean(), y[1][:450].mean())
+        std1, std2 = (y[0][:450].std(), y[1][:450].std())
+        ax.annotate(
+            'mean=[{0:.2f},{1:.2f}]\nstd=[{2:.2f},{3:.2f}]'.format(
+                mean1, mean2, std1, std2), xy=(
+                mean1, mean2), xytext=(
+                mean1 - 0.01, mean2 - 0.1), arrowprops=dict(
+                facecolor='red'), color='white')
+        fig.colorbar(im, ax=ax)
+        if marker:
             plt.axvline(marker_value[0], color='black')
             plt.axhline(marker_value[1], color='black')
-        else:
-          if len(y) == 1:
-            if x_range <> None:
-              ax.hist(y[0],color='red', label=labels[0],bins=16, range=[x_range[0], x_range[1]],
-            histtype='step', alpha=0.5)
+    elif type=='hist':
+        if len(y) == 1:
+            if x_range is not None:
+                ax.hist(
+                    y[0], color='red', label=labels[0], bins=16, range=[
+                        x_range[0], x_range[1]], histtype='step', alpha=0.5)
             else:
-              ax.hist(y[0],color='red', label=labels[0],bins=20,
-            histtype='step', alpha=0.5)
-          else:
-            #Just supporting two plots for now
-            if x_range <> None:
-              for i,ys in enumerate(y): 
-                ax.hist(ys,color=colors_rgb[i],label=labels[i],bins=15, range=[x_range[0],x_range[1]],histtype='step',normed=1, alpha=0.5) 
+                ax.hist(
+                    y[0],
+                    color='red',
+                    label=labels[0],
+                    bins=20,
+                    histtype='step',
+                    alpha=0.5)
+        else:
+            # Just supporting two plots for now
+            if x_range is not None:
+                for i, ys in enumerate(y):
+                    ax.hist(ys, color=colors_rgb[i], label=labels[i], bins=15, range=[
+                        x_range[0], x_range[1]], histtype='step', normed=1, alpha=0.5)
             else:
-              for i,ys in enumerate(y): 
-                ax.hist(ys,color=colors_rgb[i],label=labels[i],bins=15,histtype='step',normed=1, alpha=0.5) 
-            ax.legend(frameon=False,fontsize=11)
-          if axis <> None:
-            ax.set_xlabel(axis[0]) 
-          else:
+                for i, ys in enumerate(y):
+                    ax.hist(
+                        ys,
+                        color=colors_rgb[i],
+                        label=labels[i],
+                        bins=15,
+                        histtype='step',
+                        normed=1,
+                        alpha=0.5)
+            ax.legend(frameon=False, fontsize=11)
+        if axis is not None:
+            ax.set_xlabel(axis[0])
+        else:
             ax.set_xlabel('x')
-          ax.set_ylabel('Count')
-          if marker == True:
+        ax.set_ylabel('count')
+        if marker:
             plt.axvline(marker_value, color='black')
-      else:
+    elif type=='':
         if len(y) == 1:
-          ax.plot(x,y[0],'b')
-          ax.annotate('fit min 1.5285', xy=(1.52857,0),xytext=(1.8,200.),arrowprops=dict(facecolor='red'))
+            ax.plot(x, y[0], 'b')
+            ax.annotate(
+                'fit min 1.5285', xy=(
+                    1.52857, 0), xytext=(
+                    1.8, 200.), arrowprops=dict(
+                    facecolor='red'))
         else:
-          #Just supporting two plots for now
-          linestyles=['--','--']
-          markers = ['+','x']
-          for k,ys in enumerate(y):
-            #ax.plot(x,ys,colors[k],label=labels[k],linestyle=linestyles[k],marker=markers[k]) 
-            ax.plot(x,ys,colors[k],label=labels[k]) 
-          ax.legend(frameon=False,fontsize=11)
-        if axis <> None:
-          ax.set_ylabel(axis[1])
-          ax.set_xlabel(axis[0]) 
+            # Just supporting two plots for now
+            linestyles = ['--', '--']
+            markers = ['+', 'x']
+            for k, ys in enumerate(y):
+                ax.plot(x, ys, colors[k], label=labels[k])
+            ax.legend(frameon=False, fontsize=11)
+        if axis is not None:
+            ax.set_ylabel(axis[1])
+            ax.set_xlabel(axis[0])
         else:
-          ax.set_ylabel('LR')
-          ax.set_xlabel('x')
-        if marker == True:
-          plt.axvline(marker_value, color='black')
-    ax.set_title(title)
-    #if (len(y) > 1):
-      # This breaks the naming convention for plots, I will solve
-      # it later
-    #  for i,l in enumerate(labels):
-    #    np.savetxt('{0}/plots/{1}/results/{2}_{3}.txt'.format(dir,model_g,file,l),y[i])
-    #else:
-    #  np.savetxt('{0}/plots/{1}/results/{2}.txt'.format(dir,model_g,file),y[0])
-  if print_pdf == True:
-    fig.savefig('{0}/plots/{1}/{2}.pdf'.format(dir,model_g,file))
-  fig.savefig('{0}/plots/{1}/{2}.png'.format(dir,model_g,file))
-  plt.close(fig)
-  plt.clf()
+            ax.set_ylabel('LR')
+            ax.set_xlabel('x')
+        if marker:
+            plt.axvline(marker_value, color='black')
+
+    plt.tight_layout()
+    if print_pdf:
+        fig.savefig('{0}/plots/{1}/{2}.pdf'.format(dir, model_g, file))
+    fig.savefig('{0}/plots/{1}/{2}.png'.format(dir, model_g, file))
+    plt.close(fig)
+    plt.clf()
 
 def saveMultiFig(x,y,file,labels=None, 
             dir='/afs/cern.ch/user/j/jpavezse/systematics',
             model_g='mlp',title='',print_pdf=False):
-  plot_num = 311
-  for k,ys in enumerate(y):
-    # Fix later
-    plt.subplot(plot_num)
-    plt.plot(x,ys[0],'b-',label=labels[k][0])
-    plt.plot(x,ys[1],'r-',label=labels[k][1])
-    if k == len(y)-1:
-      plt.xlabel('x',fontsize=11)
-    plt.ylabel('Ratio',fontsize=11)
-    plt.tick_params(axis='both', labelsize=10) 
-    plt.legend(frameon=False, fontsize=11)
-    if plot_num == 311:
-      plt.title('{0}'.format(title))
-    plot_num = plot_num + 1
-  if print_pdf == True:
-    plt.savefig('{0}/plots/{1}/{2}.pdf'.format(dir,model_g,file))
-  plt.savefig('{0}/plots/{1}/{2}.png'.format(dir,model_g,file))
-  plt.close()
-  plt.clf()
-
-
-def saveFig3D(x,y,z,file,labels=None,scatter=False,
-              dir='/afs/cern.ch/user/j/jpavezse/systematics',
-              model_g='mlp',axis=None):
-  fig = plt.figure()
-  ax = fig.add_subplot(111, projection='3d')
-  if scatter == True:
-    if len(z) == 1: 
-      ax.scatter(x,y,z[0],s=2)
-      ax.set_xlabel(axis[0])
-      ax.set_ylabel(axis[1])
-      ax.set_zlabel(axis[2])
-    else:
-      sc1 = ax.scatter(x,y,z[0],color='black')
-      sc2 = ax.scatter(x,y,z[1],color='red')
-      ax.legend((sc1,sc2),(labels[0],labels[1]))
-      ax.set_xlabel('x')
-      ax.set_ylabel('y')
-      ax.set_zlabel('regression(score)')
-  else:
-    if len(z) == 1:
-      ax.plot_wireframe(x,y,z[0],color='red')
-    else:
-      #Just supporting two plots for now
-      ax.plot_wireframe(x,y,z[0],color='red',label=labels[0]) 
-      ax.plot_wireframe(x,y,z[1],color='blue',label=labels[1])
-      ax.legend()
-    ax.set_zlabel('LR')
-    ax.set_ylabel('y')
-    ax.set_xlabel('x')
-  ax.set_title(file)
-  if (len(z) > 1):
-    # This breaks the naming convention for plots, I will solve
-    # it later
-    for i,l in enumerate(labels):
-      np.savetxt('{0}/plots/{1}/{2}_{3}.txt'.format(dir,model_g,file,l),z[i])
-  else:
-    np.savetxt('{0}/plots/{1}/{2}.txt'.format(dir,model_g,file),z[0])
-  fig.savefig('{0}/plots/{1}/{2}.png'.format(dir,model_g,file))
-  plt.close(fig)
-  plt.clf()
-
-
-def makeMultiROC(all_outputs, targets, label,
-           dir='/afs/cern.ch/user/j/jpavezse/systematics',model_g='mlp',
-            true_score=None,print_pdf=False,title='',pos=[(0,1),(0,2),(1,2)]):
-  '''
-    make plots for ROC curve of classifier and 
-    test data.
-  '''
-  plot_n = 311
-  fprs = []
-  tprs = []
-  fpr_trues = []
-  tpr_trues = []
-  roc_aucs = []
-  roc_auc_trues = []
-  for k,(outputs,target) in enumerate(zip(all_outputs,targets)):
-    fpr, tpr, _  = roc_curve(target.ravel(),outputs.ravel())
-    fprs.append(fpr)
-    tprs.append(tpr)
+    plot_num = 311
+    for k,ys in enumerate(y):
+        plt.subplot(plot_num)
+        plt.plot(x,ys[0],'b-',label=labels[k][0])
+        plt.plot(x,ys[1],'r-',label=labels[k][1])
+        if k == len(y)-1:
+            plt.xlabel('x',fontsize=11)
+        plt.ylabel('Ratio',fontsize=11)
+        plt.tick_params(axis='both', labelsize=10) 
+        plt.legend(frameon=False, fontsize=11)
+        if plot_num == 311:
+            plt.title('{0}'.format(title))
+        plot_num = plot_num + 1
+    if print_pdf == True:
+        plt.savefig('{0}/plots/{1}/{2}.pdf'.format(dir,model_g,file))
+    plt.savefig('{0}/plots/{1}/{2}.png'.format(dir,model_g,file))
+    plt.close()
+    plt.clf()
+
+def makeMultiROC(
+    all_outputs,
+    targets,
+    label,
+    dir='/afs/cern.ch/user/j/jpavezse/systematics',
+    model_g='mlp',
+    true_score=None,
+    print_pdf=False,
+    title='',
+    pos=[
+        (0,
+         1),
+        (0,
+         2),
+        (1,
+         2)]):
+    '''
+      make plots for ROC curve of classifier and
+      test data.
+    '''
+    plot_n = 311
+    fprs = []
+    tprs = []
+    fpr_trues = []
+    tpr_trues = []
+    roc_aucs = []
+    roc_auc_trues = []
+    for k, (outputs, target) in enumerate(zip(all_outputs, targets)):
+        fpr, tpr, _ = roc_curve(target.ravel(), outputs.ravel())
+        fprs.append(fpr)
+        tprs.append(tpr)
+        roc_auc = auc(fpr, tpr)
+        roc_aucs.append(roc_auc)
+        if true_score is not None:
+            fpr_true, tpr_true, _ = roc_curve(
+                target.ravel(), true_score[k].ravel())
+            fpr_trues.append(fpr_true)
+            tpr_trues.append(tpr_true)
+            roc_auc_true = auc(fpr_true, tpr_true)
+            roc_auc_trues.append(roc_auc_true)
+        plt.subplot(plot_n)
+        plt.plot(fprs[-1], tprs[-1], 'b-',
+                 label='ROC curve trained (area = %0.2f)' % roc_aucs[-1])
+        if true_score is not None:
+            plt.plot(fpr_trues[-1], tpr_trues[-1], 'r-',
+                     label='ROC curve true (area = %0.2f)' % roc_auc_trues[-1])
+        plt.plot([0, 1], [0, 1], 'k--')
+        plt.xlim([0.0, 1.0])
+        plt.ylim([0.0, 1.05])
+        if k == len(targets) - 1:
+            plt.xlabel('Sensitivity', fontsize=11)
+        plt.ylabel('1-Specificity', fontsize=11)
+        plt.tick_params(axis='both', labelsize=10)
+        if plot_n == 311:
+            plt.title('{0}'.format(title))
+        plt.legend(loc="lower right", frameon=False, fontsize=11)
+        plt.text(0.62, 0.42, 'f{0}-f{1}'.format(pos[k][0], pos[k][1]))
+        plot_n = plot_n + 1
+    plt.savefig('{0}/plots/{1}/{2}.png'.format(dir, model_g, label))
+    if print_pdf:
+        plt.savefig('{0}/plots/{1}/{2}.pdf'.format(dir, model_g, label))
+    plt.close()
+    plt.clf()
+
+
+def makeROC(outputs, target, label,
+            dir='/afs/cern.ch/user/j/jpavezse/systematics', model_g='mlp'):
+    '''
+      make plots for ROC curve of classifier and
+      test data.
+    '''
+    fpr, tpr, _ = roc_curve(target.ravel(), outputs.ravel())
     roc_auc = auc(fpr, tpr)
-    roc_aucs.append(roc_auc)
-    if true_score <> None:
-      fpr_true, tpr_true, _  = roc_curve(target.ravel(),true_score[k].ravel())
-      fpr_trues.append(fpr_true)
-      tpr_trues.append(tpr_true)
-      roc_auc_true = auc(fpr_true, tpr_true)
-      roc_auc_trues.append(roc_auc_true)
-    plt.subplot(plot_n)
-    plt.plot(fprs[-1], tprs[-1],'b-', label='ROC curve trained (area = %0.2f)' % roc_aucs[-1])
-    if true_score <> None:
-      plt.plot(fpr_trues[-1], tpr_trues[-1],'r-', label='ROC curve true (area = %0.2f)' % roc_auc_trues[-1])
+    fig = plt.figure()
+    plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
     plt.plot([0, 1], [0, 1], 'k--')
     plt.xlim([0.0, 1.0])
     plt.ylim([0.0, 1.05])
-    if k == len(targets)-1:
-      plt.xlabel('Sensitivity',fontsize=11)
-    plt.ylabel('1-Specificity',fontsize=11)
-    plt.tick_params(axis='both', labelsize=10)
-    if plot_n == 311:
-      plt.title('{0}'.format(title))
-    plt.legend(loc="lower right",frameon=False,fontsize=11)
-    plt.text(0.62,0.42,'f{0}-f{1}'.format(pos[k][0],pos[k][1]))
-    plot_n = plot_n + 1
-  #np.savetxt('{0}/plots/{1}/results/{2}.txt'.format(dir,model_g,label),np.column_stack((fpr,tpr)))
-  plt.savefig('{0}/plots/{1}/{2}.png'.format(dir,model_g,label))
-  if print_pdf == True:
-    plt.savefig('{0}/plots/{1}/{2}.pdf'.format(dir,model_g,label))
-  plt.close()
-  plt.clf()
-
-def makeROC(outputs, target, label,
-           dir='/afs/cern.ch/user/j/jpavezse/systematics',model_g='mlp'):
-  '''
-    make plots for ROC curve of classifier and 
-    test data.
-  '''
-  fpr, tpr, _  = roc_curve(target.ravel(),outputs.ravel())
-  roc_auc = auc(fpr, tpr)
-  fig = plt.figure()
-  plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
-  plt.plot([0, 1], [0, 1], 'k--')
-  plt.xlim([0.0, 1.0])
-  plt.ylim([0.0, 1.05])
-  plt.xlabel('False Positive Rate')
-  plt.ylabel('True Positive Rate')
-  plt.title('{0}'.format(label))
-  plt.legend(loc="lower right")
-  plt.savefig('{0}/plots/{1}/{2}.png'.format(dir,model_g,label))
-  plt.plot()
-  #plt.close(fig)
-  #plt.clf()
+    plt.xlabel('False Positive Rate')
+    plt.ylabel('True Positive Rate')
+    plt.title('{0}'.format(label))
+    plt.legend(loc="lower right")
+    plt.savefig('{0}/plots/{1}/{2}.png'.format(dir, model_g, label))
+    plt.plot()
 
 
 
 def makeSigBkg(all_outputs, targets, label,
-              dir='/afs/cern.ch/user/j/jpavezse/systematics',model_g='mlp',
-              print_pdf=False,legends=None, title=''):
-  '''
-  make plots for ROC curve of classifier and
-  test data.
-  '''
-  print 'Signal-Background plots'
-  tprs = []
-  fnrs = []
-  aucs = []
-  thresholds = np.linspace(0,1.0,150) 
-  fig = plt.figure()
-  for k,(outputs,target) in enumerate(zip(all_outputs,targets)):
-    fnrs.append(np.array([float(np.sum((outputs > tr) * (target == 0)))/float(np.sum(target == 0)) for tr in thresholds]))
-    fnrs[-1] = fnrs[-1].ravel()
-    tprs.append(np.array([float(np.sum((outputs < tr) * (target == 1)))/float(np.sum(target == 1)) for tr in thresholds]))
-    tprs[-1] = tprs[-1].ravel()
-    aucs.append(auc(tprs[-1],fnrs[-1]))
-    plt.plot(tprs[-1], fnrs[-1], label='{0} (area = {1:.2f})'.format(
-       legends[k],aucs[-1]))
-  plt.xlim([0.0, 1.0])
-  plt.ylim([0.0, 1.05])
-  plt.xlabel('Signal Efficiency',fontsize=11)
-  plt.ylabel('Background Rejection',fontsize=11)
-  plt.tick_params(axis='both', labelsize=10)
-  plt.title('{0}'.format(title))
-  plt.legend(loc="lower left",frameon=False, fontsize=11)
-  #np.savetxt('{0}/plots/{1}/results/{2}.txt'.format(dir,model_g,label),np.column_stack((tpr,fnr)))
-  plt.savefig('{0}/plots/{1}/{2}.png'.format(dir,model_g,label))
-  if print_pdf == True:
-    plt.savefig('{0}/plots/{1}/{2}.pdf'.format(dir,model_g,label))
-  plt.close(fig)
-  plt.clf()
-
-
-def plotCValues(c0,c1,dir='/afs/cern.ch/user/j/jpavezse/systematics',
-            c1_g='',model_g='mlp',true_dist=False,vars_g=None,
-            workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root',
-            use_log=False, n_hist=150,c_eval=0, range_min=-1.0,range_max=0.):
-
-  ''' 
-    Plot histogram of fitted values
-  '''
-  if use_log == True:
-    post = 'log'
-  else:
-    post = ''
-
-  keys = ['true','dec']
-  c1_ = dict((key,np.zeros(n_hist)) for key in keys)
-  c1_1 = np.loadtxt('{0}/fitting_values_c1.txt'.format(dir))  
-  c1_['true'] = c1_1[:,0]
-  c1_['dec'] = c1_1[:,1]
-  if true_dist == True:
-    vals = [c1_['true'],c1_['dec']]
-    labels = ['true','dec']
-  else:
-    vals = c1_['dec']
-    vals1 = c1_1[:,3]
-    labels = ['dec']
-
-  size = min(vals.shape[0],vals1.shape[0])
-  # 1D
-  #saveFig([],[vals1], 
-  #    makePlotName('g2','train',type='hist_g1g2'),hist=True, 
-  #    axis=['g2'],marker=True,marker_value=c1[c_eval],
-  #    labels=labels,x_range=[range_min,range_max],dir=dir,
-  #    model_g=model_g,title='Histogram for fitted g2', print_pdf=True)
-  # 2D
-  saveFig([],[vals,vals1], 
-      makePlotName('g1g2','train',type='hist'),hist=True,hist2D=True, 
-      axis=['g1','g2'],marker=True,marker_value=c1,
-      labels=labels,dir=dir,model_g=model_g,title='2D Histogram for fitted g1,g2', print_pdf=True,
-      x_range=[[0.5,1.4],[1.1,1.9]])
-
-
-
-def getWeights(g_1=1.,g_2=1.5):
-  #Set the basis
-  basis_g1 = np.array((1.,1.,1.,1.,0.))
-  basis_g2 = np.array((0.,2.,1.,3.,1.))
-  
-  #basis_g1 = np.array((0.,1.,1.,1.,1.))
-  #basis_g2 = np.array((1.,0.,1.,2.,3.))
- 
-  #define the formulae
-  g1_t4 = lambda x,y: x*x*x*x + 0*y
-  g1_t2_g2_t2  = lambda x,y: x*x*y*y
-  g1_t3_g2 = lambda x,y: x*x*x*y
-  g1_g2_t3 = lambda x,y: y*y*y*x
-  g2_t4 = lambda x,y: y*y*y*y + 0*x
-
-  a = np.zeros((5,5))
-
-  for i in range(5):
-      a[0,i] = g1_t4(basis_g1[i],basis_g2[i])
-      a[2,i] = g1_t2_g2_t2(basis_g1[i],basis_g2[i])
-      a[1,i] = g1_t3_g2(basis_g1[i],basis_g2[i])
-      a[3,i] = g1_g2_t3(basis_g1[i],basis_g2[i])
-      a[4,i] = g2_t4(basis_g1[i],basis_g2[i])
-
-  #print a
-  
-  b = np.linalg.inv(a)
-  #print b
-  
-  weight = np.zeros(5)
-  #identify the weight
-
-  for j in range(5):
-      weight[j] = b[j,0]*g1_t4(g_1,g_2) + b[j,1]*g1_t3_g2(g_1,g_2) + b[j,2]*g1_t2_g2_t2(g_1,g_2) + b[j,3]*g1_g2_t3(g_1,g_2) + b[j,4]*g2_t4(g_1,g_2) 
-
-  return weight
+               dir='/afs/cern.ch/user/j/jpavezse/systematics', model_g='mlp',
+               print_pdf=False, legends=None, title=''):
+    '''
+    make plots for ROC curve of classifier and
+    test data.
+    '''
+    print 'Signal-Background plots'
+    tprs = []
+    fnrs = []
+    aucs = []
+    thresholds = np.linspace(0, 1.0, 150)
+    fig = plt.figure()
+    for k, (outputs, target) in enumerate(zip(all_outputs, targets)):
+        fnrs.append(np.array([float(np.sum((outputs > tr) *
+                                           (target == 0))) /
+                              float(np.sum(target == 0)) for tr in thresholds]))
+        fnrs[-1] = fnrs[-1].ravel()
+        tprs.append(np.array([float(np.sum((outputs < tr) *
+                                           (target == 1))) /
+                              float(np.sum(target == 1)) for tr in thresholds]))
+        tprs[-1] = tprs[-1].ravel()
+        aucs.append(auc(tprs[-1], fnrs[-1]))
+        plt.plot(tprs[-1], fnrs[-1], label='{0} (area = {1:.2f})'.format(
+            legends[k], aucs[-1]))
+    plt.xlim([0.0, 1.0])
+    plt.ylim([0.0, 1.05])
+    plt.xlabel('signal efficiency', fontsize=11)
+    plt.ylabel('background rejection', fontsize=11)
+    plt.tick_params(axis='both', labelsize=10)
+    plt.legend(loc="lower left", frameon=False, fontsize=11)
+    plt.tight_layout()
+    plt.savefig('{0}/plots/{1}/{2}.png'.format(dir, model_g, label))
+    if print_pdf:
+        plt.savefig('{0}/plots/{1}/{2}.pdf'.format(dir, model_g, label))
+    plt.close(fig)
+    plt.clf()
+
 
diff --git a/xgboost_wrapper.py b/xgboost_wrapper.py
index f5a7a11..9e6028c 100644
--- a/xgboost_wrapper.py
+++ b/xgboost_wrapper.py
@@ -1,7 +1,7 @@
- 
+
 import sys
 import math
- 
+
 import numpy as np
 
 
@@ -11,38 +11,47 @@
 
 import xgboost as xgb
 import pdb
- 
+
+
 def logloss(y_true, Y_pred):
-    return -1 * sum(math.log(y[int(label)]) if y[int(label)] > 0 else -np.inf for y, label in zip(Y_pred, y_true)) / len(Y_pred) 
+    return -1 * sum(math.log(y[int(label)]) if y[int(label)] > 0 else -
+                    np.inf for y, label in zip(Y_pred, y_true)) / len(Y_pred)
+
 
 class XGBoostClassifier():
-    def __init__(self, num_boost_round=10, **params):
+
+    def __init__(self, num_boost_round=10, missing=0., **params):
         self.clf = None
+        self.missing = missing
         self.num_boost_round = num_boost_round
         self.params = params
         self.params.update({'objective': 'multi:softprob'})
- 
-    def fit(self, X, y, num_boost_round=None):
+
+    def fit(self, X, y, num_boost_round=None, weight=None):
         num_boost_round = num_boost_round or self.num_boost_round
-        dtrain = xgb.DMatrix(X, label=y)
-        self.clf = xgb.train(params=self.params, dtrain=dtrain, num_boost_round=num_boost_round)
- 
+        dtrain = xgb.DMatrix(X, label=y, missing=self.missing, weight=weight)
+        self.clf = xgb.train(
+            params=self.params,
+            dtrain=dtrain,
+            num_boost_round=num_boost_round,
+            verbose_eval=True)
+
     def predict(self, X):
         Y = self.predict_proba(X)
         y = np.argmax(Y, axis=1)
         return np.array(y)
- 
+
     def predict_proba(self, X):
-        dtest = xgb.DMatrix(X)
+        dtest = xgb.DMatrix(X, missing=self.missing)
         return self.clf.predict(dtest)
- 
+
     def score(self, X, y):
         Y = self.predict_proba(X)
         return 1 / logloss(y, Y)
- 
+
     def get_params(self, deep=True):
         return self.params
- 
+
     def set_params(self, **params):
         if 'num_boost_round' in params:
             self.num_boost_round = params.pop('num_boost_round')
@@ -50,4 +59,7 @@ def set_params(self, **params):
             del params['objective']
         self.params.update(params)
         return self
-    
+
+    def plot_importance_matrix(self, vars_names):
+        pdb.set_trace()
+        xgb.plot_importance(self.clf)