diff --git a/Dockerfile.py3 b/Dockerfile.py3
new file mode 100644
index 000000000..7e6eaa9af
--- /dev/null
+++ b/Dockerfile.py3
@@ -0,0 +1,119 @@
+FROM metabrainz/python:3.7 AS acousticbrainz-sklearn
+
+# Dockerize
+ENV DOCKERIZE_VERSION v0.6.1
+RUN wget https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz \
+    && tar -C /usr/local/bin -xzvf dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz
+
+# Install dependencies
+# Hadolint DL4006
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+# Node
+RUN wget -q -O - https://deb.nodesource.com/setup_12.x | bash - && apt-get update \
+    && apt-get install -y --no-install-recommends \
+                       build-essential \
+                       ca-certificates \
+                       git \
+                       ipython \
+                       libavcodec-dev \
+                       libavformat-dev \
+                       libavutil-dev \
+                       libavresample-dev \
+                       libffi-dev \
+                       libfftw3-dev \
+                       libpq-dev \
+                       libsamplerate0-dev \
+                       libqt4-dev \
+                       libssl-dev \
+                       libtag1-dev \
+                       libxml2-dev \
+                       libxslt1-dev \
+                       libyaml-dev \
+                       nodejs \
+                       pkg-config \
+                       pxz \
+                       python-dev \
+                       python-numpy-dev \
+                       python-numpy \
+                       swig2.0 \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN mkdir /code
+RUN mkdir /data
+WORKDIR /code
+
+RUN groupadd --gid 901 acousticbrainz
+RUN useradd --create-home --shell /bin/bash --uid 901 --gid 901 acousticbrainz
+
+RUN chown acousticbrainz:acousticbrainz /code
+
+# Python dependencies
+RUN mkdir /code/docs/ && chown acousticbrainz:acousticbrainz /code/docs/
+COPY --chown=acousticbrainz:acousticbrainz docs/requirements.txt /code/docs/requirements.txt
+COPY --chown=acousticbrainz:acousticbrainz requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Python dependencies for sklearn
+COPY --chown=acousticbrainz:acousticbrainz acousticbrainz/models/sklearn/requirements.txt /code/acousticbrainz/models/sklearn/requirements.txt
+RUN pip install --no-cache-dir -r /code/acousticbrainz/models/sklearn/requirements.txt
+
+
+FROM acousticbrainz-sklearn AS acousticbrainz-dev
+
+COPY --chown=acousticbrainz:acousticbrainz requirements_development.txt /code/requirements_development.txt
+RUN pip install --no-cache-dir -r requirements_development.txt
+
+
+# We don't copy code to the dev image because it's added with a volume mount
+# during development, however it's needed for tests. Add it here.
+FROM acousticbrainz-dev AS acousticbrainz-test
+
+COPY . /code
+
+
+FROM acousticbrainz-sklearn AS acousticbrainz-prod
+USER root
+
+RUN pip install --no-cache-dir uWSGI==2.0.17.1
+
+RUN mkdir /cache_namespaces && chown -R acousticbrainz:acousticbrainz /cache_namespaces
+
+# Consul template service is already set up, just need to copy the configuration
+COPY ./docker/consul-template.conf /etc/consul-template.conf
+
+# runit service files
+# All services are created with a `down` file, preventing them from starting
+# rc.local removes the down file for the specific service we want to run in a container
+# http://smarden.org/runit/runsv.8.html
+
+# uwsgi service files
+COPY ./docker/uwsgi/uwsgi.service /etc/service/uwsgi/run
+COPY ./docker/uwsgi/uwsgi.ini /etc/uwsgi/uwsgi.ini
+RUN touch /etc/service/uwsgi/down
+
+# hl_extractor service files
+COPY ./docker/hl_extractor/hl_extractor.service /etc/service/hl_extractor/run
+RUN touch /etc/service/hl_extractor/down
+
+# dataset evaluator service files
+COPY ./docker/dataset_eval/dataset_eval.service /etc/service/dataset_eval/run
+RUN touch /etc/service/dataset_eval/down
+
+# Add cron jobs
+COPY docker/crontab /etc/cron.d/acousticbrainz
+RUN chmod 0644 /etc/cron.d/acousticbrainz
+RUN touch /etc/service/cron/down
+
+COPY ./docker/rc.local /etc/rc.local
+
+COPY --chown=acousticbrainz:acousticbrainz package.json /code
+
+USER acousticbrainz
+RUN npm install
+
+COPY --chown=acousticbrainz:acousticbrainz . /code
+
+RUN npm run build:prod
+
+# Our entrypoint runs as root
+USER root
diff --git a/acousticbrainz/__init__.py b/acousticbrainz/__init__.py
new file mode 100644
index 000000000..40a96afc6
--- /dev/null
+++ b/acousticbrainz/__init__.py
@@ -0,0 +1 @@
+# -*- coding: utf-8 -*-
diff --git a/acousticbrainz/models/__init__.py b/acousticbrainz/models/__init__.py
new file mode 100644
index 000000000..40a96afc6
--- /dev/null
+++ b/acousticbrainz/models/__init__.py
@@ -0,0 +1 @@
+# -*- coding: utf-8 -*-
diff --git a/acousticbrainz/models/sklearn/README.md b/acousticbrainz/models/sklearn/README.md
new file mode 100644
index 000000000..3287d075c
--- /dev/null
+++ b/acousticbrainz/models/sklearn/README.md
@@ -0,0 +1,243 @@
+# Machine Learning Infrastructure with scikit-learn (GSoC 2020)
+
+This folder contains the tool that is built for training SVM models of 
+AcousticBrainz's datasets, as well as predicting where a single AcousticBrainz 
+track instance can be classified based on the trained models. It is part of the 
+*Google Summer of Code 2020* in collaboration with the **MetaBrainz** Open-Source 
+organization.
+
+Given a dataset, a Grid Search algorithm using n-fold cross-validation is executed 
+for an exhaustive search over specified parameter values for an estimator.
+
+A final model is trained with all the data (without a validation set) featuring 
+the best parameter combination in terms of accuracy.
+
+Finally, a prediction functionality is part of the tool, which gives the user the 
+capability of predicting where a track instance is classified based on a trained model.
+
+
+## Functionalities
+
+### Train
+The main model training function is the `create_classification_project` which is located in
+the `model.classification_project.py` Python script. It can be imported as a module. 
+It requires a path to the dataset directory that contains sub-folders 
+composed of the groundtruth yaml file/s (tracks, tracks paths, labels, target class), and
+the features (low-level data) in JSON format.
+
+```
+create_classification_project()
+
+Generates a model trained using descriptor files specified in the groundtruth yaml file.
+
+positional parameters:
+groundtruth             Path of the main dataset directory containing the 
+                        groundtruth yaml file/s. (required)
+
+file                    Name of the project configuration file (.yaml) will be stored. 
+                        If not specified it takes automatically the name <project_CLASS_NAME>."
+
+exportsdir              Name of the exports directory that the project's results 
+                        will be stored (best model, grid models, transformation 
+                        pipelines, folded and shuffled dataset).
+
+path                    Path where the project results will be stored. If empty,
+                        the results will be saved in the main app directory.
+
+optional parameters:
+
+c_values                The C values parameter (list) for the SVM Grid Search 
+                        (e.g. [-2, 3, 5, 10]). In case of None, the values will be set up
+                        by the specified in the configuration template.
+
+gamma_values            The gamma values parameter (list) for the SVM Grid Search 
+                        (e.g. [ 3, 1, -1, -3]). In case of None, the values will be set up
+                        by the specified in the configuration template.
+
+preprocessing_values:   The preprocessing values parameter (list) for the 
+                        SVM Grid Search. They must be one or more of the following list: 
+                        ["basic", "lowlevel", "nobands", "normalized", "gaussianized"]
+                        In case of None, the values will be set up
+                        by the specified in the configuration template.
+
+logging                 The logging level (int) that will be printed (0: DEBUG, 1: INFO, 
+                        2: WARNING, 3: ERROR, 4: CRITICAL). Can be set only in the
+                        prescribed integer values (0, 1, 2, 3, 4)
+
+seed                    Seed (int) is used to generate the random shuffled dataset 
+                        applied later to folding. If no seed is specified, the seed
+                        will be automatically set to current clock value.
+
+jobs                    Parallel jobs (int). Set a value of cores to be used.
+                        The default is -1, which means that all the available cores
+                        will be used.
+  
+verbose                 Controls the verbosity (int) of the Grid Search print messages
+                        on the console: the higher, the more messages.
+```
+
+For example, a dataset path directory structure could be like this one:
+
+    dataset (e.g. danceability)
+    |- features
+    |  |-happy
+    |  |  |- 1.json
+    |  |  |- 2.json
+    |  |  |- 3.json
+    |  |  |- 4.json
+    |  |-sad
+    |  |  |- 1.json
+    |  |  |- 2.json
+    |  |  |- 3.json
+    |- metadata
+    |  |- groundtruth.yaml
+    
+The tool will train a model with 2 classes (happy, sad), with 4 and 3 files in each class, respectively.
+
+The tool generates a `.yaml` project file to the path and exports directory specified or by the 
+arguments or automatically by the tool itself. This project file contains information about the 
+preprocessing steps that are followed through the training process, as well as the path and directory
+where the results after the model training will be stored to.
+
+
+### How the Training mode works
+
+There are several steps which are followed in the training phase. First of all, the project 
+configuration template file is loaded. Then, based on the arguments that are specified via the 
+`create_classification_project` function invoke, the`ListGroundTruthFiles` class searches for 
+the available `.yaml` file/s which contain the target class and the *groundtruth* data. These files 
+are inside the specified dataset directory.
+
+Afterwards, for each target class, the following actions take place inside the 
+`train_class` function:
+
+1. It starts with the `GroundTruthLoad` class that loads the *groundtruth* data from the related `.yaml` file. By
+   using its included methods, the tracks with their labels shuffled, in tuples, are exported as well as the 
+   target class exploiting the `export_gt_tracks()` and the `export_train_class()` accordingly. The shuffled 
+   dataset is also exported and saved locally in `.csv` format. A logger object is also set up and the logging
+   results are exported into the relevant `.log` file.
+
+2. It creates a project configuration file based on the specified paths for the exported results, as well as
+   a relevant directory that these results will be stored to. The training model results comprise:
+
+3. The `DatasetExporter` class is used then to load the tracks' features and exports them in a `pandas DataFrame`. 
+   The tracks and the labels are also exported in separate `NumPy arrays` too.
+
+4. The `ClassificationTaskManager` class is invoked which is used for extracting the different classification tasks
+   that are specified in the configuration file. This is done be calling the `TrainingProcesses` class, which reads 
+   the configuration file, and extracts the available training processes in a list. Each item of the list is 
+   composed of a Python dictionary that comprises the evaluation that will take place with its: a) the classifier used, 
+   b) the preprocess steps (features selection, scaling type, etc.), the k-fold cross-validation (number of folds), 
+   and finally, c) the combination parameters that a Grid Search algorithm will use to find the best model that will 
+   be assigned to the classifier.
+   
+5. For each evaluation, the `ClassificationTask` class is used. The class loads the list of process dictionaries, with 
+   their corresponding training steps as described above that contain also the features with their labels, as well as 
+   the specified in the configuration file classifier that will be used for training the model.
+   
+6. The whole specified classification task (i.e. the preprocessing, the training of the model for the selected 
+   features, and the evaluation) takes place inside the `ClassificationTask` class. The `TrainGridClassifier` is
+   responsible for the classifier training by using a Grid Search algorithm which, in our case loads a 
+   `Support Vector Machines` Machine Learning model from sklearn with a grid of parameters. 
+   
+7. For each preprocessing step, the `Transform` class is responsible for doing the appropriate preprocess, like the
+   data cleaning, the features selection, the enumeration, and the scaling, when it is available. For each 
+   preprocessing step, the corresponding transformation pipeline (in `.pkl` format) is extracted and saved locally 
+   for later use in the predictions mode. 
+   
+8. The transformed features data is loaded then to the `train_grid` function where the training of the model takes place. 
+   The results of the training phase are extracted by using the `save_grid_results` function. Such results are the best 
+   parameters that did best in each training phase (i.e. in each training step), as well as the best model from this 
+   training step which is saved locally in `.pkl` format. Finally, the best extracted
+   models from each training process are compared and the best one is chosen. The information about the best model 
+   parameters, with the preprocess step that was followed are exported and saved in a `.json` file locally, and 
+   include:
+   * Best model's score, the parameters, the preprocess (data cleaning, features selection, enumeration, scaling), 
+   and the number of folds that the dataset was split into through the cross-validation training procedure.
+   
+9. The `evaluation` function is used to evaluate the best model and the relevant reports are
+   exported. The best model and the corresponding preprocessing step pipeline are loaded, and a k-fold 
+   cross-validation training takes place. The results from this process are:
+   * A `yaml` file that contains the tracks' instances and the fold that were classified is exported in this phase.
+   * A `.csv` file that includes the tracks, the prediction that took place in the relevant fold, the true label, 
+   and the probability of the classifier's decision function  that took for each class prediction. 
+   * The plot that depicts the accuracy score delivered from each fold training.
+   * A `.txt` file that contains detailed information about each fold's training score, the *mean* of all the 
+   accuracies exported from each fold, as well as the *standard deviation* of these accuracies.
+   * The `.txt` files that contain the confusion matrix and the classification report of the cross-validation
+   training.
+
+10. Finally, the `evaluation` function executes a training to the whole dataset by using the best model that is 
+    extracted from the grid search algorithm. After applying predictions to the whole dataset, the related `.txt` 
+    files with the confusion matrix and the classification report are exported and saved locally to the disk. The 
+    trained model, after this training phase is saved locally in `.pkl` format for later use from the 
+    predictions mode of the tool.
+   
+
+
+### Predict
+
+The `model.predict.py` script contains the `prediction` function. This function can be invoked via by 
+importing the function in a separate script and invoking it with its corresponding parameters. The 
+project `.yaml` file with project's configuration metadata is a required field in the function's
+parameters, as well as the **MBID** of the track to be called for predicting to which trained model's
+class will be classified. The MBID is actually the Musicbrainz ID which is the unique track's ID
+stored in the MusicBrainz and AcousticBrainz database. For example, the following link:
+* https://acousticbrainz.org/232b8e6e-0aa5-4310-8df3-583047af3126
+has the MBID: `232b8e6e-0aa5-4310-8df3-583047af3126`
+
+This is the only necessary information for the related argument of the `prediction` function to
+make the relevant classification.
+
+```
+$ python predict.py --help
+usage: predict.py [-h] [--path] [--file] [--track] [--logging]
+
+positional arguments:
+path                    Path where the project file (.yaml) is stored (required).
+
+file                    Name of the project configuration file (.yaml) that 
+                        is to be loaded. (required)
+                        The .yaml at the end of the file is not necessary. 
+                        Just put the name of the file.
+
+track                   MBID of the the low-level data from the AcousticBrainz API.
+                        (required)
+
+optional arguments:
+
+logging                 The logging level (int) that will be printed (0: DEBUG, 1: INFO, 
+                        2: WARNING, 3: ERROR, 4: CRITICAL). Can be set only in the
+                        prescribed integer values (0, 1, 2, 3, 4)
+```
+
+### How the Predictions mode works
+
+The function and the class that are used in this phase are the `prediction` and the `Predict` accordingly. The steps 
+that are followed in this mode are:
+
+1. The `prediction` function loads the project configuration file that was created by the training of the 
+   corresponding model. This `.yaml` file includes all the relevant information about the paths that the 
+   trained model and the preprocessing pipelines were saved to (in `.pkl` format).
+
+2. Then, by using the MBID that was inserted as an argument, it downloads the low-level data from AcousticBrainz API, 
+   using the `requests` library.
+
+3. The data, which are in JSON format are then loaded to the `Predict` class, with the built model's configuration 
+   data (training results' location, etc.).
+   
+3. The `Predict` loads the best model's JSON file that was saved from the training mode, and checks the preprocessing 
+   step that resulted in the best model.
+   
+4. After checking which was the preprocessing step that was specified inside the best model's metadata, the
+   `TransformPredictions` class is invoked and does the necessary data transformation by loading the corresponding 
+   preprocessing pipeline that was saved in `.pkl` format during the training mode.
+   
+5. After that, it loads the best trained model that was saved in `.pkl` format.
+
+6. It does the prediction.
+
+7. It returns a dictionary that includes:
+   * the predicted class
+   * the score of the predicted class
+   * the probabilities for each class the model took to decide to which one the track will be classified.
diff --git a/acousticbrainz/models/sklearn/__init__.py b/acousticbrainz/models/sklearn/__init__.py
new file mode 100644
index 000000000..40a96afc6
--- /dev/null
+++ b/acousticbrainz/models/sklearn/__init__.py
@@ -0,0 +1 @@
+# -*- coding: utf-8 -*-
diff --git a/acousticbrainz/models/sklearn/classification/__init__.py b/acousticbrainz/models/sklearn/classification/__init__.py
new file mode 100644
index 000000000..40a96afc6
--- /dev/null
+++ b/acousticbrainz/models/sklearn/classification/__init__.py
@@ -0,0 +1 @@
+# -*- coding: utf-8 -*-
diff --git a/acousticbrainz/models/sklearn/classification/classification_task.py b/acousticbrainz/models/sklearn/classification/classification_task.py
new file mode 100644
index 000000000..773281bf0
--- /dev/null
+++ b/acousticbrainz/models/sklearn/classification/classification_task.py
@@ -0,0 +1,79 @@
+import os
+import json
+from ..classification.classifier_grid import TrainGridClassifier
+from ..classification.evaluation import evaluation
+
+
+class ClassificationTask:
+    """
+    This class is the core of the model classification. It loads the relevant classifier to
+    be used for training, the features, the labels, and the tracks. It uses a corresponding
+    to the configuration file declared class to train the model and then it uses that model
+    for evaluation.
+    """
+    def __init__(self, config, classifier, train_class, training_processes, X, y, exports_path, tracks, logger):
+        """
+        Args:
+            config: The configuration data that contain the settings from the configuration
+                template with the parsed arguments in classification project.
+            classifier: The classifier name (e.g. svm) that is declared in the classifiers
+                list of the configuration data.
+            train_class: The class name that is defined in the groundtruth yaml file. It is
+                actually the model that will be trained.
+            training_processes: The training processes (list) where each item of the list
+                contains the set of parameters that will be used in the classifier:
+                (Evaluation, classifier, preprocess, kernel, C, gamma, balanceClasses, n_fold)
+            X: The features (pandas DataFrame) of the exported data from the DatasetExporter class
+            y: The labels (NumPy array) of the target class
+            exports_path: Path to where the classification project's results will be stored to.
+            tracks: The tracks (numpy.ndarray) that are exported from the Groundtruth file.
+            log_level: The logging level (0-4).
+        """
+        self.config = config
+        self.classifier = classifier
+        self.train_class = train_class
+
+        self.X = X
+        self.y = y
+        self.training_processes = training_processes
+        self.exports_path = exports_path
+        self.tracks = tracks
+        self.logger = logger
+
+
+    def run(self):
+        # grid search train
+        if self.config["train_kind"] == "grid":
+            self.logger.info("Train Classifier: Classifier with GridSearchCV")
+            grid_svm_train = TrainGridClassifier(config=self.config,
+                                                 classifier=self.classifier,
+                                                 class_name=self.train_class,
+                                                 X=self.X,
+                                                 y=self.y,
+                                                 tr_processes=self.training_processes,
+                                                 exports_path=self.exports_path,
+                                                 logger=self.logger
+                                                 )
+            grid_svm_train.train_grid_search_clf()
+            grid_svm_train.export_best_classifier()
+        else:
+            self.logger.error("Use a valid classifier in the configuration file.")
+        self.logger.info("Training the classifier is completed successfully.")
+
+        # load best model to check its parameters
+        self.logger.debug("Loading the Best Model..")
+        best_model_name = "best_model_{}.json".format(self.train_class)
+        with open(os.path.join(self.exports_path, best_model_name)) as best_model_file:
+            best_model = json.load(best_model_file)
+        self.logger.debug("BEST MODEL: {}".format(best_model))
+
+        # evaluation
+        evaluation(config=self.config,
+                   n_fold=best_model["n_fold"],
+                   X=self.X, y=self.y,
+                   class_name=self.train_class,
+                   tracks=self.tracks,
+                   process=best_model["preprocessing"],
+                   exports_path=self.exports_path,
+                   logger=self.logger
+                   )
diff --git a/acousticbrainz/models/sklearn/classification/classification_task_manager.py b/acousticbrainz/models/sklearn/classification/classification_task_manager.py
new file mode 100644
index 000000000..0248c621b
--- /dev/null
+++ b/acousticbrainz/models/sklearn/classification/classification_task_manager.py
@@ -0,0 +1,128 @@
+import os
+from time import time
+from termcolor import colored
+from datetime import datetime
+
+from ..helper_functions.utils import create_directory, extract_training_processes
+from ..classification.classification_task import ClassificationTask
+
+
+validClassifiers = ["svm", "NN"]
+validEvaluations = ["nfoldcrossvalidation"]
+
+
+class ClassificationTaskManager:
+    """
+    It manages the tasks to be done based on the configuration file. It checks if the
+    config keys exist in the template and are specified correctly, as well as it creates
+    the relevant directories (if not exist) where the classification results will be
+    stored to. Then, it extracts a list with the evaluation steps that will be followed
+    with their corresponding preprocessing steps and parameters declaration for the
+    classifier, and executes the classification task for each step.
+    """
+    def __init__(self, config, train_class, X, y, tracks, exports_path, logger):
+        """
+        Args:
+            config: The configuration file name.
+            train_class: The class that will be trained.
+            X: The already shuffled data that contain the features.
+            y: The already shuffled data that contain the labels.
+        """
+        self.config = config
+        self.train_class = train_class
+        self.X = X
+        self.y = y
+        self.tracks = tracks
+        self.exports_path = exports_path
+        self.logger = logger
+
+        self.results_path = ""
+        self.logs_path = ""
+        self.tracks_path = ""
+        self.dataset_path = ""
+        self.models_path = ""
+        self.images_path = ""
+        self.reports_path = ""
+
+        self.files_existence()
+        self.config_file_analysis()
+
+
+    def files_existence(self):
+        """
+        Ensure that all the folders will exist before the training process starts.
+        """
+        # main exports
+        # train results exports
+        self.results_path = create_directory(self.exports_path, "results")
+        # logs
+        self.logs_path = create_directory(self.exports_path, "logs")
+        # tracks
+        self.tracks_path = create_directory(self.exports_path, "tracks_csv_format")
+        # datasets
+        self.dataset_path = create_directory(self.exports_path, "dataset")
+        # models
+        self.models_path = create_directory(self.exports_path, "models")
+        # images
+        self.images_path = create_directory(self.exports_path, "images")
+        # reports
+        self.reports_path = create_directory(self.exports_path, "reports")
+
+    def config_file_analysis(self):
+        """
+        Check the keys of the configuration template file if they are set up correctly.
+        """
+        self.logger.info("---- CHECK FOR INAPPROPRIATE CONFIG FILE FORMAT ----")
+        if "processing" not in self.config:
+            self.logger.error("No preprocessing defined in config.")
+
+        if "evaluations" not in self.config:
+            self.logger.error("No evaluations defined in config.")
+            self.logger.error("Setting default evaluation to 10-fold cross-validation")
+            self.config["evaluations"] = {"nfoldcrossvalidation": [{"nfold": [10]}]}
+
+        for classifier in self.config['classifiers'].keys():
+            if classifier not in validClassifiers:
+                self.logger.error("Not a valid classifier: {}".format(classifier))
+                raise ValueError("The classifier name must be valid.")
+
+        for evaluation in self.config['evaluations'].keys():
+            if evaluation not in validEvaluations:
+                self.logger.error("Not a valid evaluation: {}".format(evaluation))
+                raise ValueError("The evaluation must be valid.")
+        self.logger.info("No errors in config file format found.")
+
+    def apply_processing(self):
+        """
+        Evaluation steps extraction and classification task execution for each step.
+        """
+        start_time = time()
+        training_processes = extract_training_processes(self.config)
+        self.logger.info("Classifiers detected: {}".format(self.config["classifiers"].keys()))
+        for classifier in self.config["classifiers"].keys():
+            print("Before Classification task: ", classifier)
+            task = ClassificationTask(config=self.config,
+                                      classifier=classifier,
+                                      train_class=self.train_class,
+                                      training_processes=training_processes,
+                                      X=self.X,
+                                      y=self.y,
+                                      exports_path=self.exports_path,
+                                      tracks=self.tracks,
+                                      logger=self.logger
+                                      )
+            try:
+                task.run()
+            except Exception as e:
+                self.logger.error('Running task failed: {}'.format(e))
+                print(colored('Running task failed: {}'.format(e), "red"))
+        end_time = time()
+
+        print()
+        print(colored("Last evaluation took place at: {}".format(datetime.now()), "magenta"))
+        self.logger.info("Last evaluation took place at: {}".format(datetime.now()))
+
+        # test duration
+        time_duration = end_time - start_time
+        classification_time = round(time_duration / 60, 2)
+        return classification_time
diff --git a/acousticbrainz/models/sklearn/classification/classifier_grid.py b/acousticbrainz/models/sklearn/classification/classifier_grid.py
new file mode 100644
index 000000000..5c36ded43
--- /dev/null
+++ b/acousticbrainz/models/sklearn/classification/classifier_grid.py
@@ -0,0 +1,161 @@
+import os
+import json
+from termcolor import colored
+import joblib
+from sklearn.model_selection import GridSearchCV
+from sklearn.svm import SVC
+from sklearn.model_selection import KFold
+
+from ..transformation.transform import Transform
+
+
+class TrainGridClassifier:
+    def __init__(self, config, classifier, class_name, X, y, tr_processes, exports_path, logger):
+        self.config = config
+        self.classifier = classifier
+        self.class_name = class_name
+        self.X = X
+        self.y = y
+        self.tr_processes = tr_processes
+        self.exports_path = exports_path
+
+        self.logger = logger
+        self.best_models_list = []
+        # self.train_grid_search_clf()
+
+
+    def train_grid_search_clf(self):
+        process_counter = 1
+        for tr_process in self.tr_processes:
+            print(colored("Train process {} - {}".format(process_counter, tr_process), "green"))
+            self.logger.info("(Grid) - Train process {} - {}".format(process_counter, tr_process))
+            # initiate SVM classifier object
+            if self.classifier == "svm":
+                grid_clf = SVC(gamma="auto", probability=True)
+            # TODO: different classifier object (e.g. random forests, knn, etc) can be initiated here
+            else:
+                raise ValueError('The classifier name must be valid.')
+
+            print("CLASSIFIER", tr_process["classifier"])
+            # transformation of the data
+            features_prepared = Transform(config=self.config,
+                                          df_feats=self.X,
+                                          process=tr_process["preprocess"],
+                                          train_class=self.class_name,
+                                          exports_path=self.exports_path,
+                                          logger=self.logger).post_processing()
+
+            # train the grid classifier and return the trained model
+            gsvc = train_grid(tr_process=tr_process,
+                              grid_clf=grid_clf,
+                              features_prepared=features_prepared,
+                              y=self.y,
+                              config=self.config,
+                              logger=self.logger)
+
+            # save best results for each train process
+            # paths declaration for saving the grid training results
+            results_path = os.path.join(self.exports_path, "results")
+            models_path = os.path.join(self.exports_path, "models")
+            best_process_model_path = os.path.join(models_path, "model_grid_{}.pkl".format(tr_process["preprocess"]))
+
+            # save the results from each train process step and return the results from that train in a dictionary
+            # that contains: the best score, the best params, the number of folds, and the preprocessing step
+            results_dict = save_grid_results(gsvc=gsvc,
+                                             class_name=self.class_name,
+                                             tr_process=tr_process,
+                                             results_path=results_path,
+                                             best_process_model_path=best_process_model_path,
+                                             logger=self.logger)
+
+            # return a list that includes the best models exported from each processing
+            self.best_models_list.append(results_dict)
+
+            print(colored("Next train process..", "yellow"))
+            process_counter += 1
+            print()
+            print()
+        print(colored("Finishing training processes..", "blue"))
+        print()
+
+    def export_best_classifier(self):
+        # Gather the best scores from the exported grid clf models
+        scores = [x["score"] for x in self.best_models_list]
+        self.logger.info("This is the max score of all the training processes: {}".format(max(scores)))
+        for model in self.best_models_list:
+            if model["score"] == max(scores):
+                self.logger.info("Best {} model parameters:".format(self.class_name))
+                # log2 --> convert values to initial parameters' values
+                # model["params"]["C"] = math.log2(model["params"]["C"])
+                # model["params"]["gamma"] = math.log2(model["params"]["gamma"])
+                self.logger.info("{}".format(model))
+                best_model_name = "best_model_{}.json".format(self.class_name)
+                with open(os.path.join(self.exports_path, best_model_name), "w") as best_model:
+                    json.dump(model, best_model, indent=4)
+                    self.logger.info("Best {} model parameters saved successfully to disk.".format(self.class_name))
+
+
+def train_grid(tr_process, grid_clf, features_prepared, y, config, logger):
+    # define the length of parameters
+    parameters_grid = {'kernel': tr_process["kernel"],
+                       'C': tr_process["C"],
+                       'gamma': tr_process["gamma"],
+                       'class_weight': tr_process["balance_classes"]
+                       }
+
+    # inner with K-Fold cross-validation declaration
+    random_seed = None
+    shuffle = config["k_fold_shuffle"]
+    if shuffle is True:
+        random_seed = config["seed"]
+    elif shuffle is False:
+        random_seed = None
+    logger.info("Fitting the data to the classifier with K-Fold cross-validation..")
+    inner_cv = KFold(n_splits=tr_process["n_fold"],
+                     shuffle=shuffle,
+                     random_state=random_seed
+                     )
+    # initiate GridSearch Object
+    gsvc = GridSearchCV(estimator=grid_clf,
+                        param_grid=parameters_grid,
+                        cv=inner_cv,
+                        n_jobs=config["parallel_jobs"],
+                        verbose=config["verbose"]
+                        )
+
+    logger.debug("Shape of X before train: {}".format(features_prepared.shape))
+    logger.info("Fitting the data to the model..")
+    gsvc.fit(features_prepared, y)
+
+    logger.info("Results from each best preprocess training:")
+    logger.info("a) Best score: {}".format(gsvc.best_score_))
+    logger.info("b) Best estimator: {}".format(gsvc.best_estimator_))
+    logger.info("c) Best parameters: {}".format(gsvc.best_params_))
+    logger.info("Counted evaluations in this GridSearch process: {}".format(len(gsvc.cv_results_["params"])))
+
+    return gsvc
+
+
+def save_grid_results(gsvc, class_name, tr_process, results_path, best_process_model_path, logger):
+    results_best_dict_name = "result_{}_{}_best_{}.json" \
+        .format(class_name, tr_process["preprocess"], gsvc.best_score_)
+
+    results_dict = {
+        "score": gsvc.best_score_,
+        "params": gsvc.best_params_,
+        "n_fold": tr_process['n_fold'],
+        "preprocessing": tr_process["preprocess"]
+    }
+    with open(os.path.join(results_path, results_best_dict_name), 'w') as grid_best_json:
+        json.dump(results_dict, grid_best_json, indent=4)
+
+    # export the parameters that the best model has from each training step
+    results_params_dict_name = "result_{}_{}_params_{}.json" \
+        .format(class_name, tr_process["preprocess"], gsvc.best_score_)
+    with open(os.path.join(results_path, results_params_dict_name), 'w') as grid_params_json:
+        json.dump(gsvc.cv_results_["params"], grid_params_json, indent=0)
+
+    joblib.dump(gsvc.best_estimator_, best_process_model_path)
+    logger.info("Grid Best model for the {} process saved.".format(tr_process["preprocess"]))
+
+    return results_dict
diff --git a/acousticbrainz/models/sklearn/classification/evaluation.py b/acousticbrainz/models/sklearn/classification/evaluation.py
new file mode 100644
index 000000000..a26f5bf88
--- /dev/null
+++ b/acousticbrainz/models/sklearn/classification/evaluation.py
@@ -0,0 +1,428 @@
+import os
+import json
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+from termcolor import colored
+import yaml
+from sklearn.model_selection import KFold
+from sklearn.metrics import accuracy_score
+from sklearn.metrics import confusion_matrix, classification_report
+import joblib
+
+from ..transformation.transform import Transform
+from ..classification.report_files_export import export_report
+from ..classification.matrix_creation import matrix_creation, simplified_matrix_export
+
+
+def evaluation(config, n_fold, X, y, class_name, tracks, process, exports_path, logger):
+    print(colored("------ EVALUATION and FOLDING ------", "yellow"))
+
+    logger.info("---- Folded evaluation of the model in the dataset ----")
+    logger.info("number of folds set to config: {}".format(n_fold))
+    logger.debug("Sample of shuffled tracks tracks:")
+    logger.debug("{}".format(tracks[:5]))
+    logger.debug("Tracks list length: {}".format(len(tracks)))
+
+    # load project directory and the corresponding save paths
+
+    dataset_path = os.path.join(exports_path, "dataset")
+    models_path = os.path.join(exports_path, "models")
+    images_path = os.path.join(exports_path, "images")
+
+    # load best model params and score data
+    load_best_model_params_score_path = os.path.join(exports_path, "best_model_{}.json".format(class_name))
+    with open(load_best_model_params_score_path) as model_params_score_file:
+        best_params_score_data = json.load(model_params_score_file)
+
+    logger.info("Best model preprocessing step: {}".format(process))
+    # load the saved classifier
+    clf = joblib.load(os.path.join(models_path, "model_grid_{}.pkl".format(process)))
+    logger.info("Best model loaded.")
+
+    # inner K-Fold cross-validation declaration
+    random_seed = None
+    shuffle = config["k_fold_shuffle"]
+    if shuffle is True:
+        random_seed = config["seed"]
+    elif shuffle is False:
+        random_seed = None
+    logger.info("Fitting the data to the classifier with K-Fold cross-validation..")
+    inner_cv = KFold(n_splits=n_fold,
+                     shuffle=shuffle,
+                     random_state=random_seed)
+
+    # transformation of the data to proper features based on the preprocess step
+    features_prepared = Transform(config=config,
+                                  df_feats=X,
+                                  process=process,
+                                  train_class=class_name,
+                                  exports_path=exports_path,
+                                  logger=logger).post_processing()
+    logger.debug("Features prepared shape: {}".format(features_prepared.shape))
+
+    # Starting Training, Predictions for each fold
+    logger.info("Starting fold-evaluation..")
+    predictions_df_list, accuracy_model, tracks_fold_indexing_dict = predictions_fold(clf=clf,
+                                                                                      inner_cv=inner_cv,
+                                                                                      feats_prepared=features_prepared,
+                                                                                      y=y,
+                                                                                      tracks=tracks,
+                                                                                      class_name=class_name,
+                                                                                      logger=logger)
+
+    # concatenate the folded predictions DFs
+    df_predictions = create_dataset_predictions(list_df_predictions=predictions_df_list,
+                                                class_name=class_name,
+                                                dataset_path=dataset_path,
+                                                logger=logger)
+
+    logger.debug("PRINT THE WHOLE GESTURES DF:\n{}".format(df_predictions))
+
+    # list of each column from the dataframe for the folded indexed tracks, y, adn predictions
+    tracks_folded_list = df_predictions["track"].to_list()
+    y_folded_list = df_predictions[class_name].to_list()
+    pred_folded_list = df_predictions["predictions"].to_list()
+
+    # export the matrix dictionary from the folded dataset
+    folded_matrix_dict = matrix_creation(classes=clf.classes_,
+                                         tracks=tracks_folded_list,
+                                         y_actual=y_folded_list,
+                                         y_hat=pred_folded_list,
+                                         logger=logger,
+                                         export_save_path=exports_path,
+                                         export_name="folded_dataset_results_matrix.json")
+
+    # ACCURACIES for each fold
+    export_accuracies(accuracy_model=accuracy_model,
+                      config=config,
+                      class_name=class_name,
+                      exports_path=exports_path,
+                      images_path=images_path,
+                      logger=logger)
+
+    # Folded Tracks Dictionary --> export also the Folded instances dictionary
+    folded_instances_dict = export_folded_instances(tracks_fold_indexing_dict=tracks_fold_indexing_dict,
+                                                    class_name=class_name,
+                                                    dataset_path=dataset_path,
+                                                    logger=logger)
+
+    concat_save_model_instances_matrix_json(instances_dict=folded_instances_dict,
+                                            cm_dict=folded_matrix_dict,
+                                            exports_path=exports_path,
+                                            logger=logger,
+                                            export_name="folded_dataset_instances_cm.json")
+
+    simplified_cm = simplified_matrix_export(best_result_file="folded_dataset_results_matrix.json",
+                                             logger=logger,
+                                             export_save_path=exports_path,
+                                             export_name="folded_simplified_matrix.json",
+                                             write_mode=True)
+
+    logger.info("Simplified CM of the evaluated folded dataset:\n{}".format(simplified_cm))
+
+    # Evaluation to the folded Dataset
+    export_evaluation_results(config=config,
+                              set_name="Folded",
+                              y_true_values=df_predictions[class_name],
+                              predictions=df_predictions["predictions"],
+                              class_name=class_name,
+                              exports_path=exports_path,
+                              logger=logger
+                              )
+
+    # ---------- TRAIN TO THE WHOLE DATASET WITH THE BEST CLASSIFIER ----------
+    logger.info("Train the classifier with the whole dataset..")
+    clf.fit(features_prepared, y)
+    # prediction for the whole dataset
+    predictions_all = clf.predict(features_prepared)
+    # save the model that is trained to the whole dataset
+    best_model_path = os.path.join(exports_path, "best_clf_model.pkl")
+    joblib.dump(clf, best_model_path)
+    logger.info("Best model saved.")
+
+    # export the matrix dictionary from the whole dataset
+    whole_matrix_dict = matrix_creation(classes=clf.classes_,
+                                        tracks=tracks,
+                                        y_actual=predictions_all,
+                                        y_hat=y,
+                                        logger=logger,
+                                        export_save_path=exports_path,
+                                        export_name="whole_dataset_results_matrix.json")
+
+    simplified_cm_whole = simplified_matrix_export(best_result_file="whole_dataset_results_matrix.json",
+                                                   logger=logger,
+                                                   export_save_path=exports_path,
+                                                   export_name="whole_dataset_cm_dict.json",
+                                                   write_mode=True)
+
+    logger.info("Simplified CM of the evaluated whole dataset:\n{}".format(simplified_cm_whole))
+
+    concat_save_model_instances_matrix_json(instances_dict=None,
+                                            cm_dict=whole_matrix_dict,
+                                            exports_path=exports_path,
+                                            logger=logger,
+                                            export_name="whole_dataset_instances_cm.json")
+
+    # Evaluation to the whole Dataset
+    export_evaluation_results(config=config,
+                              set_name="Whole",
+                              y_true_values=y,
+                              predictions=predictions_all,
+                              class_name=class_name,
+                              exports_path=exports_path,
+                              logger=logger
+                              )
+
+
+def concat_save_model_instances_matrix_json(instances_dict, cm_dict, exports_path, logger, export_name):
+    """
+    Save the best model's folded instances and confusion matrix dictionary merged into one dictionary
+
+    Args:
+        instances_dict:
+        cm_dict:
+        exports_path:
+        logger:
+        export_name:
+
+    Returns:
+
+    """
+    if instances_dict:
+        # in case of the folded dataset where folds exist
+        best_folds_cm_merge_dict = {**instances_dict, **cm_dict}
+    else:
+        # in case of the whole datset where no folds exist
+        best_folds_cm_merge_dict = cm_dict
+
+    # Serializing json
+    json_object_folds_cm = json.dumps(best_folds_cm_merge_dict, indent=4)
+    # Writing to json
+    load_file_path = os.path.join(exports_path, export_name)
+    with open(load_file_path, "w") as outfile:
+        outfile.write(json_object_folds_cm)
+    logger.info("Whole folded instaces and matrix dictionary stored successfully.")
+
+
+def predictions_fold(clf, inner_cv, feats_prepared, y, tracks, class_name, logger):
+    """
+
+    Args:
+        clf: the classifier model object
+        inner_cv: the KFold object
+        feats_prepared:
+        y: the true values
+        tracks:
+        class_name:
+        logger:
+
+    Returns:
+        tracks_fold_indexing_dict:
+        accuracy_model:
+        predictions_df_list:
+    """
+    tracks_fold_indexing_dict = {}
+    accuracy_model = []
+    predictions_df_list = []
+    fold_number = 0
+    for train_index, test_index in inner_cv.split(feats_prepared):
+        logger.info("FOLD {} - Analyzing, Fitting, Predicting".format(fold_number))
+        logger.debug("first test index element: {} - last test index element: {}".format(test_index[0], test_index[-1]))
+        logger.debug("TEST INDEX: {}".format(test_index))
+        logger.debug("Length of the test index array: {}".format(len(test_index)))
+
+        # tracks indexing list for each fold
+        tracks_count = 0
+        tracks_list = []
+        for index in test_index:
+            tracks_fold_indexing_dict[tracks[index]] = fold_number
+            tracks_list.append(tracks[index])
+            tracks_count += 1
+        logger.debug("Tracks indexed to the specific fold: {}".format(tracks_count))
+        X_train, X_test = feats_prepared[train_index], feats_prepared[test_index]
+        y_train, y_test = y[train_index], y[test_index]
+        # Train the model
+        clf.fit(X_train, y_train)
+        logger.debug("Classifier classes: {}".format(clf.classes_))
+        # create a df for this fold with the predictions
+        df_pred_general = create_fold_predictions(clf=clf,
+                                                  class_name=class_name,
+                                                  X_test=X_test,
+                                                  test_index=test_index,
+                                                  tracks_list=tracks_list,
+                                                  y_test=y_test,
+                                                  logger=logger)
+        # Append the folded dataset to a list that will contain all the folded datasets
+        predictions_df_list.append(df_pred_general)
+        # Append each accuracy of the folded model to a list that contains all the accuracies resulted from each fold
+        accuracy_model.append(accuracy_score(y_test, clf.predict(X_test), normalize=True) * 100)
+        fold_number += 1
+
+    return predictions_df_list, accuracy_model, tracks_fold_indexing_dict
+
+
+def create_fold_predictions(clf, class_name, X_test, test_index, tracks_list, y_test, logger):
+    """
+    Creates a pandas DataFrame from each fold with the predictions in
+    order later to extract the shuffled dataset with the tracks, the percentage
+    of the prediction probability for each class, the prediction, and the true
+    value.
+
+    Args:
+        clf:
+        class_name:
+        X_test:
+        test_index:
+        tracks_list:
+        y_test:
+        logger:
+
+    Returns:
+        A pandas DataFrame with the predictions at each fold.
+    """
+    # predictions for the features test
+    pred = clf.predict(X_test)
+    # predictions numpy array transformation to pandas DF
+    df_pred = pd.DataFrame(data=pred, index=test_index, columns=["predictions"])
+    # predictions' probabilities
+    pred_prob = clf.predict_proba(X_test)
+    # predictions' probabilities numpy array transformation to pandas DF
+    df_pred_prob = pd.DataFrame(data=pred_prob, index=test_index, columns=clf.classes_)
+    # tracks list transformation to pandas DF
+    df_tracks = pd.DataFrame(data=tracks_list, index=test_index, columns=["track"])
+    logger.debug("\n{}".format(df_tracks.head()))
+    # y_test pandas Series transformation to pandas DF
+    y_test_series = pd.DataFrame(data=y_test, index=test_index, columns=[class_name])
+    # concatenate the 4 DFs above to 1 for saving the resulted dataset
+    # (tracks, predictions' probabilities, predictions, true)
+    logger.debug("Concatenating DF..")
+    df_pred_general = pd.concat([df_tracks, df_pred_prob, df_pred, y_test_series], axis=1, ignore_index=False)
+
+    return df_pred_general
+
+
+def export_accuracies(accuracy_model, config, class_name, exports_path, images_path, logger):
+    """
+
+    Args:
+        accuracy_model:
+        config:
+        class_name:
+        exports_path:
+        images_path:
+        logger:
+
+    Returns:
+
+    """
+    logger.info("Accuracies in each fold: {}".format(accuracy_model))
+    logger.info("Mean of accuracies: {}".format(np.mean(accuracy_model)))
+    logger.info("Standard Deviation of accuracies: {}".format(np.std(accuracy_model)))
+    accuracies_export = "Accuracies in each fold: {} \nMean of accuracies: {} \nStandard Deviation of accuracies: {}" \
+        .format(accuracy_model, np.mean(accuracy_model), np.std(accuracy_model))
+    export_report(config=config,
+                  name="Accuracies results",
+                  report=accuracies_export,
+                  filename="accuracies_results_fold",
+                  train_class=class_name,
+                  exports_path=exports_path)
+
+    # Visualize accuracy for each iteration in a distribution plot
+    create_accuracies_dist_plot(accuracies_list=accuracy_model,
+                                images_path=images_path,
+                                logger=logger)
+
+
+def create_dataset_predictions(list_df_predictions, class_name, dataset_path, logger):
+    """
+    Args:
+        list_df_predictions:
+        class_name:
+        dataset_path:
+        logger:
+
+    Returns:
+
+    """
+    logger.info("Make Predictions DataFrame for all the folded instances together.")
+    df_concat_predictions = pd.concat(list_df_predictions)
+    logger.debug("\n{}".format(df_concat_predictions.head()))
+    logger.debug("Info:")
+    logger.debug("\n{}".format(df_concat_predictions.info()))
+    # save predictions df
+    logger.info("Saving the unified predictions DataFrame locally.")
+    df_concat_predictions.to_csv(os.path.join(dataset_path, "predictions_{}.csv".format(class_name)))
+
+    return df_concat_predictions
+
+
+def create_accuracies_dist_plot(accuracies_list, images_path, logger):
+    logger.info("Visualize accuracy for each iteration.")
+    list_folds = []
+    counter_folds = 0
+    for accuracy in accuracies_list:
+        list_folds.append("Fold{}".format(counter_folds))
+        counter_folds += 1
+    logger.debug("Exporting accuracies distribution to plot file..")
+    scores = pd.DataFrame(accuracies_list, columns=['Scores'])
+    sns.set(style="white", rc={"lines.linewidth": 3})
+    sns.barplot(x=list_folds, y="Scores", data=scores)
+    plt.savefig(os.path.join(images_path, "accuracies_distribution.png"))
+    sns.set()
+    plt.close()
+    logger.info("Plot saved successfully.")
+
+
+def export_folded_instances(tracks_fold_indexing_dict, class_name, dataset_path, logger):
+    logger.info("Writing Folded Tracks Dictionary locally to check where each track is folded..")
+    logger.debug("length of keys: {}".format(len(tracks_fold_indexing_dict.keys())))
+    fold_dict = {"fold": tracks_fold_indexing_dict}
+
+    # writing to yaml
+    folded_dataset_path_yml = os.path.join(dataset_path, "{}.yaml".format(class_name))
+    with open(folded_dataset_path_yml, 'w') as file:
+        folded_dataset = yaml.dump(fold_dict, file)
+
+    # Serializing json
+    json_object = json.dumps(fold_dict, indent=4)
+    # Writing to json
+    folded_dataset_path_json = os.path.join(dataset_path, "{}.json".format(class_name))
+    with open(folded_dataset_path_json, "w") as outfile:
+        outfile.write(json_object)
+
+    logger.info("Folded dataset written successfully to disk both in yaml and json format.")
+
+    return fold_dict
+
+
+def export_evaluation_results(config, set_name, y_true_values, predictions, class_name, exports_path, logger):
+    logger.info("---- Evaluation to the {} dataset ----".format(set_name))
+    # Confusion Matrix
+    logger.info("Exporting Confusion Matrix applied to the {} dataset..".format(set_name))
+    cm = confusion_matrix(y_true=y_true_values, y_pred=predictions)
+    logger.info("\n{}".format(cm))
+    # Confusion Matrix Normalized
+    logger.info("Exporting Normalized Confusion Matrix applied to the {} dataset..".format(set_name))
+    cm_normalized = (cm / cm.astype(np.float).sum(axis=1) * 100)
+    logger.info("\n{}".format(cm_normalized))
+    cm_all = "Actual instances\n{}\n\nNormalized\n{}".format(cm, cm_normalized)
+    # export the confusion matrix report for the folded dataset
+    export_report(config=config,
+                  name="{} Data Confusion Matrix".format(set_name),
+                  report=cm_all,
+                  filename="confusion_matrix_{}".format(set_name),
+                  train_class=class_name,
+                  exports_path=exports_path)
+    # Classification Report
+    logger.info("Exporting Classification Report applied to the {} dataset..".format(set_name))
+    cr = classification_report(y_true=y_true_values, y_pred=predictions)
+    # export the Classification report for the whole dataset
+    export_report(config=config,
+                  name="{} Data Classification Report".format(set_name),
+                  report=cr,
+                  filename="classification_report_{}".format(set_name),
+                  train_class=class_name,
+                  exports_path=exports_path)
+    logger.info("The {} dataset has been evaluated successfully.".format(set_name))
diff --git a/acousticbrainz/models/sklearn/classification/matrix_creation.py b/acousticbrainz/models/sklearn/classification/matrix_creation.py
new file mode 100644
index 000000000..2c6f1e71f
--- /dev/null
+++ b/acousticbrainz/models/sklearn/classification/matrix_creation.py
@@ -0,0 +1,73 @@
+import os
+import json
+import numpy as np
+
+
+def matrix_creation(classes, tracks, y_actual, y_hat, logger, export_save_path, export_name):
+    logger.info("MATRIX DICTIONARY CREATION")
+    # classes numpy array to list conversion
+    logger.info("CLASSES BEFORE CONVERSION {}".format(type(classes)))
+    classes = classes.tolist()
+    logger.info("CLASSES AFTER CONVERSION: {}".format(type(classes)))
+    logger.info("CLASSES: {}".format(classes))
+    matrix_dict = {}
+    # print(type(y_actual))
+    # print(type(y_hat))
+    for pred_class in classes:
+        logger.info("Class process: {}".format(pred_class))
+        # print("Class type:", type(pred_class))
+        # pred_class = str(pred_class)
+        class_item_dict = {}
+        for track, actual, pred in zip(tracks, y_actual, y_hat):
+            if isinstance(actual, (int, np.int64)):
+                actual = int(actual)
+            if isinstance(pred, (int, np.int64)):
+                pred = int(pred)
+            if pred_class == actual == pred:
+                if actual not in class_item_dict:
+                    class_item_dict[actual] = []
+                class_item_dict[actual].append(track)
+            elif pred_class == actual and actual != pred:
+                if pred not in class_item_dict:
+                    class_item_dict[pred] = []
+                class_item_dict[pred].append(track)
+        matrix_dict[pred_class] = class_item_dict
+    logger.info("Matrix classified..")
+    matrix_general_dict = {"matrix": matrix_dict}
+    logger.debug("The whole matrix dictionary:\n{}".format(matrix_general_dict))
+
+    # Serializing json
+    json_object = json.dumps(matrix_general_dict, indent=4)
+    # Writing to sample.json
+    load_file_path = os.path.join(export_save_path, export_name)
+    with open(load_file_path, "w") as outfile:
+        outfile.write(json_object)
+    logger.info("Best results matrix stored successfully.")
+
+    return matrix_general_dict
+
+
+def simplified_matrix_export(best_result_file, logger, export_save_path, export_name, write_mode=False):
+    load_file_path = os.path.join(export_save_path, best_result_file)
+    logger.info("load best model results from JSON format file")
+    with open(load_file_path) as f:
+        data = json.load(f)
+    confusion_matrix = data['matrix']
+    logger.info("Best model results loaded..")
+
+    simplified_cm = {}
+    for key, val in confusion_matrix.items():
+        simplified_cm[key] = {}
+        for predicted_key, predicted_val in val.items():
+            simplified_cm[key][predicted_key] = len(predicted_val)
+    # export simplified matrix to JSON file
+    if write_mode is True:
+        # Serializing json
+        json_object = json.dumps(simplified_cm, indent=4)
+        # Writing to sample.json
+        load_file_path = os.path.join(export_save_path, export_name)
+        with open(load_file_path, "w") as outfile:
+            outfile.write(json_object)
+        logger.info("Best simplified matrix stored successfully.")
+
+    return simplified_cm
diff --git a/acousticbrainz/models/sklearn/classification/report_files_export.py b/acousticbrainz/models/sklearn/classification/report_files_export.py
new file mode 100644
index 000000000..9fef07aad
--- /dev/null
+++ b/acousticbrainz/models/sklearn/classification/report_files_export.py
@@ -0,0 +1,19 @@
+import os
+from datetime import datetime
+from termcolor import colored
+
+
+def export_report(config, name, report, filename, train_class, exports_path):
+    reports_path = os.path.join(exports_path, "reports")
+    # take current datetime
+    now = datetime.now()
+    datetime_str_verbose = now.isoformat()
+    print("Creating report file..")
+    with open(os.path.join(reports_path, "{}.txt".format(filename)), 'w+') as fp:
+        fp.write("Date of execution: {}".format(datetime_str_verbose))
+        fp.write("\n\n")
+        fp.write(str(name))
+        fp.write("\n\n")
+        fp.write(str(report))
+        fp.close()
+    print(colored("{} file for class {} is created successfully.".format(name, train_class), "cyan"))
diff --git a/acousticbrainz/models/sklearn/classification/train_class.py b/acousticbrainz/models/sklearn/classification/train_class.py
new file mode 100644
index 000000000..15296f221
--- /dev/null
+++ b/acousticbrainz/models/sklearn/classification/train_class.py
@@ -0,0 +1,90 @@
+import os
+from termcolor import colored
+import yaml
+
+from ..helper_functions.logging_tool import setup_logger
+from ..transformation.load_ground_truth import load_local_ground_truth, export_gt_tracks, create_df_tracks
+from ..classification.classification_task_manager import ClassificationTaskManager
+
+
+def train_class(config, gt_file, c_values, gamma_values, preprocessing_values, log_level):
+    exports_path = config["exports_path"]
+    ground_truth_data = load_local_ground_truth(gt_file)
+    # tracks shuffled and exported
+    tracks_listed_shuffled = export_gt_tracks(ground_truth_data, config.get("seed"))
+
+    # class to train
+    class_name = ground_truth_data["className"]
+    config["class_name"] = class_name
+    print("EXPORT CLASS NAME: {}".format(class_name))
+
+    config = update_parameters(config=config,
+                               c_values=c_values,
+                               gamma_values=gamma_values,
+                               preprocessing_values=preprocessing_values)
+
+    logger = setup_logger(
+        exports_path=exports_path,
+        name="train_model_{}".format(class_name),
+        mode="w",
+        level=log_level
+    )
+
+    logger.info("---- TRAINING FOR THE {} MODEL HAS JUST STARTED ----".format(class_name))
+    logger.debug("Type of exported GT data exported: {}".format(type(tracks_listed_shuffled)))
+
+    # name the project file
+    if config["project_file"] is None:
+        prefix_project_file = "project"
+        project_file_name_save = "{}_{}.yaml".format(prefix_project_file, class_name)
+    else:
+        project_file_name_save = "{}.yaml".format(config["project_file"])
+    logger.info("Project yaml file name: {}".format(project_file_name_save))
+    # save the project file
+    project_file_save_path = os.path.join(exports_path, project_file_name_save)
+    with open(os.path.join(project_file_save_path), "w") as template_file:
+        template_data_write = yaml.dump(config, template_file)
+
+    print("First N sample of shuffled tracks: \n{}".format(tracks_listed_shuffled[:4]))
+
+    # create the exports with the features DF, labels, and tracks together
+    features, labels, tracks = create_df_tracks(config=config,
+                                               tracks_list=tracks_listed_shuffled,
+                                               train_class=class_name,
+                                               exports_path=exports_path,
+                                               logger=logger)
+    logger.debug("Types of exported files from GT:")
+    logger.debug("Type of features: {}".format(type(features)))
+    logger.debug("Type of labels: {}".format(type(labels)))
+    logger.debug("Type of Tracks: {}".format(type(tracks)))
+
+    model_manage = ClassificationTaskManager(config=config,
+                                             train_class=class_name,
+                                             X=features,
+                                             y=labels,
+                                             tracks=tracks,
+                                             exports_path=exports_path,
+                                             logger=logger)
+    classification_time = model_manage.apply_processing()
+    print(colored("Classification ended successfully in {} minutes.".format(classification_time), "green"))
+    logger.info("Classification ended successfully in {} minutes.".format(classification_time))
+
+
+def update_parameters(config, c_values, gamma_values, preprocessing_values):
+    """Update the project file with user-provided preferences
+
+    Args:
+        config: The config data to be updated.
+        c_values: C value to be updated.
+        gamma_values: gamma value to be updated.
+        preprocessing_values: preprocessing values to be updated.
+    """
+    for pref in config['classifiers']['svm']:
+        if c_values:
+            pref['C'] = c_values
+        if gamma_values:
+            pref['gamma'] = gamma_values
+        if preprocessing_values:
+            pref['preprocessing'] = preprocessing_values
+
+    return config
diff --git a/acousticbrainz/models/sklearn/helper_functions/__init__.py b/acousticbrainz/models/sklearn/helper_functions/__init__.py
new file mode 100644
index 000000000..40a96afc6
--- /dev/null
+++ b/acousticbrainz/models/sklearn/helper_functions/__init__.py
@@ -0,0 +1 @@
+# -*- coding: utf-8 -*-
diff --git a/acousticbrainz/models/sklearn/helper_functions/logging_tool.py b/acousticbrainz/models/sklearn/helper_functions/logging_tool.py
new file mode 100644
index 000000000..0c764de0e
--- /dev/null
+++ b/acousticbrainz/models/sklearn/helper_functions/logging_tool.py
@@ -0,0 +1,57 @@
+"""
+This file consists of the setup_logger methof that is used for logging. setup_logger()
+method set up a new logger object with the related configurations.
+
+Typical usage example:
+    logger = setup_logger(logger_name, logging_file_location, level_of_logging)
+"""
+import logging
+import os
+
+from acousticbrainz.models.sklearn.helper_functions.utils import create_directory
+
+
+def setup_logger(exports_path, name, mode, level=logging.INFO):
+    """
+    Function to set up as many loggers as you want. It exports the logging results to a file
+    in the relevant path that is determined by the configuration file.
+
+    Args:
+        exports_path: The path (str) the logging exports will be exported.
+        name: The name (str) of the logger.
+        level: The level (int) of the logging. Defaults to logging.INFO.
+        mode: The mode (str) translated in write, append. Valid values ("w", "a")
+
+    Returns:
+        The logger object.
+    """
+    logs_path = create_directory(exports_path, "logs")
+
+    # Create a custom logger
+    logger = logging.getLogger(name)
+
+    # Create handlers
+    c_handler = logging.StreamHandler()
+    f_handler = logging.FileHandler(os.path.join(logs_path, "{}.log".format(name)), mode=mode)
+
+    # Create formatters and add it to handlers
+    c_format = logging.Formatter('%(name)s - %(levelname)s - %(message)s')
+    f_format = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+    c_handler.setFormatter(c_format)
+    f_handler.setFormatter(f_format)
+
+    #  if handlers are already present and if so, clear them before adding new handlers. This is pretty convenient
+    #  when debugging and the code includes the logger initialization
+    if logger.hasHandlers():
+        logger.handlers.clear()
+
+    # Add handlers to the logger
+    logger.addHandler(c_handler)
+    logger.addHandler(f_handler)
+
+    if level is None:
+        logger.setLevel(logging.INFO)
+    else:
+        logger.setLevel(level)
+
+    return logger
diff --git a/acousticbrainz/models/sklearn/helper_functions/utils.py b/acousticbrainz/models/sklearn/helper_functions/utils.py
new file mode 100644
index 000000000..af08a7f86
--- /dev/null
+++ b/acousticbrainz/models/sklearn/helper_functions/utils.py
@@ -0,0 +1,90 @@
+import os
+import yaml
+
+
+def load_yaml(path_to_file, file):
+    """
+    Args:
+        path_file:
+
+    Returns:
+        The configuration data loaded from the template.
+    """
+    try:
+        with open(os.path.join(path_to_file, file)) as fp:
+            config_data = yaml.load(fp, Loader=yaml.FullLoader)
+            if isinstance(config_data, dict):
+                return config_data
+            else:
+                return None
+    except ImportError:
+        print("WARNING: could not import yaml module")
+        return None
+
+
+def create_directory(exports_path, directory):
+    # find dynamically the current script directory
+    full_path = os.path.join(exports_path, directory)
+    # create path directories if not exist --> else return the path
+    os.makedirs(full_path, exist_ok=True)
+    return full_path
+
+
+def change_weights_val(i):
+    """
+    Is is used in the TrainingProcesses class. It is used to transform each value of
+    the balanced classes list in the configuration file Grid parameters of the classifier:
+        * True --> balanced
+        * False --> None
+    Args:
+        i: The value inserted
+    Returns:
+        "balanced" in case the value of the list is True, else None if it is set to False.
+    """
+    if i is True:
+        return "balanced"
+    elif i is False:
+        return None
+    return i
+
+
+def extract_training_processes(config):
+    """ Extracts the pre-processing steps that are specified in "List of classifiers
+    to be trained" section of the configuration template. These are the amount
+    of the prep-processing steps with the relevant training that will be executed.
+
+    Returns:
+        A list of the processes that have been identified with the corresponding parameter grid.
+    """
+    evaluations = config["evaluations"]["nfoldcrossvalidation"]
+    print("Evaluations countered: {}".format(len(evaluations)))
+    evaluation_counter = 0
+    trainings_counted = 0
+    processes = []
+    for evaluation in evaluations:
+        for nfold_number in evaluation["nfold"]:
+            classifiers = config["classifiers"]["svm"]
+            for classifier in classifiers:
+                for pre_processing in classifier["preprocessing"]:
+                    for clf_type in classifier["type"]:
+                        if clf_type == "C-SVC":
+                            process_dict = {
+                                "evaluation": evaluation_counter,
+                                "classifier": clf_type,
+                                "preprocess": pre_processing,
+                                "kernel": [i.lower() for i in classifier["kernel"]],  # lowercase the values
+                                "C": [2 ** x for x in classifier["C"]],  # 2 ** c
+                                "gamma": [2 ** x for x in classifier["gamma"]],  # 2 ** gamma
+                                "balance_classes": [change_weights_val(i) for i in classifier["balance_classes"]],
+                                "n_fold": nfold_number
+                            }
+                            # append the pre-processing steps list
+                            processes.append(process_dict)
+                            # increase counter by 1
+                            trainings_counted += 1
+        # increase evaluation counter by 1
+        evaluation_counter += 1
+
+    print("Trainings to be applied: {}".format(trainings_counted))
+
+    return processes
diff --git a/acousticbrainz/models/sklearn/model/__init__.py b/acousticbrainz/models/sklearn/model/__init__.py
new file mode 100644
index 000000000..40a96afc6
--- /dev/null
+++ b/acousticbrainz/models/sklearn/model/__init__.py
@@ -0,0 +1 @@
+# -*- coding: utf-8 -*-
diff --git a/acousticbrainz/models/sklearn/model/classification_project.py b/acousticbrainz/models/sklearn/model/classification_project.py
new file mode 100644
index 000000000..6ac5bb887
--- /dev/null
+++ b/acousticbrainz/models/sklearn/model/classification_project.py
@@ -0,0 +1,63 @@
+import os
+from ..helper_functions.utils import load_yaml
+import time
+from ..classification.train_class import train_class
+
+
+def create_classification_project(ground_truth_file, dataset_dir, project_file=None, exports_path=None,
+                                  c_values=None, gamma_values=None, preprocessing_values=None,
+                                  seed=None, jobs=-1, verbose=1, logging="INFO"):
+    """
+    Args:
+        ground_truth_file: The path (str) to the groundtruth yaml file of the dataset. It is required.
+        dataset_dir: The path to main datasets_dir containing the .json files.
+        project_file: The name (str) of the project configuration yaml file that
+            will be created. Default: None. If None, the tool will create
+            automatically a project file name in form of "project_CLASS_NAME",
+            where CLASS_NAME is the target class as referred to the groundtruth data.
+        exports_path: The path (str) where the results of the classification project will be saved to.
+            Default: None. If None, the exports directory will be saved inside the app folder.
+        seed: The seed (int) of the random shuffle generator. Default: 1
+        jobs: The cores (int) that will be exploited during the training phase.
+            Default: -1. If -1, all the available cores will be used.
+        verbose: The verbosity (int) of the printed messages where this function
+            is available (for example in sklearn's GridSearch algorithm). Default: 1.
+            The higher the number the higher the verbosity.
+        logging: The level (str) of the logging prints. Default: "INFO".
+            Available values: DEBUG, INFO, WARNING, ERROR, CRITICAL.
+    """
+    try:
+        path_template = os.path.dirname(os.path.realpath(__file__))
+        project_template = load_yaml(path_template, "configuration_template.yaml")
+    except Exception as e:
+        print('Unable to open project configuration template:', e)
+        raise
+
+    print("-------------------------------------------------------")
+    print()
+    if seed is None:
+        seed = time.time()
+
+    print("Seed argument: {}".format(seed))
+
+    project_template["ground_truth_file"] = ground_truth_file
+    project_template["dataset_dir"] = dataset_dir
+    project_template["project_file"] = project_file
+    project_template["logging_level"] = logging
+    project_template["seed"] = seed
+    project_template["parallel_jobs"] = jobs
+    project_template["verbose"] = verbose
+
+    # if empty, path is declared as the app's main directory
+    if exports_path is None:
+        exports_path = os.getcwd()
+
+    print("Exports path: {}".format(exports_path))
+    project_template["exports_path"] = exports_path
+
+    print()
+    print()
+    print("-------------------------------------------------------")
+
+    print("Loading GroundTruth yaml file:", ground_truth_file)
+    train_class(project_template, ground_truth_file, c_values, gamma_values, preprocessing_values, logging)
diff --git a/acousticbrainz/models/sklearn/model/configuration_template.yaml b/acousticbrainz/models/sklearn/model/configuration_template.yaml
new file mode 100644
index 000000000..adce65740
--- /dev/null
+++ b/acousticbrainz/models/sklearn/model/configuration_template.yaml
@@ -0,0 +1,110 @@
+# READ GROUND TRUTH
+# the ground truth data directory
+ground_truth_directory:
+exports_path:
+# classes with features locally: danceability, gender, genre_rosamerica, moods_claurier, moods_mirex, timbre_bright_dark
+# classes with features locally: tonal_atonal, voice_instrumental
+# classes with features online: genre_dortmund, genre_electronic, genre_tzanetakis, ismir04_rhythm,
+class_name:
+exports_directory:
+logging_level:  # logging level
+seed:  # set null to get the seed from the clock value, otherwise specify a number
+
+# PRE-PROCESSING
+# List of parameters that have to be excluded before applying the transformation steps
+excludedDescriptors: [ 'metadata.tags*' ]
+# List of preprocessed datasets to build
+processing:
+    # it is possible to not apply any processing, although this is of
+    # of little value in real-life tests and evaluations
+    raw: []
+
+    basic:
+      - transfo: remove
+        params: { descriptorNames: &unusedDescs [ 'metadata.*', '*dmean*', '*dvar*',
+                                                  '*.min', '*.max', '*cov',
+                                                  'tonal.thpcp', # because of division by zero
+                                                  'lowlevel.spectral_energyband_high.*', # 0 for low samplerate
+                                                  'lowlevel.silence_rate*' # funky behavior in general
+                                                  ] }
+      - transfo: enumerate
+        params: { descriptorNames: &stringDescs [ # 'rhythm.perceptual_tempo', # removed from new extractor
+                                                  'tonal.chords_key', 'tonal.chords_scale',
+                                                  'tonal.key_key', 'tonal.key_scale' ] }
+
+    lowlevel:
+      # note that the order of the transformations is important!
+      - transfo: remove
+        params: { descriptorNames: *unusedDescs }
+      - transfo: enumerate
+        params: { descriptorNames: *stringDescs }
+      - transfo: select
+        params: { descriptorNames: ['lowlevel*'] }
+
+    nobands:
+      - transfo: remove
+        params: { descriptorNames: *unusedDescs }
+      - transfo: enumerate
+        params: { descriptorNames: *stringDescs }
+      - transfo: remove
+        params: { descriptorNames: [ 'barkbands*', '*energyband*', 'melbands*', 'erbbands*' ] }
+
+    normalized:
+        - transfo: remove
+          params: { descriptorNames: *unusedDescs }
+        - transfo: enumerate
+          params: { descriptorNames: *stringDescs }
+        - transfo: normalize  # MixMax Scale
+
+    gaussianized:
+      - transfo: remove
+        params: { descriptorNames: *unusedDescs }
+      - transfo: enumerate
+        params: { descriptorNames: *stringDescs }
+      - transfo: normalize  # MixMax Scale
+      - transfo: gaussianize  # QuantileTransformer
+        params: { descriptorNames: ['lowlevel.*'] }
+
+#    mfcc:
+#      # an MFCC only baseline
+#      - transfo: remove
+#        params: { descriptorNames: *unusedDescs }
+#      - transfo: enumerate
+#        params: { descriptorNames: *stringDescs }
+#      - transfo: select
+#        params: { descriptorNames: ['lowlevel.mfcc*'] }
+
+## ML SETTINGS
+# train kind: grid, svm, deep_learning, supervised_lb
+train_kind: grid
+k_fold_shuffle: False
+
+# GRID ML SETTINGS
+# PCA number of best components
+pca_n_components: .95
+parallel_jobs:   # set to -1 if to exploit all processors. Set to null to exploit only 1 processor
+verbose:   # 0: no verbose, 1: simple information about the tasks completed, 2: full information of all the tasks
+
+# NEURAL NETWORK SETTINGS
+#
+
+# List of classifiers to be trained
+classifiers:
+    svm:
+        # first svm test combinations
+#        - preprocessing: [ 'basic', 'lowlevel', 'nobands', 'normalized', 'gaussianized', 'mfcc' ]
+        - preprocessing: [ 'basic', 'lowlevel', 'nobands', 'normalized', 'gaussianized']
+          type: [ 'C-SVC' ]
+          kernel: [ 'poly', 'RBF' ]
+          C:     [ -5, -3, -1, 1, 3, 5, 7, 9, 11 ] # will actually be 2**x
+          gamma: [ 3, 1, -1, -3, -5, -7, -9, -11 ] # will actually be 2**x
+          # if True, weight classes based on the number of elements
+          balance_classes: [False, True]
+        #  descriptorNames: [ ['*.mean', '*.var'] ]
+        # more svm params combinations
+        # ...
+
+# List of evaluations to be performed
+evaluations:
+    nfoldcrossvalidation:
+        - nfold: [ 5 ]
\ No newline at end of file
diff --git a/acousticbrainz/models/sklearn/model/predict.py b/acousticbrainz/models/sklearn/model/predict.py
new file mode 100644
index 000000000..0e9d568fe
--- /dev/null
+++ b/acousticbrainz/models/sklearn/model/predict.py
@@ -0,0 +1,132 @@
+import os
+import requests
+import argparse
+from pprint import pprint
+import joblib
+import json
+import pandas as pd
+from ..helper_functions.utils import load_yaml
+from ..transformation.utils_preprocessing import flatten_dict_full
+from ..transformation.transform_predictions import TransformPredictions
+from ..helper_functions.logging_tool import setup_logger
+
+
+class Predict:
+    def __init__(self, config, track_low_level, log_level):
+        self.config = config
+        self.track_low_level = track_low_level
+        self.log_level = log_level
+
+        self.class_name = ""
+        self.exports_path = ""
+        self.best_model = ""
+        self.track_feats = dict()
+
+        self.load_best_model()
+        # self.setting_logger()
+        self.logger = ""
+        # self.flat_dict()
+        self.df_track = pd.DataFrame()
+        self.list_track = []
+
+    def load_best_model(self):
+        self.class_name = self.config["class_name"]
+        self.exports_path = self.config["exports_path"]
+
+        # self.exports_path = os.path.join(self.exports_path, "{}_{}".format(self.exports_dir, self.class_name))
+        best_model_path = os.path.join(self.exports_path,
+                                       "best_model_{}.json".format(self.class_name))
+        # best_model_path = os.path.join(self.exports_dir, "models", "model_grid_{}.pkl".format[""])
+        with open(best_model_path) as json_file:
+            self.best_model = json.load(json_file)
+
+    def preprocessing(self):
+        self.logger = setup_logger(
+            exports_path=self.exports_path,
+            name="predict_{}".format(self.class_name),
+            mode="w",
+            level=self.log_level
+        )
+
+        self.logger.info("Best model:")
+        self.logger.info(self.best_model)
+
+        self.logger.info("FLATTENING:")
+        try:
+            if 'beats_position' in self.track_low_level['rhythm']:
+                del self.track_low_level['rhythm']['beats_position']
+        except Exception as e:
+            self.logger.warning("There is no 'rhythm' key in the low level data. Exception:", e)
+
+        # data dictionary transformed to a fully flattened dictionary
+        self.track_feats = dict(flatten_dict_full(self.track_low_level))
+        list_track = []
+        list_track.append(self.track_feats)
+        self.logger.debug("DICT TO DATAFRAME:")
+        self.df_track = pd.DataFrame(data=list_track, columns=list_track[0].keys())
+        self.logger.debug("TYPE of track structure: {}".format(type(self.df_track)))
+        # print(self.df_track)
+        # print("Shape of DF", self.df_track.shape)
+
+        self.logger.info("PROCESSING:")
+        features_prepared = TransformPredictions(config=self.config,
+                                                 df_feats=self.df_track,
+                                                 process=self.best_model["preprocessing"],
+                                                 train_class=self.class_name,
+                                                 exports_path=self.exports_path,
+                                                 logger=self.logger
+                                                 ).post_processing()
+        self.logger.debug("Features shape after preparation: {}".format(features_prepared.shape))
+
+        # load the best grid model that is trained with a k-fold cross validation
+        # models_path = FindCreateDirectory(self.exports_path,
+        #                                   os.path.join(self.exports_dir, "models")).inspect_directory()
+        # best_model_path = os.path.join(models_path, "model_grid_{}.pkl".format(self.best_model["preprocessing"]))
+
+        # load the best model that is trained to the whole dataset
+        best_model_path = os.path.join(self.exports_path, "best_clf_model.pkl")
+        clf_loaded = joblib.load(best_model_path)
+        predicted = clf_loaded.predict(features_prepared)
+        predicted_prob = clf_loaded.predict_proba(features_prepared)
+        self.logger.info("Prediction: {}".format(predicted))
+        self.logger.info("Classes: {}".format(clf_loaded.classes_))
+        self.logger.info("Prediction probabilities: {}".format(predicted_prob))
+        predict_list = []
+        for pred, pred_probability in zip(predicted, predicted_prob):
+            predict_dict = dict()
+            predict_dict[self.class_name] = pred
+            predict_dict["score"] = max(pred_probability)
+            predict_dict["probabilities"] = dict(zip(clf_loaded.classes_, pred_probability))
+
+            predict_list.append(predict_dict)
+
+        self.logger.info("Predictions for the track:")
+        self.logger.info("{}".format(predict_list))
+        self.logger.debug("Output (Return) predict_list")
+
+        return predict_list
+
+
+def prediction(exports_path, project_file, mbid, log_level="INFO"):
+    # if empty, path is declared as the app's main directory
+    try:
+        project_data = load_yaml(exports_path, "{}.yaml".format(project_file))
+    except Exception as e:
+        print('Unable to open project configuration file:', e)
+        raise
+
+    url_api = "https://acousticbrainz.org/api/v1/{}/low-level".format(mbid)
+    response = requests.get(url=url_api)
+    track_low_level_data = response.json()
+    if track_low_level_data["metadata"]["tags"]["artist"][0]:
+        print("Artist:", track_low_level_data["metadata"]["tags"]["artist"][0])
+    if track_low_level_data["metadata"]["tags"]["album"][0]:
+        print("Album:", track_low_level_data["metadata"]["tags"]["album"][0])
+    if track_low_level_data["metadata"]["tags"]["title"][0]:
+        print("Title:", track_low_level_data["metadata"]["tags"]["title"][0])
+
+    prediction_track = Predict(config=project_data,
+                               track_low_level=track_low_level_data,
+                               log_level=log_level
+                               )
+    prediction_track.preprocessing()
diff --git a/acousticbrainz/models/sklearn/requirements.txt b/acousticbrainz/models/sklearn/requirements.txt
new file mode 100644
index 000000000..62242336b
--- /dev/null
+++ b/acousticbrainz/models/sklearn/requirements.txt
@@ -0,0 +1,13 @@
+matplotlib==3.1.3
+numpy==1.18.1
+pandas==1.0.3
+PyYAML==5.3
+scikit-learn==0.23.1
+scipy==1.4.1
+seaborn==0.10.0
+dask==2.11.0
+dotty-dict==1.2.1
+termcolor==1.1.0
+joblib==0.15.1
+six==1.15.0
+requests==2.23.0
\ No newline at end of file
diff --git a/acousticbrainz/models/sklearn/transformation/__init__.py b/acousticbrainz/models/sklearn/transformation/__init__.py
new file mode 100644
index 000000000..40a96afc6
--- /dev/null
+++ b/acousticbrainz/models/sklearn/transformation/__init__.py
@@ -0,0 +1 @@
+# -*- coding: utf-8 -*-
diff --git a/acousticbrainz/models/sklearn/transformation/load_ground_truth.py b/acousticbrainz/models/sklearn/transformation/load_ground_truth.py
new file mode 100644
index 000000000..79b798b81
--- /dev/null
+++ b/acousticbrainz/models/sklearn/transformation/load_ground_truth.py
@@ -0,0 +1,112 @@
+import os
+import yaml
+import pandas as pd
+from termcolor import colored
+import random
+from ..helper_functions.utils import create_directory
+from ..transformation.load_low_level import create_low_level_features_df
+
+
+def load_local_ground_truth(gt_filename):
+    """ Loads the the ground truth file.
+
+    The Ground Truth data which contains the tracks and the corresponding
+    labels they belong to. The path to the related tracks' low-level data
+    (features in JSON format) can be extracted from this file too.
+    """
+    with open(gt_filename, "r") as stream:
+        try:
+            ground_truth_data = yaml.safe_load(stream)
+            print("Ground truth file loaded.")
+            return ground_truth_data
+        except yaml.YAMLError as exc:
+            print("Error in loading the ground truth file.")
+            print(exc)
+
+
+def export_gt_tracks(ground_truth_data, seed):
+    """
+    It takes a dictionary of the tracks from the groundtruth and it transforms it
+    to a list of tuples (track, label). Then it shuffles the list based on the seed
+    specified in the configuration file, and returns that shuffled list.
+
+    Returns:
+        A list of tuples with the tracks and their corresponding labels.
+    """
+    labeled_tracks = ground_truth_data["groundTruth"]
+    tracks_list = []
+    for track, label in labeled_tracks.items():
+        tracks_list.append((track, label))
+    print(colored("SEED is set to: {}".format(seed, "cyan")))
+    random.seed(a=seed)
+    random.shuffle(tracks_list)
+    print("Listed tracks in GT file: {}".format(len(tracks_list)))
+    return tracks_list
+
+
+def create_df_tracks(config, tracks_list, train_class, exports_path, logger):
+    """
+    TODO: Description
+    Returns:
+        TODO: Description
+    """
+
+    logger.info("---- EXPORTING FEATURES - LABELS - TRACKS ----")
+    dataset_dir = config.get("dataset_dir")
+    print('DATASET-DIR', dataset_dir)
+    dirpath = os.path.join(os.getcwd(), dataset_dir)
+    low_level_list = list()
+    for (dirpath, dirnames, filenames) in os.walk(dirpath):
+        low_level_list += [os.path.join(dirpath, file) for file in filenames if file.endswith(".json")]
+    if len(low_level_list) != 0:
+        logger.info("Low-level features for the tracks found.")
+        # processing the names of the tracks that are inside both the GT file and the low-level json files
+        # list with the tracks that are included in the low-level json files
+        tracks_existing_list = [e for e in tracks_list for i in low_level_list if e[0] in i]
+        # list with the low-level json tracks' paths that are included in tracks list
+        tracks_existing_path_list = [i for e in tracks_list for i in low_level_list if e[0] in i]
+        logger.debug("tracks existed found: {}".format(len(tracks_existing_list)))
+        logger.debug("tracks_path existed found: {}".format(len(tracks_existing_path_list)))
+        logger.debug("{}".format(tracks_existing_list[:4]))
+        logger.debug("{}".format(tracks_existing_path_list[:4]))
+        logger.debug("The founded tracks tracks listed successfully.")
+        logger.debug("Generate random number within a given range of listed tracks:")
+        # Random number between 0 and length of listed tracks
+        random_num = random.randrange(len(tracks_existing_list))
+        logger.debug("Check if the tracks are the same in the same random index in both lists")
+        logger.debug("{}".format(tracks_existing_list[random_num]))
+        logger.debug("{}".format(tracks_existing_path_list[random_num]))
+
+        tracks_list = tracks_existing_list
+        # create the dataframe with tracks that are bothe in low-level files and the GT file
+        df_tracks = pd.DataFrame(data=tracks_list, columns=["track", train_class])
+        logger.debug("Shape of tracks DF created before cleaning: {}".format(df_tracks.shape))
+        logger.debug("Check the shape of a temporary DF that includes if there are any NULL values:")
+        logger.debug("{}".format(df_tracks[df_tracks.isnull().any(axis=1)].shape))
+
+        logger.debug("Drop rows with NULL values if they exist..")
+        if df_tracks[df_tracks.isnull().any(axis=1)].shape[0] != 0:
+            df_tracks.dropna(inplace=True)
+            logger.debug("Check if there are NULL values after the cleaning process:")
+            logger.debug("{}".format(df_tracks[df_tracks.isnull().any(axis=1)].shape))
+            logger.debug("Re-index the tracks DF..")
+            df_tracks = df_tracks.reset_index(drop=True)
+        else:
+            logger.info("There are no NULL values found.")
+
+        # export shuffled tracks to CSV format
+        tracks_path = create_directory(exports_path, "tracks_csv_format")
+        df_tracks.to_csv(os.path.join(tracks_path, "tracks_{}_shuffled.csv".format(train_class)))
+        logger.debug("DF INFO:")
+        logger.debug("{}".format(df_tracks.info()))
+        logger.debug("COLUMNS CONTAIN OBJECTS: {}".format(
+            df_tracks.select_dtypes(include=['object']).columns))
+
+        df_feats = create_low_level_features_df(tracks_existing_path_list, logger)
+
+        y = df_tracks[train_class].values
+        logger.info("Features, Labels, and Tracks are exported successfully..")
+        return df_feats, y, df_tracks["track"].values
+    else:
+        logger.error("No low-level data found.")
+        return None, None, None
diff --git a/acousticbrainz/models/sklearn/transformation/load_low_level.py b/acousticbrainz/models/sklearn/transformation/load_low_level.py
new file mode 100644
index 000000000..d1698c81f
--- /dev/null
+++ b/acousticbrainz/models/sklearn/transformation/load_low_level.py
@@ -0,0 +1,45 @@
+import json
+import pandas as pd
+from ..transformation.utils_preprocessing import flatten_dict_full
+
+
+def create_low_level_features_df(list_path_tracks, logger):
+    """
+    Creates the low-level DataFrame. Cleans also the low-level data from the unnecessary features before creating
+    the DF.
+    Returns:
+        The low-level features (pandas DataFrame) from all the tracks in the collection.
+    """
+    logger.info("---- CREATE LOW LEVEL DATAFRAME ----")
+
+    list_feats_tracks = []
+    counter_items_transformed = 0
+
+    for track_low_level_path in list_path_tracks:
+        try:
+            with open(track_low_level_path) as f:
+                data_feats_item = json.load(f, strict=False)
+        except Exception:
+            logger.error("Exception occurred in loading file:", exc_info=True)
+        # remove unnecessary features data
+        try:
+            if 'beats_position' in data_feats_item['rhythm']:
+                del data_feats_item['rhythm']['beats_position']
+        except KeyError:
+            logger.error("There is no 'rhythm' key in the low level data.", exc_info=True)
+
+        # data dictionary transformed to a fully flattened dictionary
+        data_feats_item = flatten_dict_full(data_feats_item)
+
+        # append to a full tracks features pandas df
+        list_feats_tracks.append(dict(data_feats_item))
+
+        counter_items_transformed += 1
+
+    # The dictionary's keys list is transformed to type <class 'list'>
+    df_feats_tracks = pd.DataFrame(list_feats_tracks, columns=list(list_feats_tracks[0].keys()))
+    logger.debug("COLUMNS CONTAIN OBJECTS: \n{}".format(
+        df_feats_tracks.select_dtypes(include=['object']).columns))
+    logger.info("Exporting low-level data (DataFrame)..")
+    return df_feats_tracks
+
diff --git a/acousticbrainz/models/sklearn/transformation/transform.py b/acousticbrainz/models/sklearn/transformation/transform.py
new file mode 100644
index 000000000..2396bcd97
--- /dev/null
+++ b/acousticbrainz/models/sklearn/transformation/transform.py
@@ -0,0 +1,252 @@
+import pandas as pd
+from termcolor import colored
+import collections
+import joblib
+import os
+import six
+
+from ..transformation.utils_preprocessing import list_descr_handler
+from ..transformation.utils_preprocessing import feats_selector_list
+from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.preprocessing import OneHotEncoder, MinMaxScaler, QuantileTransformer
+from sklearn.pipeline import FeatureUnion
+from sklearn.pipeline import Pipeline
+
+
+# avoid the module's method call deprecation
+try:
+    collectionsAbc = six.moves.collections_abc
+except AttributeError:
+    collectionsAbc = collections
+
+
+class Transform:
+    def __init__(self, config, df_feats, process, train_class, exports_path, logger):
+        self.config = config
+        self.df_feats = df_feats
+        self.process = process
+        self.train_class = train_class
+        self.exports_path = exports_path
+        self.logger = logger
+
+        self.list_features = []
+        self.feats_cat_list = []
+        self.feats_num_list = []
+        self.df_cat = pd.DataFrame()
+        self.df_num = pd.DataFrame()
+
+        self.feats_prepared = []
+
+
+    def post_processing(self):
+        print(colored("PROCESS: {}".format(self.process), "cyan"))
+        self.logger.debug("PROCESS: {}".format(self.process))
+        self.logger.debug("Process: {}".format(self.config["processing"][self.process]))
+        # list_preprocesses = []
+
+        self.list_features = list(self.df_feats.columns)
+
+        models_path = os.path.join(self.exports_path, "models")
+
+        # clean list
+        print(colored("Cleaning..", "yellow"))
+        self.logger.info("Cleaning..")
+        cleaning_conf_list = list_descr_handler(self.config["excludedDescriptors"])
+        feats_clean_list = feats_selector_list(self.df_feats.columns, cleaning_conf_list)
+        self.list_features = [x for x in self.df_feats.columns if x not in feats_clean_list]
+        self.logger.debug("List after cleaning some feats: {}".format(len(self.list_features)))
+
+        # remove list
+        print(colored("Removing unnecessary features..", "yellow"))
+        self.logger.info("Removing unnecessary features..")
+        if self.config["processing"][self.process][0]["transfo"] == "remove":
+            remove_list = list_descr_handler(self.config["processing"][self.process][0]["params"]["descriptorNames"])
+            feats_remove_list = feats_selector_list(self.df_feats.columns, remove_list)
+            self.list_features = [x for x in self.list_features if x not in feats_remove_list]
+            self.logger.debug("List after removing unnecessary feats: {}".format(len(self.list_features)))
+
+        # enumerate list
+        print(colored("Split numerical / categorical features..", "yellow"))
+        if self.config["processing"][self.process][1]["transfo"] == "enumerate":
+            enumerate_list = list_descr_handler(self.config["processing"][self.process][1]["params"]["descriptorNames"])
+            self.feats_cat_list = feats_selector_list(self.list_features, enumerate_list)
+            self.logger.debug("Enumerating feats: {}".format(self.feats_cat_list))
+            self.feats_num_list = [x for x in self.list_features if x not in self.feats_cat_list]
+            self.logger.debug("List Num feats: {}".format(len(self.feats_num_list)))
+            self.logger.debug("List Cat feats: {}".format(len(self.feats_cat_list), "blue"))
+
+        # BASIC
+        if self.process == "basic":
+            self.logger.debug("List post-Num feats: {}".format(len(self.feats_num_list)))
+
+            num_pipeline = Pipeline([
+                ('selector', DataFrameSelector(self.feats_num_list))
+            ])
+
+            cat_pipeline = Pipeline([
+                ('selector', DataFrameSelector(self.feats_cat_list)),
+                ('cat_encoder', OneHotEncoder(handle_unknown='ignore', sparse=False))
+            ])
+
+            full_pipeline = FeatureUnion(transformer_list=[
+                ("num_pipeline", num_pipeline),
+                ("cat_pipeline", cat_pipeline)
+            ])
+
+            self.feats_prepared = full_pipeline.fit_transform(self.df_feats)
+
+            # save pipeline
+            joblib.dump(full_pipeline, os.path.join(models_path, "full_pipeline_{}.pkl".format(self.process)))
+
+        # LOW-LEVEL or MFCC
+        if self.process == "lowlevel" or self.process == "mfcc":
+            sel_list = list_descr_handler(self.config["processing"][self.process][2]["params"]["descriptorNames"])
+            self.feats_num_list = feats_selector_list(self.feats_num_list, sel_list)
+            self.logger.debug("List post-Num feats: {}".format(len(self.feats_num_list)))
+
+            num_pipeline = Pipeline([
+                ('selector', DataFrameSelector(self.feats_num_list))
+            ])
+
+            cat_pipeline = Pipeline([
+                ('selector', DataFrameSelector(self.feats_cat_list)),
+                ('cat_encoder', OneHotEncoder(handle_unknown='ignore', sparse=False))
+            ])
+
+            full_pipeline = FeatureUnion(transformer_list=[
+                ("num_pipeline", num_pipeline),
+                ("cat_pipeline", cat_pipeline)
+            ])
+
+            self.feats_prepared = full_pipeline.fit_transform(self.df_feats)
+
+            # save pipeline
+            joblib.dump(full_pipeline, os.path.join(models_path, "full_pipeline_{}.pkl".format(self.process)))
+
+        # NOBANDS
+        if self.process == "nobands":
+            sel_list = list_descr_handler(self.config["processing"][self.process][2]["params"]["descriptorNames"])
+            feats_rem_list = feats_selector_list(self.df_feats, sel_list)
+            self.feats_num_list = [x for x in self.feats_num_list if x not in feats_rem_list]
+            self.logger.debug("List post-Num feats: {}".format(len(self.feats_num_list)))
+
+            num_pipeline = Pipeline([
+                ('selector', DataFrameSelector(self.feats_num_list))
+            ])
+
+            cat_pipeline = Pipeline([
+                ('selector', DataFrameSelector(self.feats_cat_list)),
+                ('cat_encoder', OneHotEncoder(handle_unknown='ignore', sparse=False))
+            ])
+
+            full_pipeline = FeatureUnion(transformer_list=[
+                ("num_pipeline", num_pipeline),
+                ("cat_pipeline", cat_pipeline)
+            ])
+
+            self.feats_prepared = full_pipeline.fit_transform(self.df_feats)
+
+            # save pipeline
+            joblib.dump(full_pipeline, os.path.join(models_path, "full_pipeline_{}.pkl".format(self.process)))
+
+        # NORMALIZED
+        if self.process == "normalized":
+            self.logger.debug("List post-Num feats: {}".format(len(self.feats_num_list)))
+            num_pipeline = Pipeline([
+                ('selector', DataFrameSelector(self.feats_num_list)),
+                ('minmax_scaler', MinMaxScaler()),
+            ])
+
+            cat_pipeline = Pipeline([
+                ('selector', DataFrameSelector(self.feats_cat_list)),
+                ('cat_encoder', OneHotEncoder(handle_unknown='ignore', sparse=False))
+            ])
+
+            full_pipeline = FeatureUnion(transformer_list=[
+                ("num_pipeline", num_pipeline),
+                ("cat_pipeline", cat_pipeline)
+            ])
+
+            self.feats_prepared = full_pipeline.fit_transform(self.df_feats)
+
+            # save pipeline
+            joblib.dump(full_pipeline, os.path.join(models_path, "full_pipeline_{}.pkl".format(self.process)))
+
+        # GAUSSIANIZED
+        if self.process == "gaussianized":
+            gauss_list = list_descr_handler(self.config["processing"][self.process][3]["params"]["descriptorNames"])
+            feats_num_gauss_list = feats_selector_list(self.feats_num_list, gauss_list)
+            feats_num_no_gauss_list = [x for x in self.feats_num_list if x not in feats_num_gauss_list]
+
+            self.logger.debug("List post-Num feats: {}".format(len(self.feats_num_list)))
+            self.logger.debug("List post-Num-Gauss feats: {}".format(len(feats_num_gauss_list)))
+            self.logger.debug("List post-Num-No-Gauss feats: {}".format(len(feats_num_no_gauss_list)))
+
+            num_norm_pipeline = Pipeline([
+                ("selector_num", DataFrameSelector(self.feats_num_list)),
+                ("minmax_scaler", MinMaxScaler())
+            ])
+
+            cat_pipeline = Pipeline([
+                ('selector', DataFrameSelector(self.feats_cat_list)),
+                ('cat_encoder', OneHotEncoder(handle_unknown='ignore', sparse=False))
+            ])
+
+            full_normalize_pipeline = FeatureUnion(transformer_list=[
+                ("num_pipeline", num_norm_pipeline),
+                ("cat_pipeline", cat_pipeline)
+            ])
+
+            self.feats_prepared = full_normalize_pipeline.fit_transform(self.df_feats)
+            self.logger.debug("Feats prepared normalized shape: {}".format(self.feats_prepared.shape))
+            # save pipeline
+            joblib.dump(full_normalize_pipeline,
+                        os.path.join(models_path, "full_normalize_pipeline_{}.pkl".format(self.process)))
+            self.df_feats = pd.DataFrame(data=self.feats_prepared)
+            columns = list(self.df_feats.columns)
+            # print(columns)
+            select_rename_list = columns[:len(self.feats_num_list)]
+            select_rename_list = self.feats_num_list
+            select_no_rename_list = columns[len(self.feats_num_list):]
+            print(select_no_rename_list)
+            new_feats_columns = select_rename_list + select_no_rename_list
+            self.df_feats.columns = new_feats_columns
+            self.logger.debug("Normalized Features DF:")
+            self.logger.debug("\n{}".format(self.df_feats))
+            self.logger.debug("Shape: {}".format(self.df_feats.shape))
+
+            feats_no_gauss_list = [x for x in new_feats_columns if x not in feats_num_gauss_list]
+
+            num_gauss_pipeline = Pipeline([
+                ("gauss_sel_num", DataFrameSelector(feats_num_gauss_list)),
+                ("gauss_scaler", QuantileTransformer(n_quantiles=1000))
+            ])
+
+            num_no_gauss_pipeline = Pipeline([
+                ("gauss_sel_num", DataFrameSelector(feats_no_gauss_list))
+            ])
+
+            full_gauss_pipeline = FeatureUnion(transformer_list=[
+                ("num_gauss_pipeline", num_gauss_pipeline),
+                ("num_no_gauss_pipeline", num_no_gauss_pipeline)
+            ])
+
+            self.feats_prepared = full_gauss_pipeline.fit_transform(self.df_feats)
+
+            # save pipeline
+            joblib.dump(full_gauss_pipeline,
+                        os.path.join(models_path, "full_gauss_pipeline_{}.pkl".format(self.process)))
+
+        return self.feats_prepared
+
+
+# Create a class to select numerical or categorical columns
+class DataFrameSelector(BaseEstimator, TransformerMixin):
+    def __init__(self, attribute_names):
+        self.attribute_names = attribute_names
+
+    def fit(self, X, y=None):
+        return self
+
+    def transform(self, X):
+        return X[self.attribute_names].values
diff --git a/acousticbrainz/models/sklearn/transformation/transform_predictions.py b/acousticbrainz/models/sklearn/transformation/transform_predictions.py
new file mode 100644
index 000000000..81072ef5f
--- /dev/null
+++ b/acousticbrainz/models/sklearn/transformation/transform_predictions.py
@@ -0,0 +1,153 @@
+import pandas as pd
+from termcolor import colored
+import collections
+import joblib
+import os
+import six
+
+from ..transformation.utils_preprocessing import list_descr_handler
+from ..transformation.utils_preprocessing import feats_selector_list
+
+# avoid the module's method call deprecation
+try:
+    collectionsAbc = six.moves.collections_abc
+except AttributeError:
+    collectionsAbc = collections
+
+
+class TransformPredictions:
+    def __init__(self, config, df_feats, process, train_class, exports_path, logger):
+        self.config = config
+        self.df_feats = df_feats
+        self.process = process
+        self.train_class = train_class
+        self.exports_path = exports_path
+        self.logger = logger
+        self.list_features = []
+        self.feats_cat_list = []
+        self.feats_num_list = []
+
+        self.feats_prepared = []
+
+
+    def post_processing(self):
+        print(colored("PROCESS: {}".format(self.process), "cyan"))
+        # list_preprocesses = []
+
+        self.logger.debug("Track Features - Low Level: {}".format(self.df_feats))
+        self.logger.debug("Shape of DF: {}".format(self.df_feats.shape))
+
+        self.list_features = list(self.df_feats.columns)
+
+        models_path = os.path.join(self.exports_path, "models")
+
+        # clean list
+        print(colored("Cleaning..", "yellow"))
+        cleaning_conf_list = list_descr_handler(self.config["excludedDescriptors"])
+        self.logger.debug("cleaning list: {}".format(cleaning_conf_list))
+        feats_clean_list = feats_selector_list(self.df_feats.columns, cleaning_conf_list)
+        self.list_features = [x for x in self.df_feats.columns if x not in feats_clean_list]
+        self.logger.debug("List after cleaning some feats: {}".format(len(self.list_features), "blue"))
+
+        # remove list
+        print(colored("Removing unnecessary features..", "yellow"))
+        if self.config["processing"][self.process][0]["transfo"] == "remove":
+            remove_list = list_descr_handler(self.config["processing"][self.process][0]["params"]["descriptorNames"])
+            feats_remove_list = feats_selector_list(self.df_feats.columns, remove_list)
+            self.list_features = [x for x in self.list_features if x not in feats_remove_list]
+            self.logger.debug("List after removing unnecessary feats: {}".format(len(self.list_features), "blue"))
+
+        # enumerate list
+        print(colored("Removing unnecessary features..", "yellow"))
+        if self.config["processing"][self.process][1]["transfo"] == "enumerate":
+            enumerate_list = list_descr_handler(self.config["processing"][self.process][1]["params"]["descriptorNames"])
+            self.feats_cat_list = feats_selector_list(self.list_features, enumerate_list)
+            self.logger.debug("Enumerating feats: {}".format(self.feats_cat_list))
+            self.feats_num_list = [x for x in self.list_features if x not in self.feats_cat_list]
+            self.logger.debug("List Num feats: {}".format(len(self.feats_num_list)))
+            self.logger.debug("List Cat feats: {}".format(len(self.feats_cat_list), "blue"))
+
+        # BASIC
+        if self.process == "basic":
+            print(colored("Process doing: {}".format(self.process), "green"))
+            self.logger.debug("List post-Num feats: {}".format(len(self.feats_num_list)))
+
+            # load pipeline
+            full_pipeline = joblib.load(os.path.join(models_path, "full_pipeline_{}.pkl".format(self.process)))
+
+            self.feats_prepared = full_pipeline.transform(self.df_feats)
+
+        # LOW-LEVEL or MFCC
+        if self.process == "lowlevel" or self.process == "mfcc":
+            print(colored("Process doing: {}".format(self.process), "green"))
+            sel_list = list_descr_handler(self.config["processing"][self.process][2]["params"]["descriptorNames"])
+            self.feats_num_list = feats_selector_list(self.feats_num_list, sel_list)
+            self.logger.debug("List post-Num feats: {}".format(len(self.feats_num_list)))
+
+            # load pipeline
+            full_pipeline = joblib.load(os.path.join(models_path, "full_pipeline_{}.pkl".format(self.process)))
+
+            self.feats_prepared = full_pipeline.transform(self.df_feats)
+
+        # NOBANDS
+        if self.process == "nobands":
+            print(colored("Process doing: {}".format(self.process), "green"))
+            sel_list = list_descr_handler(self.config["processing"][self.process][2]["params"]["descriptorNames"])
+            feats_rem_list = feats_selector_list(self.df_feats, sel_list)
+            self.feats_num_list = [x for x in self.feats_num_list if x not in feats_rem_list]
+            self.logger.debug("List post-Num feats: {}".format(len(self.feats_num_list)))
+
+            # load pipeline
+            full_pipeline = joblib.load(os.path.join(models_path, "full_pipeline_{}.pkl".format(self.process)))
+
+            self.feats_prepared = full_pipeline.transform(self.df_feats)
+
+        # NORMALIZED
+        if self.process == "normalized":
+            print(colored("Process doing: {}".format(self.process), "green"))
+            self.logger.debug("List post-Num feats: {}".format(len(self.feats_num_list)))
+
+            # load pipeline
+            full_pipeline = joblib.load(os.path.join(models_path, "full_pipeline_{}.pkl".format(self.process)))
+
+            self.feats_prepared = full_pipeline.transform(self.df_feats)
+
+        # GAUSSIANIZED
+        if self.process == "gaussianized":
+            print(colored("Process doing: {}".format(self.process), "green"))
+            gauss_list = list_descr_handler(self.config["processing"][self.process][3]["params"]["descriptorNames"])
+            feats_num_gauss_list = feats_selector_list(self.feats_num_list, gauss_list)
+            feats_num_no_gauss_list = [x for x in self.feats_num_list if x not in feats_num_gauss_list]
+
+            self.logger.debug("List post-Num feats: {}".format(len(self.feats_num_list)))
+            self.logger.debug("List post-Num-Gauss feats: {}".format(len(feats_num_gauss_list)))
+
+            # load normalization pipeline
+            # full_pipeline = joblib.load(os.path.join(exports_dir, "full_pipeline_{}.pkl".format(self.process)))
+            full_normalize_pipeline = joblib.load(os.path.join(models_path,
+                                                               "full_normalize_pipeline_{}.pkl".format(self.process)))
+            # normalize
+            self.feats_prepared = full_normalize_pipeline.transform(self.df_feats)
+
+            # transform numpy array to pandas DF for guassianizing
+            self.df_feats = pd.DataFrame(data=self.feats_prepared)
+            columns = list(self.df_feats.columns)
+            # print(columns)
+            select_rename_list = columns[:len(self.feats_num_list)]
+            select_rename_list = self.feats_num_list
+            select_no_rename_list = columns[len(self.feats_num_list):]
+            self.logger.debug("Selected no rename list: {}".format(select_no_rename_list))
+            new_feats_columns = select_rename_list + select_no_rename_list
+            self.df_feats.columns = new_feats_columns
+            self.logger.debug("Normalized Features DF:")
+            self.logger.debug("\n{}".format(self.df_feats))
+            self.logger.debug("Shape: {}".format(self.df_feats.shape))
+            # feats_no_gauss_list = [x for x in new_feats_columns if x not in feats_num_gauss_list]
+
+            # load guassianization pipeline
+            full_gauss_pipeline = joblib.load(os.path.join(models_path,
+                                                           "full_gauss_pipeline_{}.pkl".format(self.process)))
+
+            self.feats_prepared = full_gauss_pipeline.transform(self.df_feats)
+
+        return self.feats_prepared
diff --git a/acousticbrainz/models/sklearn/transformation/utils_preprocessing.py b/acousticbrainz/models/sklearn/transformation/utils_preprocessing.py
new file mode 100644
index 000000000..097beceab
--- /dev/null
+++ b/acousticbrainz/models/sklearn/transformation/utils_preprocessing.py
@@ -0,0 +1,70 @@
+import re
+import collections
+
+
+def flatten_dict_full(dictionary, sep="_"):
+    """
+    Args:
+        dictionary:
+        sep:
+
+    Returns:
+
+    """
+    obj = collections.OrderedDict()
+
+    def recurse(t, parent_key=""):
+        if isinstance(t, list):
+            for i in range(len(t)):
+                recurse(t[i], parent_key + sep + str(i) if parent_key else str(i))
+        elif isinstance(t, dict):
+            for k, v in t.items():
+                recurse(v, parent_key + sep + k if parent_key else k)
+        else:
+            obj[parent_key] = t
+
+    recurse(dictionary)
+
+    return obj
+
+
+def list_descr_handler(descr_list):
+    """
+    Args:
+        descr_list:
+
+    Returns:
+
+    """
+    keys_list_handle = []
+    for item in descr_list:
+        if item.endswith(".*"):
+            item = item.replace(".*", "_")
+        elif item.startswith("*."):
+            item = item.replace("*.", "_")
+        else:
+            item = item.replace("*", "")
+        item = item.replace(".", "_")
+        keys_list_handle.append(item)
+    return keys_list_handle
+
+
+def feats_selector_list(df_feats_columns, feats_select_list):
+    """
+    Args:
+        df_feats_columns:
+        feats_select_list:
+
+    Returns:
+
+    """
+    columns_list = list(df_feats_columns)
+    columns_select_list = []
+    counter_feats = 0
+    for item in feats_select_list:
+        for sel_item in columns_list:
+            if re.search(item, sel_item):
+                columns_select_list.append(sel_item)
+                counter_feats += 1
+    print("features selected: {}".format(counter_feats))
+    return columns_select_list
diff --git a/admin/sql/create_indexes.sql b/admin/sql/create_indexes.sql
index 1d5d681ab..4d71b9dd5 100644
--- a/admin/sql/create_indexes.sql
+++ b/admin/sql/create_indexes.sql
@@ -26,4 +26,6 @@ CREATE UNIQUE INDEX lower_musicbrainz_id_ndx_user ON "user" (lower(musicbrainz_i
 
 CREATE INDEX collected_ndx_statistics ON statistics (collected);
 
+CREATE INDEX training_tool_dataset_eval_jobs ON dataset_eval_jobs((options->>'training_tool'));
+
 COMMIT;
diff --git a/admin/updates/20200924-dataset-eval-job-tool-index.sql b/admin/updates/20200924-dataset-eval-job-tool-index.sql
new file mode 100644
index 000000000..4667ed757
--- /dev/null
+++ b/admin/updates/20200924-dataset-eval-job-tool-index.sql
@@ -0,0 +1,3 @@
+BEGIN;
+CREATE INDEX training_tool_dataset_eval_jobs ON dataset_eval_jobs((options->>'training_tool'));
+COMMIT;
\ No newline at end of file
diff --git a/config.py.example b/config.py.example
index 2a202d6a5..aaa358547 100644
--- a/config.py.example
+++ b/config.py.example
@@ -66,4 +66,7 @@ FEATURE_EVAL_FILTERING = True
 # Choose settings used for model training
 FEATURE_EVAL_MODEL_SELECTION = False
 
+# Choose the ML tool used for model training (gaia/sklearn)
+FEATURE_EVAL_TOOL_SELECTION = False
+
 DEBUG_TB_INTERCEPT_REDIRECTS = False
diff --git a/dataset_eval/artistfilter.py b/dataset_eval/artistfilter.py
index a8162ac2c..c534edaf3 100644
--- a/dataset_eval/artistfilter.py
+++ b/dataset_eval/artistfilter.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 import collections
 import json
 import logging
@@ -28,7 +29,7 @@ def print_datadict_summary(datadict):
     for r, cls in datadict.items():
         counter[cls] += 1
     for cls, count in counter.most_common():
-        print "%s\t\t%s" % (cls, count)
+        print("%s\t\t%s" % (cls, count))
 
 def normalise_datadict(datadict, cut_to):
     """Take a dictionary of groundtruth and cut all classes to
diff --git a/dataset_eval/evaluate.py b/dataset_eval/evaluate.py
index 31ebd6fe6..d08258d34 100644
--- a/dataset_eval/evaluate.py
+++ b/dataset_eval/evaluate.py
@@ -7,7 +7,6 @@
 import tempfile
 import time
 
-import gaia2.fastyaml as yaml
 from flask import current_app
 
 import db
@@ -16,8 +15,21 @@
 import db.dataset_eval
 import db.exceptions
 import utils.path
+import yaml
 from dataset_eval import artistfilter
-from dataset_eval import gaia_wrapper
+
+eval_tool_use = "gaia"
+is_sklearn = os.getenv("MODEL_TRAINING_SKLEARN")
+if is_sklearn == "1":
+    from acousticbrainz.models.sklearn.model.classification_project import create_classification_project
+    from acousticbrainz.models.sklearn.classification.matrix_creation import simplified_matrix_export
+    eval_tool_use = "sklearn"
+
+is_gaia = os.getenv("MODEL_TRAINING_GAIA")
+if is_gaia == "1":
+    # import gaia2.fastyaml as yaml
+    from dataset_eval import gaia_wrapper
+    eval_tool_use = "gaia"
 
 SLEEP_DURATION = 30  # number of seconds to wait between runs
 
@@ -25,9 +37,11 @@
 def main():
     logging.info("Starting dataset evaluator...")
     dataset_dir = current_app.config["DATASET_DIR"]
+    logging.info("Dataset dir path: {}".format(dataset_dir))
     storage_dir = os.path.join(current_app.config["FILE_STORAGE_DIR"], "history")
+    logging.info("Storage dir path: {}".format(storage_dir))
     while True:
-        pending_job = db.dataset_eval.get_next_pending_job()
+        pending_job = db.dataset_eval.get_next_pending_job(eval_tool_use)
         if pending_job:
             logging.info("Processing job %s..." % pending_job["id"])
             evaluate_dataset(pending_job, dataset_dir, storage_dir)
@@ -41,7 +55,10 @@ def evaluate_dataset(eval_job, dataset_dir, storage_dir):
 
     eval_location = os.path.join(os.path.abspath(dataset_dir), eval_job["id"])
     utils.path.create_path(eval_location)
-    temp_dir = tempfile.mkdtemp()
+    temp_dir = os.path.join(eval_location, 'temp')
+    utils.path.create_path(temp_dir)
+
+    training_tool = eval_job["options"].get("training_tool", "gaia")
 
     try:
         snapshot = db.dataset.get_snapshot(eval_job["snapshot_id"])
@@ -49,36 +66,32 @@ def evaluate_dataset(eval_job, dataset_dir, storage_dir):
         train, test = artistfilter.filter(eval_job["snapshot_id"], eval_job["options"])
         db.dataset_eval.add_sets_to_job(eval_job["id"], train, test)
 
-        logging.info("Generating filelist.yaml and copying low-level data for evaluation...")
-        filelist_path = os.path.join(eval_location, "filelist.yaml")
-        filelist = dump_lowlevel_data(train.keys(), temp_dir)
-        with open(filelist_path, "w") as f:
-            yaml.dump(filelist, f)
+        if training_tool == "gaia":
+            logging.info("Generating filelist.yaml and copying low-level data for evaluation...")
+            filelist_path = os.path.join(eval_location, "filelist.yaml")
+            filelist = dump_lowlevel_data(train.keys(), temp_dir)
+            with open(filelist_path, "w") as f:
+                yaml.safe_dump(filelist, f)
+        elif training_tool == "sklearn":
+            dump_lowlevel_data_sklearn(train.keys(), dataset_dir)
 
         logging.info("Generating groundtruth.yaml...")
         groundtruth_path = os.path.join(eval_location, "groundtruth.yaml")
         with open(groundtruth_path, "w") as f:
-            yaml.dump(create_groundtruth_dict(snapshot["data"]["name"], train), f)
-
-        # Passing more user preferences to train the model.
-        logging.info("Training model...")
-        results = gaia_wrapper.train_model(
-            project_dir=eval_location,
-            groundtruth_file=groundtruth_path,
-            filelist_file=filelist_path,
-            c_values=eval_job["options"].get("c_values", []),
-            gamma_values=eval_job["options"].get("gamma_values", []),
-            preprocessing_values=eval_job["options"].get("preprocessing_values", []),
-        )
-        logging.info("Saving results...")
-        save_history_file(storage_dir, results["history_path"], eval_job["id"])
-        db.dataset_eval.set_job_result(eval_job["id"], json.dumps({
-            "project_path": eval_location,
-            "parameters": results["parameters"],
-            "accuracy": results["accuracy"],
-            "confusion_matrix": results["confusion_matrix"],
-            "history_path": results["history_path"],
-        }))
+            yaml.safe_dump(create_groundtruth_dict(snapshot["data"]["name"], train), f)
+
+        if training_tool == "gaia":
+            logging.info("Training GAIA model...")
+            evaluate_gaia(eval_job["options"], eval_location, groundtruth_path, filelist_path, storage_dir, eval_job)
+        elif training_tool == "sklearn":
+            logging.info("Training SKLEARN model...")
+            evaluate_sklearn(options=eval_job["options"],
+                             eval_location=eval_location,
+                             ground_truth_file=groundtruth_path,
+                             dataset_dir=dataset_dir,
+                             storage_dir=storage_dir,
+                             eval_job=eval_job)
+
         db.dataset_eval.set_job_status(eval_job["id"], db.dataset_eval.STATUS_DONE)
         logging.info("Evaluation job %s has been completed." % eval_job["id"])
 
@@ -92,24 +105,101 @@ def evaluate_dataset(eval_job, dataset_dir, storage_dir):
         )
         logging.info(e)
 
-    finally:
-        # Clean up the source files used to generate this model.
-        # We can recreate them from the database if we need them
-        # at a later stage.
-        shutil.rmtree(temp_dir)
+
+def evaluate_gaia(options, eval_location, groundtruth_path, filelist_path, storage_dir, eval_job):
+    results = gaia_wrapper.train_model(
+        project_dir=eval_location,
+        groundtruth_file=groundtruth_path,
+        filelist_file=filelist_path,
+        c_values=options.get("c_values", []),
+        gamma_values=options.get("gamma_values", []),
+        preprocessing_values=options.get("preprocessing_values", [])
+    )
+    logging.info("Saving results...")
+    save_history_file(storage_dir, results["history_path"], eval_job["id"])
+    db.dataset_eval.set_job_result(eval_job["id"], json.dumps({
+        "project_path": eval_location,
+        "parameters": results["parameters"],
+        "accuracy": results["accuracy"],
+        "confusion_matrix": results["confusion_matrix"],
+        "history_path": results["history_path"],
+    }))
+
+
+def evaluate_sklearn(options, eval_location, ground_truth_file, dataset_dir, storage_dir, eval_job):
+    create_classification_project(ground_truth_file=ground_truth_file,
+                                  dataset_dir=dataset_dir,
+                                  project_file=eval_job["id"],
+                                  exports_path=eval_location,
+                                  c_values=options.get("c_values", []),
+                                  gamma_values=options.get("gamma_values", []),
+                                  preprocessing_values=options.get("preprocessing_values", [])
+                                  )
+
+    logging.info("Saving results...")
+    results = load_best_results_sklearn(exported_path=eval_location,
+                                        project_file=eval_job["id"])
+    db.dataset_eval.set_job_result(eval_job["id"], json.dumps({
+        "project_path": eval_location,
+        "parameters": results["parameters"],
+        "accuracy": results["accuracy"],
+        "confusion_matrix": results["confusion_matrix"],
+        "model": results["model"],
+    }))
+
+
+def load_best_results_sklearn(exported_path, project_file):
+    project_conf_file_path = os.path.join(exported_path, "{}.yaml".format(project_file))
+    logging.info("Config file path: {}".format(project_conf_file_path))
+    with open(project_conf_file_path) as fp:
+        project_data = yaml.load(fp, Loader=yaml.FullLoader)
+    logging.info("Model: {}".format(project_data['class_name']))
+
+    # load the best model dictionary
+    best_model_path = os.path.join(exported_path, "best_model_{}.json".format(project_data['class_name']))
+    logging.info("Best model path: {}".format(best_model_path))
+    with open(best_model_path) as json_file:
+        data_best_model = json.load(json_file)
+
+    # load the best model's instances and matrix dictionary
+    fold_matrix_path = os.path.join(exported_path, "folded_dataset_instances_cm.json")
+    logging.info("Best Instances and Matrix JSON path: {}".format(fold_matrix_path))
+    with open(fold_matrix_path) as json_file_cm:
+        data_fold_matrix = json.load(json_file_cm)
+
+    # load the best model's simplified matrix dictionary
+    # fold_simplified_matrix_path = os.path.join(exported_path, project_file, "folded_simplified_matrix.json")
+    # logging.info(f"Best models simplified matrix JSON path: {fold_simplified_matrix_path}")
+    # with open(fold_simplified_matrix_path) as json_file_simple_cm:
+    #     data_fold_simplified_matrix = json.load(json_file_simple_cm)
+
+    # export the matrix dictionary from the folded dataset
+    simplified_cm = simplified_matrix_export(best_result_file="folded_dataset_results_matrix.json",
+                                             logger=logging,
+                                             export_save_path=exported_path,
+                                             export_name="simplified_cm.json",
+                                             write_mode=False)
+
+    return {
+        "parameters": data_best_model["params"],
+        # for consistency with gaia which reports accuracy on scale of 0 to 100
+        "accuracy": round(data_best_model["score"] * 100, 2),
+        "confusion_matrix": simplified_cm,
+        "model": os.path.join(exported_path, "best_clf_model.pkl")  # path to best model pickle file
+    }
 
 
 def create_groundtruth_dict(name, datadict):
     groundtruth = {
         "type": "unknown",  # TODO: See if that needs to be modified.
         "version": 1.0,
-        "className": db.dataset._slugify(unicode(name)),
+        "className": db.dataset._slugify(name),
         "groundTruth": {},
     }
     for r, cls in datadict.items():
-        if isinstance(r, unicode):
-            r = r.encode("UTF-8")
-        groundtruth["groundTruth"][r] = cls.encode("UTF-8")
+        # if isinstance(r, unicode):
+        #     r = r.encode("UTF-8")
+        groundtruth["groundTruth"][r] = cls
 
     return groundtruth
 
@@ -118,12 +208,12 @@ def create_groundtruth(dataset):
     groundtruth = {
         "type": "unknown",  # TODO: See if that needs to be modified.
         "version": 1.0,
-        "className": db.dataset._slugify(unicode(dataset["name"])),
+        "className": db.dataset._slugify(dataset["name"]),
         "groundTruth": {},
     }
     for cls in dataset["classes"]:
         for recording_mbid in cls["recordings"]:
-            groundtruth["groundTruth"][recording_mbid] = cls["name"].encode("UTF-8")
+            groundtruth["groundTruth"][recording_mbid] = cls["name"]
     return groundtruth
 
 
@@ -159,7 +249,41 @@ def lowlevel_data_to_yaml(data):
     if 'lossless' in data['metadata']['audio_properties']:
         del data['metadata']['audio_properties']['lossless']
 
-    return yaml.dump(data)
+    return yaml.safe_dump(data)
+
+
+def dump_lowlevel_data_sklearn(recordings, location):
+    """Dumps low-level data to JSON for all recordings into specified location.
+
+        Args:
+            recordings: List of MBIDs of recordings.
+            location: Path to directory where low-level data will be saved.
+
+    """
+    utils.path.create_path(location)
+    filelist = {}
+    for recording in recordings:
+        logging.info("Recording: {}".format(recording))
+        filelist[recording] = os.path.join(location, "%s.json" % recording)
+        logging.info("Recoding path: {}".format(filelist[recording]))
+        with open(filelist[recording], 'w') as outfile:
+            json.dump(lowlevel_data_cleaning(db.data.load_low_level(recording)), outfile)
+    logging.info("JSON data stored successfully.")
+
+
+def lowlevel_data_cleaning(data):
+    """Prepares dictionary with low-level data about recording for processing.
+    """
+    # Removing descriptors, that will otherwise break gaia_fusion due to
+    # incompatibility of layouts (see Gaia implementation for more details).
+    if "tags" in data["metadata"]:
+        del data["metadata"]["tags"]
+    if "sample_rate" in data["metadata"]["audio_properties"]:
+        del data["metadata"]["audio_properties"]["sample_rate"]
+    if 'lossless' in data['metadata']['audio_properties']:
+        del data['metadata']['audio_properties']['lossless']
+    # logging.info("Data: {}".format(data))
+    return data
 
 
 def extract_recordings(dataset):
diff --git a/db/dataset_eval.py b/db/dataset_eval.py
index aed4e97d3..f8723deb8 100644
--- a/db/dataset_eval.py
+++ b/db/dataset_eval.py
@@ -46,7 +46,7 @@
 
 
 def evaluate_dataset(dataset_id, normalize, eval_location, c_values=None, gamma_values=None,
-                     preprocessing_values=None, filter_type=None):
+                     preprocessing_values=None, filter_type=None, training_tool="gaia"):
     """Add dataset into evaluation queue.
 
     Args:
@@ -67,6 +67,7 @@ def evaluate_dataset(dataset_id, normalize, eval_location, c_values=None, gamma_
         filter_type: Optional filtering that will be applied to the dataset.
             See FILTER_* variables in this module for a list of existing
             filters.
+        training_tool (optional): The tool to use to train the model (gaia or sklearn)
 
     Raises:
         JobExistsException: if the dataset has already been submitted for evaluation
@@ -90,7 +91,8 @@ def evaluate_dataset(dataset_id, normalize, eval_location, c_values=None, gamma_
         # Validate dataset contents
         validate_dataset_contents(db.dataset.get(dataset_id))
         return _create_job(connection, dataset_id, normalize, eval_location,
-                           c_values, gamma_values, preprocessing_values, filter_type)
+                           c_values, gamma_values, preprocessing_values, filter_type,
+                           training_tool)
 
 
 def job_exists(dataset_id):
@@ -164,7 +166,7 @@ def validate_dataset_contents(dataset):
                 )
 
 
-def get_next_pending_job():
+def get_next_pending_job(training_tool="gaia"):
     """
     Get the earliest submitted job which is still in the pending state.
 
@@ -179,10 +181,11 @@ def get_next_pending_job():
                    ON dataset_snapshot.id = dataset_eval_jobs.snapshot_id
                 WHERE status = :status
                   AND eval_location = 'local'
+                  AND options->>'training_tool' = :training_tool
              ORDER BY created ASC
                 LIMIT 1
             """ % EVAL_COLUMNS_COMMA_SEPARATED)
-        result = connection.execute(query, {"status": STATUS_PENDING})
+        result = connection.execute(query, {"status": STATUS_PENDING, "training_tool": training_tool})
         row = result.fetchone()
         return dict(row) if row else None
 
@@ -330,7 +333,7 @@ def add_dataset_eval_set(connection, data):
 
 
 def _create_job(connection, dataset_id, normalize, eval_location, c_value,
-                gamma_value, preprocessing_values, filter_type):
+                gamma_value, preprocessing_values, filter_type, training_tool):
     if not isinstance(normalize, bool):
         raise ValueError("Argument 'normalize' must be a boolean.")
     if filter_type is not None:
@@ -345,6 +348,7 @@ def _create_job(connection, dataset_id, normalize, eval_location, c_value,
             "c_values": c_value,
             "gamma_values": gamma_value,
             "preprocessing_values": preprocessing_values,
+            "training_tool": training_tool
         }
 
     snapshot_id = db.dataset.create_snapshot(dataset_id)
diff --git a/db/test/test_dataset_eval.py b/db/test/test_dataset_eval.py
index 794bc4f87..20eb6412a 100644
--- a/db/test/test_dataset_eval.py
+++ b/db/test/test_dataset_eval.py
@@ -87,7 +87,7 @@ def test_create_job_nonormalize(self):
         # No dataset normalization
         job_id = dataset_eval._create_job(self.conn, self.test_dataset_id, False, dataset_eval.EVAL_LOCAL,
                                           c_value=[1, 2, 3], gamma_value=[4, 5, 6], preprocessing_values=["basic"],
-                                          filter_type=None)
+                                          filter_type=None, training_tool="gaia")
         job = dataset_eval.get_job(job_id)
 
         self.assertIsNotNone(job)
@@ -98,7 +98,7 @@ def test_create_job_normalize(self):
         # dataset normalization
         job_id = dataset_eval._create_job(self.conn, self.test_dataset_id, True, dataset_eval.EVAL_LOCAL,
                                           c_value=[1, 2, 3], gamma_value=[4, 5, 6], preprocessing_values=["basic"],
-                                          filter_type=None)
+                                          filter_type=None, training_tool="gaia")
         job = dataset_eval.get_job(job_id)
 
         self.assertIsNotNone(job)
@@ -109,7 +109,7 @@ def test_create_job_artistfilter(self):
         # Artist filtering as an option
         job_id = dataset_eval._create_job(self.conn, self.test_dataset_id, False, dataset_eval.EVAL_LOCAL,
                                           c_value=[1, 2, 3], gamma_value=[4, 5, 6], preprocessing_values=["basic"],
-                                          filter_type=dataset_eval.FILTER_ARTIST)
+                                          filter_type=dataset_eval.FILTER_ARTIST, training_tool="gaia")
         job = dataset_eval.get_job(job_id)
 
         self.assertIsNotNone(job)
@@ -120,7 +120,7 @@ def test_create_job_svm_params(self):
         # C, gamma, and preprocessing values
         job_id = dataset_eval._create_job(self.conn, self.test_dataset_id, True, dataset_eval.EVAL_LOCAL,
                                           c_value=[1, 2, 3], gamma_value=[4, 5, 6], preprocessing_values=["basic"],
-                                          filter_type=dataset_eval.FILTER_ARTIST)
+                                          filter_type=dataset_eval.FILTER_ARTIST, training_tool="gaia")
         job = dataset_eval.get_job(job_id)
 
         self.assertIsNotNone(job)
@@ -134,27 +134,27 @@ def test_create_job_badfilter(self):
         with self.assertRaises(ValueError):
             dataset_eval._create_job(self.conn, self.test_dataset_id, True, dataset_eval.EVAL_LOCAL,
                                      c_value=[1, 2, 3], gamma_value=[4, 5, 6], preprocessing_values=["basic"],
-                                     filter_type="test")
+                                     filter_type="test", training_tool="gaia")
 
     def test_create_job_badlocation(self):
         # an invalid eval_location
         with self.assertRaises(ValueError):
             dataset_eval._create_job(self.conn, self.test_dataset_id, True, "not_a_location",
                                      c_value=[1, 2, 3], gamma_value=[4, 5, 6], preprocessing_values=["basic"],
-                                     filter_type=None)
+                                     filter_type=None, training_tool="gaia")
 
     def test_job_exists(self):
         self.assertFalse(dataset_eval.job_exists(self.test_dataset_id))
         dataset_eval._create_job(self.conn, self.test_dataset_id, True, dataset_eval.EVAL_LOCAL,
                                  c_value=[1, 2, 3], gamma_value=[4, 5, 6], preprocessing_values=["basic"],
-                                 filter_type=None)
+                                 filter_type=None, training_tool="gaia")
 
         self.assertTrue(dataset_eval.job_exists(self.test_dataset_id))
 
     def test_get_job(self):
         job_id = dataset_eval._create_job(self.conn, self.test_dataset_id, True, dataset_eval.EVAL_LOCAL,
                                           c_value=[1, 2, 3], gamma_value=[4, 5, 6], preprocessing_values=["basic"],
-                                          filter_type=None)
+                                          filter_type=None, training_tool="gaia")
         random_id = "f47ac10b-58cc-4372-a567-0e02b2c3d479"
         # just in case
         self.assertNotEqual(random_id, job_id)
@@ -164,7 +164,7 @@ def test_get_job(self):
     def test_set_job_result(self):
         job_id = dataset_eval._create_job(self.conn, self.test_dataset_id, True, dataset_eval.EVAL_LOCAL,
                                           c_value=[1, 2, 3], gamma_value=[4, 5, 6], preprocessing_values=["basic"],
-                                          filter_type=None)
+                                          filter_type=None, training_tool="gaia")
 
         result = {
             u"accuracy": 1,
@@ -182,7 +182,7 @@ def test_set_job_result(self):
     def test_set_job_status(self):
         job_id = dataset_eval._create_job(self.conn, self.test_dataset_id, True, dataset_eval.EVAL_LOCAL,
                                           c_value=[1, 2, 3], gamma_value=[4, 5, 6], preprocessing_values=["basic"],
-                                          filter_type=None)
+                                          filter_type=None, training_tool="gaia")
         job = dataset_eval.get_job(job_id)
         self.assertEqual(job["status"], dataset_eval.STATUS_PENDING)
 
@@ -196,12 +196,12 @@ def test_set_job_status(self):
     def test_get_next_pending_job(self):
         job1_id = dataset_eval._create_job(self.conn, self.test_dataset_id, True, dataset_eval.EVAL_LOCAL,
                                            c_value=[1, 2, 3], gamma_value=[4, 5, 6], preprocessing_values=["basic"],
-                                           filter_type=None)
+                                           filter_type=None, training_tool="gaia")
         job1 = dataset_eval.get_job(job1_id)
 
         job2_id = dataset_eval._create_job(self.conn, self.test_dataset_id, True, dataset_eval.EVAL_LOCAL,
                                            c_value=[1, 2, 3], gamma_value=[4, 5, 6], preprocessing_values=["basic"],
-                                           filter_type=None)
+                                           filter_type=None, training_tool="gaia")
         job2 = dataset_eval.get_job(job2_id)
 
         next_pending = dataset_eval.get_next_pending_job()
@@ -218,12 +218,12 @@ def test_get_next_pending_job_remote(self):
         # If we have a remote pending job with the most recent timestamp, skip it
         job1_id = dataset_eval._create_job(self.conn, self.test_dataset_id, True, dataset_eval.EVAL_REMOTE,
                                            c_value=[1, 2, 3], gamma_value=[4, 5, 6], preprocessing_values=["basic"],
-                                           filter_type=None)
+                                           filter_type=None, training_tool="gaia")
         job1 = dataset_eval.get_job(job1_id)
 
         job2_id = dataset_eval._create_job(self.conn, self.test_dataset_id, True, dataset_eval.EVAL_LOCAL,
                                            c_value=[1, 2, 3], gamma_value=[4, 5, 6], preprocessing_values=["basic"],
-                                           filter_type=None)
+                                           filter_type=None, training_tool="gaia")
         job2 = dataset_eval.get_job(job2_id)
 
         next_pending = dataset_eval.get_next_pending_job()
@@ -235,7 +235,7 @@ def test_delete_job(self):
 
         job_id = dataset_eval._create_job(self.conn, self.test_dataset_id, True, dataset_eval.EVAL_LOCAL,
                                           c_value=[1, 2, 3], gamma_value=[4, 5, 6], preprocessing_values=["basic"],
-                                          filter_type=None)
+                                          filter_type=None, training_tool="gaia")
         snapshots = dataset.get_snapshots_for_dataset(self.test_dataset_id)
         self.assertEqual(len(snapshots), 1)
         self.assertIsNotNone(dataset_eval.get_job(job_id))
@@ -247,13 +247,13 @@ def test_delete_job(self):
     def test_eval_job_location(self):
         job1_id = dataset_eval._create_job(self.conn, self.test_dataset_id, True, dataset_eval.EVAL_REMOTE,
                                            c_value=[1, 2, 3], gamma_value=[4, 5, 6], preprocessing_values=["basic"],
-                                           filter_type=None)
+                                           filter_type=None, training_tool="gaia")
         job1 = dataset_eval.get_job(job1_id)
         self.assertEqual(job1["eval_location"], dataset_eval.EVAL_REMOTE)
 
         job2_id = dataset_eval._create_job(self.conn, self.test_dataset_id, True, dataset_eval.EVAL_LOCAL,
                                            c_value=[1, 2, 3], gamma_value=[4, 5, 6], preprocessing_values=["basic"],
-                                           filter_type=None)
+                                           filter_type=None, training_tool="gaia")
         job2 = dataset_eval.get_job(job2_id)
         self.assertEqual(job2["eval_location"], dataset_eval.EVAL_LOCAL)
 
@@ -262,7 +262,7 @@ def test_get_remote_pending_jobs_for_user(self):
 
         job_id = dataset_eval._create_job(self.conn, self.test_dataset_id, True, dataset_eval.EVAL_REMOTE,
                                           c_value=[1, 2, 3], gamma_value=[4, 5, 6], preprocessing_values=["basic"],
-                                          filter_type=None)
+                                          filter_type=None, training_tool="gaia")
         job_details = db.dataset_eval.get_job(job_id)
 
         response = dataset_eval.get_remote_pending_jobs_for_user(self.test_user_id)
@@ -277,7 +277,7 @@ def test_get_pending_jobs_for_user_local(self):
         """ Check that a local eval dataset for this user doesn't show """
         job_id = dataset_eval._create_job(self.conn, self.test_dataset_id, True, dataset_eval.EVAL_LOCAL,
                                           c_value=[1, 2, 3], gamma_value=[4, 5, 6], preprocessing_values=["basic"],
-                                          filter_type=None)
+                                          filter_type=None, training_tool="gaia")
         job_details = db.dataset_eval.get_job(job_id)
 
         response = dataset_eval.get_remote_pending_jobs_for_user(self.test_user_id)
@@ -290,7 +290,7 @@ def test_get_pending_jobs_for_user_other_user(self):
         another_dataset_id = dataset.create_from_dict(self.test_data, author_id=another_user_id)
         job_id = dataset_eval._create_job(self.conn, another_dataset_id, True, dataset_eval.EVAL_REMOTE,
                                           c_value=[1, 2, 3], gamma_value=[4, 5, 6], preprocessing_values=["basic"],
-                                          filter_type=None)
+                                          filter_type=None, training_tool="gaia")
 
         response = dataset_eval.get_remote_pending_jobs_for_user(self.test_user_id)
         self.assertEqual(response, [])
@@ -299,7 +299,7 @@ def test_get_pending_jobs_for_user_done(self):
         """ Check that a remote eval job with a done status doesn't show """
         job_id = dataset_eval._create_job(self.conn, self.test_dataset_id, True, dataset_eval.EVAL_REMOTE,
                                           c_value=[1, 2, 3], gamma_value=[4, 5, 6], preprocessing_values=["basic"],
-                                          filter_type=None)
+                                          filter_type=None, training_tool="gaia")
         db.dataset_eval.set_job_status(job_id, db.dataset_eval.STATUS_DONE)
 
         response = dataset_eval.get_remote_pending_jobs_for_user(self.test_user_id)
diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml
index c900ff68c..ec4945f9d 100644
--- a/docker/docker-compose.dev.yml
+++ b/docker/docker-compose.dev.yml
@@ -55,6 +55,8 @@ services:
       context: ..
       dockerfile: Dockerfile
       target: acousticbrainz-dev
+    environment:
+      MODEL_TRAINING_GAIA: 1
     command: python2 worker_manage.py dataset_evaluator
     volumes:
       - ../:/code
@@ -62,3 +64,18 @@ services:
       - ../data/files:/data/files
     depends_on:
       - db
+
+  dataset_evaluator_sklearn:
+    build:
+      context: ..
+      dockerfile: Dockerfile.py3
+      target: acousticbrainz-sklearn
+    environment:
+      MODEL_TRAINING_SKLEARN: 1
+    command: python3 worker_manage.py dataset_evaluator
+    volumes:
+      - ../:/code
+      - ../data/datasets:/data/datasets
+      - ../data/files:/data/files
+    depends_on:
+      - db
diff --git a/requirements.txt b/requirements.txt
index 98505d72d..1e6ffc2c5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,7 @@ Flask-Login==0.5.0
 Flask-SQLAlchemy==2.4.1
 Flask-Testing==0.8.0
 Flask-WTF == 0.14.3
-futures==3.3.0
+futures == 3.3.0; python_version < '3.0'
 mock==3.0.5
 musicbrainzngs==0.7.1
 ndg-httpsclient==0.5.1
diff --git a/sklearn_manage.py b/sklearn_manage.py
new file mode 100644
index 000000000..9593d2f13
--- /dev/null
+++ b/sklearn_manage.py
@@ -0,0 +1,77 @@
+import click
+
+from acousticbrainz.models.sklearn.model.classification_project import create_classification_project
+from acousticbrainz.models.sklearn.model.predict import prediction
+
+cli = click.Group()
+
+@cli.command(name="classification_project")
+@click.option("--ground-truth-file", "-g",
+              help="Path of the dataset's groundtruth file/s.", required=True)
+@click.option("--low-level-dir", "-d", required=True,
+              help="Path of the main datasets dir containing .json file/s.")
+@click.option("--project-file", "-f",
+              help="Name of the project configuration file (.yaml) will be stored. If "
+                   "not specified it takes automatically the name <project_CLASS_NAME>.")
+@click.option("--export-path", "-o",
+              help="Path where the project results will be stored. If empty, the results "
+                   "will be saved in the main app directory.")
+@click.option("--seed", "-s", type=int, default=None,
+              help="Seed is used to generate the random shuffled dataset applied "
+                   "later to folding.")
+@click.option("--jobs", "-j", default=-1, type=int,
+              help="Parallel jobs. Set to -1 to use all the available cores")
+@click.option("--verbose", "-v", default=1, type=int,
+              help="Controls the verbosity: the higher, the more messages.")
+@click.option("--logging", "-l", default="INFO",
+              type=click.Choice(
+                  ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
+                  case_sensitive=False
+              ), help="The logging level that will be printed")
+def classification_project(ground_truth_file, low_level_dir, project_file, export_path,
+                           seed, jobs, verbose, logging):
+    """ Generates a project configuration file given a filelist, a groundtruth file,
+    and the directories to store the datasets and the results files. The script has
+    a parameter to specify the project template to use. If it is not specified, it
+    will try to guess the appropriated one from the essentia version found on the
+    descriptor files.
+    """
+    create_classification_project(
+        ground_truth_file=ground_truth_file,
+        dataset_dir=low_level_dir,
+        project_file=project_file,
+        exports_path=export_path,
+        seed=seed,
+        jobs=jobs,
+        verbose=verbose,
+        logging=logging
+    )
+
+
+@cli.command(name="predict")
+@click.option("--project-file", "-f",  required=True,
+              help="Name of the project configuration file (.yaml) that is to be loaded. "
+                   "The .yaml at the end of the file is not necessary. Just put the name "
+                   "of the file.")
+@click.option("--export-path", "-o",
+              help="Path where the project results will be stored. If empty, the results "
+                   "will be saved in the main app directory.")
+@click.option("--track", "-t", required=True,
+              help="MBID of the the low-level data from the AcousticBrainz API.")
+@click.option("--logging", "-l", default="INFO",
+              type=click.Choice(
+                  ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
+                  case_sensitive=False
+              ), help="The logging level that will be printed")
+def predict(project_file, export_path, track, logging):
+    """ Prediction of a track. """
+    prediction(
+        exports_path=export_path,
+        project_file=project_file,
+        mbid=track,
+        log_level=logging
+    )
+
+
+if __name__ == '__main__':
+    cli()
diff --git a/webserver/__init__.py b/webserver/__init__.py
index 131f80519..1163e499b 100644
--- a/webserver/__init__.py
+++ b/webserver/__init__.py
@@ -8,7 +8,7 @@
 
 import os
 import time
-import urlparse
+from six.moves import urllib
 
 API_PREFIX = '/api/'
 
@@ -109,7 +109,7 @@ def after_request_callbacks(response):
     init_error_handlers(app)
 
     # Static files
-    import static_manager
+    from webserver import static_manager
 
     # Template utilities
     app.jinja_env.add_extension('jinja2.ext.do')
@@ -136,7 +136,7 @@ def after_request_callbacks(response):
     def prod_https_login_redirect():
         """ Redirect to HTTPS in production except for the API endpoints
         """
-        if urlparse.urlsplit(request.url).scheme == 'http' \
+        if urllib.parse.urlsplit(request.url).scheme == 'http' \
                 and app.config['DEBUG'] == False \
                 and app.config['TESTING'] == False \
                 and request.blueprint not in ('api', 'api_v1_core', 'api_v1_datasets', 'api_v1_dataset_eval'):
diff --git a/webserver/forms.py b/webserver/forms.py
index 0bddfd8c1..07b7d46e8 100644
--- a/webserver/forms.py
+++ b/webserver/forms.py
@@ -10,6 +10,9 @@
 DATASET_EVAL_LOCAL = "local"
 DATASET_EVAL_REMOTE = "remote"
 
+DATASET_TOOL_EVALUATION_GAIA = "gaia"
+DATASET_TOOL_EVALUATION_SKLEARN = "sklearn"
+
 DATASET_PENDING = "pending"
 DATASET_RUNNING = "running"
 DATASET_DONE = "done"
@@ -60,6 +63,11 @@ class DatasetEvaluationForm(FlaskForm):
                                  render_kw={"data-toggle": "collapse",
                                             "data-target": "#collapseSvmOptions"})
 
+    training_tool = SelectField("Model training tool", choices=[
+        (DATASET_TOOL_EVALUATION_GAIA, "gaia"),
+        (DATASET_TOOL_EVALUATION_SKLEARN, "sklearn")],
+                                  default=DATASET_TOOL_EVALUATION_GAIA)
+
     # C parameter to SVM
     c_value = StringField('C Values', default=DATASET_C_VALUE,
                           render_kw={"data-default": DATASET_C_VALUE})
diff --git a/webserver/static/scripts/datasets/eval-jobs-viewer.js b/webserver/static/scripts/datasets/eval-jobs-viewer.js
index 3e89c52bd..2932ee61a 100644
--- a/webserver/static/scripts/datasets/eval-jobs-viewer.js
+++ b/webserver/static/scripts/datasets/eval-jobs-viewer.js
@@ -191,6 +191,7 @@ class JobList extends React.Component {
                             id={cls.id}
                             created={cls.created}
                             status={cls.status}
+                            training_tool={cls.options.training_tool ?? "gaia"}
                             outdated={cls.outdated}
                             showDelete={this.props.showDelete}
                             onViewDetails={this.props.onViewDetails}
@@ -206,6 +207,7 @@ class JobList extends React.Component {
                             <th className="id">Job ID</th>
                             <th className="status">Status</th>
                             <th className="created">Creation time</th>
+                            <th className="tool">Training Tool</th>
                             <th className="controls" />
                         </tr>
                     </thead>
@@ -227,6 +229,7 @@ class JobRow extends React.Component {
         id: PropTypes.string.isRequired,
         created: PropTypes.string.isRequired,
         status: PropTypes.string.isRequired,
+        training_tool: PropTypes.string.isRequired,
         outdated: PropTypes.string.isRequired,
         showDelete: PropTypes.bool.isRequired,
         onViewDetails: PropTypes.func.isRequired,
@@ -283,6 +286,7 @@ class JobRow extends React.Component {
                 <td className="created">
                     <span>{this.props.created}</span>
                 </td>
+                <td className="tool" style={{textTransform: 'capitalize'}}>{this.props.training_tool}</td>
                 <td className="controls">{controls}</td>
             </tr>
         );
diff --git a/webserver/templates/datasets/evaluate.html b/webserver/templates/datasets/evaluate.html
index 177c20b05..bcaab9cde 100644
--- a/webserver/templates/datasets/evaluate.html
+++ b/webserver/templates/datasets/evaluate.html
@@ -56,6 +56,12 @@ <h2 class="page-title">Evaluate dataset "{{ dataset['name'] }}"</h2>
                 </div>
             </div>
             <div class="collapse {% if form.svm_filtering.data %}in{% endif %}" id="collapseSvmOptions">
+                {% if config.get('FEATURE_EVAL_TOOL_SELECTION') %}
+                    <div class="form-group">
+                        <label class="col-sm-2 control-label">{{ form.training_tool.label.text }}</label>
+                        <div class="col-sm-4">{{ form.training_tool(class="form-control", required="required") }}</div>
+                    </div>
+                {% endif %}
                 <div class="form-group">
                     <label class="col-sm-2 control-label">{{ form.c_value.label.text }}</label>
                     <div class="col-sm-4">
@@ -85,8 +91,8 @@ <h2 class="page-title">Evaluate dataset "{{ dataset['name'] }}"</h2>
                     <div class="col-sm-4">{{ form.preprocessing_values(required="required") }}</div>
                 </div>
             </div>
+            </fieldset>
         {% endif %}
-        </fieldset>
         <div class="form-group">
             <div class="col-sm-offset-2 col-sm-4">
                 <button type="submit" class="btn btn-primary">Evaluate</button>
diff --git a/webserver/views/datasets.py b/webserver/views/datasets.py
index 931734219..a85ebebda 100644
--- a/webserver/views/datasets.py
+++ b/webserver/views/datasets.py
@@ -15,7 +15,7 @@
 import csv
 import math
 import six
-import StringIO
+from six import StringIO
 
 from webserver.views.api.exceptions import APIUnauthorized
 # Below values are defined in 'classification_project_template.yaml' file.
@@ -127,7 +127,7 @@ def _convert_dataset_to_csv_stringio(dataset):
     #   - dataset description, class names, class descriptions
     # TODO: On upgrade to python 3, check that stringio accepts the correct data
     #       (may have to change to bytesio if we encode this data)
-    fp = StringIO.StringIO()
+    fp = StringIO()
     writer = csv.writer(fp)
 
     # write dataset description only if it is set
@@ -253,6 +253,7 @@ def evaluate(dataset_id):
                 gamma_values=gamma_values,
                 preprocessing_values=preprocessing_values,
                 filter_type=form.filter_type.data,
+                training_tool=form.training_tool.data
             )
             flash.info("Dataset %s has been added into evaluation queue." % ds["id"])
         except db.dataset_eval.IncompleteDatasetException as e: