Merge branch 'develop'

openml-labs · Nov 7, 2019 · d73b9a8 · d73b9a8
2 parents 5d06114 + 62317c2
commit d73b9a8
Show file tree

Hide file tree

Showing 4 changed files with 29 additions and 30 deletions.
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -13,11 +13,9 @@ Using GAMA is as simple as using::
 	automl.predict(X_test)
 	automl.predict_proba(X_test)
 
-You can install GAMA (and its dependencies) by cloning the repository and calling the setup script::
+You can install GAMA from PyPI with pip::
 
-    git clone https://github.com/PGijsbers/gama.git
-    cd gama
-    python setup.py install
+    pip install gama
 
 To get more basic information on GAMA and its AutoML functionality, read more in the :ref:`user_guide_index`.
 If you want find out everything there is to know about GAMA, also visit the :ref:`technical_guide_index`.

diff --git a/docs/source/user_guide/installation.rst b/docs/source/user_guide/installation.rst
@@ -3,24 +3,28 @@
 Installation
 ------------
 
-GAMA makes use of optional dependencies for its test environment (``test``) and its dash app (``vis``).
-To install GAMA first clone the repository::
+For regular usage, you can install GAMA with pip::
 
-    git clone https://github.com/PGijsbers/gama.git
-    cd gama
-
-Then install GAMA with optional dependencies as desired.
-Installing only the required dependencies allows you to use all of GAMA's AutoML functionality::
+    pip install gama
 
-    pip install -e .
+GAMA features optional dependencies for visualization and development.
+You can install them with::
 
-Installing the visualization dependencies additionally allows you to use the prototype dash app to visualize optimization traces::
+    pip install gama[OPTIONAL]
 
-    pip install -e .[vis]
+where `OPTIONAL` is one of:
 
-If you plan on developing GAMA, install the test environment::
+ - `vis`: allows you to use the prototype dash app to visualize optimization traces.
+ - `test`: to run GAMA's unit tests locally.
+ - `doc`: to build docs locally.
+ - `all`: all of the above.
 
-    pip install -e .[test]
+To see exactly what dependencies will be installed, see `setup.py <https://github.com/PGijsbers/gama/blob/master/setup.py>`_.
+If you plan on developing GAMA, cloning the repository and installing locally is advised::
 
-To see what dependencies will be installed, see `setup.py <https://github.com/PGijsbers/gama/blob/master/setup.py>`_.
+    git clone https://github.com/PGijsbers/gama.git
+    cd gama
+    pip install -e .[all]
 
+This installation will refer to your local GAMA files.
+Changes to the code directly affect the installed GAMA package without reinstalling requiring a reinstall.
diff --git a/gama/postprocessing/ensemble.py b/gama/postprocessing/ensemble.py
@@ -6,7 +6,6 @@
 from typing import Optional, List
 
 import pandas as pd
-import numpy as np
 from sklearn.preprocessing import OneHotEncoder
 import stopit
 
@@ -45,8 +44,9 @@ def dynamic_defaults(self, gama: 'Gama'):
         self._overwrite_hyperparameter_default('cache', gama._cache_dir)
 
     def post_process(self, x: pd.DataFrame, y: pd.Series, timeout: float, selection: List[Individual]) -> 'model':
-        return build_fit_ensemble(x, y, timeout,
+        return build_fit_ensemble(x, y,
                                   self.hyperparameters['ensemble_size'],
+                                  timeout,
                                   self.hyperparameters['metric'],
                                   self.hyperparameters['cache'])
 
@@ -95,6 +95,7 @@ def __init__(self, metric, y: pd.DataFrame,
         self._y = y
         self._prediction_transformation = None
 
+        self._internal_score = None
         self._fit_models = None
         self._maximize = True
         self._child_ensembles = []
@@ -177,7 +178,8 @@ def expand_ensemble(self, n: int):
                     best_addition, best_addition_score = model, candidate_ensemble_score
 
             self._add_model(best_addition)
-            log.debug('Ensemble size {} , best score: {}'.format(self._total_model_weights(), best_addition_score))
+            self._internal_score = best_addition_score
+            log.info('Ensemble size {} , best score: {}'.format(self._total_model_weights(), best_addition_score))
 
     def fit(self, X, y, timeout=1e6):
         """ Constructs an Ensemble out of the library of models.
@@ -314,7 +316,7 @@ def _ensemble_validation_score(self, prediction_to_validate=None):
             return self._metric.maximizable_score(self._y, prediction_to_validate)
         else:
             # argmax returns (N, 1) matrix, need to squeeze it to (N,) for scoring.
-            class_predictions = np.argmax(prediction_to_validate.toarray(), axis=1)
+            class_predictions = self._one_hot_encoder.inverse_transform(prediction_to_validate.toarray())
             return self._metric.maximizable_score(self._y, class_predictions)
 
     def predict(self, X):
@@ -325,10 +327,11 @@ def predict(self, X):
         else:
             class_probabilities = self._get_weighted_mean_predictions(X, 'predict').toarray()
 
-        class_predictions = np.argmax(class_probabilities, axis=1)
+        class_predictions = self._one_hot_encoder.inverse_transform(class_probabilities)
         if self._label_encoder:
             class_predictions = self._label_encoder.inverse_transform(class_predictions)
-        return class_predictions
+
+        return class_predictions.ravel()
 
     def predict_proba(self, X):
         if self._metric.requires_probabilities:
@@ -349,7 +352,7 @@ def predict(self, X):
         return self._get_weighted_mean_predictions(X)
 
 
-def build_fit_ensemble(x, y, timeout: float, ensemble_size: int,
+def build_fit_ensemble(x, y, ensemble_size: int, timeout: float,
                        metric: Metric, cache: str, encoder: Optional[object]=None) -> Ensemble:
     """ Construct an Ensemble of models from cache, optimizing for metric and fit to (x, y). """
     start_build = time.time()

diff --git a/gama/search_methods/asha.py b/gama/search_methods/asha.py
@@ -12,12 +12,6 @@
 from gama.utilities.generic.async_evaluator import AsyncEvaluator
 from gama.genetic_programming.components.individual import Individual
 
-"""
-TODO:
- - instead of list, use a min-heap by rung.
- - promoted pipelines as set and set-intersection to determine promotability?
-"""
-
 log = logging.getLogger(__name__)
 ASHA_LOG_TOKEN = 'ASHA'