From ecb603c01d1ffcf65838e6b08ab80a627bad4645 Mon Sep 17 00:00:00 2001 From: Felix Wick Date: Mon, 9 Oct 2023 17:11:25 +0200 Subject: [PATCH 1/2] enable hierarchical iterations --- cyclic_boosting/GBSregression.py | 2 + cyclic_boosting/base.py | 42 +++++++++++++++++++-- cyclic_boosting/generic_loss.py | 10 +++++ cyclic_boosting/pipelines.py | 6 +++ cyclic_boosting/regression.py | 2 + tests/test_integration.py | 64 +++++++++++++++++++++++++++++++- 6 files changed, 122 insertions(+), 4 deletions(-) diff --git a/cyclic_boosting/GBSregression.py b/cyclic_boosting/GBSregression.py index 806b375..01c1596 100644 --- a/cyclic_boosting/GBSregression.py +++ b/cyclic_boosting/GBSregression.py @@ -37,6 +37,7 @@ class CBGBSRegressor(RegressorMixin, CyclicBoostingBase, IdentityLinkMixin): def __init__( self, feature_groups=None, + hierarchical_feature_groups=None, feature_properties=None, weight_column=None, minimal_loss_change=1e-10, @@ -52,6 +53,7 @@ def __init__( CyclicBoostingBase.__init__( self, feature_groups=feature_groups, + hierarchical_feature_groups=hierarchical_feature_groups, feature_properties=feature_properties, weight_column=weight_column, minimal_loss_change=minimal_loss_change, diff --git a/cyclic_boosting/base.py b/cyclic_boosting/base.py index 681b119..1459f37 100644 --- a/cyclic_boosting/base.py +++ b/cyclic_boosting/base.py @@ -15,7 +15,7 @@ from cyclic_boosting import common_smoothers, learning_rate, link from cyclic_boosting.binning import get_feature_column_names_or_indices from cyclic_boosting.common_smoothers import SmootherChoice -from cyclic_boosting.features import create_features, Feature, FeatureList, FeatureTypes +from cyclic_boosting.features import create_features, Feature, FeatureList, FeatureTypes, create_feature_id from cyclic_boosting.link import IdentityLinkMixin, LogLinkMixin from cyclic_boosting.utils import ( slice_finite_semi_positive, @@ -170,6 +170,17 @@ class CyclicBoostingBase( If this argument is omitted, all columns except a possible ``weight_column`` are considered as one-dimensional feature_groups. + hierarchical_feature_groups: sequence of column labels + (:obj:`str` or :obj:`int`) or tuples of such labels or + :class:`cyclic_boosting.base.FeatureID`. + In the first three iterations of the training, only the feature groups + defined here are used, i.e., all other feature groups are excluded. + From the fourth iteration onwards, all feature groups are used. The + idea of such hierarchical iterations is to support the modeling of + hierarchical or causal effects (e.g., mitigate confounding). + + If this argument is omitted, such no hierarchical iterations are run. + feature_properties: :obj:`dict` of :obj:`int` Dictionary listing the names of all features for the training as keys and their pre-processing flags as values. When using a numpy feature @@ -256,6 +267,7 @@ class CyclicBoostingBase( def __init__( self, feature_groups=None, + hierarchical_feature_groups=None, feature_properties: Optional[Dict[int, int]] = None, weight_column: Optional[Union[str, int, None]] = None, prior_prediction_column: Optional[Union[str, int, None]] = None, @@ -276,9 +288,15 @@ def __init__( raise ValueError("smoother_choice needs to be of type SmootherChoice") self.feature_groups = feature_groups + self.hierarchical_feature_groups = hierarchical_feature_groups self.feature_properties = feature_properties self.features = None + self.hierarchical_features = [] + if self.hierarchical_feature_groups is not None: + for fg in self.hierarchical_feature_groups: + hierarchical_feature = create_feature_id(fg) + self.hierarchical_features.append(hierarchical_feature.feature_group) self.feature_importances = {} self.aggregate = aggregate @@ -305,6 +323,8 @@ def __init__( self.learn_rate = learning_rate.half_linear_learn_rate else: self.learn_rate = learn_rate + if hierarchical_feature_groups is not None: + self.learn_rate = learning_rate.constant_learn_rate_one self._init_features() def loss(self, prediction: np.ndarray, y: np.ndarray, weights: np.ndarray) -> np.ndarray: @@ -517,12 +537,17 @@ def _call_observe_iterations(self, iteration, X, y, prediction, delta) -> None: observer.observe_iterations(iteration, X, y, prediction, self.weights, self.get_state(), delta) def get_state(self) -> Dict[str, Any]: - return { + est_state = { "link_function": self, "features": self.features, "globale_scale": self.global_scale_, "insample_loss": self.insample_loss_, } + if self.hierarchical_feature_groups is not None and self.iteration_ < 3: + est_state["features"] = [ + feature for feature in self.features if feature.feature_group in self.hierarchical_features + ] + return est_state def remove_preds(self, pred: CBLinkPredictionsFactors, X: np.ndarray) -> None: for feature in self.features: @@ -701,6 +726,13 @@ def _fit_main(self, X: np.ndarray, y: np.ndarray, pred: CBLinkPredictionsFactors self._log_iteration_info(convergence_parameters) for i, feature, pf_data in self.cb_features(X, y, pred, prefit_data): + if ( + self.hierarchical_feature_groups is not None + and self.iteration_ < 3 + and feature.feature_group not in self.hierarchical_features + ): + feature.factors_link_old = feature.factors_link.copy() + continue pred = self.feature_iteration(X, y, feature, pred, pf_data) self._call_observe_feature_iterations(self.iteration_, i, X, y, prediction) @@ -852,6 +884,7 @@ def _check_stop_criteria(self, iterations: int, convergence_parameters: Converge stop_iterations = False stop_factor_change = False stop_loss_change = False + veto_hierarchical = False delta = convergence_parameters.delta loss_change = convergence_parameters.loss_change @@ -887,8 +920,11 @@ def _check_stop_criteria(self, iterations: int, convergence_parameters: Converge "analysis plots." ) + if iterations <= 3 and self.hierarchical_feature_groups is not None: + veto_hierarchical = True + self.stop_criteria_ = (stop_iterations, stop_factor_change, stop_loss_change) - return stop_iterations or stop_factor_change or stop_loss_change + return (stop_iterations or stop_factor_change or stop_loss_change) and not veto_hierarchical def _check_parameters(self) -> None: if self.feature_groups is not None and len(self.feature_groups) == 0: diff --git a/cyclic_boosting/generic_loss.py b/cyclic_boosting/generic_loss.py index 766cfb2..a81fc5c 100644 --- a/cyclic_boosting/generic_loss.py +++ b/cyclic_boosting/generic_loss.py @@ -194,6 +194,7 @@ class CBMultiplicativeQuantileRegressor(CBGenericLoss, sklearn.base.RegressorMix def __init__( self, feature_groups=None, + hierarchical_feature_groups=None, feature_properties=None, weight_column=None, prior_prediction_column=None, @@ -210,6 +211,7 @@ def __init__( CyclicBoostingBase.__init__( self, feature_groups=feature_groups, + hierarchical_feature_groups=hierarchical_feature_groups, feature_properties=feature_properties, weight_column=weight_column, prior_prediction_column=prior_prediction_column, @@ -284,6 +286,7 @@ class CBAdditiveQuantileRegressor(CBGenericLoss, sklearn.base.RegressorMixin, Id def __init__( self, feature_groups=None, + hierarchical_feature_groups=None, feature_properties=None, weight_column=None, prior_prediction_column=None, @@ -300,6 +303,7 @@ def __init__( CyclicBoostingBase.__init__( self, feature_groups=feature_groups, + hierarchical_feature_groups=hierarchical_feature_groups, feature_properties=feature_properties, weight_column=weight_column, prior_prediction_column=prior_prediction_column, @@ -509,6 +513,7 @@ class CBMultiplicativeGenericCRegressor(CBGenericLoss, sklearn.base.RegressorMix def __init__( self, feature_groups=None, + hierarchical_feature_groups=None, feature_properties=None, weight_column=None, prior_prediction_column=None, @@ -525,6 +530,7 @@ def __init__( CyclicBoostingBase.__init__( self, feature_groups=feature_groups, + hierarchical_feature_groups=hierarchical_feature_groups, feature_properties=feature_properties, weight_column=weight_column, prior_prediction_column=prior_prediction_column, @@ -572,6 +578,7 @@ class CBAdditiveGenericCRegressor(CBGenericLoss, sklearn.base.RegressorMixin, Id def __init__( self, feature_groups=None, + hierarchical_feature_groups=None, feature_properties=None, weight_column=None, prior_prediction_column=None, @@ -588,6 +595,7 @@ def __init__( CyclicBoostingBase.__init__( self, feature_groups=feature_groups, + hierarchical_feature_groups=hierarchical_feature_groups, feature_properties=feature_properties, weight_column=weight_column, prior_prediction_column=prior_prediction_column, @@ -634,6 +642,7 @@ class CBGenericClassifier(CBGenericLoss, sklearn.base.ClassifierMixin, LogitLink def __init__( self, feature_groups=None, + hierarchical_feature_groups=None, feature_properties=None, weight_column=None, prior_prediction_column=None, @@ -650,6 +659,7 @@ def __init__( CyclicBoostingBase.__init__( self, feature_groups=feature_groups, + hierarchical_feature_groups=hierarchical_feature_groups, feature_properties=feature_properties, weight_column=weight_column, prior_prediction_column=prior_prediction_column, diff --git a/cyclic_boosting/pipelines.py b/cyclic_boosting/pipelines.py index d38bf5a..e9d125d 100644 --- a/cyclic_boosting/pipelines.py +++ b/cyclic_boosting/pipelines.py @@ -21,6 +21,7 @@ def pipeline_CB( estimator=None, feature_groups=None, + hierarchical_feature_groups=None, feature_properties=None, weight_column=None, prior_prediction_column=None, @@ -51,6 +52,7 @@ def pipeline_CB( if estimator in [CBPoissonRegressor, CBLocPoissonRegressor, CBLocationRegressor, CBClassifier]: estimatorCB = estimator( feature_groups=feature_groups, + hierarchical_feature_groups=hierarchical_feature_groups, feature_properties=feature_properties, weight_column=weight_column, prior_prediction_column=prior_prediction_column, @@ -66,6 +68,7 @@ def pipeline_CB( elif estimator == CBNBinomRegressor: estimatorCB = estimator( feature_groups=feature_groups, + hierarchical_feature_groups=hierarchical_feature_groups, feature_properties=feature_properties, weight_column=weight_column, prior_prediction_column=prior_prediction_column, @@ -119,6 +122,7 @@ def pipeline_CB( elif estimator == CBGBSRegressor: estimatorCB = estimator( feature_groups=feature_groups, + hierarchical_feature_groups=hierarchical_feature_groups, feature_properties=feature_properties, weight_column=weight_column, minimal_loss_change=minimal_loss_change, @@ -134,6 +138,7 @@ def pipeline_CB( elif estimator in [CBMultiplicativeQuantileRegressor, CBAdditiveQuantileRegressor]: estimatorCB = estimator( feature_groups=feature_groups, + hierarchical_feature_groups=hierarchical_feature_groups, feature_properties=feature_properties, weight_column=weight_column, prior_prediction_column=prior_prediction_column, @@ -150,6 +155,7 @@ def pipeline_CB( elif estimator in [CBMultiplicativeGenericCRegressor, CBAdditiveGenericCRegressor, CBGenericClassifier]: estimatorCB = estimator( feature_groups=feature_groups, + hierarchical_feature_groups=hierarchical_feature_groups, feature_properties=feature_properties, weight_column=weight_column, prior_prediction_column=prior_prediction_column, diff --git a/cyclic_boosting/regression.py b/cyclic_boosting/regression.py index 7d0257f..5493553 100644 --- a/cyclic_boosting/regression.py +++ b/cyclic_boosting/regression.py @@ -106,6 +106,7 @@ class CBNBinomRegressor(CBBaseRegressor): def __init__( self, feature_groups=None, + hierarchical_feature_groups=None, feature_properties=None, weight_column=None, prior_prediction_column=None, @@ -123,6 +124,7 @@ def __init__( CyclicBoostingBase.__init__( self, feature_groups=feature_groups, + hierarchical_feature_groups=hierarchical_feature_groups, feature_properties=feature_properties, weight_column=weight_column, prior_prediction_column=prior_prediction_column, diff --git a/tests/test_integration.py b/tests/test_integration.py index 8877b6e..48970a9 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -65,7 +65,69 @@ def test_poisson_regression(is_plot, prepare_data, cb_poisson_regressor_model): yhat = CB_est.predict(X.copy()) mad = np.nanmean(np.abs(y - yhat)) - np.testing.assert_almost_equal(mad, 1.6997, 3) + np.testing.assert_almost_equal(mad, 1.70, 3) + + +@pytest.fixture(scope="function") +def cb_poisson_regressor_model_hierarchical(features, feature_properties): + explicit_smoothers = { + ("dayofyear",): SeasonalSmoother(order=3), + ("price_ratio",): IsotonicRegressor(increasing=False), + } + + plobs = [ + observers.PlottingObserver(iteration=1), + observers.PlottingObserver(iteration=4), + observers.PlottingObserver(iteration=-1), + ] + + CB_pipeline = pipeline_CBPoissonRegressor( + feature_properties=feature_properties, + feature_groups=[ + "PG_ID_3", + "P_ID", + "L_ID", + ("P_ID", "L_ID"), + "dayofweek", + "PROMOTION_TYPE", + "dayofyear", + "price_ratio", + ], + hierarchical_feature_groups=[ + "PG_ID_3", + "P_ID", + "L_ID", + ("P_ID", "L_ID"), + "dayofweek", + "PROMOTION_TYPE", + "dayofyear", + # "price_ratio", + ], + observers=plobs, + maximal_iterations=50, + smoother_choice=common_smoothers.SmootherChoiceGroupBy( + use_regression_type=True, use_normalization=False, explicit_smoothers=explicit_smoothers + ), + ) + + return CB_pipeline + + +def test_poisson_regression_hierarchical(is_plot, prepare_data, cb_poisson_regressor_model_hierarchical): + X, y = prepare_data + + CB_est = cb_poisson_regressor_model_hierarchical + CB_est.fit(X.copy(), y) + + if is_plot: + plot_CB("analysis_CB_iterfirst", [CB_est[-1].observers[0]], CB_est[-2]) + plot_CB("analysis_CB_iterfourth", [CB_est[-1].observers[1]], CB_est[-2]) + plot_CB("analysis_CB_iterlast", [CB_est[-1].observers[-1]], CB_est[-2]) + + yhat = CB_est.predict(X.copy()) + + mad = np.nanmean(np.abs(y - yhat)) + np.testing.assert_almost_equal(mad, 1.699, 3) def test_poisson_regression_default_features(prepare_data, default_features, feature_properties): From 5a1d7c806243dee6dffad338244112e645762e0d Mon Sep 17 00:00:00 2001 From: Felix Wick Date: Mon, 16 Oct 2023 21:51:58 +0200 Subject: [PATCH 2/2] hierarchical iterations as parameter --- cyclic_boosting/base.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/cyclic_boosting/base.py b/cyclic_boosting/base.py index 1459f37..01c0a06 100644 --- a/cyclic_boosting/base.py +++ b/cyclic_boosting/base.py @@ -179,7 +179,8 @@ class CyclicBoostingBase( idea of such hierarchical iterations is to support the modeling of hierarchical or causal effects (e.g., mitigate confounding). - If this argument is omitted, such no hierarchical iterations are run. + If this argument is not explicitly set, no such hierarchical iterations + are run. feature_properties: :obj:`dict` of :obj:`int` Dictionary listing the names of all features for the training as keys @@ -268,6 +269,7 @@ def __init__( self, feature_groups=None, hierarchical_feature_groups=None, + training_iterations_hierarchical_features=3, feature_properties: Optional[Dict[int, int]] = None, weight_column: Optional[Union[str, int, None]] = None, prior_prediction_column: Optional[Union[str, int, None]] = None, @@ -297,6 +299,7 @@ def __init__( for fg in self.hierarchical_feature_groups: hierarchical_feature = create_feature_id(fg) self.hierarchical_features.append(hierarchical_feature.feature_group) + self.training_iterations_hierarchical_features = training_iterations_hierarchical_features self.feature_importances = {} self.aggregate = aggregate @@ -543,7 +546,10 @@ def get_state(self) -> Dict[str, Any]: "globale_scale": self.global_scale_, "insample_loss": self.insample_loss_, } - if self.hierarchical_feature_groups is not None and self.iteration_ < 3: + if ( + self.hierarchical_feature_groups is not None + and self.iteration_ < self.training_iterations_hierarchical_features + ): est_state["features"] = [ feature for feature in self.features if feature.feature_group in self.hierarchical_features ] @@ -728,7 +734,7 @@ def _fit_main(self, X: np.ndarray, y: np.ndarray, pred: CBLinkPredictionsFactors for i, feature, pf_data in self.cb_features(X, y, pred, prefit_data): if ( self.hierarchical_feature_groups is not None - and self.iteration_ < 3 + and self.iteration_ < self.training_iterations_hierarchical_features and feature.feature_group not in self.hierarchical_features ): feature.factors_link_old = feature.factors_link.copy() @@ -874,7 +880,7 @@ def transform(self, X: pd.DataFrame, y: Optional[np.ndarray] = None) -> pd.DataF def _check_stop_criteria(self, iterations: int, convergence_parameters: ConvergenceParameters) -> bool: """ - Checks the stop criteria and returns True if none are satisfied else False. + Checks the stop criteria and returns True if at least one is satisfied. You can check the stop criteria in the estimated parameter `stop_criteria_`. @@ -920,7 +926,10 @@ def _check_stop_criteria(self, iterations: int, convergence_parameters: Converge "analysis plots." ) - if iterations <= 3 and self.hierarchical_feature_groups is not None: + if ( + iterations <= self.training_iterations_hierarchical_features + and self.hierarchical_feature_groups is not None + ): veto_hierarchical = True self.stop_criteria_ = (stop_iterations, stop_factor_change, stop_loss_change)