Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

hierarchical iterations #52

Merged
merged 2 commits into from
Oct 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cyclic_boosting/GBSregression.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class CBGBSRegressor(RegressorMixin, CyclicBoostingBase, IdentityLinkMixin):
def __init__(
self,
feature_groups=None,
hierarchical_feature_groups=None,
feature_properties=None,
weight_column=None,
minimal_loss_change=1e-10,
Expand All @@ -52,6 +53,7 @@ def __init__(
CyclicBoostingBase.__init__(
self,
feature_groups=feature_groups,
hierarchical_feature_groups=hierarchical_feature_groups,
feature_properties=feature_properties,
weight_column=weight_column,
minimal_loss_change=minimal_loss_change,
Expand Down
53 changes: 49 additions & 4 deletions cyclic_boosting/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from cyclic_boosting import common_smoothers, learning_rate, link
from cyclic_boosting.binning import get_feature_column_names_or_indices
from cyclic_boosting.common_smoothers import SmootherChoice
from cyclic_boosting.features import create_features, Feature, FeatureList, FeatureTypes
from cyclic_boosting.features import create_features, Feature, FeatureList, FeatureTypes, create_feature_id
from cyclic_boosting.link import IdentityLinkMixin, LogLinkMixin
from cyclic_boosting.utils import (
slice_finite_semi_positive,
Expand Down Expand Up @@ -170,6 +170,18 @@ class CyclicBoostingBase(
If this argument is omitted, all columns except a possible
``weight_column`` are considered as one-dimensional feature_groups.

hierarchical_feature_groups: sequence of column labels
(:obj:`str` or :obj:`int`) or tuples of such labels or
:class:`cyclic_boosting.base.FeatureID`.
In the first three iterations of the training, only the feature groups
defined here are used, i.e., all other feature groups are excluded.
From the fourth iteration onwards, all feature groups are used. The
idea of such hierarchical iterations is to support the modeling of
hierarchical or causal effects (e.g., mitigate confounding).

If this argument is not explicitly set, no such hierarchical iterations
are run.

feature_properties: :obj:`dict` of :obj:`int`
Dictionary listing the names of all features for the training as keys
and their pre-processing flags as values. When using a numpy feature
Expand Down Expand Up @@ -256,6 +268,8 @@ class CyclicBoostingBase(
def __init__(
self,
feature_groups=None,
hierarchical_feature_groups=None,
training_iterations_hierarchical_features=3,
feature_properties: Optional[Dict[int, int]] = None,
weight_column: Optional[Union[str, int, None]] = None,
prior_prediction_column: Optional[Union[str, int, None]] = None,
Expand All @@ -276,9 +290,16 @@ def __init__(
raise ValueError("smoother_choice needs to be of type SmootherChoice")

self.feature_groups = feature_groups
self.hierarchical_feature_groups = hierarchical_feature_groups
self.feature_properties = feature_properties

self.features = None
self.hierarchical_features = []
if self.hierarchical_feature_groups is not None:
for fg in self.hierarchical_feature_groups:
hierarchical_feature = create_feature_id(fg)
self.hierarchical_features.append(hierarchical_feature.feature_group)
self.training_iterations_hierarchical_features = training_iterations_hierarchical_features
self.feature_importances = {}
self.aggregate = aggregate

Expand All @@ -305,6 +326,8 @@ def __init__(
self.learn_rate = learning_rate.half_linear_learn_rate
else:
self.learn_rate = learn_rate
if hierarchical_feature_groups is not None:
self.learn_rate = learning_rate.constant_learn_rate_one
self._init_features()

def loss(self, prediction: np.ndarray, y: np.ndarray, weights: np.ndarray) -> np.ndarray:
Expand Down Expand Up @@ -517,12 +540,20 @@ def _call_observe_iterations(self, iteration, X, y, prediction, delta) -> None:
observer.observe_iterations(iteration, X, y, prediction, self.weights, self.get_state(), delta)

def get_state(self) -> Dict[str, Any]:
return {
est_state = {
"link_function": self,
"features": self.features,
"globale_scale": self.global_scale_,
"insample_loss": self.insample_loss_,
}
if (
self.hierarchical_feature_groups is not None
and self.iteration_ < self.training_iterations_hierarchical_features
):
est_state["features"] = [
feature for feature in self.features if feature.feature_group in self.hierarchical_features
]
return est_state

def remove_preds(self, pred: CBLinkPredictionsFactors, X: np.ndarray) -> None:
for feature in self.features:
Expand Down Expand Up @@ -701,6 +732,13 @@ def _fit_main(self, X: np.ndarray, y: np.ndarray, pred: CBLinkPredictionsFactors

self._log_iteration_info(convergence_parameters)
for i, feature, pf_data in self.cb_features(X, y, pred, prefit_data):
if (
self.hierarchical_feature_groups is not None
and self.iteration_ < self.training_iterations_hierarchical_features
and feature.feature_group not in self.hierarchical_features
):
feature.factors_link_old = feature.factors_link.copy()
continue
pred = self.feature_iteration(X, y, feature, pred, pf_data)
self._call_observe_feature_iterations(self.iteration_, i, X, y, prediction)

Expand Down Expand Up @@ -842,7 +880,7 @@ def transform(self, X: pd.DataFrame, y: Optional[np.ndarray] = None) -> pd.DataF

def _check_stop_criteria(self, iterations: int, convergence_parameters: ConvergenceParameters) -> bool:
"""
Checks the stop criteria and returns True if none are satisfied else False.
Checks the stop criteria and returns True if at least one is satisfied.

You can check the stop criteria in the estimated parameter
`stop_criteria_`.
Expand All @@ -852,6 +890,7 @@ def _check_stop_criteria(self, iterations: int, convergence_parameters: Converge
stop_iterations = False
stop_factor_change = False
stop_loss_change = False
veto_hierarchical = False

delta = convergence_parameters.delta
loss_change = convergence_parameters.loss_change
Expand Down Expand Up @@ -887,8 +926,14 @@ def _check_stop_criteria(self, iterations: int, convergence_parameters: Converge
"analysis plots."
)

if (
iterations <= self.training_iterations_hierarchical_features
and self.hierarchical_feature_groups is not None
):
veto_hierarchical = True

self.stop_criteria_ = (stop_iterations, stop_factor_change, stop_loss_change)
return stop_iterations or stop_factor_change or stop_loss_change
return (stop_iterations or stop_factor_change or stop_loss_change) and not veto_hierarchical

def _check_parameters(self) -> None:
if self.feature_groups is not None and len(self.feature_groups) == 0:
Expand Down
10 changes: 10 additions & 0 deletions cyclic_boosting/generic_loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ class CBMultiplicativeQuantileRegressor(CBGenericLoss, sklearn.base.RegressorMix
def __init__(
self,
feature_groups=None,
hierarchical_feature_groups=None,
feature_properties=None,
weight_column=None,
prior_prediction_column=None,
Expand All @@ -210,6 +211,7 @@ def __init__(
CyclicBoostingBase.__init__(
self,
feature_groups=feature_groups,
hierarchical_feature_groups=hierarchical_feature_groups,
feature_properties=feature_properties,
weight_column=weight_column,
prior_prediction_column=prior_prediction_column,
Expand Down Expand Up @@ -284,6 +286,7 @@ class CBAdditiveQuantileRegressor(CBGenericLoss, sklearn.base.RegressorMixin, Id
def __init__(
self,
feature_groups=None,
hierarchical_feature_groups=None,
feature_properties=None,
weight_column=None,
prior_prediction_column=None,
Expand All @@ -300,6 +303,7 @@ def __init__(
CyclicBoostingBase.__init__(
self,
feature_groups=feature_groups,
hierarchical_feature_groups=hierarchical_feature_groups,
feature_properties=feature_properties,
weight_column=weight_column,
prior_prediction_column=prior_prediction_column,
Expand Down Expand Up @@ -509,6 +513,7 @@ class CBMultiplicativeGenericCRegressor(CBGenericLoss, sklearn.base.RegressorMix
def __init__(
self,
feature_groups=None,
hierarchical_feature_groups=None,
feature_properties=None,
weight_column=None,
prior_prediction_column=None,
Expand All @@ -525,6 +530,7 @@ def __init__(
CyclicBoostingBase.__init__(
self,
feature_groups=feature_groups,
hierarchical_feature_groups=hierarchical_feature_groups,
feature_properties=feature_properties,
weight_column=weight_column,
prior_prediction_column=prior_prediction_column,
Expand Down Expand Up @@ -572,6 +578,7 @@ class CBAdditiveGenericCRegressor(CBGenericLoss, sklearn.base.RegressorMixin, Id
def __init__(
self,
feature_groups=None,
hierarchical_feature_groups=None,
feature_properties=None,
weight_column=None,
prior_prediction_column=None,
Expand All @@ -588,6 +595,7 @@ def __init__(
CyclicBoostingBase.__init__(
self,
feature_groups=feature_groups,
hierarchical_feature_groups=hierarchical_feature_groups,
feature_properties=feature_properties,
weight_column=weight_column,
prior_prediction_column=prior_prediction_column,
Expand Down Expand Up @@ -634,6 +642,7 @@ class CBGenericClassifier(CBGenericLoss, sklearn.base.ClassifierMixin, LogitLink
def __init__(
self,
feature_groups=None,
hierarchical_feature_groups=None,
feature_properties=None,
weight_column=None,
prior_prediction_column=None,
Expand All @@ -650,6 +659,7 @@ def __init__(
CyclicBoostingBase.__init__(
self,
feature_groups=feature_groups,
hierarchical_feature_groups=hierarchical_feature_groups,
feature_properties=feature_properties,
weight_column=weight_column,
prior_prediction_column=prior_prediction_column,
Expand Down
6 changes: 6 additions & 0 deletions cyclic_boosting/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
def pipeline_CB(
estimator=None,
feature_groups=None,
hierarchical_feature_groups=None,
feature_properties=None,
weight_column=None,
prior_prediction_column=None,
Expand Down Expand Up @@ -51,6 +52,7 @@ def pipeline_CB(
if estimator in [CBPoissonRegressor, CBLocPoissonRegressor, CBLocationRegressor, CBClassifier]:
estimatorCB = estimator(
feature_groups=feature_groups,
hierarchical_feature_groups=hierarchical_feature_groups,
feature_properties=feature_properties,
weight_column=weight_column,
prior_prediction_column=prior_prediction_column,
Expand All @@ -66,6 +68,7 @@ def pipeline_CB(
elif estimator == CBNBinomRegressor:
estimatorCB = estimator(
feature_groups=feature_groups,
hierarchical_feature_groups=hierarchical_feature_groups,
feature_properties=feature_properties,
weight_column=weight_column,
prior_prediction_column=prior_prediction_column,
Expand Down Expand Up @@ -119,6 +122,7 @@ def pipeline_CB(
elif estimator == CBGBSRegressor:
estimatorCB = estimator(
feature_groups=feature_groups,
hierarchical_feature_groups=hierarchical_feature_groups,
feature_properties=feature_properties,
weight_column=weight_column,
minimal_loss_change=minimal_loss_change,
Expand All @@ -134,6 +138,7 @@ def pipeline_CB(
elif estimator in [CBMultiplicativeQuantileRegressor, CBAdditiveQuantileRegressor]:
estimatorCB = estimator(
feature_groups=feature_groups,
hierarchical_feature_groups=hierarchical_feature_groups,
feature_properties=feature_properties,
weight_column=weight_column,
prior_prediction_column=prior_prediction_column,
Expand All @@ -150,6 +155,7 @@ def pipeline_CB(
elif estimator in [CBMultiplicativeGenericCRegressor, CBAdditiveGenericCRegressor, CBGenericClassifier]:
estimatorCB = estimator(
feature_groups=feature_groups,
hierarchical_feature_groups=hierarchical_feature_groups,
feature_properties=feature_properties,
weight_column=weight_column,
prior_prediction_column=prior_prediction_column,
Expand Down
2 changes: 2 additions & 0 deletions cyclic_boosting/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ class CBNBinomRegressor(CBBaseRegressor):
def __init__(
self,
feature_groups=None,
hierarchical_feature_groups=None,
feature_properties=None,
weight_column=None,
prior_prediction_column=None,
Expand All @@ -123,6 +124,7 @@ def __init__(
CyclicBoostingBase.__init__(
self,
feature_groups=feature_groups,
hierarchical_feature_groups=hierarchical_feature_groups,
feature_properties=feature_properties,
weight_column=weight_column,
prior_prediction_column=prior_prediction_column,
Expand Down
64 changes: 63 additions & 1 deletion tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,69 @@ def test_poisson_regression(is_plot, prepare_data, cb_poisson_regressor_model):
yhat = CB_est.predict(X.copy())

mad = np.nanmean(np.abs(y - yhat))
np.testing.assert_almost_equal(mad, 1.6997, 3)
np.testing.assert_almost_equal(mad, 1.70, 3)


@pytest.fixture(scope="function")
def cb_poisson_regressor_model_hierarchical(features, feature_properties):
explicit_smoothers = {
("dayofyear",): SeasonalSmoother(order=3),
("price_ratio",): IsotonicRegressor(increasing=False),
}

plobs = [
observers.PlottingObserver(iteration=1),
observers.PlottingObserver(iteration=4),
observers.PlottingObserver(iteration=-1),
]

CB_pipeline = pipeline_CBPoissonRegressor(
feature_properties=feature_properties,
feature_groups=[
"PG_ID_3",
"P_ID",
"L_ID",
("P_ID", "L_ID"),
"dayofweek",
"PROMOTION_TYPE",
"dayofyear",
"price_ratio",
],
hierarchical_feature_groups=[
"PG_ID_3",
"P_ID",
"L_ID",
("P_ID", "L_ID"),
"dayofweek",
"PROMOTION_TYPE",
"dayofyear",
# "price_ratio",
],
observers=plobs,
maximal_iterations=50,
smoother_choice=common_smoothers.SmootherChoiceGroupBy(
use_regression_type=True, use_normalization=False, explicit_smoothers=explicit_smoothers
),
)

return CB_pipeline


def test_poisson_regression_hierarchical(is_plot, prepare_data, cb_poisson_regressor_model_hierarchical):
X, y = prepare_data

CB_est = cb_poisson_regressor_model_hierarchical
CB_est.fit(X.copy(), y)

if is_plot:
plot_CB("analysis_CB_iterfirst", [CB_est[-1].observers[0]], CB_est[-2])
plot_CB("analysis_CB_iterfourth", [CB_est[-1].observers[1]], CB_est[-2])
plot_CB("analysis_CB_iterlast", [CB_est[-1].observers[-1]], CB_est[-2])

yhat = CB_est.predict(X.copy())

mad = np.nanmean(np.abs(y - yhat))
np.testing.assert_almost_equal(mad, 1.699, 3)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can I read this slight improvement in MAD (compared to the previous test) as a result of feature hierarchization, or is the difference too small to attribute it to that?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The improvement is small, yes, but I think it is thanks to the hierarchical training. Here it helps to describe the strong confounding of the price due to different products. I couldn't find a better example in our integration test.



def test_poisson_regression_default_features(prepare_data, default_features, feature_properties):
Expand Down