Skip to content

Commit

Permalink
Merge pull request #47 from lbventura/refactor-feature-importance-cal…
Browse files Browse the repository at this point in the history
…culation
  • Loading branch information
FelixWick authored Sep 22, 2023
2 parents 817500b + 18fbb0a commit 46eb36b
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 53 deletions.
20 changes: 14 additions & 6 deletions cyclic_boosting/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -722,10 +722,8 @@ def _fit_main(self, X: np.ndarray, y: np.ndarray, pred: CBLinkPredictionsFactors

self.iteration_ += 1

for feature in self.features:
feature.prepare_feature()
feature.set_feature_bin_deviations_from_neutral(self.neutral_factor_link)
self.feature_importances[feature.feature_id] = feature.bin_weighted_average
# compute feature importances
self.set_feature_importances()

if len(self.observers) > 0:
self.prepare_plots(X, y, prediction)
Expand Down Expand Up @@ -925,13 +923,23 @@ def get_subestimators_as_items(self, prototypes=True) -> List[Tuple]:
self._check_fitted()
return [(feature.feature_id, feature.smoother) for feature in self.features]

def get_feature_importances(self) -> Dict[tuple, float]:
def set_feature_importances(self) -> None:
for feature in self.features:
feature.prepare_feature()
feature.set_feature_bin_deviations_from_neutral(self.neutral_factor_link)
self.feature_importances[feature] = feature.bin_weighted_average

def get_feature_importances(self) -> Dict[str, float]:
"""
Returns the relative importance of each input feature.
"""
if not self.feature_importances:
raise ValueError("_fit_main has to be called first to compute the feature importance.")
else:
normalized_values = get_normalized_values(self.feature_importances.values())
norm_feature_importances = {
feature: normalized_values[i] for i, feature in enumerate(self.feature_importances.items())
"_".join(feature.feature_id.feature_group): normalized_values[i]
for i, feature in enumerate(self.feature_importances.keys())
}
return norm_feature_importances

Expand Down
92 changes: 45 additions & 47 deletions tests/test_features.py
Original file line number Diff line number Diff line change
@@ -1,67 +1,65 @@
from cyclic_boosting import CBPoissonRegressor
import numpy as np
import pytest
from typing import List
import pandas as pd
from typing import Dict, Tuple

from cyclic_boosting.regression import CBBaseRegressor


@pytest.fixture
def expected_feature_importances() -> Dict[str, float]:
return {
"dayofweek": 0.08183693583617015,
"L_ID": 0.14191802307396523,
"PG_ID_3": 0.12016395453139928,
"P_ID": 0.23511743026016937,
"PROMOTION_TYPE": 0.10313172776022547,
"price_ratio": 0.030753319720274865,
"dayofyear": 0.09212591146822456,
"P_ID_L_ID": 0.19495269734957096,
}


@pytest.fixture
def expected_feature_importances() -> List[float]:
return [
0.08183693583617015,
0.14191802307396523,
0.12016395453139928,
0.23511743026016937,
0.10313172776022547,
0.030753319720274865,
0.09212591146822456,
0.19495269734957096,
]
def expected_feature_contributions() -> Dict[str, float]:
return {
"dayofweek": 1.0033225561393633,
"L_ID": 0.9966915915554274,
"PG_ID_3": 0.9962981313257777,
"P_ID": 0.9581821452147931,
"PROMOTION_TYPE": 0.9896018791652068,
"price_ratio": 1.0,
"dayofyear": 1.0506461325899688,
"P_ID L_ID": 0.9140640045438535,
}


def test_poisson_regressor_feature_importance(prepare_data, features, feature_properties, expected_feature_importances):
@pytest.fixture(scope="session")
def estimator_data(prepare_data, features, feature_properties) -> Tuple[CBBaseRegressor, pd.DataFrame]:
X, y = prepare_data
est = CBPoissonRegressor(
feature_groups=features,
feature_properties=feature_properties,
)
est.fit(X, y)
norm_feature_importances = est.get_feature_importances()
return est, X

assert [ele[0].feature_group for ele in norm_feature_importances.keys()] == [
("dayofweek",),
("L_ID",),
("PG_ID_3",),
("P_ID",),
("PROMOTION_TYPE",),
("price_ratio",),
("dayofyear",),
("P_ID", "L_ID"),
]

for ind, f_imp in enumerate(norm_feature_importances.values()):
np.testing.assert_almost_equal(f_imp, expected_feature_importances[ind], 4)
np.testing.assert_almost_equal(sum(norm_feature_importances.values()), 1.0, 3)
def test_feature_importance(estimator_data, expected_feature_importances):
estimator, _ = estimator_data
norm_feature_importances = estimator.get_feature_importances()

for feature_name, feature_importance in norm_feature_importances.items():
assert feature_name in expected_feature_importances.keys()
np.testing.assert_almost_equal(feature_importance, expected_feature_importances[feature_name], 4)
np.testing.assert_almost_equal(sum(norm_feature_importances.values()), 1.0, 3)

def test_poisson_regressor_feature_contributions(prepare_data, features, feature_properties):
X, y = prepare_data
est = CBPoissonRegressor(
feature_groups=features,
feature_properties=feature_properties,
)
est.fit(X, y)
feature_contributions = est.get_feature_contributions(X)

assert [ele for ele in feature_contributions.keys()] == [
"dayofweek",
"L_ID",
"PG_ID_3",
"P_ID",
"PROMOTION_TYPE",
"price_ratio",
"dayofyear",
"P_ID L_ID",
]
def test_feature_contributions(estimator_data, expected_feature_contributions):
estimator, X = estimator_data
feature_contributions = estimator.get_feature_contributions(X)

np.testing.assert_almost_equal(feature_contributions["dayofweek"].mean(), 1.003, 3)
np.testing.assert_almost_equal(feature_contributions["P_ID"].mean(), 0.958, 3)
for feature_name, feature_contribution in feature_contributions.items():
assert feature_name in expected_feature_contributions.keys()
np.testing.assert_almost_equal(feature_contribution.mean(), expected_feature_contributions[feature_name], 3)

0 comments on commit 46eb36b

Please sign in to comment.