From cd5208c82a77981a722ead08890c4087c5c47f01 Mon Sep 17 00:00:00 2001
From: Reid Johnson <reid.johnson@gmail.com>
Date: Fri, 27 Sep 2024 21:41:05 -0500
Subject: [PATCH] Refactor Usage of scikit-learn Utilities (#95)

Updates for sklearn 1.5+
---
 quantile_forest/_quantile_forest.py           | 74 ++++---------------
 quantile_forest/tests/test_quantile_forest.py | 29 ++++++++
 2 files changed, 42 insertions(+), 61 deletions(-)

diff --git a/quantile_forest/_quantile_forest.py b/quantile_forest/_quantile_forest.py
index 412fe5d..da04f5e 100755
--- a/quantile_forest/_quantile_forest.py
+++ b/quantile_forest/_quantile_forest.py
@@ -30,7 +30,6 @@ class calls the ``fit`` method of the ``ForestRegressor`` and creates a
 
 import joblib
 import numpy as np
-import sklearn
 from sklearn.ensemble._forest import (
     ForestRegressor,
     _generate_sample_indices,
@@ -38,24 +37,12 @@ class calls the ``fit`` method of the ``ForestRegressor`` and creates a
 )
 from sklearn.tree import DecisionTreeRegressor, ExtraTreeRegressor
 from sklearn.tree._tree import DTYPE
-
-try:
-    from sklearn.utils.fixes import parse_version
-except ImportError:
-    from sklearn.utils import parse_version
-
-param_validation = True
-try:
-    from sklearn.utils._param_validation import Interval, RealNotInt
-except ImportError:
-    param_validation = False
+from sklearn.utils._param_validation import Interval, RealNotInt
 from sklearn.utils.validation import check_is_fitted
 
 from ._quantile_forest_fast import QuantileForest
 from ._utils import generate_unsampled_indices, group_indices_by_value, map_indices_to_leaves
 
-sklearn_version = parse_version(sklearn.__version__)
-
 
 class BaseForestQuantileRegressor(ForestRegressor):
     """Base class for quantile regression forests.
@@ -64,17 +51,16 @@ class BaseForestQuantileRegressor(ForestRegressor):
     instead.
     """
 
-    if param_validation:
-        _parameter_constraints: dict = {
-            **ForestRegressor._parameter_constraints,
-            **DecisionTreeRegressor._parameter_constraints,
-            "max_samples_leaf": [
-                None,
-                Interval(RealNotInt, 0, 1, closed="right"),
-                Interval(Integral, 1, None, closed="left"),
-            ],
-        }
-        _parameter_constraints.pop("splitter")
+    _parameter_constraints: dict = {
+        **ForestRegressor._parameter_constraints,
+        **DecisionTreeRegressor._parameter_constraints,
+        "max_samples_leaf": [
+            None,
+            Interval(RealNotInt, 0, 1, closed="right"),
+            Interval(Integral, 1, None, closed="left"),
+        ],
+    }
+    _parameter_constraints.pop("splitter")
 
     @abstractmethod
     def __init__(
@@ -107,8 +93,6 @@ def __init__(
         }
         super().__init__(**init_dict)
 
-        self.param_validation = hasattr(self, "_parameter_constraints")
-
     def fit(self, X, y, sample_weight=None, sparse_pickle=False):
         """Build a forest from the training set (X, y).
 
@@ -135,26 +119,8 @@ def fit(self, X, y, sample_weight=None, sparse_pickle=False):
         self : object
             Fitted estimator.
         """
-        if self.param_validation:
-            self._validate_params()
-        else:
-            if isinstance(self.max_samples_leaf, (Integral, np.integer)):
-                if self.max_samples_leaf < 1:
-                    raise ValueError(
-                        "If max_samples_leaf is an integer, it must be be >= 1, "
-                        f"got {self.max_samples_leaf}."
-                    )
-            elif isinstance(self.max_samples_leaf, Real):
-                if not 0.0 < self.max_samples_leaf <= 1.0:
-                    raise ValueError(
-                        "If max_samples_leaf is a float, it must be in range (0, 1], "
-                        f"got {self.max_samples_leaf}."
-                    )
-            elif self.max_samples_leaf is not None:
-                raise ValueError(
-                    "max_samples_leaf must be of integer, float, or None type, got "
-                    f"{self.max_samples_leaf}."
-                )
+        self._validate_params()
+
         if self.monotonic_cst is not None:
             if (
                 not isinstance(self.max_samples_leaf, (Integral, np.integer))
@@ -1210,17 +1176,12 @@ class RandomForestQuantileRegressor(BaseForestQuantileRegressor):
           - regressions trained on data with missing values,
           - trees with multi-sample leaves (i.e. when `max_samples_leaf > 1`).
 
-        .. sklearn-versionadded:: 1.4
-
     Attributes
     ----------
     estimator_ : :class:`~sklearn.tree.DecisionTreeRegressor`
         The child estimator template used to create the collection of fitted
         sub-estimators.
 
-        .. sklearn-versionadded:: 1.2
-           `base_estimator_` was renamed to `estimator_`.
-
     estimators_ : list of DecisionTreeRegressor
         The collection of fitted sub-estimators.
 
@@ -1257,8 +1218,6 @@ class RandomForestQuantileRegressor(BaseForestQuantileRegressor):
         The subset of drawn samples (i.e., the in-bag samples) for each base
         estimator. Each subset is defined by an array of the indices selected.
 
-        .. sklearn-versionadded:: 1.4
-
     See Also
     --------
     ExtraTreesQuantileRegressor : Quantile ensemble of extremely randomized
@@ -1556,17 +1515,12 @@ class ExtraTreesQuantileRegressor(BaseForestQuantileRegressor):
           - regressions trained on data with missing values,
           - trees with multi-sample leaves (i.e. when `max_samples_leaf > 1`).
 
-        .. sklearn-versionadded:: 1.4
-
     Attributes
     ----------
     estimator_ : :class:`~sklearn.tree.ExtraTreeRegressor`
         The child estimator template used to create the collection of fitted
         sub-estimators.
 
-        .. sklearn-versionadded:: 1.2
-           `base_estimator_` was renamed to `estimator_`.
-
     estimators_ : list of DecisionTreeRegressor
         The collection of fitted sub-estimators.
 
@@ -1603,8 +1557,6 @@ class ExtraTreesQuantileRegressor(BaseForestQuantileRegressor):
         The subset of drawn samples (i.e., the in-bag samples) for each base
         estimator. Each subset is defined by an array of the indices selected.
 
-        .. sklearn-versionadded:: 1.4
-
     See Also
     --------
     RandomForestQuantileRegressor : Quantile ensemble regressor using trees.
diff --git a/quantile_forest/tests/test_quantile_forest.py b/quantile_forest/tests/test_quantile_forest.py
index 1d51e58..94e0c09 100755
--- a/quantile_forest/tests/test_quantile_forest.py
+++ b/quantile_forest/tests/test_quantile_forest.py
@@ -95,6 +95,35 @@ def test_regression_toy(name, weighted_quantile):
     check_regression_toy(name, weighted_quantile)
 
 
+def check_regression_params(name):
+    params = {
+        "criterion": "squared_error",
+        "max_depth": 2,
+        "min_samples_split": 2,
+        "min_samples_leaf": 1,
+        "min_weight_fraction_leaf": 0.0,
+        "max_features": 1.0,
+        "max_leaf_nodes": 16,
+        "min_impurity_decrease": 0.0,
+        "ccp_alpha": 0.0,
+        "monotonic_cst": [0, 1, -1, 0],
+    }
+
+    ForestRegressor = FOREST_REGRESSORS[name]
+
+    X, y = datasets.make_regression(n_features=4, n_informative=2, shuffle=True, random_state=0)
+
+    est = ForestRegressor(**params, random_state=0).fit(X, y)
+
+    for param in params:
+        assert getattr(est, param) == getattr(est.estimators_[0], param)
+
+
+@pytest.mark.parametrize("name", FOREST_REGRESSORS)
+def test_regression_params(name):
+    check_regression_params(name)
+
+
 def check_california_criterion(name, criterion):
     """Check for consistency on the California Housing dataset."""
     ForestRegressor = FOREST_REGRESSORS[name]