diff --git a/docs/user_guide.rst b/docs/user_guide.rst index 0c00e3e..b814795 100755 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -119,17 +119,16 @@ Multi-target quantile regression is also supported. If the target values are mul Quantile Weighting ~~~~~~~~~~~~~~~~~~ -By default, the predict method calculates quantiles by weighting each sample inversely according to the size of its leaf node (`weighted_leaves = True`). If `weighted_leaves = False`, each sample in a leaf (including repeated bootstrap samples) will be given equal weight. Note that this leaf-based weighting can only be used with weighted quantiles. - By default, the predict method calculates quantiles using a weighted quantile method (`weighted_quantile = True`), which assigns a weight to each sample in the training set based on the number of times that it co-occurs in the same leaves as the test sample. When the number of samples in the training set is larger than the expected size of this list (i.e., :math:`n_{train} \gg n_{trees} \cdot n_{leaves} \cdot n_{leafsamples}`), it can be more efficient to calculate an unweighted quantile (`weighted_quantile = False`), which aggregates the list of training `y` values for each leaf node to which the test sample belongs across all trees. For a given input, both methods can return the same output values:: >>> import numpy as np - >>> kwargs = {"weighted_leaves": False} - >>> y_pred_weighted = reg.predict(X_test, weighted_quantile=True, **kwargs) - >>> y_pred_unweighted = reg.predict(X_test, weighted_quantile=False, **kwargs) + >>> y_pred_weighted = reg.predict(X_test, weighted_quantile=True) + >>> y_pred_unweighted = reg.predict(X_test, weighted_quantile=False) >>> np.allclose(y_pred_weighted, y_pred_unweighted) True +By default, the predict method calculates quantiles by giving each sample in a leaf (including repeated bootstrap samples) equal weight (`weighted_leaves = False`). If `weighted_leaves = True`, each sample will be weighted inversely according to the size of its leaf node. Note that this leaf-based weighting can only be used with weighted quantiles. + Out-of-Bag Estimation ~~~~~~~~~~~~~~~~~~~~~ diff --git a/quantile_forest/_quantile_forest.py b/quantile_forest/_quantile_forest.py index fe6a7c8..64a64c7 100755 --- a/quantile_forest/_quantile_forest.py +++ b/quantile_forest/_quantile_forest.py @@ -449,7 +449,7 @@ def predict( quantiles=None, interpolation="linear", weighted_quantile=True, - weighted_leaves=True, + weighted_leaves=False, aggregate_leaves_first=True, oob_score=False, indices=None, @@ -490,7 +490,7 @@ def predict( number of training samples relative to siblings is small, weighted quantiles can be more efficient to compute than unweighted ones. - weighted_leaves : bool, default=True + weighted_leaves : bool, default=False Weight samples inversely to the size of their leaf node. Only used if `weighted_quantile=True` and `max_samples_leaf!=1`. diff --git a/quantile_forest/_quantile_forest_fast.pyx b/quantile_forest/_quantile_forest_fast.pyx index 35d0560..63288c8 100755 --- a/quantile_forest/_quantile_forest_fast.pyx +++ b/quantile_forest/_quantile_forest_fast.pyx @@ -617,7 +617,7 @@ cdef class QuantileForest: UINT8_t[:, :] X_indices=None, char* interpolation=b"linear", bint weighted_quantile=True, - bint weighted_leaves=True, + bint weighted_leaves=False, bint aggregate_leaves_first=True, ): """Return predictions for ``est.apply`` outputs. @@ -644,7 +644,7 @@ cdef class QuantileForest: weighted_quantile : bool, default=True Calculate weighted quantiles. - weighted_leaves : bool, default=True + weighted_leaves : bool, default=False Weight samples inversely to the size of their leaf node. aggregate_leaves_first : bool, default=True