Skip to content

Commit

Permalink
Reorder multioutputs (#35)
Browse files Browse the repository at this point in the history
  • Loading branch information
reidjohnson authored Feb 22, 2024
1 parent 6a1c9d0 commit 928a489
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 24 deletions.
6 changes: 3 additions & 3 deletions docs/user_guide.rst
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ The output of the `predict` method is an array with one column for each specifie
>>> (y_pred[:, 0] >= y_pred[:, 1]).all()
True

Multi-target quantile regression is also supported. If the target values are multi-dimensional, then the final output column will correspond to the number of targets::
Multi-target quantile regression is also supported. If the target values are multi-dimensional, then the second output column will correspond to the number of targets::

>>> from sklearn import datasets
>>> from quantile_forest import RandomForestQuantileRegressor
Expand All @@ -111,9 +111,9 @@ Multi-target quantile regression is also supported. If the target values are mul
True
>>> y_pred.shape[0] == len(X)
True
>>> y_pred.shape[1] == len(quantiles)
>>> y_pred.shape[-1] == len(quantiles)
True
>>> y_pred.shape[-1] == y.shape[1]
>>> y_pred.shape[1] == y.shape[1]
True

Quantile Weighting
Expand Down
22 changes: 11 additions & 11 deletions quantile_forest/_quantile_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,8 +516,8 @@ def predict(
Returns
-------
y_pred : array of shape (n_samples, n_quantiles, n_outputs)
If quantiles is set to None, then return ``E(Y | X)``. Else, for
y_pred : array of shape (n_samples, n_outputs, n_quantiles)
If quantiles is set to 'mean', then return ``E(Y | X)``. Else, for
all quantiles, return ``y`` at ``q`` for which ``F(Y=y|x) = q``,
where ``q`` is the quantile.
"""
Expand Down Expand Up @@ -565,27 +565,27 @@ def predict(
if self.max_samples_leaf == 1: # optimize for single-sample-per-leaf performance
y_train_leaves = np.asarray(self.forest_.y_train_leaves)
y_train = np.asarray(self.forest_.y_train).T
y_pred = np.empty((len(X), len(quantiles), y_train.shape[1]))
for i in range(y_train.shape[1]):
y_pred = np.empty((len(X), y_train.shape[1], len(quantiles)))
for output in range(y_train.shape[1]):
leaf_values = np.empty((len(X), self.n_estimators))
for tree in range(self.n_estimators):
if X_indices is None:
train_indices = y_train_leaves[tree, X_leaves[:, tree], i, 0]
if X_indices is None: # IB scoring
train_indices = y_train_leaves[tree, X_leaves[:, tree], output, 0]
else: # OOB scoring
indices = X_indices[:, tree] == 1
leaves = X_leaves[indices, tree]
train_indices = np.zeros(len(X), dtype=int)
train_indices[indices] = y_train_leaves[tree, leaves, i, 0]
leaf_values[:, tree] = y_train[train_indices - 1, i]
train_indices[indices] = y_train_leaves[tree, leaves, output, 0]
leaf_values[:, tree] = y_train[train_indices - 1, output]
leaf_values[train_indices == 0, tree] = np.nan
if len(quantiles) == 1 and quantiles[0] == -1: # calculate mean
func = np.mean if X_indices is None else np.nanmean
y_pred[..., i] = np.expand_dims(func(leaf_values, axis=1), axis=1)
y_pred[:, output, :] = np.expand_dims(func(leaf_values, axis=1), axis=1)
else: # calculate quantiles
func = np.quantile if X_indices is None else np.nanquantile
method = interpolation.decode()
y_pred[..., i] = func(leaf_values, quantiles, method=method, axis=1).T
else:
y_pred[:, output, :] = func(leaf_values, quantiles, method=method, axis=1).T
else: # get predictions for arbitrary leaf sizes
y_pred = self.forest_.predict(
quantiles,
X_leaves,
Expand Down
10 changes: 5 additions & 5 deletions quantile_forest/_quantile_forest_fast.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -705,7 +705,7 @@ cdef class QuantileForest:
raise ValueError(f"Invalid interpolation method {interpolation}.")

# Initialize NumPy array with NaN values and get view for nogil.
preds = np.full((n_samples, n_quantiles, n_outputs), np.nan, dtype=np.float64)
preds = np.full((n_samples, n_outputs, n_quantiles), np.nan, dtype=np.float64)
preds_view = preds # memoryview

with nogil:
Expand Down Expand Up @@ -832,20 +832,20 @@ cdef class QuantileForest:
if not use_mean:
for k in range(<SIZE_t>(leaf_preds.size())):
if leaf_preds[k].size() == 1:
preds_view[i, k, j] = leaf_preds[k][0]
preds_view[i, j, k] = leaf_preds[k][0]
elif leaf_preds[k].size() > 1:
pred = calc_quantile(
leaf_preds[k],
median,
interpolation,
issorted=False,
)
preds_view[i, k, j] = pred[0]
preds_view[i, j, k] = pred[0]
else:
if leaf_preds[0].size() == 1:
preds_view[i, 0, j] = leaf_preds[0][0]
preds_view[i, j, 0] = leaf_preds[0][0]
elif leaf_preds[0].size() > 1:
preds_view[i, 0, j] = calc_mean(leaf_preds[0])
preds_view[i, j, 0] = calc_mean(leaf_preds[0])

return np.asarray(preds_view)

Expand Down
6 changes: 3 additions & 3 deletions quantile_forest/tests/examples/plot_quantile_multioutput.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ def make_func_Xy(funcs, bounds, n_samples):
"x": np.tile(X.squeeze(), len(funcs)),
"y": y.reshape(-1, order="F"),
"y_true": np.concatenate([f["signal"](X.squeeze()) for f in funcs]),
"y_pred": np.concatenate([y_pred[:, 1, i] for i in range(len(funcs))]),
"y_pred_low": np.concatenate([y_pred[:, 0, i] for i in range(len(funcs))]),
"y_pred_upp": np.concatenate([y_pred[:, 2, i] for i in range(len(funcs))]),
"y_pred": np.concatenate([y_pred[:, i, 1] for i in range(len(funcs))]),
"y_pred_low": np.concatenate([y_pred[:, i, 0] for i in range(len(funcs))]),
"y_pred_upp": np.concatenate([y_pred[:, i, 2] for i in range(len(funcs))]),
"target": np.concatenate([[f"{i}"] * len(X) for i in range(len(funcs))]),
}
)
Expand Down
4 changes: 2 additions & 2 deletions quantile_forest/tests/test_quantile_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,8 +470,8 @@ def check_predict_quantiles(
)
score = est.score(X.reshape(-1, 1), y, quantiles=0.5)
assert y_pred.ndim == (3 if isinstance(quantiles, list) else 2)
assert y_pred.shape[-1] == y.shape[1]
assert np.any(y_pred[..., 0] != y_pred[..., 1])
assert y_pred.shape[1] == y.shape[1]
assert np.any(y_pred[:, 0, ...] != y_pred[:, 1, ...])
assert score > 0.97

# Check that specifying `quantiles` overwrites `default_quantiles`.
Expand Down

0 comments on commit 928a489

Please sign in to comment.