Skip to content

Commit

Permalink
Update examples
Browse files Browse the repository at this point in the history
  • Loading branch information
reidjohnson committed Feb 19, 2024
1 parent 8b1170d commit b69c2ae
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 42 deletions.
5 changes: 5 additions & 0 deletions quantile_forest/tests/examples/plot_quantile_extrapolation.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,15 @@ def get_test_X(X):
return X_test


# Create the full dataset.
X, y = make_func_Xy(func, bounds, n_samples)

# Calculate the extrapolation bounds.
extrap_min_idx = int(n_samples * (extrap_frac / 2))
extrap_max_idx = int(n_samples - (n_samples * (extrap_frac / 2)))

# Based on the extrapolation bounds, get the training and test data.
# Training data excludes extrapolated regions; test data includes them.
X_train, y_train = get_train_Xy(X, y, extrap_min_idx, extrap_max_idx)
X_test = get_test_X(X)

Expand All @@ -61,6 +65,7 @@ def get_test_X(X):
)
qrf.fit(np.expand_dims(X_train, axis=-1), y_train)

# Get predictions at 95% prediction intervals and median.
y_pred = qrf.predict(X_test, quantiles=[0.025, 0.5, 0.975])


Expand Down
32 changes: 9 additions & 23 deletions quantile_forest/tests/examples/plot_quantile_interpolation.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,38 +41,24 @@
)
est.fit(X, y)

y_medians = []
y_errs = []
for interpolation in interpolations:
y_pred = est.predict(
X,
quantiles=[0.025, 0.5, 0.975],
interpolation=interpolation.lower(),
)
y_medians.append(y_pred[:, 1])
y_errs.append(
np.concatenate(
(
[y_pred[:, 1] - y_pred[:, 0]],
[y_pred[:, 2] - y_pred[:, 1]],
),
axis=0,
)
)

# Initialize data with actual values.
data = {
"method": ["Actual"] * len(y),
"x": [f"Sample {idx + 1} ({x})" for idx, x in enumerate(X.tolist())],
"y_med": y.tolist(),
"y_low": y.tolist(),
"y_upp": y.tolist(),
}
for idx, interpolation in enumerate(interpolations):

# Populate data based on prediction results with different interpolations.
for interpolation in interpolations:
y_pred = est.predict(X, quantiles=[0.025, 0.5, 0.975], interpolation=interpolation.lower())

data["method"].extend([interpolation] * len(y))
data["x"].extend([f"Sample {idx + 1} ({x})" for idx, x in enumerate(X.tolist())])
data["y_med"].extend(y_medians[idx])
data["y_low"].extend(y_medians[idx] - y_errs[idx][0])
data["y_upp"].extend(y_medians[idx] + y_errs[idx][1])
data["y_low"].extend(y_pred[:, 0])
data["y_med"].extend(y_pred[:, 1])
data["y_upp"].extend(y_pred[:, 2])

df = pd.DataFrame(data)

Expand Down
1 change: 1 addition & 0 deletions quantile_forest/tests/examples/plot_quantile_intervals.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
y_pred_low = []
y_pred_upp = []

# Using k-fold cross-validation, get predictions for all samples.
for train_index, test_index in kf.split(X):
X_train, X_test, y_train, y_test = (
X[train_index],
Expand Down
8 changes: 5 additions & 3 deletions quantile_forest/tests/examples/plot_quantile_multioutput.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
n_samples = 2500
bounds = [0, 100]

# Define functions that generate targets; each function maps to one target.
funcs = [
{
"signal": lambda x: np.log1p(x + 1),
Expand All @@ -46,15 +47,16 @@ def make_func_Xy(funcs, bounds, n_samples):
return np.atleast_2d(x).T, y


# Create the dataset with multiple target variables.
X, y = make_func_Xy(funcs, bounds, n_samples)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

qrf = RandomForestQuantileRegressor(max_samples_leaf=None, max_depth=4, random_state=0)
qrf.fit(X_train, y_train)
qrf.fit(X_train, y_train) # fit on all of the targets simultaneously

y_pred = qrf.predict(X, quantiles=[0.025, 0.5, 0.975], weighted_quantile=False)
y_pred = y_pred.reshape(-1, 3, len(funcs))
# Get multiple-output predictions at 95% prediction intervals and median.
y_pred = qrf.predict(X, quantiles=[0.025, 0.5, 0.975])

df = pd.DataFrame(
{
Expand Down
4 changes: 2 additions & 2 deletions quantile_forest/tests/examples/plot_quantile_vs_standard.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@
regr_rf.fit(X_train, y_train)
regr_qrf.fit(X_train, y_train)

y_pred_rf = regr_rf.predict(X_test)
y_pred_qrf = regr_qrf.predict(X_test, quantiles=0.5)
y_pred_rf = regr_rf.predict(X_test) # standard RF predictions (mean)
y_pred_qrf = regr_qrf.predict(X_test, quantiles=0.5) # QRF predictions (median)

legend = {
"Actual": "#c0c0c0",
Expand Down
23 changes: 9 additions & 14 deletions quantile_forest/tests/examples/plot_quantile_weighting.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,10 @@ def timing():
est_sizes = [1, 5, 10, 25, 50, 75, 100]
n_repeats = 5

timings = np.empty((len(est_sizes), n_repeats, 3))
# Populate data with timing results over estimators.
data = {"name": [], "n_estimators": [], "iteration": [], "runtime": []}
for i, n_estimators in enumerate(est_sizes):
for j in range(n_repeats):
result = {}

rf = RandomForestRegressor(
n_estimators=n_estimators,
random_state=0,
Expand All @@ -68,19 +67,15 @@ def timing():
with timing() as qrf_unweighted_time:
_ = qrf.predict(X_test, quantiles=0.5, weighted_quantile=False)

timings[i, j, :] = [rf_time(), qrf_weighted_time(), qrf_unweighted_time()]
timings[i, j, :] *= 1000 # convert from milliseconds to seconds
timings = [rf_time(), qrf_weighted_time(), qrf_unweighted_time()]

timings /= timings.min() # normalize by minimum runtime
timings = np.transpose(timings, axes=[2, 0, 1]) # put the estimator name first
for name, runtime in zip(legend.keys(), timings):
runtime *= 1000 # convert from milliseconds to seconds

data = {"name": [], "n_estimators": [], "iteration": [], "runtime": []}
for i, name in enumerate(legend):
for j in range(timings.shape[1]):
data["name"].extend([name] * n_repeats)
data["n_estimators"].extend([est_sizes[j]] * n_repeats)
data["iteration"].extend(list(range(n_repeats)))
data["runtime"].extend(timings[i, j])
data["name"].extend([name])
data["n_estimators"].extend([est_sizes[i]])
data["iteration"].extend([j])
data["runtime"].extend([runtime])

df = (
pd.DataFrame(data)
Expand Down

0 comments on commit b69c2ae

Please sign in to comment.