From b69c2ae8e1412a78d194a7dabbc5a9ff6c118331 Mon Sep 17 00:00:00 2001 From: Reid Johnson Date: Mon, 19 Feb 2024 04:47:08 -0800 Subject: [PATCH] Update examples --- .../examples/plot_quantile_extrapolation.py | 5 +++ .../examples/plot_quantile_interpolation.py | 32 ++++++------------- .../tests/examples/plot_quantile_intervals.py | 1 + .../examples/plot_quantile_multioutput.py | 8 +++-- .../examples/plot_quantile_vs_standard.py | 4 +-- .../tests/examples/plot_quantile_weighting.py | 23 ++++++------- 6 files changed, 31 insertions(+), 42 deletions(-) diff --git a/quantile_forest/tests/examples/plot_quantile_extrapolation.py b/quantile_forest/tests/examples/plot_quantile_extrapolation.py index 8b5dfc0..ae1ef9f 100755 --- a/quantile_forest/tests/examples/plot_quantile_extrapolation.py +++ b/quantile_forest/tests/examples/plot_quantile_extrapolation.py @@ -46,11 +46,15 @@ def get_test_X(X): return X_test +# Create the full dataset. X, y = make_func_Xy(func, bounds, n_samples) +# Calculate the extrapolation bounds. extrap_min_idx = int(n_samples * (extrap_frac / 2)) extrap_max_idx = int(n_samples - (n_samples * (extrap_frac / 2))) +# Based on the extrapolation bounds, get the training and test data. +# Training data excludes extrapolated regions; test data includes them. X_train, y_train = get_train_Xy(X, y, extrap_min_idx, extrap_max_idx) X_test = get_test_X(X) @@ -61,6 +65,7 @@ def get_test_X(X): ) qrf.fit(np.expand_dims(X_train, axis=-1), y_train) +# Get predictions at 95% prediction intervals and median. y_pred = qrf.predict(X_test, quantiles=[0.025, 0.5, 0.975]) diff --git a/quantile_forest/tests/examples/plot_quantile_interpolation.py b/quantile_forest/tests/examples/plot_quantile_interpolation.py index 3b3c839..3656ae0 100755 --- a/quantile_forest/tests/examples/plot_quantile_interpolation.py +++ b/quantile_forest/tests/examples/plot_quantile_interpolation.py @@ -41,25 +41,7 @@ ) est.fit(X, y) -y_medians = [] -y_errs = [] -for interpolation in interpolations: - y_pred = est.predict( - X, - quantiles=[0.025, 0.5, 0.975], - interpolation=interpolation.lower(), - ) - y_medians.append(y_pred[:, 1]) - y_errs.append( - np.concatenate( - ( - [y_pred[:, 1] - y_pred[:, 0]], - [y_pred[:, 2] - y_pred[:, 1]], - ), - axis=0, - ) - ) - +# Initialize data with actual values. data = { "method": ["Actual"] * len(y), "x": [f"Sample {idx + 1} ({x})" for idx, x in enumerate(X.tolist())], @@ -67,12 +49,16 @@ "y_low": y.tolist(), "y_upp": y.tolist(), } -for idx, interpolation in enumerate(interpolations): + +# Populate data based on prediction results with different interpolations. +for interpolation in interpolations: + y_pred = est.predict(X, quantiles=[0.025, 0.5, 0.975], interpolation=interpolation.lower()) + data["method"].extend([interpolation] * len(y)) data["x"].extend([f"Sample {idx + 1} ({x})" for idx, x in enumerate(X.tolist())]) - data["y_med"].extend(y_medians[idx]) - data["y_low"].extend(y_medians[idx] - y_errs[idx][0]) - data["y_upp"].extend(y_medians[idx] + y_errs[idx][1]) + data["y_low"].extend(y_pred[:, 0]) + data["y_med"].extend(y_pred[:, 1]) + data["y_upp"].extend(y_pred[:, 2]) df = pd.DataFrame(data) diff --git a/quantile_forest/tests/examples/plot_quantile_intervals.py b/quantile_forest/tests/examples/plot_quantile_intervals.py index 8d76fc9..3477d2e 100755 --- a/quantile_forest/tests/examples/plot_quantile_intervals.py +++ b/quantile_forest/tests/examples/plot_quantile_intervals.py @@ -36,6 +36,7 @@ y_pred_low = [] y_pred_upp = [] +# Using k-fold cross-validation, get predictions for all samples. for train_index, test_index in kf.split(X): X_train, X_test, y_train, y_test = ( X[train_index], diff --git a/quantile_forest/tests/examples/plot_quantile_multioutput.py b/quantile_forest/tests/examples/plot_quantile_multioutput.py index a8cd225..e0dcc4f 100755 --- a/quantile_forest/tests/examples/plot_quantile_multioutput.py +++ b/quantile_forest/tests/examples/plot_quantile_multioutput.py @@ -21,6 +21,7 @@ n_samples = 2500 bounds = [0, 100] +# Define functions that generate targets; each function maps to one target. funcs = [ { "signal": lambda x: np.log1p(x + 1), @@ -46,15 +47,16 @@ def make_func_Xy(funcs, bounds, n_samples): return np.atleast_2d(x).T, y +# Create the dataset with multiple target variables. X, y = make_func_Xy(funcs, bounds, n_samples) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) qrf = RandomForestQuantileRegressor(max_samples_leaf=None, max_depth=4, random_state=0) -qrf.fit(X_train, y_train) +qrf.fit(X_train, y_train) # fit on all of the targets simultaneously -y_pred = qrf.predict(X, quantiles=[0.025, 0.5, 0.975], weighted_quantile=False) -y_pred = y_pred.reshape(-1, 3, len(funcs)) +# Get multiple-output predictions at 95% prediction intervals and median. +y_pred = qrf.predict(X, quantiles=[0.025, 0.5, 0.975]) df = pd.DataFrame( { diff --git a/quantile_forest/tests/examples/plot_quantile_vs_standard.py b/quantile_forest/tests/examples/plot_quantile_vs_standard.py index 64dc023..5b0cf0f 100755 --- a/quantile_forest/tests/examples/plot_quantile_vs_standard.py +++ b/quantile_forest/tests/examples/plot_quantile_vs_standard.py @@ -37,8 +37,8 @@ regr_rf.fit(X_train, y_train) regr_qrf.fit(X_train, y_train) -y_pred_rf = regr_rf.predict(X_test) -y_pred_qrf = regr_qrf.predict(X_test, quantiles=0.5) +y_pred_rf = regr_rf.predict(X_test) # standard RF predictions (mean) +y_pred_qrf = regr_qrf.predict(X_test, quantiles=0.5) # QRF predictions (median) legend = { "Actual": "#c0c0c0", diff --git a/quantile_forest/tests/examples/plot_quantile_weighting.py b/quantile_forest/tests/examples/plot_quantile_weighting.py index 39e9c91..3e7efe3 100755 --- a/quantile_forest/tests/examples/plot_quantile_weighting.py +++ b/quantile_forest/tests/examples/plot_quantile_weighting.py @@ -43,11 +43,10 @@ def timing(): est_sizes = [1, 5, 10, 25, 50, 75, 100] n_repeats = 5 -timings = np.empty((len(est_sizes), n_repeats, 3)) +# Populate data with timing results over estimators. +data = {"name": [], "n_estimators": [], "iteration": [], "runtime": []} for i, n_estimators in enumerate(est_sizes): for j in range(n_repeats): - result = {} - rf = RandomForestRegressor( n_estimators=n_estimators, random_state=0, @@ -68,19 +67,15 @@ def timing(): with timing() as qrf_unweighted_time: _ = qrf.predict(X_test, quantiles=0.5, weighted_quantile=False) - timings[i, j, :] = [rf_time(), qrf_weighted_time(), qrf_unweighted_time()] - timings[i, j, :] *= 1000 # convert from milliseconds to seconds + timings = [rf_time(), qrf_weighted_time(), qrf_unweighted_time()] -timings /= timings.min() # normalize by minimum runtime -timings = np.transpose(timings, axes=[2, 0, 1]) # put the estimator name first + for name, runtime in zip(legend.keys(), timings): + runtime *= 1000 # convert from milliseconds to seconds -data = {"name": [], "n_estimators": [], "iteration": [], "runtime": []} -for i, name in enumerate(legend): - for j in range(timings.shape[1]): - data["name"].extend([name] * n_repeats) - data["n_estimators"].extend([est_sizes[j]] * n_repeats) - data["iteration"].extend(list(range(n_repeats))) - data["runtime"].extend(timings[i, j]) + data["name"].extend([name]) + data["n_estimators"].extend([est_sizes[i]]) + data["iteration"].extend([j]) + data["runtime"].extend([runtime]) df = ( pd.DataFrame(data)