From 0e09712a8ec8520fc75529d4d20e96c3478f9f49 Mon Sep 17 00:00:00 2001 From: Reid Johnson Date: Fri, 16 Feb 2024 17:10:42 -0800 Subject: [PATCH] Tidy up examples --- .../examples/plot_quantile_extrapolation.py | 10 ++++---- .../examples/plot_quantile_interpolation.py | 25 ++++++++----------- .../tests/examples/plot_quantile_intervals.py | 10 +++++--- .../examples/plot_quantile_multioutput.py | 12 ++++----- .../tests/examples/plot_quantile_weighting.py | 5 ++-- 5 files changed, 31 insertions(+), 31 deletions(-) diff --git a/quantile_forest/tests/examples/plot_quantile_extrapolation.py b/quantile_forest/tests/examples/plot_quantile_extrapolation.py index 5c5492a..8ffca17 100755 --- a/quantile_forest/tests/examples/plot_quantile_extrapolation.py +++ b/quantile_forest/tests/examples/plot_quantile_extrapolation.py @@ -60,7 +60,7 @@ def get_test_X(X): ) qrf.fit(np.expand_dims(X_train, axis=-1), y_train) -y_pred = qrf.predict(X_test, quantiles=[0.025, 0.5, 0.975]) # extrapolate +y_pred = qrf.predict(X_test, quantiles=[0.025, 0.5, 0.975]) df = pd.DataFrame( @@ -156,7 +156,7 @@ def plot_extrapolations(df, title="", legend=False, x_domain=None, y_domain=None tooltip=tooltip_pred, ) - base1 = bar_pred + points_true + line_true + line_pred + chart = bar_pred + points_true + line_true + line_pred if legend: # For desired legend ordering. @@ -175,10 +175,10 @@ def plot_extrapolations(df, title="", legend=False, x_domain=None, y_domain=None blank = blank.encode( color=alt.Color(f"{k}:N", scale=alt.Scale(range=[v["color"]]), title=None) ) - base1 += blank - base1 = base1.resolve_scale(color="independent") + chart += blank + chart = chart.resolve_scale(color="independent") - chart = base1.properties(height=200, width=300, title=title) + chart = chart.properties(height=200, width=300, title=title) return chart diff --git a/quantile_forest/tests/examples/plot_quantile_interpolation.py b/quantile_forest/tests/examples/plot_quantile_interpolation.py index ff6c660..1474c82 100755 --- a/quantile_forest/tests/examples/plot_quantile_interpolation.py +++ b/quantile_forest/tests/examples/plot_quantile_interpolation.py @@ -68,6 +68,7 @@ data["y_med"].extend(y_medians[idx]) data["y_low"].extend(y_medians[idx] - y_errs[idx][0]) data["y_upp"].extend(y_medians[idx] + y_errs[idx][1]) + df = pd.DataFrame(data) @@ -80,6 +81,14 @@ def plot_interpolations(df, legend): alt.value("lightgray"), ) + tooltip = [ + alt.Tooltip("method:N", title="Method"), + alt.Tooltip("x:N", title="X Values"), + alt.Tooltip("y_med:N", format=".3f", title="Median Y Value"), + alt.Tooltip("y_low:N", format=".3f", title="Lower Y Value"), + alt.Tooltip("y_upp:N", format=".3f", title="Upper Y Value"), + ] + point = ( alt.Chart(df, width=alt.Step(20)) .mark_circle(opacity=1, size=75) @@ -92,13 +101,7 @@ def plot_interpolations(df, legend): ), y=alt.Y("y_med:Q", title="Actual and Predicted Values"), color=color, - tooltip=[ - alt.Tooltip("method:N", title="Method"), - alt.Tooltip("x:N", title="X Values"), - alt.Tooltip("y_med:N", format=".3f", title="Median Y Value"), - alt.Tooltip("y_low:N", format=".3f", title="Lower Y Value"), - alt.Tooltip("y_upp:N", format=".3f", title="Upper Y Value"), - ], + tooltip=tooltip, ) ) @@ -115,13 +118,7 @@ def plot_interpolations(df, legend): y=alt.Y("y_low:Q", title=""), y2=alt.Y2("y_upp:Q", title=None), color=color, - tooltip=[ - alt.Tooltip("method:N", title="Method"), - alt.Tooltip("x:N", title="X Values"), - alt.Tooltip("y_med:N", format=".3f", title="Median Y Value"), - alt.Tooltip("y_low:N", format=".3f", title="Lower Y Value"), - alt.Tooltip("y_upp:N", format=".3f", title="Upper Y Value"), - ], + tooltip=tooltip, ) ) diff --git a/quantile_forest/tests/examples/plot_quantile_intervals.py b/quantile_forest/tests/examples/plot_quantile_intervals.py index 837ac71..8d76fc9 100755 --- a/quantile_forest/tests/examples/plot_quantile_intervals.py +++ b/quantile_forest/tests/examples/plot_quantile_intervals.py @@ -2,8 +2,10 @@ Quantile Regression Forests Prediction Intervals ================================================ -An example of how to use a quantile regression forest to plot prediction -intervals on the California Housing dataset. +An example of how to use quantile regression forests to generate prediction +intervals on the California Housing dataset. Inspired by Figure 3 of +"Quantile Regression Forests" by Meinshausen: +https://jmlr.org/papers/v7/meinshausen06a.html. """ import altair as alt @@ -61,8 +63,8 @@ "y_pred_upp": np.concatenate(y_pred_upp), } ).pipe( - lambda x: x * 100_000 -) # convert to dollars + lambda x: x * 100_000 # convert to dollars +) def plot_calibration_and_intervals(df): diff --git a/quantile_forest/tests/examples/plot_quantile_multioutput.py b/quantile_forest/tests/examples/plot_quantile_multioutput.py index 69379dd..1a8a900 100755 --- a/quantile_forest/tests/examples/plot_quantile_multioutput.py +++ b/quantile_forest/tests/examples/plot_quantile_multioutput.py @@ -21,11 +21,11 @@ funcs = [ { - "truth": lambda x: np.log1p(x + 1), - "noise": lambda x: np.log1p(x + 1) * np.random.uniform(size=len(x)), + "signal": lambda x: np.log1p(x + 1), + "noise": lambda x: np.log1p(x) * np.random.uniform(size=len(x)), }, { - "truth": lambda x: np.log1p(np.sqrt(x)), + "signal": lambda x: np.log1p(np.sqrt(x)), "noise": lambda x: np.log1p(x / 2) * np.random.uniform(size=len(x)), }, ] @@ -40,7 +40,7 @@ def make_func_Xy(funcs, bounds, n_samples): x = np.linspace(*bounds, n_samples) y = np.empty((len(x), len(funcs))) for i, func in enumerate(funcs): - y[:, i] = func["truth"](x) + func["noise"](x) + y[:, i] = func["signal"](x) + func["noise"](x) return np.atleast_2d(x).T, y @@ -51,14 +51,14 @@ def make_func_Xy(funcs, bounds, n_samples): qrf = RandomForestQuantileRegressor(max_samples_leaf=None, max_depth=4, random_state=0) qrf.fit(X_train, y_train) -y_pred = qrf.predict(X, quantiles=[0.025, 0.5, 0.975], weighted_leaves=False) +y_pred = qrf.predict(X, quantiles=[0.025, 0.5, 0.975], weighted_quantile=False) y_pred = y_pred.reshape(-1, 3, len(funcs)) df = pd.DataFrame( { "x": np.tile(X.squeeze(), len(funcs)), "y": y.reshape(-1, order="F"), - "y_true": np.concatenate([f["truth"](X.squeeze()) for f in funcs]), + "y_true": np.concatenate([f["signal"](X.squeeze()) for f in funcs]), "y_pred": np.concatenate([y_pred[:, 1, i] for i in range(len(funcs))]), "y_pred_low": np.concatenate([y_pred[:, 0, i] for i in range(len(funcs))]), "y_pred_upp": np.concatenate([y_pred[:, 2, i] for i in range(len(funcs))]), diff --git a/quantile_forest/tests/examples/plot_quantile_weighting.py b/quantile_forest/tests/examples/plot_quantile_weighting.py index 23f0472..02587ce 100755 --- a/quantile_forest/tests/examples/plot_quantile_weighting.py +++ b/quantile_forest/tests/examples/plot_quantile_weighting.py @@ -28,7 +28,7 @@ def timing(): t1 = time.time() -X, y = datasets.make_regression(n_samples=500, n_features=4, random_state=0) +X, y = datasets.make_regression(n_samples=250, n_features=4, n_targets=5, random_state=0) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) @@ -69,6 +69,7 @@ def timing(): timings[i, j, :] = [rf_time(), qrf_weighted_time(), qrf_unweighted_time()] timings[i, j, :] *= 1000 # convert from milliseconds to seconds +timings /= timings.min() # normalize by minimum runtime timings = np.transpose(timings, axes=[2, 0, 1]) # put the estimator name first data = {"name": [], "n_estimators": [], "iteration": [], "runtime": []} @@ -115,7 +116,7 @@ def plot_timings_by_size(df, legend): .mark_line() .encode( x=alt.X("n_estimators:Q", title="Number of Estimators"), - y=alt.Y("mean:Q", title="Prediction Runtime (seconds)"), + y=alt.Y("mean:Q", title="Prediction Runtime (normalized)"), color=color, ) )