From 0e09712a8ec8520fc75529d4d20e96c3478f9f49 Mon Sep 17 00:00:00 2001
From: Reid Johnson <reidj@zillowgroup.com>
Date: Fri, 16 Feb 2024 17:10:42 -0800
Subject: [PATCH] Tidy up examples

---
 .../examples/plot_quantile_extrapolation.py   | 10 ++++----
 .../examples/plot_quantile_interpolation.py   | 25 ++++++++-----------
 .../tests/examples/plot_quantile_intervals.py | 10 +++++---
 .../examples/plot_quantile_multioutput.py     | 12 ++++-----
 .../tests/examples/plot_quantile_weighting.py |  5 ++--
 5 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/quantile_forest/tests/examples/plot_quantile_extrapolation.py b/quantile_forest/tests/examples/plot_quantile_extrapolation.py
index 5c5492a..8ffca17 100755
--- a/quantile_forest/tests/examples/plot_quantile_extrapolation.py
+++ b/quantile_forest/tests/examples/plot_quantile_extrapolation.py
@@ -60,7 +60,7 @@ def get_test_X(X):
 )
 qrf.fit(np.expand_dims(X_train, axis=-1), y_train)
 
-y_pred = qrf.predict(X_test, quantiles=[0.025, 0.5, 0.975])  # extrapolate
+y_pred = qrf.predict(X_test, quantiles=[0.025, 0.5, 0.975])
 
 
 df = pd.DataFrame(
@@ -156,7 +156,7 @@ def plot_extrapolations(df, title="", legend=False, x_domain=None, y_domain=None
         tooltip=tooltip_pred,
     )
 
-    base1 = bar_pred + points_true + line_true + line_pred
+    chart = bar_pred + points_true + line_true + line_pred
 
     if legend:
         # For desired legend ordering.
@@ -175,10 +175,10 @@ def plot_extrapolations(df, title="", legend=False, x_domain=None, y_domain=None
             blank = blank.encode(
                 color=alt.Color(f"{k}:N", scale=alt.Scale(range=[v["color"]]), title=None)
             )
-            base1 += blank
-        base1 = base1.resolve_scale(color="independent")
+            chart += blank
+        chart = chart.resolve_scale(color="independent")
 
-    chart = base1.properties(height=200, width=300, title=title)
+    chart = chart.properties(height=200, width=300, title=title)
 
     return chart
 
diff --git a/quantile_forest/tests/examples/plot_quantile_interpolation.py b/quantile_forest/tests/examples/plot_quantile_interpolation.py
index ff6c660..1474c82 100755
--- a/quantile_forest/tests/examples/plot_quantile_interpolation.py
+++ b/quantile_forest/tests/examples/plot_quantile_interpolation.py
@@ -68,6 +68,7 @@
     data["y_med"].extend(y_medians[idx])
     data["y_low"].extend(y_medians[idx] - y_errs[idx][0])
     data["y_upp"].extend(y_medians[idx] + y_errs[idx][1])
+
 df = pd.DataFrame(data)
 
 
@@ -80,6 +81,14 @@ def plot_interpolations(df, legend):
         alt.value("lightgray"),
     )
 
+    tooltip = [
+        alt.Tooltip("method:N", title="Method"),
+        alt.Tooltip("x:N", title="X Values"),
+        alt.Tooltip("y_med:N", format=".3f", title="Median Y Value"),
+        alt.Tooltip("y_low:N", format=".3f", title="Lower Y Value"),
+        alt.Tooltip("y_upp:N", format=".3f", title="Upper Y Value"),
+    ]
+
     point = (
         alt.Chart(df, width=alt.Step(20))
         .mark_circle(opacity=1, size=75)
@@ -92,13 +101,7 @@ def plot_interpolations(df, legend):
             ),
             y=alt.Y("y_med:Q", title="Actual and Predicted Values"),
             color=color,
-            tooltip=[
-                alt.Tooltip("method:N", title="Method"),
-                alt.Tooltip("x:N", title="X Values"),
-                alt.Tooltip("y_med:N", format=".3f", title="Median Y Value"),
-                alt.Tooltip("y_low:N", format=".3f", title="Lower Y Value"),
-                alt.Tooltip("y_upp:N", format=".3f", title="Upper Y Value"),
-            ],
+            tooltip=tooltip,
         )
     )
 
@@ -115,13 +118,7 @@ def plot_interpolations(df, legend):
             y=alt.Y("y_low:Q", title=""),
             y2=alt.Y2("y_upp:Q", title=None),
             color=color,
-            tooltip=[
-                alt.Tooltip("method:N", title="Method"),
-                alt.Tooltip("x:N", title="X Values"),
-                alt.Tooltip("y_med:N", format=".3f", title="Median Y Value"),
-                alt.Tooltip("y_low:N", format=".3f", title="Lower Y Value"),
-                alt.Tooltip("y_upp:N", format=".3f", title="Upper Y Value"),
-            ],
+            tooltip=tooltip,
         )
     )
 
diff --git a/quantile_forest/tests/examples/plot_quantile_intervals.py b/quantile_forest/tests/examples/plot_quantile_intervals.py
index 837ac71..8d76fc9 100755
--- a/quantile_forest/tests/examples/plot_quantile_intervals.py
+++ b/quantile_forest/tests/examples/plot_quantile_intervals.py
@@ -2,8 +2,10 @@
 Quantile Regression Forests Prediction Intervals
 ================================================
 
-An example of how to use a quantile regression forest to plot prediction
-intervals on the California Housing dataset.
+An example of how to use quantile regression forests to generate prediction
+intervals on the California Housing dataset. Inspired by Figure 3 of
+"Quantile Regression Forests" by Meinshausen:
+https://jmlr.org/papers/v7/meinshausen06a.html.
 """
 
 import altair as alt
@@ -61,8 +63,8 @@
         "y_pred_upp": np.concatenate(y_pred_upp),
     }
 ).pipe(
-    lambda x: x * 100_000
-)  # convert to dollars
+    lambda x: x * 100_000  # convert to dollars
+)
 
 
 def plot_calibration_and_intervals(df):
diff --git a/quantile_forest/tests/examples/plot_quantile_multioutput.py b/quantile_forest/tests/examples/plot_quantile_multioutput.py
index 69379dd..1a8a900 100755
--- a/quantile_forest/tests/examples/plot_quantile_multioutput.py
+++ b/quantile_forest/tests/examples/plot_quantile_multioutput.py
@@ -21,11 +21,11 @@
 
 funcs = [
     {
-        "truth": lambda x: np.log1p(x + 1),
-        "noise": lambda x: np.log1p(x + 1) * np.random.uniform(size=len(x)),
+        "signal": lambda x: np.log1p(x + 1),
+        "noise": lambda x: np.log1p(x) * np.random.uniform(size=len(x)),
     },
     {
-        "truth": lambda x: np.log1p(np.sqrt(x)),
+        "signal": lambda x: np.log1p(np.sqrt(x)),
         "noise": lambda x: np.log1p(x / 2) * np.random.uniform(size=len(x)),
     },
 ]
@@ -40,7 +40,7 @@ def make_func_Xy(funcs, bounds, n_samples):
     x = np.linspace(*bounds, n_samples)
     y = np.empty((len(x), len(funcs)))
     for i, func in enumerate(funcs):
-        y[:, i] = func["truth"](x) + func["noise"](x)
+        y[:, i] = func["signal"](x) + func["noise"](x)
     return np.atleast_2d(x).T, y
 
 
@@ -51,14 +51,14 @@ def make_func_Xy(funcs, bounds, n_samples):
 qrf = RandomForestQuantileRegressor(max_samples_leaf=None, max_depth=4, random_state=0)
 qrf.fit(X_train, y_train)
 
-y_pred = qrf.predict(X, quantiles=[0.025, 0.5, 0.975], weighted_leaves=False)
+y_pred = qrf.predict(X, quantiles=[0.025, 0.5, 0.975], weighted_quantile=False)
 y_pred = y_pred.reshape(-1, 3, len(funcs))
 
 df = pd.DataFrame(
     {
         "x": np.tile(X.squeeze(), len(funcs)),
         "y": y.reshape(-1, order="F"),
-        "y_true": np.concatenate([f["truth"](X.squeeze()) for f in funcs]),
+        "y_true": np.concatenate([f["signal"](X.squeeze()) for f in funcs]),
         "y_pred": np.concatenate([y_pred[:, 1, i] for i in range(len(funcs))]),
         "y_pred_low": np.concatenate([y_pred[:, 0, i] for i in range(len(funcs))]),
         "y_pred_upp": np.concatenate([y_pred[:, 2, i] for i in range(len(funcs))]),
diff --git a/quantile_forest/tests/examples/plot_quantile_weighting.py b/quantile_forest/tests/examples/plot_quantile_weighting.py
index 23f0472..02587ce 100755
--- a/quantile_forest/tests/examples/plot_quantile_weighting.py
+++ b/quantile_forest/tests/examples/plot_quantile_weighting.py
@@ -28,7 +28,7 @@ def timing():
     t1 = time.time()
 
 
-X, y = datasets.make_regression(n_samples=500, n_features=4, random_state=0)
+X, y = datasets.make_regression(n_samples=250, n_features=4, n_targets=5, random_state=0)
 
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
 
@@ -69,6 +69,7 @@ def timing():
         timings[i, j, :] = [rf_time(), qrf_weighted_time(), qrf_unweighted_time()]
         timings[i, j, :] *= 1000  # convert from milliseconds to seconds
 
+timings /= timings.min()  # normalize by minimum runtime
 timings = np.transpose(timings, axes=[2, 0, 1])  # put the estimator name first
 
 data = {"name": [], "n_estimators": [], "iteration": [], "runtime": []}
@@ -115,7 +116,7 @@ def plot_timings_by_size(df, legend):
         .mark_line()
         .encode(
             x=alt.X("n_estimators:Q", title="Number of Estimators"),
-            y=alt.Y("mean:Q", title="Prediction Runtime (seconds)"),
+            y=alt.Y("mean:Q", title="Prediction Runtime (normalized)"),
             color=color,
         )
     )