From 8b1170da2c603a7731d33b5c6502083b8430a89d Mon Sep 17 00:00:00 2001
From: Reid Johnson <reidj@zillowgroup.com>
Date: Sun, 18 Feb 2024 23:53:10 -0800
Subject: [PATCH] Update examples

---
 .../examples/plot_quantile_conformalized.py   | 12 +++--
 .../examples/plot_quantile_multioutput.py     |  2 +-
 .../examples/plot_quantile_vs_standard.py     | 44 +++++++++----------
 .../tests/examples/plot_quantile_weighting.py |  6 +--
 4 files changed, 34 insertions(+), 30 deletions(-)

diff --git a/quantile_forest/tests/examples/plot_quantile_conformalized.py b/quantile_forest/tests/examples/plot_quantile_conformalized.py
index 6e0caf4..8b0418d 100755
--- a/quantile_forest/tests/examples/plot_quantile_conformalized.py
+++ b/quantile_forest/tests/examples/plot_quantile_conformalized.py
@@ -6,8 +6,10 @@
 construct reliable prediction intervals using conformalized quantile
 regression (CQR). CQR offers prediction intervals that attain valid coverage,
 while QRF may require additional calibration for reliable interval estimates.
-Adapted from "Prediction intervals: Quantile Regression Forests" by Carl McBride
-Ellis:
+Notice that in this example, by using CQR we obtain a level of coverage (i.e.,
+percentage of samples that actaully fall within their prediction interval)
+that is closer to the target level. Adapted from "Prediction intervals:
+Quantile Regression Forests" by Carl McBride Ellis:
 https://www.kaggle.com/code/carlmcbrideellis/prediction-intervals-quantile-regression-forests.
 """
 
@@ -225,7 +227,7 @@ def plot_prediction_intervals(df, domain):
         )
         .transform_calculate(
             coverage_text=(
-                "'Coverage: ' + format(datum.coverage * 100, '.1f') + '%'"
+                f"'Coverage: ' + format({alt.datum['coverage'] * 100}, '.1f') + '%'"
                 f" + ' (target = {cov_pct}%)'"
             )
         )
@@ -240,7 +242,9 @@ def plot_prediction_intervals(df, domain):
         base.transform_aggregate(
             coverage="mean(coverage)", width="mean(width)", groupby=["strategy"]
         )
-        .transform_calculate(width_text="'Interval Width: ' + format(datum.width, '$,d')")
+        .transform_calculate(
+            width_text=f"'Interval Width: ' + format({alt.datum['width']}, '$,d')"
+        )
         .mark_text(align="left", baseline="top")
         .encode(
             x=alt.value(5),
diff --git a/quantile_forest/tests/examples/plot_quantile_multioutput.py b/quantile_forest/tests/examples/plot_quantile_multioutput.py
index 4f2a76c..a8cd225 100755
--- a/quantile_forest/tests/examples/plot_quantile_multioutput.py
+++ b/quantile_forest/tests/examples/plot_quantile_multioutput.py
@@ -6,7 +6,7 @@
 regressor for multiple target variables. For each target, multiple quantiles
 can be estimated simultaneously. In this example, the target variable has
 two output values for each sample, with a single regressor used to estimate
-three quantiles (the median and interval) for each target output.
+three quantiles (the median and 95% interval) for each target output.
 """
 
 import altair as alt
diff --git a/quantile_forest/tests/examples/plot_quantile_vs_standard.py b/quantile_forest/tests/examples/plot_quantile_vs_standard.py
index 4cb45bd..64dc023 100755
--- a/quantile_forest/tests/examples/plot_quantile_vs_standard.py
+++ b/quantile_forest/tests/examples/plot_quantile_vs_standard.py
@@ -46,49 +46,49 @@
     "QRF (Median)": "#006aff",
 }
 
-df = pd.DataFrame({"Actual": y_test, "RF (Mean)": y_pred_rf, "QRF (Median)": y_pred_qrf})
+df = pd.DataFrame({"actual": y_test, "rf": y_pred_rf, "qrf": y_pred_qrf})
 
 
 def plot_prediction_histograms(df, legend):
-    click = alt.selection_point(fields=["estimator"], bind="legend")
+    click = alt.selection_point(fields=["label"], bind="legend")
 
     color = alt.condition(
         click,
-        alt.Color("estimator:N", sort=list(legend.keys()), title=None),
+        alt.Color("label:N", sort=list(legend.keys()), title=None),
         alt.value("lightgray"),
     )
 
     chart = (
-        alt.Chart(df, width=alt.Step(6))
-        .transform_fold(list(legend.keys()), as_=["estimator", "y_pred"])
-        .transform_joinaggregate(total="count(*)", groupby=["estimator"])
+        alt.Chart(df)
+        .transform_calculate(calculate=f"round({alt.datum['actual']} * 10) / 10", as_="Actual")
+        .transform_calculate(calculate=f"round({alt.datum['rf']} * 10) / 10", as_="RF (Mean)")
+        .transform_calculate(calculate=f"round({alt.datum['qrf']} * 10) / 10", as_="QRF (Median)")
+        .transform_fold(["Actual", "RF (Mean)", "QRF (Median)"], as_=["label", "value"])
+        .transform_joinaggregate(total="count(*)", groupby=["label"])
         .transform_calculate(pct="1 / datum.total")
         .mark_bar()
         .encode(
-            x=alt.X("estimator:N", axis=alt.Axis(labels=False, title=None)),
-            y=alt.Y("sum(pct):Q", axis=alt.Axis(title="Frequency")),
-            color=color,
-            column=alt.Column(
-                "y_pred:Q",
-                bin=alt.Bin(maxbins=80),
-                header=alt.Header(
-                    labelExpr="datum.value % 1 == 0 ? floor(datum.value) : null",
-                    labelOrient="bottom",
-                    titleOrient="bottom",
+            x=alt.X(
+                "value:O",
+                axis=alt.Axis(
+                    labelAngle=0,
+                    labelExpr="datum.value % 0.5 == 0 ? datum.value : null",
                 ),
-                title="Actual and Predicted Target Values",
+                title="Value",
             ),
+            y=alt.Y("sum(pct):Q", axis=alt.Axis(format=".0%", title="Percentage")),
+            color=color,
+            xOffset=alt.XOffset("label:N"),
             tooltip=[
-                alt.Tooltip("estimator:N", title=" "),
+                alt.Tooltip("label:N", title="Label"),
+                alt.Tooltip("value:O", title="Value (binned)"),
+                alt.Tooltip("sum(pct):Q", format=".0%", title="Percentage"),
             ],
         )
         .add_params(click)
-        .configure_facet(spacing=0)
         .configure_range(category=alt.RangeScheme(list(legend.values())))
-        .configure_scale(bandPaddingInner=0.2)
-        .configure_view(stroke=None)
+        .properties(height=400, width=650)
     )
-
     return chart
 
 
diff --git a/quantile_forest/tests/examples/plot_quantile_weighting.py b/quantile_forest/tests/examples/plot_quantile_weighting.py
index 00c6c58..39e9c91 100755
--- a/quantile_forest/tests/examples/plot_quantile_weighting.py
+++ b/quantile_forest/tests/examples/plot_quantile_weighting.py
@@ -30,7 +30,7 @@ def timing():
     t1 = time.time()
 
 
-X, y = datasets.make_regression(n_samples=250, n_features=4, n_targets=5, random_state=0)
+X, y = datasets.make_regression(n_samples=500, n_features=4, n_targets=5, random_state=0)
 
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
 
@@ -40,8 +40,8 @@ def timing():
     "QRF Unweighted Quantile": "#001751",
 }
 
-est_sizes = [1, 5, 10, 25, 50, 100]
-n_repeats = 10
+est_sizes = [1, 5, 10, 25, 50, 75, 100]
+n_repeats = 5
 
 timings = np.empty((len(est_sizes), n_repeats, 3))
 for i, n_estimators in enumerate(est_sizes):