From c93668e03447837517835f75f4020a35b4b5ed46 Mon Sep 17 00:00:00 2001
From: Reid Johnson <reid.johnson@gmail.com>
Date: Fri, 23 Aug 2024 21:43:36 +0900
Subject: [PATCH] Update example plots

---
 examples/plot_huggingface_model.py      |  1 +
 examples/plot_predict_custom.py         |  1 +
 examples/plot_proximity_counts.py       |  1 +
 examples/plot_quantile_conformalized.py |  6 ++++--
 examples/plot_quantile_example.py       |  5 +++--
 examples/plot_quantile_extrapolation.py | 25 +++++++++++++++++--------
 examples/plot_quantile_interpolation.py |  5 +++--
 examples/plot_quantile_intervals.py     | 14 ++++++++------
 examples/plot_quantile_multioutput.py   |  7 ++++---
 examples/plot_quantile_ranks.py         |  3 ++-
 examples/plot_quantile_vs_standard.py   |  1 +
 examples/plot_treeshap_example.py       |  1 +
 12 files changed, 46 insertions(+), 24 deletions(-)

diff --git a/examples/plot_huggingface_model.py b/examples/plot_huggingface_model.py
index be93c63..62d6905 100755
--- a/examples/plot_huggingface_model.py
+++ b/examples/plot_huggingface_model.py
@@ -176,6 +176,7 @@ def fit_and_upload_model(token, repo_id, local_dir="./local_repo", random_state=
 
 
 def plot_quantiles_by_latlon(df, quantiles, color_scheme="cividis"):
+    """Plot quantile predictions on California Housing dataset by lat/lon."""
     # Slider for varying the displayed quantile estimates.
     slider = alt.binding_range(
         name="Predicted Quantile: ",
diff --git a/examples/plot_predict_custom.py b/examples/plot_predict_custom.py
index 38483a1..c236ea3 100755
--- a/examples/plot_predict_custom.py
+++ b/examples/plot_predict_custom.py
@@ -103,6 +103,7 @@ def predict(qrf, X, quantiles=0.5, what=None):
 
 
 def plot_ecdf(df):
+    """Plot the ECDF for test samples."""
     min_idx = df["index"].min()
     max_idx = df["index"].max()
 
diff --git a/examples/plot_proximity_counts.py b/examples/plot_proximity_counts.py
index bf701f4..0747108 100644
--- a/examples/plot_proximity_counts.py
+++ b/examples/plot_proximity_counts.py
@@ -125,6 +125,7 @@ def plot_digits_proximities(
     height=225,
     width=225,
 ):
+    """Plot Digits dataset proximities for test samples."""
     dim_x, dim_y = pixel_dim[0], pixel_dim[1]
     dgt_x, dgt_y = len(str(dim_x)), len(str(dim_y))
 
diff --git a/examples/plot_quantile_conformalized.py b/examples/plot_quantile_conformalized.py
index 25b8713..35c8b07 100755
--- a/examples/plot_quantile_conformalized.py
+++ b/examples/plot_quantile_conformalized.py
@@ -157,7 +157,9 @@ def cqr_strategy(alpha, X_train, X_test, y_train, y_test, random_state=None):
 
 
 def plot_prediction_intervals_by_strategy(df):
-    def plot_prediction_intervals(df, domain):
+    """Plot prediction intervals by interval estimate strategy."""
+
+    def _plot_prediction_intervals(df, domain):
         # Slider for varying the target coverage level.
         slider = alt.binding_range(name="Coverage Target: ", min=0, max=1, step=0.1)
         coverage_val = alt.param(name="coverage", value=0.9, bind=slider)
@@ -280,7 +282,7 @@ def plot_prediction_intervals(df, domain):
             int(np.max((df[["y_test", "y_pred"]].max(axis=0)))),  # max of all axes
         ]
         df_i = df.query(f"strategy == '{strategy}'").reset_index(drop=True)
-        base = plot_prediction_intervals(df_i, domain)
+        base = _plot_prediction_intervals(df_i, domain)
         chart |= base.properties(height=225, width=300, title=strategies[strategy])
 
     return chart
diff --git a/examples/plot_quantile_example.py b/examples/plot_quantile_example.py
index 554a7bc..c7bcb56 100755
--- a/examples/plot_quantile_example.py
+++ b/examples/plot_quantile_example.py
@@ -60,7 +60,8 @@ def make_toy_dataset(n_samples, bounds, add_noise=True, random_state=0):
 )
 
 
-def plot_fit_and_intervals(df):
+def plot_predictions_and_intervals(df):
+    """Plot model predictions and prediction intervals with ground truth."""
     area_pred = (
         alt.Chart(df)
         .transform_filter(~alt.datum["test"])  # filter to non-test data
@@ -143,5 +144,5 @@ def plot_fit_and_intervals(df):
     return chart
 
 
-chart = plot_fit_and_intervals(df)
+chart = plot_predictions_and_intervals(df)
 chart
diff --git a/examples/plot_quantile_extrapolation.py b/examples/plot_quantile_extrapolation.py
index 9aa0e1e..b69f81a 100755
--- a/examples/plot_quantile_extrapolation.py
+++ b/examples/plot_quantile_extrapolation.py
@@ -30,7 +30,7 @@
 func = lambda x: x * np.sin(x)
 func_str = "f(x) = x sin(x)"
 quantiles = [0.025, 0.975, 0.5]
-qrf_params = {"max_samples_leaf": None, "min_samples_leaf": 4, "random_state": random_state}
+qrf_params = {"min_samples_leaf": 4, "max_samples_leaf": None, "random_state": random_state}
 
 
 def make_func_Xy(func, bounds, n_samples, add_noise=True, random_state=0):
@@ -404,7 +404,16 @@ def get_coverage_xtr(bounds_list, train_indices, test_indices, y_train, level, *
 
 
 def plot_qrf_vs_xtrapolation_comparison(df, func_str):
-    def plot_extrapolations(df, title="", legend=False, func_str="", x_domain=None, y_domain=None):
+    """Plot comparison of QRF vs Xtrapolation on extrapolated data."""
+
+    def _plot_extrapolations(
+        df,
+        title="",
+        legend=False,
+        func_str="",
+        x_domain=None,
+        y_domain=None,
+    ):
         x_scale = None
         if x_domain is not None:
             x_scale = alt.Scale(domain=x_domain, nice=False, padding=0)
@@ -521,29 +530,29 @@ def plot_extrapolations(df, title="", legend=False, func_str="", x_domain=None,
     xtra_mapper = {"bb_mid": "y_pred", "bb_low": "y_pred_low", "bb_upp": "y_pred_upp"}
 
     chart1 = alt.layer(
-        plot_extrapolations(
+        _plot_extrapolations(
             df.query("~(test_left | test_right)").assign(**{"coverage": lambda x: x["cov_qrf"]}),
             title="Extrapolation with Standard QRF",
             **kwargs,
         ).resolve_scale(color="independent"),
-        plot_extrapolations(df.query("test_left").assign(extrapolate=True), **kwargs),
-        plot_extrapolations(df.query("test_right").assign(extrapolate=True), **kwargs),
+        _plot_extrapolations(df.query("test_left").assign(extrapolate=True), **kwargs),
+        _plot_extrapolations(df.query("test_right").assign(extrapolate=True), **kwargs),
     )
     chart2 = alt.layer(
-        plot_extrapolations(
+        _plot_extrapolations(
             df.query("~(test_left | test_right)").assign(**{"coverage": lambda x: x["cov_xtr"]}),
             title="Extrapolation with Xtrapolation Procedure",
             legend=True,
             **kwargs,
         ).resolve_scale(color="independent"),
-        plot_extrapolations(
+        _plot_extrapolations(
             df.query("test_left")
             .assign(extrapolate=True)
             .drop(columns=["y_pred", "y_pred_low", "y_pred_upp"])
             .rename(xtra_mapper, axis="columns"),
             **kwargs,
         ),
-        plot_extrapolations(
+        _plot_extrapolations(
             df.query("test_right")
             .assign(extrapolate=True)
             .drop(columns=["y_pred", "y_pred_low", "y_pred_upp"])
diff --git a/examples/plot_quantile_interpolation.py b/examples/plot_quantile_interpolation.py
index 3b19c9c..2ae4e4e 100755
--- a/examples/plot_quantile_interpolation.py
+++ b/examples/plot_quantile_interpolation.py
@@ -80,7 +80,8 @@
 df = pd.concat(dfs, ignore_index=True)
 
 
-def plot_interpolations(df, legend):
+def plot_interpolation_predictions(df, legend):
+    """Plot predictions by quantile interpolation methods."""
     # Slider for varying the prediction interval that determines the quantiles being interpolated.
     slider = alt.binding_range(name="Prediction Interval: ", min=0, max=1, step=0.01)
     interval_val = alt.param(name="interval", value=0.9, bind=slider)
@@ -163,5 +164,5 @@ def plot_interpolations(df, legend):
     return chart
 
 
-chart = plot_interpolations(df, legend)
+chart = plot_interpolation_predictions(df, legend)
 chart
diff --git a/examples/plot_quantile_intervals.py b/examples/plot_quantile_intervals.py
index 6ee60a2..e418568 100755
--- a/examples/plot_quantile_intervals.py
+++ b/examples/plot_quantile_intervals.py
@@ -50,8 +50,10 @@
 df = pd.DataFrame(data).pipe(lambda x: x * 100_000)  # convert to dollars
 
 
-def plot_calibration_and_intervals(df):
-    def plot_calibration(df):
+def plot_california_calibration_and_intervals(df):
+    """Plot calibration and intervals on California Housing dataset."""
+
+    def _plot_calibration(df):
         domain = [
             int(np.min(np.minimum(df["y_true"], df["y_pred"]))),  # min of both axes
             int(np.max(np.maximum(df["y_true"], df["y_pred"]))),  # max of both axes
@@ -111,7 +113,7 @@ def plot_calibration(df):
         chart = bar + tick_low + tick_upp + circle + diagonal
         return chart
 
-    def plot_intervals(df):
+    def _plot_intervals(df):
         df = df.copy()
 
         # Order samples by interval width.
@@ -175,12 +177,12 @@ def plot_intervals(df):
         chart = bar + tick_low + tick_upp + circle
         return chart
 
-    chart1 = plot_calibration(df).properties(height=250, width=325)
-    chart2 = plot_intervals(df).properties(height=250, width=325)
+    chart1 = _plot_calibration(df).properties(height=250, width=325)
+    chart2 = _plot_intervals(df).properties(height=250, width=325)
     chart = chart1 | chart2
 
     return chart
 
 
-chart = plot_calibration_and_intervals(df)
+chart = plot_california_calibration_and_intervals(df)
 chart
diff --git a/examples/plot_quantile_multioutput.py b/examples/plot_quantile_multioutput.py
index a4a4aff..1eef472 100644
--- a/examples/plot_quantile_multioutput.py
+++ b/examples/plot_quantile_multioutput.py
@@ -40,7 +40,7 @@
 
 
 def make_func_Xy(funcs, bounds, n_samples):
-    """Make a dataset from a specified function."""
+    """Make a dataset from specified function(s)."""
     x = np.linspace(*bounds, n_samples)
     y = np.empty((len(x), len(funcs)))
     for i, func in enumerate(funcs):
@@ -53,11 +53,11 @@ def make_func_Xy(funcs, bounds, n_samples):
 
 X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_state)
 
-qrf = RandomForestQuantileRegressor(max_samples_leaf=None, max_depth=4, random_state=random_state)
+qrf = RandomForestQuantileRegressor(max_depth=4, max_samples_leaf=None, random_state=random_state)
 qrf.fit(X_train, y_train)  # fit on all of the targets simultaneously
 
 # Get multi-target predictions at specified quantiles.
-y_pred = qrf.predict(X, quantiles=quantiles)  # shape = (n_samples, n_targets, n_quantiles)
+y_pred = qrf.predict(X, quantiles=quantiles)  # output shape = (n_samples, n_targets, n_quantiles)
 
 df = pd.DataFrame(
     {
@@ -72,6 +72,7 @@ def make_func_Xy(funcs, bounds, n_samples):
 
 
 def plot_multitargets(df, legend):
+    """Plot predictions and prediction intervals for multi-target outputs."""
     # Slider for varying the displayed prediction intervals.
     slider = alt.binding_range(name="Prediction Interval: ", min=0, max=1, step=0.05)
     interval_val = alt.param(name="interval", value=0.95, bind=slider)
diff --git a/examples/plot_quantile_ranks.py b/examples/plot_quantile_ranks.py
index 24e28a3..68e5ba2 100644
--- a/examples/plot_quantile_ranks.py
+++ b/examples/plot_quantile_ranks.py
@@ -37,8 +37,8 @@ def make_toy_dataset(n_samples, bounds, random_state=0):
 X, y = make_toy_dataset(n_samples, bounds, random_state=0)
 
 qrf = RandomForestQuantileRegressor(
-    max_samples_leaf=None,
     min_samples_leaf=50,
+    max_samples_leaf=None,
     random_state=random_state,
 ).fit(X, y)
 
@@ -51,6 +51,7 @@ def make_toy_dataset(n_samples, bounds, random_state=0):
 
 
 def plot_pred_and_ranks(df):
+    """Plot quantile predictions and ranks."""
     # Slider for varying the interval that defines the upper and lower quantile rank thresholds.
     slider = alt.binding_range(name="Rank Interval Threshold: ", min=0, max=1, step=0.01)
     interval_val = alt.param(name="interval", value=0.05, bind=slider)
diff --git a/examples/plot_quantile_vs_standard.py b/examples/plot_quantile_vs_standard.py
index 3b1f2dd..9d4a4a2 100755
--- a/examples/plot_quantile_vs_standard.py
+++ b/examples/plot_quantile_vs_standard.py
@@ -66,6 +66,7 @@ def make_skewed_dataset(a=7, loc=-1, scale=1, random_state=0):
 
 
 def plot_prediction_histograms(df, legend):
+    """Plot histogram of predictions by model."""
     # Slider for varying the quantile value used for generating the QRF histogram.
     slider = alt.binding_range(
         name="Predicted Quantile: ",
diff --git a/examples/plot_treeshap_example.py b/examples/plot_treeshap_example.py
index b34f442..f4df453 100644
--- a/examples/plot_treeshap_example.py
+++ b/examples/plot_treeshap_example.py
@@ -126,6 +126,7 @@ def get_shap_value_by_index(shap_values, index):
 
 
 def plot_shap_waterfall_with_quantiles(df, height=300):
+    """Plot SHAP waterfall plot by quantile predictions."""
     df = df.copy()
 
     # Slider for varying the applied quantile estimates.