From 8db4ac6c2a0bbe3c5198038997f0d7d39c49682e Mon Sep 17 00:00:00 2001
From: Reid Johnson <reid.johnson@gmail.com>
Date: Fri, 9 Aug 2024 18:43:24 -0700
Subject: [PATCH] Update example plots

---
 examples/plot_huggingface_model.py      | 48 +++++++++++-----------
 examples/plot_predict_custom.py         | 25 ++++++------
 examples/plot_proximity_counts.py       | 53 +++++++++----------------
 examples/plot_quantile_conformalized.py | 33 +++++++--------
 examples/plot_quantile_example.py       | 29 +++++++-------
 examples/plot_quantile_extrapolation.py |  6 +--
 examples/plot_quantile_interpolation.py | 29 ++++++++------
 examples/plot_quantile_intervals.py     |  4 +-
 examples/plot_quantile_multioutput.py   | 39 +++++++++---------
 examples/plot_quantile_ranks.py         | 43 ++++++++++----------
 examples/plot_quantile_vs_standard.py   | 37 ++++++++---------
 examples/plot_treeshap_example.py       | 49 ++++++++++++++++++-----
 12 files changed, 203 insertions(+), 192 deletions(-)

diff --git a/examples/plot_huggingface_model.py b/examples/plot_huggingface_model.py
index 9508bfb..2310b8d 100755
--- a/examples/plot_huggingface_model.py
+++ b/examples/plot_huggingface_model.py
@@ -6,15 +6,16 @@
 (QRF) model from Hugging Face Hub and use it to estimate new quantiles. In
 this scenario, a QRF has been trained with default parameters on a train-test
 split of the California housing dataset and uploaded to Hugging Face Hub. The
-model is downloaded, and inference is performed over several quantiles for
-each instance in the dataset. The estimates are visualized by the latitude and
-longitude of each instance. The model used is available on Hugging Face Hub
+model is downloaded and used to perform inference across several quantiles for
+each dataset sample. The results are visualized by the latitude and longitude
+of each sample. The model used is available on Hugging Face Hub
 `here <https://huggingface.co/quantile-forest/california-housing-example>`_.
 """
 
 import os
 import pickle
 import shutil
+import tempfile
 
 import altair as alt
 import numpy as np
@@ -134,13 +135,16 @@ def fit_and_upload_model(token, repo_id, local_dir="./local_repo", random_state=
 
     # Create the repository on the Hugging Face Hub if it does not exist.
     # Push the model to the repository.
-    hub_utils.push(
-        repo_id=repo_id,
-        source=local_dir,
-        token=token,  # personal token to be downloaded from Hugging Face
-        commit_message="Model commit",
-        create_remote=True,
-    )
+    try:
+        hub_utils.push(
+            repo_id=repo_id,
+            source=local_dir,
+            token=token,  # personal token to be downloaded from Hugging Face
+            commit_message="Model commit",
+            create_remote=True,
+        )
+    except Exception as e:
+        print(f"Error pushing model to Hugging Face Hub: {e}")
 
     os.remove(model_filename)
     shutil.rmtree(local_dir)
@@ -149,17 +153,13 @@ def fit_and_upload_model(token, repo_id, local_dir="./local_repo", random_state=
 if not load_existing:
     fit_and_upload_model(token, repo_id, random_state=random_seed)
 
-# Download the repository locally.
-local_dir = "./local_repo"
-if os.path.exists(local_dir):
-    shutil.rmtree(local_dir)
-hub_utils.download(repo_id=repo_id, dst=local_dir)
-
-# Load the fitted model.
+# Download the repository locally and load the fitted model.
 model_filename = "model.pkl"
-with open(f"{local_dir}/{model_filename}", "rb") as file:
-    qrf = pickle.load(file)
-shutil.rmtree(local_dir)
+local_dir = "./local_repo"
+with tempfile.TemporaryDirectory() as local_dir:
+    hub_utils.download(repo_id=repo_id, dst=local_dir)
+    with open(f"{local_dir}/{model_filename}", "rb") as file:
+        qrf = pickle.load(file)
 
 # Estimate quantiles.
 X, y = datasets.fetch_california_housing(as_frame=True, return_X_y=True)
@@ -174,7 +174,7 @@ def fit_and_upload_model(token, repo_id, local_dir="./local_repo", random_state=
 )
 
 
-def plot_quantiles_by_latlon(df, quantiles):
+def plot_quantiles_by_latlon(df, quantiles, color_scheme="cividis"):
     # Slider for varying the displayed quantile estimates.
     slider = alt.binding_range(
         min=0,
@@ -183,11 +183,11 @@ def plot_quantiles_by_latlon(df, quantiles):
         name="Predicted Quantile: ",
     )
 
-    quantile_selection = alt.param(value=0.5, bind=slider, name="quantile")
+    quantile_val = alt.param(value=0.5, bind=slider, name="quantile")
 
     chart = (
         alt.Chart(df)
-        .add_params(quantile_selection)
+        .add_params(quantile_val)
         .transform_filter("datum.quantile == quantile")
         .mark_circle()
         .encode(
@@ -203,7 +203,7 @@ def plot_quantiles_by_latlon(df, quantiles):
                 scale=alt.Scale(zero=False),
                 title="Latitude",
             ),
-            color=alt.Color("value:Q", scale=alt.Scale(scheme="cividis"), title="Prediction"),
+            color=alt.Color("value:Q", scale=alt.Scale(scheme=color_scheme), title="Prediction"),
             size=alt.Size("Population:Q"),
             tooltip=[
                 alt.Tooltip("index:N", title="Row ID"),
diff --git a/examples/plot_predict_custom.py b/examples/plot_predict_custom.py
index 94f434f..bc5e3cc 100755
--- a/examples/plot_predict_custom.py
+++ b/examples/plot_predict_custom.py
@@ -3,12 +3,11 @@
 ============================================
 
 This example demonstrates how to extract the empirical distribution from a
-quantile regression forest (QRF) for one or more samples to calculate a
-user-specified function of interest. While a QRF is designed to estimate
-quantiles from the empirical distribution calculated for each sample, it can
-also be useful to use this empirical distribution to calculate other
-quantities of interest. Here, we calculate the empirical cumulative
-distribution function (ECDF) for a test sample.
+quantile regression forest (QRF) to calculate user-specified functions of
+interest. While a QRF is designed to estimate quantiles, their empirical
+distributions can also be used to calculate other statistical quantities. In
+this scenario, we compute the empirical cumulative distribution function
+(ECDF) for test samples.
 """
 
 from itertools import chain
@@ -93,7 +92,7 @@ def predict(reg, X, quantiles=0.5, what=None):
             "y_val": list(chain.from_iterable([y_i.quantiles for y_i in y_ecdf])),
             "y_val2": list(chain.from_iterable([y_i.quantiles for y_i in y_ecdf]))[1:] + [np.nan],
             "proba": list(chain.from_iterable([y_i.probabilities for y_i in y_ecdf])),
-            "sample_idx": [idx] * n_quantiles,
+            "index": [idx] * n_quantiles,
         }
     )
     dfs.append(df_i)
@@ -101,12 +100,12 @@ def predict(reg, X, quantiles=0.5, what=None):
 
 
 def plot_ecdf(df):
-    min_idx = df["sample_idx"].min()
-    max_idx = df["sample_idx"].max()
+    min_idx = df["index"].min()
+    max_idx = df["index"].max()
 
     # Slider for determining the sample index for which the custom function is being visualized.
-    slider = alt.binding_range(min=min_idx, max=max_idx, step=1, name="Sample Index: ")
-    sample_selection = alt.param(value=0, bind=slider, name="sample_idx")
+    slider = alt.binding_range(min=min_idx, max=max_idx, step=1, name="Test Sample Index: ")
+    index_selection = alt.selection_point(value=0, bind=slider, fields=["index"])
 
     tooltip = [
         alt.Tooltip("y_val:Q", title="Response Value"),
@@ -136,8 +135,8 @@ def plot_ecdf(df):
 
     chart = (
         (circles + lines)
-        .add_params(sample_selection)
-        .transform_filter(alt.datum["sample_idx"] == sample_selection)
+        .add_params(index_selection)
+        .transform_filter(index_selection)
         .properties(
             height=400,
             width=650,
diff --git a/examples/plot_proximity_counts.py b/examples/plot_proximity_counts.py
index f927de9..c35d87d 100644
--- a/examples/plot_proximity_counts.py
+++ b/examples/plot_proximity_counts.py
@@ -3,16 +3,15 @@
 ==================================================
 
 This example demonstrates the use of quantile regression forest (QRF)
-proximity counts to identify similar samples in an unsupervised manner, as the
-target values are not used during model fitting. In this scenario, we train a
-QRF on a noisy dataset to predict individual pixel values (i.e., denoise). We
-then retrieve the proximity values for samples in a noisy test set. For each
-test sample digit, we visualize it alongside a set of similar (non-noisy)
-training samples determined by their proximity counts, as well as the
-non-noisy digit. The similar samples are ordered from the highest to the
-lowest proximity count for each digit, arranged from left to right and top to
-bottom. This example illustrates the effectiveness of proximity counts in
-identifying similar samples, even when using noisy training and test data.
+proximity counts to identify similar samples. In this scenario, we train a QRF
+on a noisy dataset to predict individual pixel values in an unsupervised
+manner (the target labels are not used during training) for denoising
+purposes. We then retrieve the proximity values for the noisy test samples. We
+visualize each test sample alongside a set of similar (non-noisy) training
+samples determined by their proximity counts. These similar samples are
+ordered from the highest to the lowest proximity count. This illustrates how
+proximity counts can effectively identify similar samples even in noisy
+conditions.
 """
 
 import altair as alt
@@ -41,12 +40,7 @@
 
 def add_gaussian_noise(X, mean=0, std=0.1, random_state=None):
     """Add Gaussian noise to input data."""
-    if random_state is None:
-        rng = check_random_state(0)
-    elif isinstance(random_state, int):
-        rng = check_random_state(random_state)
-    else:
-        rng = random_state
+    rng = check_random_state(random_state)
 
     scaler = MinMaxScaler()
     X_scaled = scaler.fit_transform(X)
@@ -97,7 +91,6 @@ def extract_floats(combined_df, scale=100):
     .join(y_test)
     .reset_index()
     .join(df_prox)
-    .iloc[:n_test_samples]
     .explode("prox")
     .assign(
         **{
@@ -140,26 +133,16 @@ def plot_digits_proximities(
     subplot_dim = (width - subplot_spacing * (n_subplot_rows - 1)) / n_subplot_rows
 
     # Slider for determining the test index for which the data is being visualized.
-    slider = alt.binding_range(
-        min=0,
-        max=n_samples - 1,
-        step=1,
-        name="Test Index: ",
-    )
-
-    idx_val = alt.selection_point(
-        value=0,
-        bind=slider,
-        fields=["index"],
-    )
+    slider = alt.binding_range(min=0, max=n_samples - 1, step=1, name="Test Sample Index: ")
+    index_selection = alt.selection_point(value=0, bind=slider, fields=["index"])
 
     scale = alt.Scale(domain=[x_min, x_max], scheme="greys")
-    opacity = (alt.value(0), alt.value(0.67))
+    opacity = (alt.value(0), alt.value(0.5))
 
-    base = alt.Chart(df).add_params(idx_val).transform_filter(idx_val)
+    base = alt.Chart(df).add_params(index_selection).transform_filter(index_selection)
 
     chart1 = (
-        base.transform_filter(f"datum.prox_idx == 0")
+        base.transform_filter("datum.prox_idx == 0")
         .transform_fold(fold=pixel_cols, as_=["pixel", "value"])
         .transform_calculate(value_clean=f"floor(datum.value / {pixel_scale})")
         .transform_calculate(value_noisy=f"datum.value - (datum.value_clean * {pixel_scale})")
@@ -172,7 +155,7 @@ def plot_digits_proximities(
             opacity=alt.condition(alt.datum["value_noisy"] == 0, *opacity),
             tooltip=[
                 alt.Tooltip("target:Q", title="Digit"),
-                alt.Tooltip("value_noisy:Q", format=".3f", title="Pixel Value"),
+                alt.Tooltip("value_noisy:Q", format=",.3f", title="Pixel Value"),
                 alt.Tooltip("x:Q", title="Pixel X"),
                 alt.Tooltip("y:Q", title="Pixel Y"),
             ],
@@ -213,7 +196,7 @@ def plot_digits_proximities(
     )
 
     chart3 = (
-        base.transform_filter(f"datum.prox_idx == 0")
+        base.transform_filter("datum.prox_idx == 0")
         .transform_fold(fold=pixel_cols, as_=["pixel", "value"])
         .transform_calculate(value_clean=f"floor(datum.value / {pixel_scale})")
         .transform_calculate(x=pixel_x, y=pixel_y)
@@ -225,7 +208,7 @@ def plot_digits_proximities(
             opacity=alt.condition(alt.datum["value_clean"] == 0, *opacity),
             tooltip=[
                 alt.Tooltip("target:Q", title="Digit"),
-                alt.Tooltip("value_clean:Q", title="Pixel Value"),
+                alt.Tooltip("value_clean:Q", format=",.3f", title="Pixel Value"),
                 alt.Tooltip("x:Q", title="Pixel X"),
                 alt.Tooltip("y:Q", title="Pixel Y"),
             ],
diff --git a/examples/plot_quantile_conformalized.py b/examples/plot_quantile_conformalized.py
index 52d1857..9c14879 100755
--- a/examples/plot_quantile_conformalized.py
+++ b/examples/plot_quantile_conformalized.py
@@ -4,12 +4,13 @@
 
 This example demonstrates the use of a quantile regression forest (QRF) to
 construct reliable prediction intervals using conformalized quantile
-regression (CQR). CQR provides prediction intervals that attain valid
-coverage, whereas QRF may require additional calibration for reliable interval
-estimates. In this example, by using CQR, we achieve a level of coverage
-(i.e., the percentage of samples that actually fall within their prediction
-interval) that is generally closer to the target level. This example is
-adapted from `"Prediction intervals: Quantile Regression Forests"
+regression (CQR). While QRFs can estimate quantiles, they may require
+additional calibration to provide reliable interval estimates. CQR provides
+prediction intervals that attain valid coverage. In this example, we use CQR
+to enhance QRF by producing prediction intervals that achieve a level of
+coverage (i.e., the percentage of samples that actually fall within their
+prediction interval) that is generally closer to the target level. This
+example is adapted from `"Prediction intervals: Quantile Regression Forests"
 <https://www.kaggle.com/code/carlmcbrideellis/prediction-intervals-quantile-regression-forests>`_
 by Carl McBride Ellis.
 """
@@ -69,6 +70,7 @@ def mean_width_score(y_pred_low, y_pred_upp):
 
 
 def qrf_strategy(alpha, X_train, X_test, y_train, y_test, random_state=None):
+    """QRF (baseline) strategy."""
     quantiles = [alpha / 2, 1 - alpha / 2]
 
     qrf = RandomForestQuantileRegressor(random_state=random_state)
@@ -89,6 +91,7 @@ def qrf_strategy(alpha, X_train, X_test, y_train, y_test, random_state=None):
 
 
 def cqr_strategy(alpha, X_train, X_test, y_train, y_test, random_state=None):
+    """Conformalized Quantile Regression (CQR) strategy with a QRF."""
     quantiles = [alpha / 2, 1 - alpha / 2]
 
     # Create calibration set.
@@ -160,8 +163,7 @@ def plot_prediction_intervals_by_strategy(df):
     def plot_prediction_intervals(df, domain):
         # Slider for varying the target coverage level.
         slider = alt.binding_range(min=0, max=1, step=0.1, name="Coverage Target: ")
-        cov_selection = alt.param(value=0.9, bind=slider, name="coverage")
-        cov_tol = 0.01
+        coverage_val = alt.param(value=0.9, bind=slider, name="coverage")
 
         click = alt.selection_point(fields=["y_label"], bind="legend")
 
@@ -175,10 +177,7 @@ def plot_prediction_intervals(df, domain):
 
         base = (
             alt.Chart(df)
-            .transform_filter(
-                (1 - alt.datum["alpha"] - cov_tol <= cov_selection)
-                & (1 - alt.datum["alpha"] + cov_tol >= cov_selection)
-            )
+            .transform_filter("round((1 - datum.alpha) * 100) / 100 == coverage")
             .transform_calculate(
                 y_label=(
                     "((datum.y_test >= datum.y_pred_low) & (datum.y_test <= datum.y_pred_upp))"
@@ -249,8 +248,8 @@ def plot_prediction_intervals(df, domain):
             )
             .transform_calculate(
                 coverage_text=(
-                    f"'Coverage: ' + format(datum.coverage * 100, '.1f') + '%'"
-                    f" + ' (target = ' + format((1 - datum.alpha) * 100, '.1f') + '%)'"
+                    "'Coverage: ' + format(datum.coverage * 100, '.1f') + '%'"
+                    " + ' (target = ' + format((1 - datum.alpha) * 100, '.1f') + '%)'"
                 )
             )
             .mark_text(align="left", baseline="top")
@@ -262,9 +261,7 @@ def plot_prediction_intervals(df, domain):
         )
         text_with = (
             base.transform_aggregate(width="mean(width)", groupby=["strategy"])
-            .transform_calculate(
-                width_text=f"'Interval Width: ' + format({alt.datum['width']}, '$,d')"
-            )
+            .transform_calculate(width_text="'Interval Width: ' + format(datum.width, '$,d')")
             .mark_text(align="left", baseline="top")
             .encode(
                 x=alt.value(5),
@@ -275,7 +272,7 @@ def plot_prediction_intervals(df, domain):
 
         chart = (
             bar + tick_low + tick_upp + circle + diagonal + text_coverage + text_with
-        ).add_params(cov_selection)
+        ).add_params(coverage_val)
 
         return chart
 
diff --git a/examples/plot_quantile_example.py b/examples/plot_quantile_example.py
index a3a1008..7910b37 100755
--- a/examples/plot_quantile_example.py
+++ b/examples/plot_quantile_example.py
@@ -3,8 +3,9 @@
 ===========================================
 
 This example demonstrates the use of a quantile regression forest (QRF) to
-predict a conditional median and prediction intervals. The example compares
-the QRF predictions to a ground truth function used to generate noisy samples.
+predict the conditional median and construct prediction intervals. The QRF
+predictions are compared to the ground truth function used to generate noisy
+samples.
 """
 
 import altair as alt
@@ -27,8 +28,8 @@ def make_toy_dataset(n_samples, bounds, add_noise=True, random_seed=0):
     f = x * np.sin(x)
 
     sigma = 0.25 + x / 10
-    noise = rng.lognormal(sigma=sigma) - np.exp(sigma**2 / 2)
-    y = f + (noise if add_noise else 0)
+    noise = rng.lognormal(sigma=sigma) - np.exp(sigma**2 / 2) if add_noise else np.zeros_like(f)
+    y = f + noise
 
     return np.atleast_2d(x).T, y
 
@@ -47,7 +48,7 @@ def make_toy_dataset(n_samples, bounds, add_noise=True, random_seed=0):
 
 df = pd.DataFrame(
     {
-        "X": np.concatenate([X_func.reshape(-1), X_test.reshape(-1)]),
+        "x": np.concatenate([X_func.reshape(-1), X_test.reshape(-1)]),
         "y": np.concatenate([y_func, y_test]),
         "y_pred": np.concatenate([y_pred_func[:, 1], y_pred_test[:, 1]]),
         "y_pred_low": np.concatenate([y_pred_func[:, 0], y_pred_test[:, 0]]),
@@ -63,11 +64,11 @@ def plot_fit_and_intervals(df):
         .transform_filter(alt.datum["test"])  # filter to test data
         .mark_circle(color="#f2a619")
         .encode(
-            x=alt.X("X:Q", scale=alt.Scale(nice=False)),
+            x=alt.X("x:Q", scale=alt.Scale(nice=False)),
             y=alt.Y("y:Q", title=""),
             color=alt.Color("point_label:N", scale=alt.Scale(range=["#f2a619"]), title=None),
             tooltip=[
-                alt.Tooltip("X:Q", format=",.3f", title="X"),
+                alt.Tooltip("x:Q", format=",.3f", title="X"),
                 alt.Tooltip("y:Q", format=",.3f", title="Y"),
             ],
         )
@@ -78,11 +79,11 @@ def plot_fit_and_intervals(df):
         .transform_filter(~alt.datum["test"])  # filter to training data
         .mark_line(color="black", size=3)
         .encode(
-            x=alt.X("X:Q", scale=alt.Scale(nice=False)),
-            y=alt.Y("y:Q", title="f(x)"),
+            x=alt.X("x:Q", scale=alt.Scale(nice=False)),
+            y=alt.Y("y:Q", title="Y"),
             color=alt.Color("y_true_label:N", scale=alt.Scale(range=["black"]), title=None),
             tooltip=[
-                alt.Tooltip("X:Q", format=",.3f", title="X"),
+                alt.Tooltip("x:Q", format=",.3f", title="X"),
                 alt.Tooltip("y:Q", format=",.3f", title="Y"),
             ],
         )
@@ -93,11 +94,11 @@ def plot_fit_and_intervals(df):
         .transform_filter(~alt.datum["test"])  # filter to training data
         .mark_line(color="#006aff", size=5)
         .encode(
-            x=alt.X("X:Q", scale=alt.Scale(nice=False)),
+            x=alt.X("x:Q", scale=alt.Scale(nice=False)),
             y=alt.Y("y_pred:Q", title=""),
             color=alt.Color("y_pred_label:N", scale=alt.Scale(range=["#006aff"]), title=None),
             tooltip=[
-                alt.Tooltip("X:Q", format=",.3f", title="X"),
+                alt.Tooltip("x:Q", format=",.3f", title="X"),
                 alt.Tooltip("y:Q", format=",.3f", title="Y"),
                 alt.Tooltip("y_pred:Q", format=",.3f", title="Predicted Y"),
             ],
@@ -109,11 +110,11 @@ def plot_fit_and_intervals(df):
         .transform_filter(~alt.datum["test"])  # filter to training data
         .mark_area(color="#e0f2ff", opacity=0.8)
         .encode(
-            x=alt.X("X:Q", scale=alt.Scale(nice=False), title="x"),
+            x=alt.X("x:Q", scale=alt.Scale(nice=False), title="X"),
             y=alt.Y("y_pred_low:Q", title=""),
             y2=alt.Y2("y_pred_upp:Q", title=None),
             tooltip=[
-                alt.Tooltip("X:Q", format=",.3f", title="X"),
+                alt.Tooltip("x:Q", format=",.3f", title="X"),
                 alt.Tooltip("y:Q", format=",.3f", title="Y"),
                 alt.Tooltip("y_pred:Q", format=",.3f", title="Predicted Y"),
                 alt.Tooltip("y_pred_low:Q", format=",.3f", title="Predicted Lower Y"),
diff --git a/examples/plot_quantile_extrapolation.py b/examples/plot_quantile_extrapolation.py
index 0ce3a5f..6718ac4 100755
--- a/examples/plot_quantile_extrapolation.py
+++ b/examples/plot_quantile_extrapolation.py
@@ -435,8 +435,8 @@ def plot_extrapolations(df, title="", legend=False, x_domain=None, y_domain=None
         base = alt.Chart(df.assign(**{"point_label": "Observations", "line_label": func_str}))
 
         points_true = base.mark_circle(size=20).encode(
-            x=alt.X("X_true:Q", scale=x_scale, title="x"),
-            y=alt.Y("y_true:Q", scale=y_scale, title="y"),
+            x=alt.X("X_true:Q", scale=x_scale, title="X"),
+            y=alt.Y("y_true:Q", scale=y_scale, title="Y"),
             color=points_color,
             tooltip=tooltip_true,
         )
@@ -471,7 +471,7 @@ def plot_extrapolations(df, title="", legend=False, x_domain=None, y_domain=None
                 base.transform_aggregate(coverage="mean(coverage)")
                 .transform_calculate(
                     coverage_text=(
-                        f"'Extrapolated Coverage: '"
+                        "'Extrapolated Coverage: '"
                         f" + format({alt.datum['coverage'] * 100}, '.1f') + '%'"
                         f" + ' (target = {(quantiles[1] - quantiles[0]) * 100}%)'"
                     )
diff --git a/examples/plot_quantile_interpolation.py b/examples/plot_quantile_interpolation.py
index 147ecc9..c9f589d 100755
--- a/examples/plot_quantile_interpolation.py
+++ b/examples/plot_quantile_interpolation.py
@@ -2,12 +2,14 @@
 Comparing Quantile Interpolation Methods
 ========================================
 
-This example illustrates the interpolation methods that can be applied during
-prediction when the desired quantile lies between two data points. In this toy
-example, the forest estimator creates a single split that separates samples
-1–3 and samples 4–5, with quantiles calculated separately for these two groups
-based on the actual sample values. The interpolation methods are used when a
-calculated quantile does not precisely correspond to one of the actual values.
+This example illustrates different interpolation methods that can be used
+during prediction in quantile regression forests (QRF). When a desired quantile
+lies between two data points, interpolation methods determine the predicted
+value. In this toy example, the QRF creates a split that divides the samples
+into two groups (samples 1–3 and samples 4–5), with quantiles calculated
+separately for each. The interpolation methods demonstrate how predictions are
+handled when a quantile does not exactly match a data point.
+
 """
 
 import altair as alt
@@ -23,6 +25,8 @@
 X = np.array([[-1, -1], [-1, -1], [-1, -1], [1, 1], [1, 1]])
 y = np.array([-2, -1, 0, 1, 2])
 
+# We use a single estimator that retains all leaf samples and is trained without bootstrap.
+# By construction of the data, this leads to samples split between two terminal leaf nodes.
 qrf = RandomForestQuantileRegressor(
     n_estimators=1,
     max_samples_leaf=None,
@@ -55,10 +59,11 @@
         "quantile_upp": [None] * len(y),
     }
 
+    quantiles = [0.5, round(0.5 - interval / 2, 3), round(0.5 + interval / 2, 3)]
+
     # Populate data based on prediction results with different interpolations.
     for interpolation in interpolations:
         # Get predictions at median and prediction intervals.
-        quantiles = [0.5, round(0.5 - interval / 2, 3), round(0.5 + interval / 2, 3)]
         y_pred = qrf.predict(X, quantiles=quantiles, interpolation=interpolation.lower())
 
         data["method"].extend([interpolation] * len(y))
@@ -77,7 +82,7 @@
 def plot_interpolations(df, legend):
     # Slider for varying the prediction interval that determines the quantiles being interpolated.
     slider = alt.binding_range(min=0, max=1, step=0.01, name="Prediction Interval: ")
-    interval_selection = alt.param(value=0.9, bind=slider, name="interval")
+    interval_val = alt.param(value=0.9, bind=slider, name="interval")
 
     click = alt.selection_point(fields=["method"], bind="legend")
 
@@ -90,9 +95,9 @@ def plot_interpolations(df, legend):
     tooltip = [
         alt.Tooltip("method:N", title="Method"),
         alt.Tooltip("X:N", title="X Values"),
-        alt.Tooltip("y_pred:Q", format=".3f", title="Predicted Y"),
-        alt.Tooltip("y_pred_low:Q", format=".3f", title="Predicted Lower Y"),
-        alt.Tooltip("y_pred_upp:Q", format=".3f", title="Predicted Upper Y"),
+        alt.Tooltip("y_pred:Q", format=",.3f", title="Predicted Y"),
+        alt.Tooltip("y_pred_low:Q", format=",.3f", title="Predicted Lower Y"),
+        alt.Tooltip("y_pred_upp:Q", format=",.3f", title="Predicted Upper Y"),
         alt.Tooltip("quantile_low:Q", format=".3f", title="Lower Quantile"),
         alt.Tooltip("quantile_upp:Q", format=".3f", title="Upper Quantile"),
     ]
@@ -132,7 +137,7 @@ def plot_interpolations(df, legend):
 
     chart = (
         (area + point)
-        .add_params(interval_selection, click)
+        .add_params(interval_val, click)
         .transform_filter(
             "(datum.method == 'Actual')"
             "| (datum.quantile_low == round((0.5 - interval / 2) * 1000) / 1000)"
diff --git a/examples/plot_quantile_intervals.py b/examples/plot_quantile_intervals.py
index 72412a4..e678ac1 100755
--- a/examples/plot_quantile_intervals.py
+++ b/examples/plot_quantile_intervals.py
@@ -3,8 +3,8 @@
 ================================================
 
 This example demonstrates how to use quantile regression forests (QRF) to
-generate prediction intervals on the California Housing dataset. Inspired by
-Figure 3 of `"Quantile Regression Forests"
+generate prediction intervals on the California Housing dataset. The
+visualization is inspired by Figure 3 of `"Quantile Regression Forests"
 <https://jmlr.org/papers/v7/meinshausen06a.html>`_ by Meinshausen.
 """
 
diff --git a/examples/plot_quantile_multioutput.py b/examples/plot_quantile_multioutput.py
index af035bf..be97688 100644
--- a/examples/plot_quantile_multioutput.py
+++ b/examples/plot_quantile_multioutput.py
@@ -5,7 +5,7 @@
 This example demonstrates how to fit a single quantile regressor for multiple
 target variables on a toy dataset. For each target, multiple quantiles can be
 estimated simultaneously. In this example, the target variable has two output
-values for each sample, with a single regressor used to estimate many
+values for each sample, with a single regressor used to estimate multiple
 quantiles simultaneously. Three of these quantiles are visualized concurrently
 for each target: the median line and the area defined by the interval points.
 """
@@ -52,8 +52,7 @@ def make_func_Xy(funcs, bounds, n_samples):
 
 
 def format_frac(fraction):
-    formatted = ("%.3g" % fraction).rstrip("0").rstrip(".")
-    return formatted if formatted else "0"
+    return f"{fraction:.3g}".rstrip("0").rstrip(".") or "0"
 
 
 # Create the dataset with multiple target variables.
@@ -67,28 +66,26 @@ def format_frac(fraction):
 # Get multi-target predictions at specified quantiles.
 y_pred = qrf.predict(X, quantiles=quantiles)
 
+# Prepare the data frame.
 df = pd.DataFrame(
     {
         "x": np.tile(X.squeeze(), len(funcs)),
         "y": y.reshape(-1, order="F"),
         "y_true": np.concatenate([f["signal"](X.squeeze()) for f in funcs]),
         "y_pred": np.concatenate([y_pred[:, i, len(quantiles) // 2] for i in range(len(funcs))]),
-        "target": np.concatenate([[f"{i}"] * len(X) for i in range(len(funcs))]),
+        "target": np.concatenate([[str(i)] * len(X) for i in range(len(funcs))]),
     }
-).join(
-    pd.DataFrame(
-        {
-            f"q_{format_frac(q)}": np.concatenate([y_pred[:, t, idx] for t in range(len(funcs))])
-            for idx, q in enumerate(quantiles)
-        }
-    )
 )
 
+# Add quantile predictions to the data frame.
+for idx, q in enumerate(quantiles):
+    df[f"q_{format_frac(q)}"] = y_pred[:, :, idx].flatten(order="F")
+
 
 def plot_multitargets(df, legend):
     # Slider for varying the displayed prediction intervals.
     slider = alt.binding_range(min=0, max=1, step=0.05, name="Prediction Interval: ")
-    interval_selection = alt.param(value=0.95, bind=slider, name="interval")
+    interval_val = alt.param(value=0.95, bind=slider, name="interval")
 
     click = alt.selection_point(fields=["target"], bind="legend")
 
@@ -119,14 +116,14 @@ def plot_multitargets(df, legend):
     base = (
         alt.Chart(df)
         .transform_calculate(
-            quantile_low=f"round((0.5 - interval / 2) * 1000) / 1000",
-            quantile_upp=f"round((0.5 + interval / 2) * 1000) / 1000",
+            quantile_low="round((0.5 - interval / 2) * 1000) / 1000",
+            quantile_upp="round((0.5 + interval / 2) * 1000) / 1000",
             quantile_low_col="'q_' + datum.quantile_low",
             quantile_upp_col="'q_' + datum.quantile_upp",
         )
         .transform_calculate(
-            y_pred_low=f"datum[datum.quantile_low_col]",
-            y_pred_upp=f"datum[datum.quantile_upp_col]",
+            y_pred_low="datum[datum.quantile_low_col]",
+            y_pred_upp="datum[datum.quantile_upp_col]",
         )
     )
 
@@ -138,15 +135,15 @@ def plot_multitargets(df, legend):
     )
 
     line = base.mark_line(color="black", size=3).encode(
-        x=alt.X("x:Q", scale=alt.Scale(nice=False), title="x"),
-        y=alt.Y("y_pred:Q", title="y"),
+        x=alt.X("x:Q", scale=alt.Scale(nice=False), title="X"),
+        y=alt.Y("y_pred:Q", title="Y"),
         color=color,
         tooltip=tooltip,
     )
 
     area = base.mark_area(opacity=0.25).encode(
-        x=alt.X("x:Q", scale=alt.Scale(nice=False), title="x"),
-        y=alt.Y("y_pred_low:Q", title="y"),
+        x=alt.X("x:Q", scale=alt.Scale(nice=False), title="X"),
+        y=alt.Y("y_pred_low:Q", title="Y"),
         y2=alt.Y2("y_pred_upp:Q", title=None),
         color=color,
         tooltip=tooltip,
@@ -154,7 +151,7 @@ def plot_multitargets(df, legend):
 
     chart = (
         (points + area + line)
-        .add_params(interval_selection, click)
+        .add_params(interval_val, click)
         .configure_range(category=alt.RangeScheme(list(legend.values())))
         .properties(
             height=400,
diff --git a/examples/plot_quantile_ranks.py b/examples/plot_quantile_ranks.py
index 746052e..730d333 100644
--- a/examples/plot_quantile_ranks.py
+++ b/examples/plot_quantile_ranks.py
@@ -3,10 +3,10 @@
 ===================================================
 
 This example demonstrates the use of quantile regression forest (QRF) quantile
-ranks to identify potential outlier samples. In this scenario, we train a QRF
-model on a toy dataset and use quantile ranks to highlight values that deviate
-significantly from the expected range. Potential outliers are defined as
-points whose quantile rank falls outside the specified threshold interval
+ranks to identify potential outliers in a dataset. In this scenario, we train
+a QRF model on a toy dataset and use quantile ranks to highlight values that
+deviate significantly from the expected range. Potential outliers are defined
+as points whose quantile rank falls outside the specified threshold interval
 around the median.
 """
 
@@ -33,37 +33,33 @@ def make_toy_dataset(n_samples, bounds, random_seed=0):
 
 X, y = make_toy_dataset(n_samples, bounds, random_seed=random_seed)
 
-params = {"max_samples_leaf": None, "min_samples_leaf": 50, "random_state": random_seed}
-qrf = RandomForestQuantileRegressor(**params).fit(X, y)
+qrf = RandomForestQuantileRegressor(
+    max_samples_leaf=None,
+    min_samples_leaf=50,
+    random_state=random_seed,
+).fit(X, y)
 
 y_pred = qrf.predict(X, quantiles=0.5)
 
 # Get the quantile rank for all samples.
 y_ranks = qrf.quantile_ranks(X, y)
 
-df = pd.DataFrame(
-    {
-        "x": X.reshape(-1),
-        "y": y,
-        "y_pred": y_pred,
-        "y_rank": y_ranks,
-    }
-)
+df = pd.DataFrame({"x": X.reshape(-1), "y": y, "y_pred": y_pred, "y_rank": y_ranks})
 
 
 def plot_fit_and_ranks(df):
     # Slider for varying the interval that defines the upper and lower quantile rank thresholds.
     slider = alt.binding_range(min=0, max=1, step=0.01, name="Rank Interval Threshold: ")
-    interval_selection = alt.param("interval_selection", bind=slider, value=0.05)
+    interval_val = alt.param(value=0.05, bind=slider, name="interval")
 
     click = alt.selection_point(fields=["outlier"], bind="legend")
 
     base = alt.Chart(df)
 
     points = (
-        base.add_params(interval_selection, click)
+        base.add_params(interval_val, click)
         .transform_calculate(
-            outlier="abs(datum.y_rank - 0.5) > (0.5 - interval_selection / 2) ? 'Yes' : 'No'"
+            outlier="abs(datum.y_rank - 0.5) > (0.5 - interval / 2) ? 'Yes' : 'No'"
         )
         .mark_circle(opacity=0.5, size=25)
         .encode(
@@ -79,8 +75,8 @@ def plot_fit_and_ranks(df):
                 alt.value("lightgray"),
             ),
             tooltip=[
-                alt.Tooltip("x:Q", format=".3f", title="x"),
-                alt.Tooltip("y:Q", format=".3f", title="f(x)"),
+                alt.Tooltip("x:Q", format=",.3f", title="X"),
+                alt.Tooltip("y:Q", format=",.3f", title="Y"),
                 alt.Tooltip("y_rank:Q", format=".3f", title="Quantile Rank"),
                 alt.Tooltip("outlier:N", title="Outlier"),
             ],
@@ -88,8 +84,13 @@ def plot_fit_and_ranks(df):
     )
 
     line_pred = base.mark_line(color="#006aff", size=4).encode(
-        x=alt.X("x:Q", axis=alt.Axis(title="x")),
-        y=alt.Y("y_pred:Q", axis=alt.Axis(title="f(x)")),
+        x=alt.X("x:Q", axis=alt.Axis(title="X")),
+        y=alt.Y("y_pred:Q", axis=alt.Axis(title="Y")),
+        tooltip=[
+            alt.Tooltip("x:Q", format=",.3f", title="X"),
+            alt.Tooltip("y:Q", format=",.3f", title="Y"),
+            alt.Tooltip("y_pred:Q", format=",.3f", title="Predicted Y"),
+        ],
     )
 
     # For desired legend labels.
diff --git a/examples/plot_quantile_vs_standard.py b/examples/plot_quantile_vs_standard.py
index 56aa5ea..ee7ccd8 100755
--- a/examples/plot_quantile_vs_standard.py
+++ b/examples/plot_quantile_vs_standard.py
@@ -3,12 +3,12 @@
 ==============================================
 
 This example compares the predictions generated by a quantile regression
-forest (QRF) and a standard random forest regressor on a synthetic
+forest (QRF) and a standard random forest regressor (RF) on a synthetic
 right-skewed dataset. In a right-skewed distribution, the mean is to the right
-of the median. As illustrated by a greater overlap in the frequencies of the
-actual and predicted values, the median (quantile = 0.5) predicted by a
-quantile regressor can be a more reliable estimator of a skewed distribution
-than the mean.
+of the median. This example demonstrates how the median (quantile = 0.5)
+predicted by a quantile regressor (QRF) can be a more reliable estimator than
+the mean predicted by a standard random forest when dealing with skewed
+distributions.
 """
 
 import altair as alt
@@ -24,18 +24,18 @@
 random_seed = 0
 rng = check_random_state(random_seed)
 
-quantiles = np.linspace(0, 1, num=21, endpoint=True).round(2).tolist()
+quantiles = np.linspace(0, 1, num=101, endpoint=True).round(2).tolist()
 
 # Create right-skewed dataset.
 n_samples = 5000
-a, loc, scale = 5, -1, 1
+a, loc, scale = 7, -1, 1
 skewnorm_rv = sp.stats.skewnorm(a, loc, scale)
 skewnorm_rv.random_state = rng
 y = skewnorm_rv.rvs(n_samples)
 X = rng.randn(n_samples, 2) * y.reshape(-1, 1)
 
-regr_rf = RandomForestRegressor(n_estimators=10, random_state=random_seed)
-regr_qrf = RandomForestQuantileRegressor(n_estimators=10, random_state=random_seed)
+regr_rf = RandomForestRegressor(random_state=random_seed)
+regr_qrf = RandomForestQuantileRegressor(random_state=random_seed)
 
 X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_seed)
 
@@ -53,14 +53,15 @@
 
 
 def format_frac(fraction):
-    formatted = ("%.3g" % fraction).rstrip("0").rstrip(".")
-    return formatted if formatted else "0"
+    return f"{fraction:.3g}".rstrip("0").rstrip(".") or "0"
 
 
-df = pd.DataFrame({"actual": y_test, "rf": y_pred_rf}).join(
-    pd.DataFrame(
-        {f"qrf_{format_frac(q)}": y_pred_qrf[..., idx] for idx, q in enumerate(quantiles)}
-    )
+df = pd.DataFrame(
+    {
+        "actual": y_test,
+        "rf": y_pred_rf,
+        **{f"qrf_{format_frac(q)}": y_pred_qrf[..., idx] for idx, q in enumerate(quantiles)},
+    }
 )
 
 
@@ -72,13 +73,13 @@ def plot_prediction_histograms(df, legend):
         step=0.5 if len(quantiles) == 1 else 1 / (len(quantiles) - 1),
         name="Predicted Quantile: ",
     )
-    quantile_selection = alt.param(value=0.5, bind=slider, name="quantile")
+    quantile_val = alt.param(value=0.5, bind=slider, name="quantile")
 
     click = alt.selection_point(fields=["label"], bind="legend")
 
     chart = (
         alt.Chart(df)
-        .add_params(quantile_selection, click)
+        .add_params(quantile_val, click)
         .transform_calculate(qrf_col="'qrf_' + quantile")
         .transform_calculate(qrf="datum[datum.qrf_col]")
         .transform_calculate(calculate="round(datum.actual * 10) / 10", as_="Actual")
@@ -112,7 +113,7 @@ def plot_prediction_histograms(df, legend):
         .properties(
             height=400,
             width=650,
-            title="Distribution of RF vs. QRF Predictions on Right-Skewed Distribution",
+            title="Distribution of QRF vs. RF Predictions on Right-Skewed Distribution",
         )
     )
     return chart
diff --git a/examples/plot_treeshap_example.py b/examples/plot_treeshap_example.py
index 6bc13b2..6d05c2f 100644
--- a/examples/plot_treeshap_example.py
+++ b/examples/plot_treeshap_example.py
@@ -3,14 +3,15 @@
 ==========================================
 
 This example demonstrates the use of SHAP (SHapley Additive exPlanations) to
-explain the predictions of a quantile regression forest (QRF) model. We
-generate a waterfall plot using the `Tree SHAP
+explain the predictions of a quantile regression forest (QRF) model. SHAP
+values offer insight into the contribution of each feature to the predicted
+value. We generate a waterfall plot using the `Tree SHAP
 <https://shap.readthedocs.io/en/latest/generated/shap.TreeExplainer.html#shap.TreeExplainer>`_
-method to visualize the explanations for a single instance across multiple
+method to visualize the explanations for a single sample across multiple
 quantiles. In a QRF, quantile estimation is applied during inference, meaning
-the selected quantile affects the expected value of the model output but does
-not alter the feature contributions. This plot allows us to observe how the
-SHAP explanations vary with different quantile choices.
+the selected quantile affects the specific value of the model output but does
+not alter the underlying feature contributions. This plot allows us to observe
+how the SHAP explanations vary with different quantile choices.
 """
 
 import altair as alt
@@ -29,6 +30,27 @@
 
 
 def get_shap_values(qrf, X, quantile=0.5, **kwargs):
+    """Get QRF model SHAP values using Tree SHAP.
+
+    Note that, at the time of writing, SHAP does not natively provide support
+    for models in which quantile estimation is applied during inference, such
+    as with QRFs. To address this limitation, this function adjusts the
+    explainer outputs based on the difference between the mean and quantile
+    predictions for each sample, creating new base and expected values.
+
+    Parameters
+    ----------
+    qrf : BaseForestQuantileRegressor
+        Quantile forest model object.
+    X : array-like, of shape (n_samples, n_features)
+        Input dataset for which SHAP values are calculated.
+    quantiles : float, default=0.5
+        Quantile for which SHAP values are calculated.
+
+    Returns
+    -------
+    shap.Explanation: The SHAP values explanation object.
+    """
     # Define a custom tree model.
     model = {
         "objective": qrf.criterion,
@@ -60,6 +82,7 @@ def get_shap_values(qrf, X, quantile=0.5, **kwargs):
 
 
 def get_shap_value_by_index(shap_values, index):
+    """Get QRF model SHAP values for a specified index."""
     shap_values_i = shap_values[index]
     shap_values_i.base_values = shap_values.base_values[index]
     return shap_values_i
@@ -75,6 +98,10 @@ def get_shap_value_by_index(shap_values, index):
 qrf = RandomForestQuantileRegressor(random_state=random_seed)
 qrf.fit(X_train, y_train)
 
+shap_values_list = [
+    get_shap_value_by_index(get_shap_values(qrf, X_test, quantile=q), test_idx) for q in quantiles
+]
+
 df = pd.concat(
     [
         pd.DataFrame(
@@ -89,8 +116,7 @@ def get_shap_value_by_index(shap_values, index):
                 "quantile": q,
             }
         )
-        for q in quantiles
-        for shap_i in [get_shap_value_by_index(get_shap_values(qrf, X_test, quantile=q), test_idx)]
+        for shap_i, q in zip(shap_values_list, quantiles)
     ],
     ignore_index=True,
 )
@@ -106,7 +132,7 @@ def plot_shap_waterfall_with_quantiles(df, height=300):
         step=0.5 if len(quantiles) == 1 else 1 / (len(quantiles) - 1),
         name="Predicted Quantile: ",
     )
-    quantile_selection = alt.param(value=0.5, bind=slider, name="quantile")
+    quantile_val = alt.param(value=0.5, bind=slider, name="quantile")
 
     df_grouped = (
         df.groupby("quantile")[df.columns.tolist()]
@@ -187,7 +213,8 @@ def plot_shap_waterfall_with_quantiles(df, height=300):
             alt.datum["shap_value"] > 0, alt.value("#ff0251"), alt.value("#018bfb")
         ),
         tooltip=[
-            alt.Tooltip("feature_name:N", title="Feature"),
+            alt.Tooltip("feature_name:N", title="Feature Name"),
+            alt.Tooltip("feature_value:Q", title="Feature Value"),
             alt.Tooltip("shap_value:Q", format=",.2f", title="SHAP Value"),
             alt.Tooltip("start:Q", format=",.2f", title="SHAP Start"),
             alt.Tooltip("end:Q", format=",.2f", title="SHAP End"),
@@ -270,7 +297,7 @@ def plot_shap_waterfall_with_quantiles(df, height=300):
 
     chart = (
         (bars + points + text + rules)
-        .add_params(quantile_selection)
+        .add_params(quantile_val)
         .configure_view(strokeOpacity=0)
         .properties(
             width=600, height=height, title="Waterfall Plot of SHAP Values for QRF Predictions"