diff --git a/examples/plot_huggingface_model.py b/examples/plot_huggingface_model.py index 7d1c373..de01c67 100755 --- a/examples/plot_huggingface_model.py +++ b/examples/plot_huggingface_model.py @@ -2,14 +2,14 @@ Using a Trained QRF Model via Hugging Face Hub ============================================== -An example of downloading a trained quantile regression forest (QRF) model -from Hugging Face Hub and using it to estimate new quantiles. Here, a QRF has -been trained with default parameters on a train-test split of the California -housing dataset and uploaded to Hugging Face Hub. The model is downloaded and -inference is performed over several quantiles for each instance in the dataset. -The estimates are visualized by the latitude and longitude of each instance. -The model used is available on Hugging Face Hub: -https://huggingface.co/quantile-forest/california-housing-example +This example demonstrates how to download a trained quantile regression forest +(QRF) model from Hugging Face Hub and use it to estimate new quantiles. In +this scenario, a QRF has been trained with default parameters on a train-test +split of the California housing dataset and uploaded to Hugging Face Hub. The +model is downloaded, and inference is performed over several quantiles for +each instance in the dataset. The estimates are visualized by the latitude and +longitude of each instance. The model used is available on Hugging Face Hub +`here `_. """ import os @@ -170,7 +170,6 @@ def fit_and_upload_model(token, repo_id, local_dir="./local_repo"): .melt(id_vars=["index"], var_name="quantile", value_name="value") .merge(X[["Latitude", "Longitude", "Population"]].reset_index(), on="index", how="right") ) -print(df) def plot_quantiles_by_latlon(df, quantiles): diff --git a/examples/plot_predict_custom.py b/examples/plot_predict_custom.py index d70f563..03ba49a 100755 --- a/examples/plot_predict_custom.py +++ b/examples/plot_predict_custom.py @@ -2,12 +2,13 @@ Computing User-Specified Functions with QRFs ============================================ -An example that demonstrates a way of extracting the empirical distribution -from a quantile regression forest (QRF) for one or more samples in order to -calculate a user-specified function of interest. While a QRF is designed to -estimate quantiles from the empirical distribution calculated for each sample, -in many cases it may be useful to use the empirical distribution to calculate -other quantities of interest. Here, we calculate the ECDF for a test sample. +This example demonstrates how to extract the empirical distribution from a +quantile regression forest (QRF) for one or more samples to calculate a +user-specified function of interest. While a QRF is designed to estimate +quantiles from the empirical distribution calculated for each sample, it can +also be useful to use this empirical distribution to calculate other +quantities of interest. Here, we calculate the empirical cumulative +distribution function (ECDF) for a test sample. """ from itertools import chain diff --git a/examples/plot_quantile_conformalized.py b/examples/plot_quantile_conformalized.py index 62689b6..34dc98a 100755 --- a/examples/plot_quantile_conformalized.py +++ b/examples/plot_quantile_conformalized.py @@ -2,15 +2,16 @@ QRFs for Conformalized Quantile Regression ========================================== -An example that demonstrates the use of a quantile regression forest (QRF) to +This example demonstrates the use of a quantile regression forest (QRF) to construct reliable prediction intervals using conformalized quantile -regression (CQR). CQR offers prediction intervals that attain valid coverage, -while QRF may require additional calibration for reliable interval estimates. -Notice that in this example, by using CQR we obtain a level of coverage (i.e., -percentage of samples that actaully fall within their prediction interval) -that is generally closer to the target level. Adapted from `"Prediction -intervals: Quantile Regression Forests" by Carl McBride Ellis -`_. +regression (CQR). CQR provides prediction intervals that attain valid +coverage, whereas QRF may require additional calibration for reliable interval +estimates. In this example, by using CQR, we achieve a level of coverage +(i.e., the percentage of samples that actually fall within their prediction +interval) that is generally closer to the target level. This example is +adapted from `"Prediction intervals: Quantile Regression Forests" +`_ +by Carl McBride Ellis. """ import altair as alt diff --git a/examples/plot_quantile_example.py b/examples/plot_quantile_example.py index 13e7e52..6a5a96b 100755 --- a/examples/plot_quantile_example.py +++ b/examples/plot_quantile_example.py @@ -2,9 +2,9 @@ Predicting with Quantile Regression Forests =========================================== -An example that demonstrates the use of a quantile regression forest to +This example demonstrates the use of a quantile regression forest (QRF) to predict a conditional median and prediction intervals. The example compares -the predictions to a ground truth function used to generate noisy samples. +the QRF predictions to a ground truth function used to generate noisy samples. """ import altair as alt diff --git a/examples/plot_quantile_extrapolation.py b/examples/plot_quantile_extrapolation.py index b0a9a5a..6f86e0f 100755 --- a/examples/plot_quantile_extrapolation.py +++ b/examples/plot_quantile_extrapolation.py @@ -2,16 +2,16 @@ Extrapolation with Quantile Regression Forests ============================================== -An example on a toy dataset that illustrates the prediction intervals produced -by a quantile regression forest (QRF) on extrapolated data. QRFs do not -intrinsically extrapolate outside of the bounds of the training data, an -important limitation of the approach; notice that the extrapolated interval -with a standard QRF fails to reliably cover values outside of those observed -in the training set. To overcome this limitation, we can use a procedure known -as Xtrapolation that can estimate the extrapolation bounds for samples that -fall outside the range of the training data. Adapted from "Extrapolation-Aware -Nonparametric Statistical Inference" by Niklas Pfister and Peter Bühlmann: -https://arxiv.org/abs/2402.09758. +This example uses a toy dataset to illustrate the prediction intervals +produced by a quantile regression forest (QRF) on extrapolated data. QRFs do +not intrinsically extrapolate outside the bounds of the training data, which +is an important limitation of the approach. Notice that the extrapolated +interval with a standard QRF fails to reliably cover values outside those +observed in the training set. To overcome this limitation, we can use a +procedure known as Xtrapolation, which can estimate the extrapolation bounds +for samples that fall outside the range of the training data. This example is +adapted from `"Extrapolation-Aware Nonparametric Statistical Inference" +`_ by Niklas Pfister and Peter Bühlmann. """ import math diff --git a/examples/plot_quantile_interpolation.py b/examples/plot_quantile_interpolation.py index 2cabb1f..7292fac 100755 --- a/examples/plot_quantile_interpolation.py +++ b/examples/plot_quantile_interpolation.py @@ -2,13 +2,12 @@ Comparing Quantile Interpolation Methods ======================================== -An example illustration of the interpolation methods that can be applied -during prediction when the desired quantile lies between two data points. In -this toy example, the forest estimator creates a single split that separates -samples 1–3 and samples 4–5, with quantiles calculated separately for these -two groups based on the actual sample values. The interpolation methods are -used when a calculated quantile does not precisely correspond to one of the -actual values. +This example illustrates the interpolation methods that can be applied during +prediction when the desired quantile lies between two data points. In this toy +example, the forest estimator creates a single split that separates samples +1–3 and samples 4–5, with quantiles calculated separately for these two groups +based on the actual sample values. The interpolation methods are used when a +calculated quantile does not precisely correspond to one of the actual values. """ import altair as alt diff --git a/examples/plot_quantile_intervals.py b/examples/plot_quantile_intervals.py index 13a0603..e85ad59 100755 --- a/examples/plot_quantile_intervals.py +++ b/examples/plot_quantile_intervals.py @@ -2,10 +2,10 @@ Quantile Regression Forests Prediction Intervals ================================================ -An example of how to use quantile regression forests to generate prediction -intervals on the California Housing dataset. Inspired by Figure 3 of -"Quantile Regression Forests" by Meinshausen: -https://jmlr.org/papers/v7/meinshausen06a.html. +This example demonstrates how to use quantile regression forests (QRF) to +generate prediction intervals on the California Housing dataset. Inspired by +Figure 3 of `"Quantile Regression Forests" +`_ by Meinshausen. """ import altair as alt diff --git a/examples/plot_quantile_multioutput.py b/examples/plot_quantile_multioutput.py index b922605..81716be 100755 --- a/examples/plot_quantile_multioutput.py +++ b/examples/plot_quantile_multioutput.py @@ -2,11 +2,11 @@ Multiple-Output Quantile Regression with QRFs ============================================= -An example on a toy dataset that demonstrates fitting a single quantile -regressor for multiple target variables. For each target, multiple quantiles -can be estimated simultaneously. In this example, the target variable has -two output values for each sample, with a single regressor used to estimate -three quantiles (the median and interval points) for each target. +This example demonstrates fitting a single quantile regressor for multiple +target variables on a toy dataset. For each target, multiple quantiles can be +estimated simultaneously. In this example, the target variable has two output +values for each sample, with a single regressor used to estimate three +quantiles (the median and interval points) for each target. """ import altair as alt diff --git a/examples/plot_quantile_vs_standard.py b/examples/plot_quantile_vs_standard.py index d8626bd..6aff07c 100755 --- a/examples/plot_quantile_vs_standard.py +++ b/examples/plot_quantile_vs_standard.py @@ -2,8 +2,8 @@ Quantile Regression Forests vs. Random Forests ============================================== -An example comparison between the estimates generated by a quantile regression -forest and a standard random forest regressor on a synthetic, right-skewed +This example compares the estimates generated by a quantile regression forest +(QRF) and a standard random forest regressor on a synthetic, right-skewed dataset. In a right-skewed distribution, the mean is to the right of the median. As illustrated by a greater overlap in the frequencies of the actual and predicted values, the median (quantile = 0.5) estimated by a quantile diff --git a/examples/plot_treeshap_example.py b/examples/plot_treeshap_example.py index d1f6826..44aaeb7 100644 --- a/examples/plot_treeshap_example.py +++ b/examples/plot_treeshap_example.py @@ -2,8 +2,8 @@ Tree SHAP with Quantile Regression Forests ========================================== -An example that demonstrates the use of SHAP (SHapley Additive exPlanations) -to explain the predictions of a quantile regression forest (QRF) model. We +This example demonstrates the use of SHAP (SHapley Additive exPlanations) to +explain the predictions of a quantile regression forest (QRF) model. We generate a waterfall plot using the `Tree SHAP `_ method to visualize the explanations for a single instance across multiple