Skip to content

Commit

Permalink
Update examples
Browse files Browse the repository at this point in the history
  • Loading branch information
reidjohnson committed Feb 20, 2024
1 parent 722f8ee commit 6a1c9d0
Show file tree
Hide file tree
Showing 8 changed files with 205 additions and 237 deletions.
6 changes: 3 additions & 3 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,21 @@ quantile-forest
:link-type: ref
:link-alt: Getting started

A guide that provides installation instructions and information on testing the package.
A guide that provides installation requirements and instructions, as well as procedures for developers.

.. grid-item-card:: User Guide
:link: user_guide
:link-type: ref
:link-alt: User guide

Check out the User Guide for information on the key concepts behind quantile forests.
Information on the key concepts behind quantile forests and how they apply to this package.

.. grid-item-card:: Examples
:link: example-gallery
:link-type: ref
:link-alt: Examples

General-purpose, introductory and illustrative examples of using quantile forests.
Examples that demonstrate the broad applications and introductory concepts of quantile forests.

.. grid-item-card:: API
:link: api
Expand Down
36 changes: 17 additions & 19 deletions quantile_forest/tests/examples/plot_quantile_conformalized.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,9 @@ def qrf_strategy(alpha, X_train, X_test, y_train, y_test):
# Calculate the point predictions on the test data.
y_pred = qrf.predict(X_test, quantiles="mean", aggregate_leaves_first=False)

return sort_y_values(y_test, y_pred, y_pis)
y_values = sort_y_values(y_test, y_pred, y_pis)

return pd.DataFrame(y_values).pipe(lambda x: x * 100_000).assign(strategy="qrf")


def cqr_strategy(alpha, X_train, X_test, y_train, y_test):
Expand Down Expand Up @@ -121,32 +123,32 @@ def cqr_strategy(alpha, X_train, X_test, y_train, y_test):
# Calculate the point predictions on the test data.
y_pred = qrf.predict(X_test, quantiles="mean", aggregate_leaves_first=False)

return sort_y_values(y_test, y_pred, y_pis)
y_values = sort_y_values(y_test, y_pred, y_pis)

return pd.DataFrame(y_values).pipe(lambda x: x * 100_000).assign(strategy="cqr")


# Get strategy outputs as a data frame.
args = (alpha, X_train, X_test, y_train, y_test)
df = pd.concat(
[
pd.DataFrame(qrf_strategy(*args)).pipe(lambda x: x * 100_000).assign(strategy="qrf"),
pd.DataFrame(cqr_strategy(*args)).pipe(lambda x: x * 100_000).assign(strategy="cqr"),
]
)
df = pd.concat([qrf_strategy(*args), cqr_strategy(*args)])

# Add coverage and mean width metrics to the data frame.
df = df.merge(
# Calculate coverage and width metrics.
metrics = (
df.groupby("strategy")
.apply(
lambda x: pd.Series(
lambda grp: pd.Series(
{
"coverage": coverage_score(x["y_test"], x["y_pred_low"], x["y_pred_upp"]),
"width": mean_width_score(x["y_pred_low"], x["y_pred_upp"]),
"coverage": coverage_score(grp["y_test"], grp["y_pred_low"], grp["y_pred_upp"]),
"width": mean_width_score(grp["y_pred_low"], grp["y_pred_upp"]),
}
)
)
.reset_index()
)

# Merge the metrics into the data frame.
df = df.merge(metrics, on="strategy", how="left")


def plot_prediction_intervals(df, domain):
click = alt.selection_point(fields=["y_label"], bind="legend")
Expand Down Expand Up @@ -222,9 +224,7 @@ def plot_prediction_intervals(df, domain):
)

text_coverage = (
base.transform_aggregate(
coverage="mean(coverage)", width="mean(width)", groupby=["strategy"]
)
base.transform_aggregate(coverage="mean(coverage)", groupby=["strategy"])
.transform_calculate(
coverage_text=(
f"'Coverage: ' + format({alt.datum['coverage'] * 100}, '.1f') + '%'"
Expand All @@ -239,9 +239,7 @@ def plot_prediction_intervals(df, domain):
)
)
text_with = (
base.transform_aggregate(
coverage="mean(coverage)", width="mean(width)", groupby=["strategy"]
)
base.transform_aggregate(width="mean(width)", groupby=["strategy"])
.transform_calculate(
width_text=f"'Interval Width: ' + format({alt.datum['width']}, '$,d')"
)
Expand Down
100 changes: 43 additions & 57 deletions quantile_forest/tests/examples/plot_quantile_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,127 +8,113 @@
"""

import altair as alt
import pandas as pd
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from quantile_forest import RandomForestQuantileRegressor

n_samples = 1000
bounds = [0, 10]
quantiles = [0.025, 0.5, 0.975]


def make_toy_dataset(n_samples, bounds, random_seed=0):
def make_toy_dataset(n_samples, bounds, add_noise=True, random_seed=0):
rng = np.random.RandomState(random_seed)

x = rng.uniform(*bounds, size=n_samples)
f = x * np.sin(x)

sigma = 0.25 + x / 10
noise = rng.lognormal(sigma=sigma) - np.exp(sigma**2 / 2)
y = f + noise
y = f + (noise if add_noise else 0)

return np.atleast_2d(x).T, y


X, y = make_toy_dataset(n_samples, bounds)
# Create noisy data for modeling and non-noisy function data for illustration.
X, y = make_toy_dataset(n_samples, bounds, add_noise=True, random_seed=0)
X_func, y_func = make_toy_dataset(n_samples, bounds, add_noise=False, random_seed=0)

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

X_sampled = np.atleast_2d(np.linspace(*bounds, n_samples)).T
y_sampled = (X_sampled * np.sin(X_sampled)).reshape(-1)

qrf = RandomForestQuantileRegressor(max_depth=3, min_samples_leaf=5, random_state=0)
qrf.fit(X_train, y_train)

y_pred = qrf.predict(X_sampled, quantiles=[0.025, 0.5, 0.975])
y_pred_func = qrf.predict(X_func, quantiles=quantiles)
y_pred_test = qrf.predict(X_test, quantiles=quantiles)

df_train = pd.DataFrame(
df = pd.DataFrame(
{
"X_sampled": X_sampled.reshape(-1),
"y_sampled": y_sampled,
"y_pred_low": y_pred[:, 0],
"y_pred_med": y_pred[:, 1],
"y_pred_upp": y_pred[:, 2],
"X": np.concatenate([X_func.reshape(-1), X_test.reshape(-1)]),
"y": np.concatenate([y_func, y_test]),
"y_pred": np.concatenate([y_pred_func[:, 1], y_pred_test[:, 1]]),
"y_pred_low": np.concatenate([y_pred_func[:, 0], y_pred_test[:, 0]]),
"y_pred_upp": np.concatenate([y_pred_func[:, 2], y_pred_test[:, 2]]),
"test": [False] * len(y_func) + [True] * len(y_test),
}
)

df_test = pd.DataFrame(
{
"X_test": X_test.reshape(-1),
"y_test": y_test,
}
)


def plot_fit_and_intervals(df_train, df_test):
df_train = df_train.copy()
df_test = df_test.copy()

df_train = df_train.assign(
**{
"y_true_label": "f(x) = x sin(x)",
"y_pred_label": "Predicted Median",
"y_area_label": "Predicted 95% Interval",
}
)

df_test["point_label"] = "Test Observations"

def plot_fit_and_intervals(df):
points = (
alt.Chart(df_test)
alt.Chart(df.assign(**{"point_label": "Test Observations"}))
.transform_filter(alt.datum["test"]) # filter to test data
.mark_circle(color="#f2a619")
.encode(
x=alt.X("X_test:Q", scale=alt.Scale(nice=False)),
y=alt.Y("y_test:Q", title=""),
x=alt.X("X:Q", scale=alt.Scale(nice=False)),
y=alt.Y("y:Q", title=""),
color=alt.Color("point_label:N", scale=alt.Scale(range=["#f2a619"]), title=None),
tooltip=[
alt.Tooltip("X_test:Q", format=",.3f", title="X"),
alt.Tooltip("y_test:Q", format=",.3f", title="Y"),
alt.Tooltip("X:Q", format=",.3f", title="X"),
alt.Tooltip("y:Q", format=",.3f", title="Y"),
],
)
)

line_true = (
alt.Chart(df_train)
alt.Chart(df.assign(**{"y_true_label": "f(x) = x sin(x)"}))
.transform_filter(~alt.datum["test"]) # filter to training data
.mark_line(color="black", size=3)
.encode(
x=alt.X("X_sampled:Q", scale=alt.Scale(nice=False)),
y=alt.Y("y_sampled:Q", title="f(x)"),
x=alt.X("X:Q", scale=alt.Scale(nice=False)),
y=alt.Y("y:Q", title="f(x)"),
color=alt.Color("y_true_label:N", scale=alt.Scale(range=["black"]), title=None),
tooltip=[
alt.Tooltip("X_sampled:Q", format=",.3f", title="X"),
alt.Tooltip("y_sampled:Q", format=",.3f", title="Y"),
alt.Tooltip("X:Q", format=",.3f", title="X"),
alt.Tooltip("y:Q", format=",.3f", title="Y"),
],
)
)

line_pred = (
alt.Chart(df_train)
alt.Chart(df.assign(**{"y_pred_label": "Predicted Median"}))
.transform_filter(~alt.datum["test"]) # filter to training data
.mark_line(color="#006aff", size=5)
.encode(
x=alt.X("X_sampled:Q", scale=alt.Scale(nice=False)),
y=alt.Y("y_pred_med:Q", title=""),
x=alt.X("X:Q", scale=alt.Scale(nice=False)),
y=alt.Y("y_pred:Q", title=""),
color=alt.Color("y_pred_label:N", scale=alt.Scale(range=["#006aff"]), title=None),
tooltip=[
alt.Tooltip("X_sampled:Q", format=",.3f", title="X"),
alt.Tooltip("y_sampled:Q", format=",.3f", title="Y"),
alt.Tooltip("y_pred_med:Q", format=",.3f", title="Predicted Y"),
alt.Tooltip("X:Q", format=",.3f", title="X"),
alt.Tooltip("y:Q", format=",.3f", title="Y"),
alt.Tooltip("y_pred:Q", format=",.3f", title="Predicted Y"),
],
)
)

area_pred = (
alt.Chart(df_train)
alt.Chart(df)
.transform_filter(~alt.datum["test"]) # filter to training data
.mark_area(color="#e0f2ff", opacity=0.8)
.encode(
x=alt.X("X_sampled:Q", scale=alt.Scale(nice=False), title="x"),
x=alt.X("X:Q", scale=alt.Scale(nice=False), title="x"),
y=alt.Y("y_pred_low:Q", title=""),
y2=alt.Y2("y_pred_upp:Q", title=None),
tooltip=[
alt.Tooltip("X_sampled:Q", format=",.3f", title="X"),
alt.Tooltip("y_sampled:Q", format=",.3f", title="Y"),
alt.Tooltip("y_pred_med:Q", format=",.3f", title="Predicted Y"),
alt.Tooltip("X:Q", format=",.3f", title="X"),
alt.Tooltip("y:Q", format=",.3f", title="Y"),
alt.Tooltip("y_pred:Q", format=",.3f", title="Predicted Y"),
alt.Tooltip("y_pred_low:Q", format=",.3f", title="Predicted Lower Y"),
alt.Tooltip("y_pred_upp:Q", format=",.3f", title="Predicted Upper Y"),
],
Expand All @@ -153,5 +139,5 @@ def plot_fit_and_intervals(df_train, df_test):
return chart


chart = plot_fit_and_intervals(df_train, df_test)
chart = plot_fit_and_intervals(df)
chart
43 changes: 8 additions & 35 deletions quantile_forest/tests/examples/plot_quantile_extrapolation.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,17 +58,12 @@ def get_test_X(X):
X_train, y_train = get_train_Xy(X, y, extrap_min_idx, extrap_max_idx)
X_test = get_test_X(X)

qrf = RandomForestQuantileRegressor(
max_samples_leaf=None,
min_samples_leaf=10,
random_state=0,
)
qrf = RandomForestQuantileRegressor(max_samples_leaf=None, min_samples_leaf=10, random_state=0)
qrf.fit(np.expand_dims(X_train, axis=-1), y_train)

# Get predictions at 95% prediction intervals and median.
y_pred = qrf.predict(X_test, quantiles=[0.025, 0.5, 0.975])


df = pd.DataFrame(
{
"X_true": X,
Expand All @@ -77,35 +72,13 @@ def get_test_X(X):
"y_pred": y_pred[:, 1],
"y_pred_low": y_pred[:, 0],
"y_pred_upp": y_pred[:, 2],
"train": np.concatenate(
[
np.zeros(extrap_min_idx),
np.ones(extrap_max_idx - extrap_min_idx),
np.zeros(len(y) - extrap_max_idx),
]
),
"test_left": np.concatenate(
[
np.ones(extrap_min_idx),
np.zeros(len(y) - extrap_min_idx),
]
),
"test_right": np.concatenate(
[
np.zeros(extrap_max_idx),
np.ones(len(y) - extrap_max_idx),
]
),
"test_left": [True] * extrap_min_idx + [False] * (len(y) - extrap_min_idx),
"test_right": [False] * extrap_max_idx + [True] * (len(y) - extrap_max_idx),
}
)


def plot_extrapolations(df, title="", legend=False, x_domain=None, y_domain=None):
df = df.copy()

df["point_label"] = "Observations"
df["line_label"] = func_str

x_scale = None
if x_domain is not None:
x_scale = alt.Scale(domain=x_domain, nice=False, padding=0)
Expand All @@ -130,7 +103,7 @@ def plot_extrapolations(df, title="", legend=False, x_domain=None, y_domain=None
alt.Tooltip("y_pred_upp:Q", format=",.3f", title="Predicted Upper Y"),
]

base = alt.Chart(df)
base = alt.Chart(df.assign(**{"point_label": "Observations", "line_label": func_str}))

points_true = base.mark_circle(size=20).encode(
x=alt.X("X_true:Q", scale=x_scale, title="x"),
Expand Down Expand Up @@ -195,17 +168,17 @@ def plot_extrapolations(df, title="", legend=False, x_domain=None, y_domain=None
}

chart1 = plot_extrapolations(
df.query(f"train == 1"), title="Prediction Intervals on Training Data", **kwargs
df.query("~(test_left | test_right)"), title="Prediction Intervals on Training Data", **kwargs
)
chart2 = alt.layer(
plot_extrapolations(
df.query(f"(train == 1)"),
df.query("~(test_left | test_right)"),
title="Prediction Intervals with Extrapolated Values",
legend=True,
**kwargs,
).resolve_scale(color="independent"),
plot_extrapolations(df.query(f"(test_left == 1)").assign(extrapolate=True), **kwargs),
plot_extrapolations(df.query(f"(test_right == 1)").assign(extrapolate=True), **kwargs),
plot_extrapolations(df.query("test_left").assign(extrapolate=True), **kwargs),
plot_extrapolations(df.query("test_right").assign(extrapolate=True), **kwargs),
)
chart = chart1 | chart2
chart
Loading

0 comments on commit 6a1c9d0

Please sign in to comment.