Skip to content

Commit

Permalink
deploy: b40f858
Browse files Browse the repository at this point in the history
  • Loading branch information
reidjohnson committed Aug 17, 2024
1 parent 30a3d13 commit 184d350
Show file tree
Hide file tree
Showing 10 changed files with 62 additions and 83 deletions.
28 changes: 18 additions & 10 deletions _sources/gallery/plot_proximity_counts.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ conditions.
n_test_samples = 25
noise_std = 0.1

pixel_dim = (8, 8) # pixel dimensions (width and height)
pixel_scale = 100 # scale multipler for combining clean and noisy values

# Load the Digits dataset.
X, y = datasets.load_digits(return_X_y=True, as_frame=True)

Expand Down Expand Up @@ -98,7 +101,7 @@ conditions.
)
df = (
combine_floats(X_test, X_test_noisy) # combine to reduce transmitted data
combine_floats(X_test, X_test_noisy, scale=pixel_scale) # combine to reduce transmitted data
.join(y_test)
.reset_index()
.join(df_prox)
Expand All @@ -116,7 +119,7 @@ conditions.
# Create a data frame for looking up training proximities.
df_lookup = (
combine_floats(X_train, X_train_noisy) # combine to reduce transmitted data
combine_floats(X_train, X_train_noisy, scale=pixel_scale) # combine to reduce transmitted data
.assign(**{"index": np.arange(len(X_train))})
.join(y_train)
)
Expand All @@ -125,14 +128,15 @@ conditions.
def plot_digits_proximities(
df,
df_lookup,
pixel_dim=(8, 8),
pixel_scale=100,
n_prox=25,
n_prox_per_row=5,
subplot_spacing=10,
height=225,
width=225,
):
pixel_scale = 100
pixel_cols = [f"pixel_{y:01}_{x:01}" for y in range(8) for x in range(8)]
pixel_cols = [f"pixel_{y:01}_{x:01}" for y in range(pixel_dim[1]) for x in range(pixel_dim[0])]
pixel_x = "split(datum.pixel, '_')[2]"
pixel_y = "split(datum.pixel, '_')[1]"

Expand Down Expand Up @@ -240,7 +244,7 @@ conditions.
return chart


chart = plot_digits_proximities(df, df_lookup)
chart = plot_digits_proximities(df, df_lookup, pixel_dim=pixel_dim, pixel_scale=pixel_scale)
chart


Expand All @@ -261,6 +265,9 @@ conditions.
n_test_samples = 25
noise_std = 0.1
pixel_dim = (8, 8) # pixel dimensions (width and height)
pixel_scale = 100 # scale multipler for combining clean and noisy values
# Load the Digits dataset.
X, y = datasets.load_digits(return_X_y=True, as_frame=True)
Expand Down Expand Up @@ -318,7 +325,7 @@ conditions.
)
df = (
combine_floats(X_test, X_test_noisy) # combine to reduce transmitted data
combine_floats(X_test, X_test_noisy, scale=pixel_scale) # combine to reduce transmitted data
.join(y_test)
.reset_index()
.join(df_prox)
Expand All @@ -336,7 +343,7 @@ conditions.
# Create a data frame for looking up training proximities.
df_lookup = (
combine_floats(X_train, X_train_noisy) # combine to reduce transmitted data
combine_floats(X_train, X_train_noisy, scale=pixel_scale) # combine to reduce transmitted data
.assign(**{"index": np.arange(len(X_train))})
.join(y_train)
)
Expand All @@ -345,14 +352,15 @@ conditions.
def plot_digits_proximities(
df,
df_lookup,
pixel_dim=(8, 8),
pixel_scale=100,
n_prox=25,
n_prox_per_row=5,
subplot_spacing=10,
height=225,
width=225,
):
pixel_scale = 100
pixel_cols = [f"pixel_{y:01}_{x:01}" for y in range(8) for x in range(8)]
pixel_cols = [f"pixel_{y:01}_{x:01}" for y in range(pixel_dim[1]) for x in range(pixel_dim[0])]
pixel_x = "split(datum.pixel, '_')[2]"
pixel_y = "split(datum.pixel, '_')[1]"
Expand Down Expand Up @@ -460,5 +468,5 @@ conditions.
return chart
chart = plot_digits_proximities(df, df_lookup)
chart = plot_digits_proximities(df, df_lookup, pixel_dim=pixel_dim, pixel_scale=pixel_scale)
chart
22 changes: 10 additions & 12 deletions _sources/gallery/plot_quantile_extrapolation.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,10 @@ adapted from `"Extrapolation-Aware Nonparametric Statistical Inference"

random_state = np.random.RandomState(0)
n_samples = 500
bounds = [0, 15]
extrap_frac = 0.25
bounds = [0, 15]
func = lambda x: x * np.sin(x)
func_str = "f(x) = x sin(x)"

quantiles = [0.025, 0.975, 0.5]
qrf_params = {"max_samples_leaf": None, "min_samples_leaf": 4, "random_state": random_state}

Expand Down Expand Up @@ -416,8 +415,8 @@ adapted from `"Extrapolation-Aware Nonparametric Statistical Inference"
)


def plot_qrf_vs_xtrapolation_comparison(df):
def plot_extrapolations(df, title="", legend=False, x_domain=None, y_domain=None):
def plot_qrf_vs_xtrapolation_comparison(df, func_str):
def plot_extrapolations(df, title="", legend=False, func_str="", x_domain=None, y_domain=None):
x_scale = None
if x_domain is not None:
x_scale = alt.Scale(domain=x_domain, nice=False, padding=0)
Expand Down Expand Up @@ -530,7 +529,7 @@ adapted from `"Extrapolation-Aware Nonparametric Statistical Inference"
chart = chart.properties(title=title, height=200, width=300)
return chart

kwargs = {"x_domain": [0, 15], "y_domain": [-15, 20]}
kwargs = {"func_str": func_str, "x_domain": [0, 15], "y_domain": [-15, 20]}
xtra_mapper = {"bb_mid": "y_pred", "bb_low": "y_pred_low", "bb_upp": "y_pred_upp"}

chart1 = alt.layer(
Expand Down Expand Up @@ -568,7 +567,7 @@ adapted from `"Extrapolation-Aware Nonparametric Statistical Inference"
return chart

chart = plot_qrf_vs_xtrapolation_comparison(df)
chart = plot_qrf_vs_xtrapolation_comparison(df, func_str)
chart


Expand All @@ -586,11 +585,10 @@ adapted from `"Extrapolation-Aware Nonparametric Statistical Inference"
random_state = np.random.RandomState(0)
n_samples = 500
bounds = [0, 15]
extrap_frac = 0.25
bounds = [0, 15]
func = lambda x: x * np.sin(x)
func_str = "f(x) = x sin(x)"
quantiles = [0.025, 0.975, 0.5]
qrf_params = {"max_samples_leaf": None, "min_samples_leaf": 4, "random_state": random_state}
Expand Down Expand Up @@ -964,8 +962,8 @@ adapted from `"Extrapolation-Aware Nonparametric Statistical Inference"
)
def plot_qrf_vs_xtrapolation_comparison(df):
def plot_extrapolations(df, title="", legend=False, x_domain=None, y_domain=None):
def plot_qrf_vs_xtrapolation_comparison(df, func_str):
def plot_extrapolations(df, title="", legend=False, func_str="", x_domain=None, y_domain=None):
x_scale = None
if x_domain is not None:
x_scale = alt.Scale(domain=x_domain, nice=False, padding=0)
Expand Down Expand Up @@ -1078,7 +1076,7 @@ adapted from `"Extrapolation-Aware Nonparametric Statistical Inference"
chart = chart.properties(title=title, height=200, width=300)
return chart
kwargs = {"x_domain": [0, 15], "y_domain": [-15, 20]}
kwargs = {"func_str": func_str, "x_domain": [0, 15], "y_domain": [-15, 20]}
xtra_mapper = {"bb_mid": "y_pred", "bb_low": "y_pred_low", "bb_upp": "y_pred_upp"}
chart1 = alt.layer(
Expand Down Expand Up @@ -1116,5 +1114,5 @@ adapted from `"Extrapolation-Aware Nonparametric Statistical Inference"
return chart
chart = plot_qrf_vs_xtrapolation_comparison(df)
chart = plot_qrf_vs_xtrapolation_comparison(df, func_str)
chart
26 changes: 8 additions & 18 deletions _sources/gallery/plot_quantile_multioutput.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,16 @@ for each target: the median line and the area defined by the interval points.
{
"signal": lambda x: np.log1p(x + 1),
"noise": lambda x: np.log1p(x) * random_state.uniform(size=len(x)),
"legend": {"0": "#f2a619"}, # plot legend value and color
},
{
"signal": lambda x: np.log1p(np.sqrt(x)),
"noise": lambda x: np.log1p(x / 2) * random_state.uniform(size=len(x)),
"legend": {"1": "#006aff"}, # plot legend value and color
},
]

legend = {
"0": "#f2a619",
"1": "#006aff",
}
legend = {k: v for f in funcs for k, v in f["legend"].items()}


def make_func_Xy(funcs, bounds, n_samples):
Expand All @@ -61,10 +60,6 @@ for each target: the median line and the area defined by the interval points.
return np.atleast_2d(x).T, y

def format_frac(fraction):
return f"{fraction:.3g}".rstrip("0").rstrip(".") or "0"


# Create the dataset with multiple target variables.
X, y = make_func_Xy(funcs, bounds, n_samples)

Expand All @@ -83,7 +78,7 @@ for each target: the median line and the area defined by the interval points.
"y_true": np.concatenate([f["signal"](X.squeeze()) for f in funcs]),
"y_pred": np.concatenate([y_pred[:, i, len(quantiles) // 2] for i in range(len(funcs))]),
"target": np.concatenate([[str(i)] * len(X) for i in range(len(funcs))]),
**{f"q_{format_frac(q_i)}": y_i.ravel() for q_i, y_i in zip(quantiles, y_pred.T)},
**{f"q_{q_i:.3g}": y_i.ravel() for q_i, y_i in zip(quantiles, y_pred.T)},
}
)
Expand Down Expand Up @@ -193,17 +188,16 @@ for each target: the median line and the area defined by the interval points.
{
"signal": lambda x: np.log1p(x + 1),
"noise": lambda x: np.log1p(x) * random_state.uniform(size=len(x)),
"legend": {"0": "#f2a619"}, # plot legend value and color
},
{
"signal": lambda x: np.log1p(np.sqrt(x)),
"noise": lambda x: np.log1p(x / 2) * random_state.uniform(size=len(x)),
"legend": {"1": "#006aff"}, # plot legend value and color
},
]
legend = {
"0": "#f2a619",
"1": "#006aff",
}
legend = {k: v for f in funcs for k, v in f["legend"].items()}
def make_func_Xy(funcs, bounds, n_samples):
Expand All @@ -214,10 +208,6 @@ for each target: the median line and the area defined by the interval points.
return np.atleast_2d(x).T, y
def format_frac(fraction):
return f"{fraction:.3g}".rstrip("0").rstrip(".") or "0"
# Create the dataset with multiple target variables.
X, y = make_func_Xy(funcs, bounds, n_samples)
Expand All @@ -236,7 +226,7 @@ for each target: the median line and the area defined by the interval points.
"y_true": np.concatenate([f["signal"](X.squeeze()) for f in funcs]),
"y_pred": np.concatenate([y_pred[:, i, len(quantiles) // 2] for i in range(len(funcs))]),
"target": np.concatenate([[str(i)] * len(X) for i in range(len(funcs))]),
**{f"q_{format_frac(q_i)}": y_i.ravel() for q_i, y_i in zip(quantiles, y_pred.T)},
**{f"q_{q_i:.3g}": y_i.ravel() for q_i, y_i in zip(quantiles, y_pred.T)},
}
)
Expand Down
18 changes: 4 additions & 14 deletions _sources/gallery/plot_quantile_vs_standard.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ distributions.
from quantile_forest import RandomForestQuantileRegressor

random_state = np.random.RandomState(0)
n_samples = 5000
quantiles = np.linspace(0, 1, num=101, endpoint=True).round(2).tolist()

# Create right-skewed dataset.
n_samples = 5000
a, loc, scale = 7, -1, 1
skewnorm_rv = sp.stats.skewnorm(a, loc, scale)
skewnorm_rv.random_state = random_state
Expand All @@ -61,16 +61,11 @@ distributions.
"QRF (Median)": "#006aff",
}


def format_frac(fraction):
return f"{fraction:.3g}".rstrip("0").rstrip(".") or "0"


df = pd.DataFrame(
{
"actual": y_test,
"rf": y_pred_rf,
**{f"qrf_{format_frac(q_i)}": y_i.ravel() for q_i, y_i in zip(quantiles, y_pred_qrf.T)},
**{f"qrf_{q_i:.3g}": y_i.ravel() for q_i, y_i in zip(quantiles, y_pred_qrf.T)},
}
)
Expand Down Expand Up @@ -147,10 +142,10 @@ distributions.
from quantile_forest import RandomForestQuantileRegressor
random_state = np.random.RandomState(0)
n_samples = 5000
quantiles = np.linspace(0, 1, num=101, endpoint=True).round(2).tolist()
# Create right-skewed dataset.
n_samples = 5000
a, loc, scale = 7, -1, 1
skewnorm_rv = sp.stats.skewnorm(a, loc, scale)
skewnorm_rv.random_state = random_state
Expand All @@ -174,16 +169,11 @@ distributions.
"QRF (Median)": "#006aff",
}
def format_frac(fraction):
return f"{fraction:.3g}".rstrip("0").rstrip(".") or "0"
df = pd.DataFrame(
{
"actual": y_test,
"rf": y_pred_rf,
**{f"qrf_{format_frac(q_i)}": y_i.ravel() for q_i, y_i in zip(quantiles, y_pred_qrf.T)},
**{f"qrf_{q_i:.3g}": y_i.ravel() for q_i, y_i in zip(quantiles, y_pred_qrf.T)},
}
)
Expand Down
2 changes: 1 addition & 1 deletion _static/_image_hashes.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"plot_quantile_interpolation.png": "a33104b33d451e87a61b63c0441186fc", "plot_predict_custom.png": "c165b6e2ec6d97e99ee0dcb308a33165", "plot_quantile_extrapolation.png": "254b782f7f280133734238b20b05b8e3", "plot_quantile_multioutput.png": "92bc73718cd417baa7c4c2bb3a738039", "plot_quantile_example.png": "5224ce52d0004de1b163f849090d146d", "plot_quantile_conformalized.png": "aa558540d703f9df5a841644c43a9a11", "plot_quantile_intervals.png": "55469191cce728f146eb6dc148a50a62", "plot_quantile_vs_standard.png": "951d2aa38319a848a163dae38a72a9db", "plot_treeshap_example.png": "f3315a150f9bb5f55c9c0cfaefcedf4a", "plot_proximity_counts.png": "dbb1c0dc04c4809529f6cd6cfc3c0649", "plot_quantile_ranks.png": "765687aea83e00d18de5404bf6210c9f", "plot_huggingface_model.png": "54f6ba0d8875e499e02114fd00fb3197"}
{"plot_quantile_interpolation.png": "a33104b33d451e87a61b63c0441186fc", "plot_predict_custom.png": "c165b6e2ec6d97e99ee0dcb308a33165", "plot_quantile_extrapolation.png": "24853dd6e2acbf54d608fa7ee6953cf7", "plot_quantile_multioutput.png": "cb1bed13ecf47420f01a2a5d181aeb4c", "plot_quantile_example.png": "5224ce52d0004de1b163f849090d146d", "plot_quantile_conformalized.png": "aa558540d703f9df5a841644c43a9a11", "plot_quantile_intervals.png": "55469191cce728f146eb6dc148a50a62", "plot_quantile_vs_standard.png": "72d33d65c1a254072fcce892e2236d1d", "plot_treeshap_example.png": "f3315a150f9bb5f55c9c0cfaefcedf4a", "plot_proximity_counts.png": "3ef1800f1d1d128c966f3ca868b76897", "plot_quantile_ranks.png": "765687aea83e00d18de5404bf6210c9f", "plot_huggingface_model.png": "54f6ba0d8875e499e02114fd00fb3197"}
Loading

0 comments on commit 184d350

Please sign in to comment.