From b40f858a9134309c3c391e555d380d4e1472e315 Mon Sep 17 00:00:00 2001 From: Reid Johnson Date: Sat, 17 Aug 2024 13:59:33 -0700 Subject: [PATCH] Update example plots --- examples/plot_proximity_counts.py | 14 +++++++++----- examples/plot_quantile_extrapolation.py | 11 +++++------ examples/plot_quantile_multioutput.py | 13 ++++--------- examples/plot_quantile_vs_standard.py | 9 ++------- 4 files changed, 20 insertions(+), 27 deletions(-) diff --git a/examples/plot_proximity_counts.py b/examples/plot_proximity_counts.py index e084b8b..ce48d38 100644 --- a/examples/plot_proximity_counts.py +++ b/examples/plot_proximity_counts.py @@ -28,6 +28,9 @@ n_test_samples = 25 noise_std = 0.1 +pixel_dim = (8, 8) # pixel dimensions (width and height) +pixel_scale = 100 # scale multipler for combining clean and noisy values + # Load the Digits dataset. X, y = datasets.load_digits(return_X_y=True, as_frame=True) @@ -85,7 +88,7 @@ def extract_floats(combined_df, scale=100): ) df = ( - combine_floats(X_test, X_test_noisy) # combine to reduce transmitted data + combine_floats(X_test, X_test_noisy, scale=pixel_scale) # combine to reduce transmitted data .join(y_test) .reset_index() .join(df_prox) @@ -103,7 +106,7 @@ def extract_floats(combined_df, scale=100): # Create a data frame for looking up training proximities. df_lookup = ( - combine_floats(X_train, X_train_noisy) # combine to reduce transmitted data + combine_floats(X_train, X_train_noisy, scale=pixel_scale) # combine to reduce transmitted data .assign(**{"index": np.arange(len(X_train))}) .join(y_train) ) @@ -112,14 +115,15 @@ def extract_floats(combined_df, scale=100): def plot_digits_proximities( df, df_lookup, + pixel_dim=(8, 8), + pixel_scale=100, n_prox=25, n_prox_per_row=5, subplot_spacing=10, height=225, width=225, ): - pixel_scale = 100 - pixel_cols = [f"pixel_{y:01}_{x:01}" for y in range(8) for x in range(8)] + pixel_cols = [f"pixel_{y:01}_{x:01}" for y in range(pixel_dim[1]) for x in range(pixel_dim[0])] pixel_x = "split(datum.pixel, '_')[2]" pixel_y = "split(datum.pixel, '_')[1]" @@ -227,5 +231,5 @@ def plot_digits_proximities( return chart -chart = plot_digits_proximities(df, df_lookup) +chart = plot_digits_proximities(df, df_lookup, pixel_dim=pixel_dim, pixel_scale=pixel_scale) chart diff --git a/examples/plot_quantile_extrapolation.py b/examples/plot_quantile_extrapolation.py index ad3e300..ad1f19a 100755 --- a/examples/plot_quantile_extrapolation.py +++ b/examples/plot_quantile_extrapolation.py @@ -25,11 +25,10 @@ random_state = np.random.RandomState(0) n_samples = 500 -bounds = [0, 15] extrap_frac = 0.25 +bounds = [0, 15] func = lambda x: x * np.sin(x) func_str = "f(x) = x sin(x)" - quantiles = [0.025, 0.975, 0.5] qrf_params = {"max_samples_leaf": None, "min_samples_leaf": 4, "random_state": random_state} @@ -403,8 +402,8 @@ def get_coverage_xtr(bounds_list, train_indices, test_indices, y_train, level, * ) -def plot_qrf_vs_xtrapolation_comparison(df): - def plot_extrapolations(df, title="", legend=False, x_domain=None, y_domain=None): +def plot_qrf_vs_xtrapolation_comparison(df, func_str): + def plot_extrapolations(df, title="", legend=False, func_str="", x_domain=None, y_domain=None): x_scale = None if x_domain is not None: x_scale = alt.Scale(domain=x_domain, nice=False, padding=0) @@ -517,7 +516,7 @@ def plot_extrapolations(df, title="", legend=False, x_domain=None, y_domain=None chart = chart.properties(title=title, height=200, width=300) return chart - kwargs = {"x_domain": [0, 15], "y_domain": [-15, 20]} + kwargs = {"func_str": func_str, "x_domain": [0, 15], "y_domain": [-15, 20]} xtra_mapper = {"bb_mid": "y_pred", "bb_low": "y_pred_low", "bb_upp": "y_pred_upp"} chart1 = alt.layer( @@ -555,5 +554,5 @@ def plot_extrapolations(df, title="", legend=False, x_domain=None, y_domain=None return chart -chart = plot_qrf_vs_xtrapolation_comparison(df) +chart = plot_qrf_vs_xtrapolation_comparison(df, func_str) chart diff --git a/examples/plot_quantile_multioutput.py b/examples/plot_quantile_multioutput.py index 465ba8d..713fda0 100644 --- a/examples/plot_quantile_multioutput.py +++ b/examples/plot_quantile_multioutput.py @@ -27,17 +27,16 @@ { "signal": lambda x: np.log1p(x + 1), "noise": lambda x: np.log1p(x) * random_state.uniform(size=len(x)), + "legend": {"0": "#f2a619"}, # plot legend value and color }, { "signal": lambda x: np.log1p(np.sqrt(x)), "noise": lambda x: np.log1p(x / 2) * random_state.uniform(size=len(x)), + "legend": {"1": "#006aff"}, # plot legend value and color }, ] -legend = { - "0": "#f2a619", - "1": "#006aff", -} +legend = {k: v for f in funcs for k, v in f["legend"].items()} def make_func_Xy(funcs, bounds, n_samples): @@ -48,10 +47,6 @@ def make_func_Xy(funcs, bounds, n_samples): return np.atleast_2d(x).T, y -def format_frac(fraction): - return f"{fraction:.3g}".rstrip("0").rstrip(".") or "0" - - # Create the dataset with multiple target variables. X, y = make_func_Xy(funcs, bounds, n_samples) @@ -70,7 +65,7 @@ def format_frac(fraction): "y_true": np.concatenate([f["signal"](X.squeeze()) for f in funcs]), "y_pred": np.concatenate([y_pred[:, i, len(quantiles) // 2] for i in range(len(funcs))]), "target": np.concatenate([[str(i)] * len(X) for i in range(len(funcs))]), - **{f"q_{format_frac(q_i)}": y_i.ravel() for q_i, y_i in zip(quantiles, y_pred.T)}, + **{f"q_{q_i:.3g}": y_i.ravel() for q_i, y_i in zip(quantiles, y_pred.T)}, } ) diff --git a/examples/plot_quantile_vs_standard.py b/examples/plot_quantile_vs_standard.py index d1648af..298777b 100755 --- a/examples/plot_quantile_vs_standard.py +++ b/examples/plot_quantile_vs_standard.py @@ -21,10 +21,10 @@ from quantile_forest import RandomForestQuantileRegressor random_state = np.random.RandomState(0) +n_samples = 5000 quantiles = np.linspace(0, 1, num=101, endpoint=True).round(2).tolist() # Create right-skewed dataset. -n_samples = 5000 a, loc, scale = 7, -1, 1 skewnorm_rv = sp.stats.skewnorm(a, loc, scale) skewnorm_rv.random_state = random_state @@ -48,16 +48,11 @@ "QRF (Median)": "#006aff", } - -def format_frac(fraction): - return f"{fraction:.3g}".rstrip("0").rstrip(".") or "0" - - df = pd.DataFrame( { "actual": y_test, "rf": y_pred_rf, - **{f"qrf_{format_frac(q_i)}": y_i.ravel() for q_i, y_i in zip(quantiles, y_pred_qrf.T)}, + **{f"qrf_{q_i:.3g}": y_i.ravel() for q_i, y_i in zip(quantiles, y_pred_qrf.T)}, } )