From c7d61481f96bee492d4d65c7539fb4a5c6fe6fad Mon Sep 17 00:00:00 2001 From: Reid Johnson Date: Sat, 17 Aug 2024 03:37:53 -0700 Subject: [PATCH] Update example plots --- examples/plot_huggingface_model.py | 7 +++---- examples/plot_predict_custom.py | 5 ++--- examples/plot_proximity_counts.py | 4 ++-- examples/plot_quantile_conformalized.py | 8 ++++---- examples/plot_quantile_example.py | 2 +- examples/plot_quantile_extrapolation.py | 2 +- examples/plot_quantile_interpolation.py | 11 +++++------ examples/plot_quantile_intervals.py | 2 +- examples/plot_quantile_multioutput.py | 9 ++++----- examples/plot_quantile_ranks.py | 8 ++++---- examples/plot_quantile_vs_standard.py | 8 ++++---- examples/plot_treeshap_example.py | 7 +++---- 12 files changed, 34 insertions(+), 39 deletions(-) diff --git a/examples/plot_huggingface_model.py b/examples/plot_huggingface_model.py index 2b43f61..560fd62 100755 --- a/examples/plot_huggingface_model.py +++ b/examples/plot_huggingface_model.py @@ -21,7 +21,6 @@ import numpy as np import pandas as pd from sklearn import datasets -from sklearn.utils.validation import check_random_state from skops import hub_utils import quantile_forest @@ -33,7 +32,7 @@ repo_id = "quantile-forest/california-housing-example" load_existing = True -random_state = check_random_state(0) +random_state = np.random.RandomState(0) quantiles = np.linspace(0, 1, num=5, endpoint=True).round(2).tolist() sample_frac = 1 @@ -177,13 +176,13 @@ def fit_and_upload_model(token, repo_id, local_dir="./local_repo", random_state= def plot_quantiles_by_latlon(df, quantiles, color_scheme="cividis"): # Slider for varying the displayed quantile estimates. slider = alt.binding_range( + name="Predicted Quantile: ", min=0, max=1, step=0.5 if len(quantiles) == 1 else 1 / (len(quantiles) - 1), - name="Predicted Quantile: ", ) - quantile_val = alt.param(value=0.5, bind=slider, name="quantile") + quantile_val = alt.param(name="quantile", value=0.5, bind=slider) chart = ( alt.Chart(df) diff --git a/examples/plot_predict_custom.py b/examples/plot_predict_custom.py index 3b229cc..60edb6e 100755 --- a/examples/plot_predict_custom.py +++ b/examples/plot_predict_custom.py @@ -18,11 +18,10 @@ import scipy as sp from sklearn import datasets from sklearn.model_selection import train_test_split -from sklearn.utils.validation import check_random_state from quantile_forest import RandomForestQuantileRegressor -random_state = check_random_state(0) +random_state = np.random.RandomState(0) n_test_samples = 100 @@ -105,7 +104,7 @@ def plot_ecdf(df): max_idx = df["index"].max() # Slider for determining the sample index for which the custom function is being visualized. - slider = alt.binding_range(min=min_idx, max=max_idx, step=1, name="Test Sample Index: ") + slider = alt.binding_range(name="Test Sample Index: ", min=min_idx, max=max_idx, step=1) index_selection = alt.selection_point( value=0, bind=slider, diff --git a/examples/plot_proximity_counts.py b/examples/plot_proximity_counts.py index 47158ac..e084b8b 100644 --- a/examples/plot_proximity_counts.py +++ b/examples/plot_proximity_counts.py @@ -24,7 +24,7 @@ from quantile_forest import RandomForestQuantileRegressor -random_state = check_random_state(0) +random_state = np.random.RandomState(0) n_test_samples = 25 noise_std = 0.1 @@ -131,7 +131,7 @@ def plot_digits_proximities( subplot_dim = (width - subplot_spacing * (n_subplot_rows - 1)) / n_subplot_rows # Slider for determining the test index for which the data is being visualized. - slider = alt.binding_range(min=0, max=n_samples - 1, step=1, name="Test Sample Index: ") + slider = alt.binding_range(name="Test Sample Index: ", min=0, max=n_samples - 1, step=1) index_selection = alt.selection_point(value=0, bind=slider, fields=["index"]) scale = alt.Scale(domain=[x_min, x_max], scheme="greys") diff --git a/examples/plot_quantile_conformalized.py b/examples/plot_quantile_conformalized.py index 9a645fc..5899f28 100755 --- a/examples/plot_quantile_conformalized.py +++ b/examples/plot_quantile_conformalized.py @@ -25,7 +25,7 @@ from quantile_forest import RandomForestQuantileRegressor random_seed = 0 -random_state = check_random_state(random_seed) +random_state = np.random.RandomState(random_seed) n_samples = 900 coverages = np.linspace(0, 1, num=11, endpoint=True).round(1).tolist() # the "coverage level" @@ -160,10 +160,10 @@ def cqr_strategy(alpha, X_train, X_test, y_train, y_test, random_state=None): def plot_prediction_intervals_by_strategy(df): def plot_prediction_intervals(df, domain): # Slider for varying the target coverage level. - slider = alt.binding_range(min=0, max=1, step=0.1, name="Coverage Target: ") - coverage_val = alt.param(value=0.9, bind=slider, name="coverage") + slider = alt.binding_range(name="Coverage Target: ", min=0, max=1, step=0.1) + coverage_val = alt.param(name="coverage", value=0.9, bind=slider) - click = alt.selection_point(fields=["y_label"], bind="legend") + click = alt.selection_point(bind="legend", fields=["y_label"]) tooltip = [ alt.Tooltip("y_test:Q", format="$,d", title="True Price"), diff --git a/examples/plot_quantile_example.py b/examples/plot_quantile_example.py index 103a8de..1c6067e 100755 --- a/examples/plot_quantile_example.py +++ b/examples/plot_quantile_example.py @@ -16,7 +16,7 @@ from quantile_forest import RandomForestQuantileRegressor -random_state = check_random_state(0) +random_state = np.random.RandomState(0) n_samples = 1000 bounds = [0, 10] quantiles = [0.025, 0.5, 0.975] diff --git a/examples/plot_quantile_extrapolation.py b/examples/plot_quantile_extrapolation.py index b89b83d..ad3e300 100755 --- a/examples/plot_quantile_extrapolation.py +++ b/examples/plot_quantile_extrapolation.py @@ -23,7 +23,7 @@ from quantile_forest import RandomForestQuantileRegressor -random_state = check_random_state(0) +random_state = np.random.RandomState(0) n_samples = 500 bounds = [0, 15] extrap_frac = 0.25 diff --git a/examples/plot_quantile_interpolation.py b/examples/plot_quantile_interpolation.py index 1d302e3..53defcc 100755 --- a/examples/plot_quantile_interpolation.py +++ b/examples/plot_quantile_interpolation.py @@ -15,11 +15,10 @@ import altair as alt import numpy as np import pandas as pd -from sklearn.utils.validation import check_random_state from quantile_forest import RandomForestQuantileRegressor -random_state = check_random_state(0) +random_state = np.random.RandomState(0) intervals = np.linspace(0, 1, num=101, endpoint=True).round(2).tolist() # Create toy dataset. @@ -82,10 +81,10 @@ def plot_interpolations(df, legend): # Slider for varying the prediction interval that determines the quantiles being interpolated. - slider = alt.binding_range(min=0, max=1, step=0.01, name="Prediction Interval: ") - interval_val = alt.param(value=0.9, bind=slider, name="interval") + slider = alt.binding_range(name="Prediction Interval: ", min=0, max=1, step=0.01) + interval_val = alt.param(name="interval", value=0.9, bind=slider) - click = alt.selection_point(fields=["method"], bind="legend") + click = alt.selection_point(bind="legend", fields=["method"], on="click") color = alt.condition( click, @@ -151,7 +150,7 @@ def plot_interpolations(df, legend): header=alt.Header(labelOrient="bottom", titleOrient="bottom"), title="Samples (Feature Values)", ), - title="QRF Prediction Intervals by Quantile Interpolation on Toy Dataset", + title="QRF Predictions by Quantile Interpolation on Toy Dataset", ) .configure_facet(spacing=15) .configure_range(category=alt.RangeScheme(list(legend.values()))) diff --git a/examples/plot_quantile_intervals.py b/examples/plot_quantile_intervals.py index d1fa69f..0eeadf0 100755 --- a/examples/plot_quantile_intervals.py +++ b/examples/plot_quantile_intervals.py @@ -17,7 +17,7 @@ from quantile_forest import RandomForestQuantileRegressor -random_state = check_random_state(0) +random_state = np.random.RandomState(0) n_samples = 1000 # Load the California Housing Prices dataset. diff --git a/examples/plot_quantile_multioutput.py b/examples/plot_quantile_multioutput.py index a4e749c..465ba8d 100644 --- a/examples/plot_quantile_multioutput.py +++ b/examples/plot_quantile_multioutput.py @@ -14,11 +14,10 @@ import numpy as np import pandas as pd from sklearn.model_selection import train_test_split -from sklearn.utils.validation import check_random_state from quantile_forest import RandomForestQuantileRegressor -random_state = check_random_state(0) +random_state = np.random.RandomState(0) n_samples = 2500 bounds = [0, 100] quantiles = np.linspace(0, 1, num=41, endpoint=True).round(3).tolist() @@ -78,10 +77,10 @@ def format_frac(fraction): def plot_multitargets(df, legend): # Slider for varying the displayed prediction intervals. - slider = alt.binding_range(min=0, max=1, step=0.05, name="Prediction Interval: ") - interval_val = alt.param(value=0.95, bind=slider, name="interval") + slider = alt.binding_range(name="Prediction Interval: ", min=0, max=1, step=0.05) + interval_val = alt.param(name="interval", value=0.95, bind=slider) - click = alt.selection_point(fields=["target"], bind="legend") + click = alt.selection_point(bind="legend", fields=["target"], on="click") color = alt.condition( click, diff --git a/examples/plot_quantile_ranks.py b/examples/plot_quantile_ranks.py index 95ce0f4..b479a3e 100644 --- a/examples/plot_quantile_ranks.py +++ b/examples/plot_quantile_ranks.py @@ -19,7 +19,7 @@ from quantile_forest import RandomForestQuantileRegressor -random_state = check_random_state(0) +random_state = np.random.RandomState(0) n_samples = 5000 bounds = [0, 10] @@ -50,10 +50,10 @@ def make_toy_dataset(n_samples, bounds, random_state=0): def plot_pred_and_ranks(df): # Slider for varying the interval that defines the upper and lower quantile rank thresholds. - slider = alt.binding_range(min=0, max=1, step=0.01, name="Rank Interval Threshold: ") - interval_val = alt.param(value=0.05, bind=slider, name="interval") + slider = alt.binding_range(name="Rank Interval Threshold: ", min=0, max=1, step=0.01) + interval_val = alt.param(name="interval", value=0.05, bind=slider) - click = alt.selection_point(fields=["outlier"], bind="legend") + click = alt.selection_point(bind="legend", fields=["outlier"], on="click") base = alt.Chart(df) diff --git a/examples/plot_quantile_vs_standard.py b/examples/plot_quantile_vs_standard.py index 771a517..d1648af 100755 --- a/examples/plot_quantile_vs_standard.py +++ b/examples/plot_quantile_vs_standard.py @@ -17,11 +17,10 @@ import scipy as sp from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import train_test_split -from sklearn.utils.validation import check_random_state from quantile_forest import RandomForestQuantileRegressor -random_state = check_random_state(0) +random_state = np.random.RandomState(0) quantiles = np.linspace(0, 1, num=101, endpoint=True).round(2).tolist() # Create right-skewed dataset. @@ -66,14 +65,14 @@ def format_frac(fraction): def plot_prediction_histograms(df, legend): # Slider for varying the quantile value used for generating the QRF histogram. slider = alt.binding_range( + name="Predicted Quantile: ", min=0, max=1, step=0.5 if len(quantiles) == 1 else 1 / (len(quantiles) - 1), - name="Predicted Quantile: ", ) quantile_val = alt.param(value=0.5, bind=slider, name="quantile") - click = alt.selection_point(fields=["label"], bind="legend") + click = alt.selection_point(bind="legend", fields=["label"], on="click") chart = ( alt.Chart(df) @@ -100,6 +99,7 @@ def plot_prediction_histograms(df, legend): alt.Color("label:N", sort=list(legend.keys()), title=None), alt.value("lightgray"), ), + opacity=alt.condition(click, alt.value(1), alt.value(0.5)), xOffset=alt.XOffset("label:N"), tooltip=[ alt.Tooltip("label:N", title="Label"), diff --git a/examples/plot_treeshap_example.py b/examples/plot_treeshap_example.py index 10b148b..6bd0b8b 100644 --- a/examples/plot_treeshap_example.py +++ b/examples/plot_treeshap_example.py @@ -20,11 +20,10 @@ import shap from sklearn import datasets from sklearn.model_selection import train_test_split -from sklearn.utils.validation import check_random_state from quantile_forest import RandomForestQuantileRegressor -random_state = check_random_state(0) +random_state = np.random.RandomState(0) n_samples = 1000 test_idx = 0 quantiles = np.linspace(0, 1, num=11, endpoint=True).round(1).tolist() @@ -130,12 +129,12 @@ def plot_shap_waterfall_with_quantiles(df, height=300): # Slider for varying the applied quantile estimates. slider = alt.binding_range( + name="Predicted Quantile: ", min=0, max=1, step=0.5 if len(quantiles) == 1 else 1 / (len(quantiles) - 1), - name="Predicted Quantile: ", ) - quantile_val = alt.param(value=0.5, bind=slider, name="quantile") + quantile_val = alt.param(name="quantile", value=0.5, bind=slider) df_grouped = ( df.groupby("quantile")[df.columns.tolist()]