Update example plots

zillow · Aug 17, 2024 · c7d6148 · c7d6148
1 parent 35bf344
commit c7d6148
Show file tree

Hide file tree

Showing 12 changed files with 34 additions and 39 deletions.
diff --git a/examples/plot_huggingface_model.py b/examples/plot_huggingface_model.py
@@ -21,7 +21,6 @@
 import numpy as np
 import pandas as pd
 from sklearn import datasets
-from sklearn.utils.validation import check_random_state
 from skops import hub_utils
 
 import quantile_forest
@@ -33,7 +32,7 @@
 repo_id = "quantile-forest/california-housing-example"
 load_existing = True
 
-random_state = check_random_state(0)
+random_state = np.random.RandomState(0)
 quantiles = np.linspace(0, 1, num=5, endpoint=True).round(2).tolist()
 sample_frac = 1
 
@@ -177,13 +176,13 @@ def fit_and_upload_model(token, repo_id, local_dir="./local_repo", random_state=
 def plot_quantiles_by_latlon(df, quantiles, color_scheme="cividis"):
     # Slider for varying the displayed quantile estimates.
     slider = alt.binding_range(
+        name="Predicted Quantile: ",
         min=0,
         max=1,
         step=0.5 if len(quantiles) == 1 else 1 / (len(quantiles) - 1),
-        name="Predicted Quantile: ",
     )
 
-    quantile_val = alt.param(value=0.5, bind=slider, name="quantile")
+    quantile_val = alt.param(name="quantile", value=0.5, bind=slider)
 
     chart = (
         alt.Chart(df)

diff --git a/examples/plot_predict_custom.py b/examples/plot_predict_custom.py
@@ -18,11 +18,10 @@
 import scipy as sp
 from sklearn import datasets
 from sklearn.model_selection import train_test_split
-from sklearn.utils.validation import check_random_state
 
 from quantile_forest import RandomForestQuantileRegressor
 
-random_state = check_random_state(0)
+random_state = np.random.RandomState(0)
 n_test_samples = 100
 
 
@@ -105,7 +104,7 @@ def plot_ecdf(df):
     max_idx = df["index"].max()
 
     # Slider for determining the sample index for which the custom function is being visualized.
-    slider = alt.binding_range(min=min_idx, max=max_idx, step=1, name="Test Sample Index: ")
+    slider = alt.binding_range(name="Test Sample Index: ", min=min_idx, max=max_idx, step=1)
     index_selection = alt.selection_point(
         value=0,
         bind=slider,

diff --git a/examples/plot_proximity_counts.py b/examples/plot_proximity_counts.py
@@ -24,7 +24,7 @@
 
 from quantile_forest import RandomForestQuantileRegressor
 
-random_state = check_random_state(0)
+random_state = np.random.RandomState(0)
 n_test_samples = 25
 noise_std = 0.1
 
@@ -131,7 +131,7 @@ def plot_digits_proximities(
     subplot_dim = (width - subplot_spacing * (n_subplot_rows - 1)) / n_subplot_rows
 
     # Slider for determining the test index for which the data is being visualized.
-    slider = alt.binding_range(min=0, max=n_samples - 1, step=1, name="Test Sample Index: ")
+    slider = alt.binding_range(name="Test Sample Index: ", min=0, max=n_samples - 1, step=1)
     index_selection = alt.selection_point(value=0, bind=slider, fields=["index"])
 
     scale = alt.Scale(domain=[x_min, x_max], scheme="greys")

diff --git a/examples/plot_quantile_conformalized.py b/examples/plot_quantile_conformalized.py
@@ -25,7 +25,7 @@
 from quantile_forest import RandomForestQuantileRegressor
 
 random_seed = 0
-random_state = check_random_state(random_seed)
+random_state = np.random.RandomState(random_seed)
 
 n_samples = 900
 coverages = np.linspace(0, 1, num=11, endpoint=True).round(1).tolist()  # the "coverage level"
@@ -160,10 +160,10 @@ def cqr_strategy(alpha, X_train, X_test, y_train, y_test, random_state=None):
 def plot_prediction_intervals_by_strategy(df):
     def plot_prediction_intervals(df, domain):
         # Slider for varying the target coverage level.
-        slider = alt.binding_range(min=0, max=1, step=0.1, name="Coverage Target: ")
-        coverage_val = alt.param(value=0.9, bind=slider, name="coverage")
+        slider = alt.binding_range(name="Coverage Target: ", min=0, max=1, step=0.1)
+        coverage_val = alt.param(name="coverage", value=0.9, bind=slider)
 
-        click = alt.selection_point(fields=["y_label"], bind="legend")
+        click = alt.selection_point(bind="legend", fields=["y_label"])
 
         tooltip = [
             alt.Tooltip("y_test:Q", format="$,d", title="True Price"),

diff --git a/examples/plot_quantile_example.py b/examples/plot_quantile_example.py
@@ -16,7 +16,7 @@
 
 from quantile_forest import RandomForestQuantileRegressor
 
-random_state = check_random_state(0)
+random_state = np.random.RandomState(0)
 n_samples = 1000
 bounds = [0, 10]
 quantiles = [0.025, 0.5, 0.975]

diff --git a/examples/plot_quantile_extrapolation.py b/examples/plot_quantile_extrapolation.py
@@ -23,7 +23,7 @@
 
 from quantile_forest import RandomForestQuantileRegressor
 
-random_state = check_random_state(0)
+random_state = np.random.RandomState(0)
 n_samples = 500
 bounds = [0, 15]
 extrap_frac = 0.25

diff --git a/examples/plot_quantile_interpolation.py b/examples/plot_quantile_interpolation.py
@@ -15,11 +15,10 @@
 import altair as alt
 import numpy as np
 import pandas as pd
-from sklearn.utils.validation import check_random_state
 
 from quantile_forest import RandomForestQuantileRegressor
 
-random_state = check_random_state(0)
+random_state = np.random.RandomState(0)
 intervals = np.linspace(0, 1, num=101, endpoint=True).round(2).tolist()
 
 # Create toy dataset.
@@ -82,10 +81,10 @@
 
 def plot_interpolations(df, legend):
     # Slider for varying the prediction interval that determines the quantiles being interpolated.
-    slider = alt.binding_range(min=0, max=1, step=0.01, name="Prediction Interval: ")
-    interval_val = alt.param(value=0.9, bind=slider, name="interval")
+    slider = alt.binding_range(name="Prediction Interval: ", min=0, max=1, step=0.01)
+    interval_val = alt.param(name="interval", value=0.9, bind=slider)
 
-    click = alt.selection_point(fields=["method"], bind="legend")
+    click = alt.selection_point(bind="legend", fields=["method"], on="click")
 
     color = alt.condition(
         click,
@@ -151,7 +150,7 @@ def plot_interpolations(df, legend):
                 header=alt.Header(labelOrient="bottom", titleOrient="bottom"),
                 title="Samples (Feature Values)",
             ),
-            title="QRF Prediction Intervals by Quantile Interpolation on Toy Dataset",
+            title="QRF Predictions by Quantile Interpolation on Toy Dataset",
         )
         .configure_facet(spacing=15)
         .configure_range(category=alt.RangeScheme(list(legend.values())))

diff --git a/examples/plot_quantile_intervals.py b/examples/plot_quantile_intervals.py
@@ -17,7 +17,7 @@
 
 from quantile_forest import RandomForestQuantileRegressor
 
-random_state = check_random_state(0)
+random_state = np.random.RandomState(0)
 n_samples = 1000
 
 # Load the California Housing Prices dataset.

diff --git a/examples/plot_quantile_multioutput.py b/examples/plot_quantile_multioutput.py
@@ -14,11 +14,10 @@
 import numpy as np
 import pandas as pd
 from sklearn.model_selection import train_test_split
-from sklearn.utils.validation import check_random_state
 
 from quantile_forest import RandomForestQuantileRegressor
 
-random_state = check_random_state(0)
+random_state = np.random.RandomState(0)
 n_samples = 2500
 bounds = [0, 100]
 quantiles = np.linspace(0, 1, num=41, endpoint=True).round(3).tolist()
@@ -78,10 +77,10 @@ def format_frac(fraction):
 
 def plot_multitargets(df, legend):
     # Slider for varying the displayed prediction intervals.
-    slider = alt.binding_range(min=0, max=1, step=0.05, name="Prediction Interval: ")
-    interval_val = alt.param(value=0.95, bind=slider, name="interval")
+    slider = alt.binding_range(name="Prediction Interval: ", min=0, max=1, step=0.05)
+    interval_val = alt.param(name="interval", value=0.95, bind=slider)
 
-    click = alt.selection_point(fields=["target"], bind="legend")
+    click = alt.selection_point(bind="legend", fields=["target"], on="click")
 
     color = alt.condition(
         click,

diff --git a/examples/plot_quantile_ranks.py b/examples/plot_quantile_ranks.py
@@ -19,7 +19,7 @@
 
 from quantile_forest import RandomForestQuantileRegressor
 
-random_state = check_random_state(0)
+random_state = np.random.RandomState(0)
 n_samples = 5000
 bounds = [0, 10]
 
@@ -50,10 +50,10 @@ def make_toy_dataset(n_samples, bounds, random_state=0):
 
 def plot_pred_and_ranks(df):
     # Slider for varying the interval that defines the upper and lower quantile rank thresholds.
-    slider = alt.binding_range(min=0, max=1, step=0.01, name="Rank Interval Threshold: ")
-    interval_val = alt.param(value=0.05, bind=slider, name="interval")
+    slider = alt.binding_range(name="Rank Interval Threshold: ", min=0, max=1, step=0.01)
+    interval_val = alt.param(name="interval", value=0.05, bind=slider)
 
-    click = alt.selection_point(fields=["outlier"], bind="legend")
+    click = alt.selection_point(bind="legend", fields=["outlier"], on="click")
 
     base = alt.Chart(df)
 

diff --git a/examples/plot_quantile_vs_standard.py b/examples/plot_quantile_vs_standard.py
@@ -17,11 +17,10 @@
 import scipy as sp
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.model_selection import train_test_split
-from sklearn.utils.validation import check_random_state
 
 from quantile_forest import RandomForestQuantileRegressor
 
-random_state = check_random_state(0)
+random_state = np.random.RandomState(0)
 quantiles = np.linspace(0, 1, num=101, endpoint=True).round(2).tolist()
 
 # Create right-skewed dataset.
@@ -66,14 +65,14 @@ def format_frac(fraction):
 def plot_prediction_histograms(df, legend):
     # Slider for varying the quantile value used for generating the QRF histogram.
     slider = alt.binding_range(
+        name="Predicted Quantile: ",
         min=0,
         max=1,
         step=0.5 if len(quantiles) == 1 else 1 / (len(quantiles) - 1),
-        name="Predicted Quantile: ",
     )
     quantile_val = alt.param(value=0.5, bind=slider, name="quantile")
 
-    click = alt.selection_point(fields=["label"], bind="legend")
+    click = alt.selection_point(bind="legend", fields=["label"], on="click")
 
     chart = (
         alt.Chart(df)
@@ -100,6 +99,7 @@ def plot_prediction_histograms(df, legend):
                 alt.Color("label:N", sort=list(legend.keys()), title=None),
                 alt.value("lightgray"),
             ),
+            opacity=alt.condition(click, alt.value(1), alt.value(0.5)),
             xOffset=alt.XOffset("label:N"),
             tooltip=[
                 alt.Tooltip("label:N", title="Label"),

diff --git a/examples/plot_treeshap_example.py b/examples/plot_treeshap_example.py
@@ -20,11 +20,10 @@
 import shap
 from sklearn import datasets
 from sklearn.model_selection import train_test_split
-from sklearn.utils.validation import check_random_state
 
 from quantile_forest import RandomForestQuantileRegressor
 
-random_state = check_random_state(0)
+random_state = np.random.RandomState(0)
 n_samples = 1000
 test_idx = 0
 quantiles = np.linspace(0, 1, num=11, endpoint=True).round(1).tolist()
@@ -130,12 +129,12 @@ def plot_shap_waterfall_with_quantiles(df, height=300):
 
     # Slider for varying the applied quantile estimates.
     slider = alt.binding_range(
+        name="Predicted Quantile: ",
         min=0,
         max=1,
         step=0.5 if len(quantiles) == 1 else 1 / (len(quantiles) - 1),
-        name="Predicted Quantile: ",
     )
-    quantile_val = alt.param(value=0.5, bind=slider, name="quantile")
+    quantile_val = alt.param(name="quantile", value=0.5, bind=slider)
 
     df_grouped = (
         df.groupby("quantile")[df.columns.tolist()]