deploy: b95444d

zillow · Aug 6, 2024 · ad1586b · ad1586b
1 parent 5c4fc53
commit ad1586b
Show file tree

Hide file tree

Showing 19 changed files with 139 additions and 115 deletions.
diff --git a/_sources/gallery/plot_predict_custom.rst.txt b/_sources/gallery/plot_predict_custom.rst.txt
@@ -106,12 +106,14 @@ distribution function (ECDF) for a test sample.
             }
         )
         dfs.append(df_i)
-    df = pd.concat(dfs)
+    df = pd.concat(dfs, ignore_index=True)
 
 
     def plot_ecdf(df):
         min_idx = df["sample_idx"].min()
         max_idx = df["sample_idx"].max()
+
+        # Slider for determining the sample index for which the custom function is being visualized.
         slider = alt.binding_range(min=min_idx, max=max_idx, step=1, name="Sample Index: ")
         sample_selection = alt.param(value=0, bind=slider, name="sample_idx")
 
@@ -143,8 +145,8 @@ distribution function (ECDF) for a test sample.
 
         chart = (
             (circles + lines)
-            .transform_filter(alt.datum.sample_idx == sample_selection)
             .add_params(sample_selection)
+            .transform_filter(alt.datum.sample_idx == sample_selection)
             .properties(
                 height=400,
                 width=650,
@@ -243,12 +245,14 @@ distribution function (ECDF) for a test sample.
                     }
                 )
                 dfs.append(df_i)
-            df = pd.concat(dfs)
+            df = pd.concat(dfs, ignore_index=True)
 
 
             def plot_ecdf(df):
                 min_idx = df["sample_idx"].min()
                 max_idx = df["sample_idx"].max()
+
+                # Slider for determining the sample index for which the custom function is being visualized.
                 slider = alt.binding_range(min=min_idx, max=max_idx, step=1, name="Sample Index: ")
                 sample_selection = alt.param(value=0, bind=slider, name="sample_idx")
 
@@ -280,8 +284,8 @@ distribution function (ECDF) for a test sample.
 
                 chart = (
                     (circles + lines)
-                    .transform_filter(alt.datum.sample_idx == sample_selection)
                     .add_params(sample_selection)
+                    .transform_filter(alt.datum.sample_idx == sample_selection)
                     .properties(
                         height=400,
                         width=650,

diff --git a/_sources/gallery/plot_proximity_counts.rst.txt b/_sources/gallery/plot_proximity_counts.rst.txt
@@ -40,13 +40,13 @@ identifying similar samples, even when using noisy training and test data.
 
     rng = check_random_state(0)
 
-    n_test = 25
+    n_test_samples = 25
     noise_std = 0.1
 
     # Load the Digits dataset.
     X, y = datasets.load_digits(return_X_y=True, as_frame=True)
 
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=n_test, random_state=0)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=n_test_samples, random_state=0)
 
 
     def add_gaussian_noise(X, mean=0, std=0.1, random_state=None):
@@ -107,7 +107,7 @@ identifying similar samples, even when using noisy training and test data.
         .join(y_test)
         .reset_index()
         .join(df_prox)
-        .iloc[:n_test]
+        .iloc[:n_test_samples]
         .explode("prox")
         .assign(
             **{
@@ -140,6 +140,7 @@ identifying similar samples, even when using noisy training and test data.
         n_subplot_rows = n_prox // n_prox_per_row
         subplot_dim = (width - subplot_spacing * (n_subplot_rows - 1)) / n_subplot_rows
 
+        # Slider for determining the test index for which the data is being visualized.
         slider = alt.binding_range(
             min=0,
             max=n_samples - 1,
@@ -274,13 +275,13 @@ identifying similar samples, even when using noisy training and test data.
 
             rng = check_random_state(0)
 
-            n_test = 25
+            n_test_samples = 25
             noise_std = 0.1
 
             # Load the Digits dataset.
             X, y = datasets.load_digits(return_X_y=True, as_frame=True)
 
-            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=n_test, random_state=0)
+            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=n_test_samples, random_state=0)
 
 
             def add_gaussian_noise(X, mean=0, std=0.1, random_state=None):
@@ -341,7 +342,7 @@ identifying similar samples, even when using noisy training and test data.
                 .join(y_test)
                 .reset_index()
                 .join(df_prox)
-                .iloc[:n_test]
+                .iloc[:n_test_samples]
                 .explode("prox")
                 .assign(
                     **{
@@ -374,6 +375,7 @@ identifying similar samples, even when using noisy training and test data.
                 n_subplot_rows = n_prox // n_prox_per_row
                 subplot_dim = (width - subplot_spacing * (n_subplot_rows - 1)) / n_subplot_rows
 
+                # Slider for determining the test index for which the data is being visualized.
                 slider = alt.binding_range(
                     min=0,
                     max=n_samples - 1,

diff --git a/_sources/gallery/plot_quantile_conformalized.rst.txt b/_sources/gallery/plot_quantile_conformalized.rst.txt
@@ -170,6 +170,7 @@ by Carl McBride Ellis.
 
 
     def plot_prediction_intervals(df, domain):
+        # Slider for varying the target coverage level.
         slider = alt.binding_range(min=0, max=1, step=0.1, name="Coverage: ")
         cov_selection = alt.param(value=0.9, bind=slider, name="coverage")
         cov_tol = 0.01
@@ -206,7 +207,8 @@ by Carl McBride Ellis.
         )
 
         circle = (
-            base.mark_circle(size=30)
+            base.add_params(click)
+            .mark_circle(size=30)
             .encode(
                 x=alt.X(
                     "y_pred:Q",
@@ -224,7 +226,6 @@ by Carl McBride Ellis.
                 opacity=alt.condition(click, alt.value(1), alt.value(0)),
                 tooltip=tooltip,
             )
-            .add_params(click)
         )
 
         bar = base.mark_bar(width=2).encode(
@@ -448,6 +449,7 @@ by Carl McBride Ellis.
 
 
             def plot_prediction_intervals(df, domain):
+                # Slider for varying the target coverage level.
                 slider = alt.binding_range(min=0, max=1, step=0.1, name="Coverage: ")
                 cov_selection = alt.param(value=0.9, bind=slider, name="coverage")
                 cov_tol = 0.01
@@ -484,7 +486,8 @@ by Carl McBride Ellis.
                 )
 
                 circle = (
-                    base.mark_circle(size=30)
+                    base.add_params(click)
+                    .mark_circle(size=30)
                     .encode(
                         x=alt.X(
                             "y_pred:Q",
@@ -502,7 +505,6 @@ by Carl McBride Ellis.
                         opacity=alt.condition(click, alt.value(1), alt.value(0)),
                         tooltip=tooltip,
                     )
-                    .add_params(click)
                 )
 
                 bar = base.mark_bar(width=2).encode(

diff --git a/_sources/gallery/plot_quantile_interpolation.rst.txt b/_sources/gallery/plot_quantile_interpolation.rst.txt
@@ -84,13 +84,13 @@ calculated quantile does not precisely correspond to one of the actual values.
 
         df_i = pd.DataFrame(data)
         dfs.append(df_i)
-    df = pd.concat(dfs)
+    df = pd.concat(dfs, ignore_index=True)
 
 
     def plot_interpolations(df, legend):
+        # Slider for varying the prediction interval that determines the quantiles being interpolated.
         slider = alt.binding_range(min=0, max=1, step=0.01, name="Prediction Interval: ")
         interval_selection = alt.param(value=0.9, bind=slider, name="interval")
-        interval_tol = 0.001
 
         click = alt.selection_point(fields=["method"], bind="legend")
 
@@ -145,18 +145,12 @@ calculated quantile does not precisely correspond to one of the actual values.
 
         chart = (
             (area + point)
+            .add_params(interval_selection, click)
             .transform_filter(
-                (
-                    (alt.datum.quantile_low >= (0.5 - interval_selection / 2 - interval_tol))
-                    & (alt.datum.quantile_low <= (0.5 - interval_selection / 2 + interval_tol))
-                )
-                | (
-                    (alt.datum.quantile_upp >= (0.5 + interval_selection / 2 - interval_tol))
-                    & (alt.datum.quantile_upp <= (0.5 + interval_selection / 2 + interval_tol))
-                )
-                | (alt.datum.method == "Actual")
+                "(datum.quantile_low == round((0.5 - interval / 2) * 1000) / 1000)"
+                "| (datum.quantile_upp == round((0.5 + interval / 2) * 1000) / 1000)"
+                "| (datum.method == 'Actual')"
             )
-            .add_params(interval_selection, click)
             .properties(height=400)
             .facet(
                 column=alt.Column(
@@ -242,13 +236,13 @@ calculated quantile does not precisely correspond to one of the actual values.
 
                 df_i = pd.DataFrame(data)
                 dfs.append(df_i)
-            df = pd.concat(dfs)
+            df = pd.concat(dfs, ignore_index=True)
 
 
             def plot_interpolations(df, legend):
+                # Slider for varying the prediction interval that determines the quantiles being interpolated.
                 slider = alt.binding_range(min=0, max=1, step=0.01, name="Prediction Interval: ")
                 interval_selection = alt.param(value=0.9, bind=slider, name="interval")
-                interval_tol = 0.001
 
                 click = alt.selection_point(fields=["method"], bind="legend")
 
@@ -303,18 +297,12 @@ calculated quantile does not precisely correspond to one of the actual values.
 
                 chart = (
                     (area + point)
+                    .add_params(interval_selection, click)
                     .transform_filter(
-                        (
-                            (alt.datum.quantile_low >= (0.5 - interval_selection / 2 - interval_tol))
-                            & (alt.datum.quantile_low <= (0.5 - interval_selection / 2 + interval_tol))
-                        )
-                        | (
-                            (alt.datum.quantile_upp >= (0.5 + interval_selection / 2 - interval_tol))
-                            & (alt.datum.quantile_upp <= (0.5 + interval_selection / 2 + interval_tol))
-                        )
-                        | (alt.datum.method == "Actual")
+                        "(datum.quantile_low == round((0.5 - interval / 2) * 1000) / 1000)"
+                        "| (datum.quantile_upp == round((0.5 + interval / 2) * 1000) / 1000)"
+                        "| (datum.method == 'Actual')"
                     )
-                    .add_params(interval_selection, click)
                     .properties(height=400)
                     .facet(
                         column=alt.Column(

diff --git a/_sources/gallery/plot_quantile_multioutput.rst.txt b/_sources/gallery/plot_quantile_multioutput.rst.txt
@@ -97,6 +97,7 @@ for each target: the median line and the area defined by the interval points.
 
 
     def plot_multioutputs(df, legend):
+        # Slider for varying the displayed prediction intervals.
         slider = alt.binding_range(min=0, max=1, step=0.05, name="Prediction Interval: ")
         interval_selection = alt.param(value=0.95, bind=slider, name="interval")
 
@@ -252,6 +253,7 @@ for each target: the median line and the area defined by the interval points.
 
 
             def plot_multioutputs(df, legend):
+                # Slider for varying the displayed prediction intervals.
                 slider = alt.binding_range(min=0, max=1, step=0.05, name="Prediction Interval: ")
                 interval_selection = alt.param(value=0.95, bind=slider, name="interval")
 

diff --git a/_sources/gallery/plot_quantile_ranks.rst.txt b/_sources/gallery/plot_quantile_ranks.rst.txt
@@ -62,25 +62,30 @@ significantly from the expected range.
 
 
     def plot_fit_and_ranks(df):
+        # Slider for varying the interval that defines the upper and lower quantile rank thresholds.
         slider = alt.binding_range(min=0, max=1, step=0.01, name="Rank Interval Threshold: ")
         rank_val = alt.param("rank_val", bind=slider, value=0.05)
 
+        click = alt.selection_point(fields=["outlier"], bind="legend")
+
         base = alt.Chart(df)
 
+        color_points = alt.Color(
+            "outlier:N",
+            scale=alt.Scale(domain=["Yes", "No"], range=["red", "#f2a619"]),
+            title="Outlier",
+        )
+
         points = (
-            base.transform_calculate(
+            base.add_params(rank_val, click)
+            .transform_calculate(
                 outlier="abs(datum.y_rank - 0.5) > (0.5 - rank_val / 2) ? 'Yes' : 'No'"
             )
-            .add_params(rank_val)
             .mark_circle(opacity=0.5, size=25)
             .encode(
                 x=alt.X("x:Q"),
                 y=alt.Y("y:Q"),
-                color=alt.Color(
-                    "outlier:N",
-                    scale=alt.Scale(domain=["Yes", "No"], range=["red", "#f2a619"]),
-                    title="Outlier",
-                ),
+                color=alt.condition(click, color_points, alt.value("lightgray")),
                 tooltip=[
                     alt.Tooltip("x:Q", format=".3f", title="x"),
                     alt.Tooltip("y:Q", format=".3f", title="f(x)"),
@@ -156,25 +161,30 @@ significantly from the expected range.
 
 
             def plot_fit_and_ranks(df):
+                # Slider for varying the interval that defines the upper and lower quantile rank thresholds.
                 slider = alt.binding_range(min=0, max=1, step=0.01, name="Rank Interval Threshold: ")
                 rank_val = alt.param("rank_val", bind=slider, value=0.05)
 
+                click = alt.selection_point(fields=["outlier"], bind="legend")
+
                 base = alt.Chart(df)
 
+                color_points = alt.Color(
+                    "outlier:N",
+                    scale=alt.Scale(domain=["Yes", "No"], range=["red", "#f2a619"]),
+                    title="Outlier",
+                )
+
                 points = (
-                    base.transform_calculate(
+                    base.add_params(rank_val, click)
+                    .transform_calculate(
                         outlier="abs(datum.y_rank - 0.5) > (0.5 - rank_val / 2) ? 'Yes' : 'No'"
                     )
-                    .add_params(rank_val)
                     .mark_circle(opacity=0.5, size=25)
                     .encode(
                         x=alt.X("x:Q"),
                         y=alt.Y("y:Q"),
-                        color=alt.Color(
-                            "outlier:N",
-                            scale=alt.Scale(domain=["Yes", "No"], range=["red", "#f2a619"]),
-                            title="Outlier",
-                        ),
+                        color=alt.condition(click, color_points, alt.value("lightgray")),
                         tooltip=[
                             alt.Tooltip("x:Q", format=".3f", title="x"),
                             alt.Tooltip("y:Q", format=".3f", title="f(x)"),