deploy: b40f858

zillow · Aug 17, 2024 · 184d350 · 184d350
1 parent 30a3d13
commit 184d350
Show file tree

Hide file tree

Showing 10 changed files with 62 additions and 83 deletions.
diff --git a/_sources/gallery/plot_proximity_counts.rst.txt b/_sources/gallery/plot_proximity_counts.rst.txt
@@ -41,6 +41,9 @@ conditions.
     n_test_samples = 25
     noise_std = 0.1
 
+    pixel_dim = (8, 8)  # pixel dimensions (width and height)
+    pixel_scale = 100  # scale multipler for combining clean and noisy values
+
     # Load the Digits dataset.
     X, y = datasets.load_digits(return_X_y=True, as_frame=True)
 
@@ -98,7 +101,7 @@ conditions.
     )
 
     df = (
-        combine_floats(X_test, X_test_noisy)  # combine to reduce transmitted data
+        combine_floats(X_test, X_test_noisy, scale=pixel_scale)  # combine to reduce transmitted data
         .join(y_test)
         .reset_index()
         .join(df_prox)
@@ -116,7 +119,7 @@ conditions.
 
     # Create a data frame for looking up training proximities.
     df_lookup = (
-        combine_floats(X_train, X_train_noisy)  # combine to reduce transmitted data
+        combine_floats(X_train, X_train_noisy, scale=pixel_scale)  # combine to reduce transmitted data
         .assign(**{"index": np.arange(len(X_train))})
         .join(y_train)
     )
@@ -125,14 +128,15 @@ conditions.
     def plot_digits_proximities(
         df,
         df_lookup,
+        pixel_dim=(8, 8),
+        pixel_scale=100,
         n_prox=25,
         n_prox_per_row=5,
         subplot_spacing=10,
         height=225,
         width=225,
     ):
-        pixel_scale = 100
-        pixel_cols = [f"pixel_{y:01}_{x:01}" for y in range(8) for x in range(8)]
+        pixel_cols = [f"pixel_{y:01}_{x:01}" for y in range(pixel_dim[1]) for x in range(pixel_dim[0])]
         pixel_x = "split(datum.pixel, '_')[2]"
         pixel_y = "split(datum.pixel, '_')[1]"
 
@@ -240,7 +244,7 @@ conditions.
         return chart
 
 
-    chart = plot_digits_proximities(df, df_lookup)
+    chart = plot_digits_proximities(df, df_lookup, pixel_dim=pixel_dim, pixel_scale=pixel_scale)
     chart
 
 
@@ -261,6 +265,9 @@ conditions.
             n_test_samples = 25
             noise_std = 0.1
 
+            pixel_dim = (8, 8)  # pixel dimensions (width and height)
+            pixel_scale = 100  # scale multipler for combining clean and noisy values
+
             # Load the Digits dataset.
             X, y = datasets.load_digits(return_X_y=True, as_frame=True)
 
@@ -318,7 +325,7 @@ conditions.
             )
 
             df = (
-                combine_floats(X_test, X_test_noisy)  # combine to reduce transmitted data
+                combine_floats(X_test, X_test_noisy, scale=pixel_scale)  # combine to reduce transmitted data
                 .join(y_test)
                 .reset_index()
                 .join(df_prox)
@@ -336,7 +343,7 @@ conditions.
 
             # Create a data frame for looking up training proximities.
             df_lookup = (
-                combine_floats(X_train, X_train_noisy)  # combine to reduce transmitted data
+                combine_floats(X_train, X_train_noisy, scale=pixel_scale)  # combine to reduce transmitted data
                 .assign(**{"index": np.arange(len(X_train))})
                 .join(y_train)
             )
@@ -345,14 +352,15 @@ conditions.
             def plot_digits_proximities(
                 df,
                 df_lookup,
+                pixel_dim=(8, 8),
+                pixel_scale=100,
                 n_prox=25,
                 n_prox_per_row=5,
                 subplot_spacing=10,
                 height=225,
                 width=225,
             ):
-                pixel_scale = 100
-                pixel_cols = [f"pixel_{y:01}_{x:01}" for y in range(8) for x in range(8)]
+                pixel_cols = [f"pixel_{y:01}_{x:01}" for y in range(pixel_dim[1]) for x in range(pixel_dim[0])]
                 pixel_x = "split(datum.pixel, '_')[2]"
                 pixel_y = "split(datum.pixel, '_')[1]"
 
@@ -460,5 +468,5 @@ conditions.
                 return chart
 
 
-            chart = plot_digits_proximities(df, df_lookup)
+            chart = plot_digits_proximities(df, df_lookup, pixel_dim=pixel_dim, pixel_scale=pixel_scale)
             chart
diff --git a/_sources/gallery/plot_quantile_extrapolation.rst.txt b/_sources/gallery/plot_quantile_extrapolation.rst.txt
@@ -38,11 +38,10 @@ adapted from `"Extrapolation-Aware Nonparametric Statistical Inference"
 
     random_state = np.random.RandomState(0)
     n_samples = 500
-    bounds = [0, 15]
     extrap_frac = 0.25
+    bounds = [0, 15]
     func = lambda x: x * np.sin(x)
     func_str = "f(x) = x sin(x)"
-
     quantiles = [0.025, 0.975, 0.5]
     qrf_params = {"max_samples_leaf": None, "min_samples_leaf": 4, "random_state": random_state}
 
@@ -416,8 +415,8 @@ adapted from `"Extrapolation-Aware Nonparametric Statistical Inference"
     )
 
 
-    def plot_qrf_vs_xtrapolation_comparison(df):
-        def plot_extrapolations(df, title="", legend=False, x_domain=None, y_domain=None):
+    def plot_qrf_vs_xtrapolation_comparison(df, func_str):
+        def plot_extrapolations(df, title="", legend=False, func_str="", x_domain=None, y_domain=None):
             x_scale = None
             if x_domain is not None:
                 x_scale = alt.Scale(domain=x_domain, nice=False, padding=0)
@@ -530,7 +529,7 @@ adapted from `"Extrapolation-Aware Nonparametric Statistical Inference"
             chart = chart.properties(title=title, height=200, width=300)
             return chart
 
-        kwargs = {"x_domain": [0, 15], "y_domain": [-15, 20]}
+        kwargs = {"func_str": func_str, "x_domain": [0, 15], "y_domain": [-15, 20]}
         xtra_mapper = {"bb_mid": "y_pred", "bb_low": "y_pred_low", "bb_upp": "y_pred_upp"}
 
         chart1 = alt.layer(
@@ -568,7 +567,7 @@ adapted from `"Extrapolation-Aware Nonparametric Statistical Inference"
         return chart
 
 
-    chart = plot_qrf_vs_xtrapolation_comparison(df)
+    chart = plot_qrf_vs_xtrapolation_comparison(df, func_str)
     chart
 
 
@@ -586,11 +585,10 @@ adapted from `"Extrapolation-Aware Nonparametric Statistical Inference"
 
             random_state = np.random.RandomState(0)
             n_samples = 500
-            bounds = [0, 15]
             extrap_frac = 0.25
+            bounds = [0, 15]
             func = lambda x: x * np.sin(x)
             func_str = "f(x) = x sin(x)"
-
             quantiles = [0.025, 0.975, 0.5]
             qrf_params = {"max_samples_leaf": None, "min_samples_leaf": 4, "random_state": random_state}
 
@@ -964,8 +962,8 @@ adapted from `"Extrapolation-Aware Nonparametric Statistical Inference"
             )
 
 
-            def plot_qrf_vs_xtrapolation_comparison(df):
-                def plot_extrapolations(df, title="", legend=False, x_domain=None, y_domain=None):
+            def plot_qrf_vs_xtrapolation_comparison(df, func_str):
+                def plot_extrapolations(df, title="", legend=False, func_str="", x_domain=None, y_domain=None):
                     x_scale = None
                     if x_domain is not None:
                         x_scale = alt.Scale(domain=x_domain, nice=False, padding=0)
@@ -1078,7 +1076,7 @@ adapted from `"Extrapolation-Aware Nonparametric Statistical Inference"
                     chart = chart.properties(title=title, height=200, width=300)
                     return chart
 
-                kwargs = {"x_domain": [0, 15], "y_domain": [-15, 20]}
+                kwargs = {"func_str": func_str, "x_domain": [0, 15], "y_domain": [-15, 20]}
                 xtra_mapper = {"bb_mid": "y_pred", "bb_low": "y_pred_low", "bb_upp": "y_pred_upp"}
 
                 chart1 = alt.layer(
@@ -1116,5 +1114,5 @@ adapted from `"Extrapolation-Aware Nonparametric Statistical Inference"
                 return chart
 
 
-            chart = plot_qrf_vs_xtrapolation_comparison(df)
+            chart = plot_qrf_vs_xtrapolation_comparison(df, func_str)
             chart
diff --git a/_sources/gallery/plot_quantile_multioutput.rst.txt b/_sources/gallery/plot_quantile_multioutput.rst.txt
@@ -40,17 +40,16 @@ for each target: the median line and the area defined by the interval points.
         {
             "signal": lambda x: np.log1p(x + 1),
             "noise": lambda x: np.log1p(x) * random_state.uniform(size=len(x)),
+            "legend": {"0": "#f2a619"},  # plot legend value and color
         },
         {
             "signal": lambda x: np.log1p(np.sqrt(x)),
             "noise": lambda x: np.log1p(x / 2) * random_state.uniform(size=len(x)),
+            "legend": {"1": "#006aff"},  # plot legend value and color
         },
     ]
 
-    legend = {
-        "0": "#f2a619",
-        "1": "#006aff",
-    }
+    legend = {k: v for f in funcs for k, v in f["legend"].items()}
 
 
     def make_func_Xy(funcs, bounds, n_samples):
@@ -61,10 +60,6 @@ for each target: the median line and the area defined by the interval points.
         return np.atleast_2d(x).T, y
 
 
-    def format_frac(fraction):
-        return f"{fraction:.3g}".rstrip("0").rstrip(".") or "0"
-
-
     # Create the dataset with multiple target variables.
     X, y = make_func_Xy(funcs, bounds, n_samples)
 
@@ -83,7 +78,7 @@ for each target: the median line and the area defined by the interval points.
             "y_true": np.concatenate([f["signal"](X.squeeze()) for f in funcs]),
             "y_pred": np.concatenate([y_pred[:, i, len(quantiles) // 2] for i in range(len(funcs))]),
             "target": np.concatenate([[str(i)] * len(X) for i in range(len(funcs))]),
-            **{f"q_{format_frac(q_i)}": y_i.ravel() for q_i, y_i in zip(quantiles, y_pred.T)},
+            **{f"q_{q_i:.3g}": y_i.ravel() for q_i, y_i in zip(quantiles, y_pred.T)},
         }
     )
 
@@ -193,17 +188,16 @@ for each target: the median line and the area defined by the interval points.
                 {
                     "signal": lambda x: np.log1p(x + 1),
                     "noise": lambda x: np.log1p(x) * random_state.uniform(size=len(x)),
+                    "legend": {"0": "#f2a619"},  # plot legend value and color
                 },
                 {
                     "signal": lambda x: np.log1p(np.sqrt(x)),
                     "noise": lambda x: np.log1p(x / 2) * random_state.uniform(size=len(x)),
+                    "legend": {"1": "#006aff"},  # plot legend value and color
                 },
             ]
 
-            legend = {
-                "0": "#f2a619",
-                "1": "#006aff",
-            }
+            legend = {k: v for f in funcs for k, v in f["legend"].items()}
 
 
             def make_func_Xy(funcs, bounds, n_samples):
@@ -214,10 +208,6 @@ for each target: the median line and the area defined by the interval points.
                 return np.atleast_2d(x).T, y
 
 
-            def format_frac(fraction):
-                return f"{fraction:.3g}".rstrip("0").rstrip(".") or "0"
-
-
             # Create the dataset with multiple target variables.
             X, y = make_func_Xy(funcs, bounds, n_samples)
 
@@ -236,7 +226,7 @@ for each target: the median line and the area defined by the interval points.
                     "y_true": np.concatenate([f["signal"](X.squeeze()) for f in funcs]),
                     "y_pred": np.concatenate([y_pred[:, i, len(quantiles) // 2] for i in range(len(funcs))]),
                     "target": np.concatenate([[str(i)] * len(X) for i in range(len(funcs))]),
-                    **{f"q_{format_frac(q_i)}": y_i.ravel() for q_i, y_i in zip(quantiles, y_pred.T)},
+                    **{f"q_{q_i:.3g}": y_i.ravel() for q_i, y_i in zip(quantiles, y_pred.T)},
                 }
             )
 

diff --git a/_sources/gallery/plot_quantile_vs_standard.rst.txt b/_sources/gallery/plot_quantile_vs_standard.rst.txt
@@ -34,10 +34,10 @@ distributions.
     from quantile_forest import RandomForestQuantileRegressor
 
     random_state = np.random.RandomState(0)
+    n_samples = 5000
     quantiles = np.linspace(0, 1, num=101, endpoint=True).round(2).tolist()
 
     # Create right-skewed dataset.
-    n_samples = 5000
     a, loc, scale = 7, -1, 1
     skewnorm_rv = sp.stats.skewnorm(a, loc, scale)
     skewnorm_rv.random_state = random_state
@@ -61,16 +61,11 @@ distributions.
         "QRF (Median)": "#006aff",
     }
 
-
-    def format_frac(fraction):
-        return f"{fraction:.3g}".rstrip("0").rstrip(".") or "0"
-
-
     df = pd.DataFrame(
         {
             "actual": y_test,
             "rf": y_pred_rf,
-            **{f"qrf_{format_frac(q_i)}": y_i.ravel() for q_i, y_i in zip(quantiles, y_pred_qrf.T)},
+            **{f"qrf_{q_i:.3g}": y_i.ravel() for q_i, y_i in zip(quantiles, y_pred_qrf.T)},
         }
     )
 
@@ -147,10 +142,10 @@ distributions.
             from quantile_forest import RandomForestQuantileRegressor
 
             random_state = np.random.RandomState(0)
+            n_samples = 5000
             quantiles = np.linspace(0, 1, num=101, endpoint=True).round(2).tolist()
 
             # Create right-skewed dataset.
-            n_samples = 5000
             a, loc, scale = 7, -1, 1
             skewnorm_rv = sp.stats.skewnorm(a, loc, scale)
             skewnorm_rv.random_state = random_state
@@ -174,16 +169,11 @@ distributions.
                 "QRF (Median)": "#006aff",
             }
 
-
-            def format_frac(fraction):
-                return f"{fraction:.3g}".rstrip("0").rstrip(".") or "0"
-
-
             df = pd.DataFrame(
                 {
                     "actual": y_test,
                     "rf": y_pred_rf,
-                    **{f"qrf_{format_frac(q_i)}": y_i.ravel() for q_i, y_i in zip(quantiles, y_pred_qrf.T)},
+                    **{f"qrf_{q_i:.3g}": y_i.ravel() for q_i, y_i in zip(quantiles, y_pred_qrf.T)},
                 }
             )
 

diff --git a/_static/_image_hashes.json b/_static/_image_hashes.json
@@ -1 +1 @@
-{"plot_quantile_interpolation.png": "a33104b33d451e87a61b63c0441186fc", "plot_predict_custom.png": "c165b6e2ec6d97e99ee0dcb308a33165", "plot_quantile_extrapolation.png": "254b782f7f280133734238b20b05b8e3", "plot_quantile_multioutput.png": "92bc73718cd417baa7c4c2bb3a738039", "plot_quantile_example.png": "5224ce52d0004de1b163f849090d146d", "plot_quantile_conformalized.png": "aa558540d703f9df5a841644c43a9a11", "plot_quantile_intervals.png": "55469191cce728f146eb6dc148a50a62", "plot_quantile_vs_standard.png": "951d2aa38319a848a163dae38a72a9db", "plot_treeshap_example.png": "f3315a150f9bb5f55c9c0cfaefcedf4a", "plot_proximity_counts.png": "dbb1c0dc04c4809529f6cd6cfc3c0649", "plot_quantile_ranks.png": "765687aea83e00d18de5404bf6210c9f", "plot_huggingface_model.png": "54f6ba0d8875e499e02114fd00fb3197"}
+{"plot_quantile_interpolation.png": "a33104b33d451e87a61b63c0441186fc", "plot_predict_custom.png": "c165b6e2ec6d97e99ee0dcb308a33165", "plot_quantile_extrapolation.png": "24853dd6e2acbf54d608fa7ee6953cf7", "plot_quantile_multioutput.png": "cb1bed13ecf47420f01a2a5d181aeb4c", "plot_quantile_example.png": "5224ce52d0004de1b163f849090d146d", "plot_quantile_conformalized.png": "aa558540d703f9df5a841644c43a9a11", "plot_quantile_intervals.png": "55469191cce728f146eb6dc148a50a62", "plot_quantile_vs_standard.png": "72d33d65c1a254072fcce892e2236d1d", "plot_treeshap_example.png": "f3315a150f9bb5f55c9c0cfaefcedf4a", "plot_proximity_counts.png": "3ef1800f1d1d128c966f3ca868b76897", "plot_quantile_ranks.png": "765687aea83e00d18de5404bf6210c9f", "plot_huggingface_model.png": "54f6ba0d8875e499e02114fd00fb3197"}
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"plot_quantile_interpolation.png": "a33104b33d451e87a61b63c0441186fc", "plot_predict_custom.png": "c165b6e2ec6d97e99ee0dcb308a33165", "plot_quantile_extrapolation.png": "254b782f7f280133734238b20b05b8e3", "plot_quantile_multioutput.png": "92bc73718cd417baa7c4c2bb3a738039", "plot_quantile_example.png": "5224ce52d0004de1b163f849090d146d", "plot_quantile_conformalized.png": "aa558540d703f9df5a841644c43a9a11", "plot_quantile_intervals.png": "55469191cce728f146eb6dc148a50a62", "plot_quantile_vs_standard.png": "951d2aa38319a848a163dae38a72a9db", "plot_treeshap_example.png": "f3315a150f9bb5f55c9c0cfaefcedf4a", "plot_proximity_counts.png": "dbb1c0dc04c4809529f6cd6cfc3c0649", "plot_quantile_ranks.png": "765687aea83e00d18de5404bf6210c9f", "plot_huggingface_model.png": "54f6ba0d8875e499e02114fd00fb3197"}
		{"plot_quantile_interpolation.png": "a33104b33d451e87a61b63c0441186fc", "plot_predict_custom.png": "c165b6e2ec6d97e99ee0dcb308a33165", "plot_quantile_extrapolation.png": "24853dd6e2acbf54d608fa7ee6953cf7", "plot_quantile_multioutput.png": "cb1bed13ecf47420f01a2a5d181aeb4c", "plot_quantile_example.png": "5224ce52d0004de1b163f849090d146d", "plot_quantile_conformalized.png": "aa558540d703f9df5a841644c43a9a11", "plot_quantile_intervals.png": "55469191cce728f146eb6dc148a50a62", "plot_quantile_vs_standard.png": "72d33d65c1a254072fcce892e2236d1d", "plot_treeshap_example.png": "f3315a150f9bb5f55c9c0cfaefcedf4a", "plot_proximity_counts.png": "3ef1800f1d1d128c966f3ca868b76897", "plot_quantile_ranks.png": "765687aea83e00d18de5404bf6210c9f", "plot_huggingface_model.png": "54f6ba0d8875e499e02114fd00fb3197"}