From 6f81ee7ec1831350328b80c273843c674c94efeb Mon Sep 17 00:00:00 2001
From: Reid Johnson <reid.johnson@gmail.com>
Date: Tue, 6 Aug 2024 18:19:47 -0700
Subject: [PATCH] Update example plots

---
 examples/plot_huggingface_model.py      | 18 ++++----
 examples/plot_predict_custom.py         | 12 +++--
 examples/plot_proximity_counts.py       | 16 ++++---
 examples/plot_quantile_conformalized.py | 36 +++++++--------
 examples/plot_quantile_example.py       | 11 ++---
 examples/plot_quantile_extrapolation.py | 58 +++++++++++++++----------
 examples/plot_quantile_interpolation.py | 13 +++---
 examples/plot_quantile_intervals.py     | 20 +++++----
 examples/plot_quantile_multioutput.py   | 20 ++++++---
 examples/plot_quantile_ranks.py         |  9 ++--
 examples/plot_quantile_vs_standard.py   | 33 ++++++++------
 examples/plot_treeshap_example.py       |  9 ++--
 12 files changed, 149 insertions(+), 106 deletions(-)
diff --git a/examples/plot_huggingface_model.py b/examples/plot_huggingface_model.py
index c263159..f2a546c 100755
--- a/examples/plot_huggingface_model.py
+++ b/examples/plot_huggingface_model.py
@@ -27,15 +27,17 @@
 
 alt.data_transformers.disable_max_rows()
 
+random_seed = 0
+
 token = "<Hugging Face Access Token>"
 repo_id = "quantile-forest/california-housing-example"
 load_existing = True
 
-quantiles = np.arange(0, 1.25, 0.25).round(2).tolist()
+quantiles = np.linspace(0, 1, num=5, endpoint=True).round(2).tolist()
 sample_frac = 1
 
 
-def fit_and_upload_model(token, repo_id, local_dir="./local_repo"):
+def fit_and_upload_model(token, repo_id, local_dir="./local_repo", random_state=None):
     """Function used to fit the model and upload it to Hugging Face Hub."""
     from pathlib import Path
 
@@ -49,10 +51,10 @@ def fit_and_upload_model(token, repo_id, local_dir="./local_repo"):
     from skops import card
 
     X, y = datasets.fetch_california_housing(as_frame=True, return_X_y=True)
-    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_state)
 
     # Fit the model.
-    qrf = RandomForestQuantileRegressor(random_state=0).fit(X_train, y_train)
+    qrf = RandomForestQuantileRegressor(random_state=random_state).fit(X_train, y_train)
 
     # Save the model to a file.
     model_filename = "model.pkl"
@@ -145,7 +147,7 @@ def fit_and_upload_model(token, repo_id, local_dir="./local_repo"):
 
 
 if not load_existing:
-    fit_and_upload_model(token, repo_id)
+    fit_and_upload_model(token, repo_id, random_state=random_seed)
 
 # Download the repository locally.
 local_dir = "./local_repo"
@@ -166,7 +168,7 @@ def fit_and_upload_model(token, repo_id, local_dir="./local_repo"):
 df = (
     pd.DataFrame(y_pred, columns=quantiles)
     .reset_index()
-    .sample(frac=sample_frac, random_state=0)
+    .sample(frac=sample_frac, random_state=random_seed)
     .melt(id_vars=["index"], var_name="quantile", value_name="value")
     .merge(X[["Latitude", "Longitude", "Population"]].reset_index(), on="index", how="right")
 )
@@ -178,7 +180,7 @@ def plot_quantiles_by_latlon(df, quantiles):
         min=0,
         max=1,
         step=0.5 if len(quantiles) == 1 else 1 / (len(quantiles) - 1),
-        name="Quantile: ",
+        name="Predicted Quantile: ",
     )
 
     q_val = alt.selection_point(
@@ -217,7 +219,7 @@ def plot_quantiles_by_latlon(df, quantiles):
         .properties(
             height=650,
             width=650,
-            title="Quantile Estimates on the California Housing Dataset",
+            title="Quantile Predictions on the California Housing Dataset",
         )
     )
     return chart
diff --git a/examples/plot_predict_custom.py b/examples/plot_predict_custom.py
index e1c69a7..526e5a6 100755
--- a/examples/plot_predict_custom.py
+++ b/examples/plot_predict_custom.py
@@ -19,10 +19,12 @@
 import scipy as sp
 from sklearn import datasets
 from sklearn.model_selection import train_test_split
+from sklearn.utils.validation import check_random_state
 
 from quantile_forest import RandomForestQuantileRegressor
 
-np.random.seed(0)
+random_seed = 0
+rng = check_random_state(random_seed)
 
 n_test_samples = 100
 
@@ -68,12 +70,14 @@ def predict(reg, X, quantiles=0.5, what=None):
 
 
 X, y = datasets.load_diabetes(return_X_y=True)
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=n_test_samples, random_state=0)
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=n_test_samples, random_state=random_seed
+)
 
-reg = RandomForestQuantileRegressor().fit(X_train, y_train)
+reg = RandomForestQuantileRegressor(random_state=random_seed).fit(X_train, y_train)
 
 # Define a user-specified function; here we randomly sample 1000 values with replacement.
-func = lambda x: np.random.choice(x, size=1000)
+func = lambda x: rng.choice(x, size=1000)
 
 # Output array with the user-specified function applied to each sample's empirical distribution.
 y_out = predict(reg, X_test, what=func)
diff --git a/examples/plot_proximity_counts.py b/examples/plot_proximity_counts.py
index ba2fd06..b28c4dd 100644
--- a/examples/plot_proximity_counts.py
+++ b/examples/plot_proximity_counts.py
@@ -25,7 +25,8 @@
 
 from quantile_forest import RandomForestQuantileRegressor
 
-rng = check_random_state(0)
+random_seed = 0
+rng = check_random_state(random_seed)
 
 n_test_samples = 25
 noise_std = 0.1
@@ -33,13 +34,17 @@
 # Load the Digits dataset.
 X, y = datasets.load_digits(return_X_y=True, as_frame=True)
 
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=n_test_samples, random_state=0)
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=n_test_samples, random_state=random_seed
+)
 
 
 def add_gaussian_noise(X, mean=0, std=0.1, random_state=None):
     """Add Gaussian noise to input data."""
     if random_state is None:
         rng = check_random_state(0)
+    elif isinstance(random_state, int):
+        rng = check_random_state(random_state)
     else:
         rng = random_state
 
@@ -73,13 +78,13 @@ def extract_floats(combined_df, scale=100):
 
 
 # Randomly add noise to the training and test data.
-X_train_noisy = X_train.pipe(add_gaussian_noise, std=noise_std, random_state=rng)
-X_test_noisy = X_test.pipe(add_gaussian_noise, std=noise_std, random_state=rng)
+X_train_noisy = X_train.pipe(add_gaussian_noise, std=noise_std, random_state=random_seed)
+X_test_noisy = X_test.pipe(add_gaussian_noise, std=noise_std, random_state=random_seed)
 
 # We set `max_samples_leaf=None` to ensure that every sample in the training
 # data is stored in the leaf nodes. By doing this, we allow the model to
 # consider all samples as potential candidates for proximity calculations.
-qrf = RandomForestQuantileRegressor(max_samples_leaf=None, random_state=0)
+qrf = RandomForestQuantileRegressor(max_samples_leaf=None, random_state=random_seed)
 qrf.fit(X_train_noisy, X_train)
 
 # Get the proximity counts.
@@ -177,6 +182,7 @@ def plot_digits_proximities(
             color=alt.Color("value_clean:Q", legend=None, scale=alt.Scale(scheme="greys")),
             opacity=alt.condition(alt.datum["value_clean"] == 0, alt.value(0), alt.value(0.67)),
             tooltip=[
+                alt.Tooltip("prox_idx", title="Proximity Index"),
                 alt.Tooltip("prox_cnt", title="Proximity Count"),
                 alt.Tooltip("target:Q", title="Digit"),
             ],
diff --git a/examples/plot_quantile_conformalized.py b/examples/plot_quantile_conformalized.py
index 60d03e7..5bc2f1f 100755
--- a/examples/plot_quantile_conformalized.py
+++ b/examples/plot_quantile_conformalized.py
@@ -25,31 +25,31 @@
 
 alt.data_transformers.disable_max_rows()
 
+random_seed = 0
+rng = check_random_state(random_seed)
+
+n_samples = 1000
+coverages = np.linspace(0, 1, num=11, endpoint=True).round(1).tolist()  # the "coverage level"
+
 strategies = {
     "qrf": "Quantile Regression Forest (QRF)",
     "cqr": "Conformalized Quantile Regression (CQR)",
 }
 
-random_state = 0
-rng = check_random_state(random_state)
-
-coverages = np.arange(0, 1.1, 0.1).round(1).tolist()  # the "coverage level"
-
 # Load the California Housing Prices dataset.
-california = datasets.fetch_california_housing()
-n_samples = min(california.target.size, 1000)
-perm = rng.permutation(n_samples)
-X = california.data[perm]
-y = california.target[perm]
+X, y = datasets.fetch_california_housing(as_frame=True, return_X_y=True)
+perm = rng.permutation(min(len(X), n_samples))
+X = X.iloc[perm]
+y = y.iloc[perm]
 
-X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_seed)
 
 
 def sort_y_values(y_test, y_pred, y_pis):
     """Sort the target values and predictions."""
     indices = np.argsort(y_test)
     return {
-        "y_test": y_test[indices],
+        "y_test": np.asarray(y_test)[indices],
         "y_pred": y_pred[indices],
         "y_pred_low": y_pis[:, 0][indices],
         "y_pred_upp": y_pis[:, 1][indices],
@@ -68,10 +68,10 @@ def mean_width_score(y_pred_low, y_pred_upp):
     return float(mean_width)
 
 
-def qrf_strategy(alpha, X_train, X_test, y_train, y_test):
+def qrf_strategy(alpha, X_train, X_test, y_train, y_test, random_state=None):
     quantiles = [alpha / 2, 1 - alpha / 2]
 
-    qrf = RandomForestQuantileRegressor(random_state=0)
+    qrf = RandomForestQuantileRegressor(random_state=random_state)
     qrf.fit(X_train, y_train)
 
     # Calculate the lower and upper quantile values on the test data.
@@ -88,15 +88,15 @@ def qrf_strategy(alpha, X_train, X_test, y_train, y_test):
     return pd.DataFrame(y_values).pipe(lambda x: x * 100_000).assign(strategy="qrf")
 
 
-def cqr_strategy(alpha, X_train, X_test, y_train, y_test):
+def cqr_strategy(alpha, X_train, X_test, y_train, y_test, random_state=None):
     quantiles = [alpha / 2, 1 - alpha / 2]
 
     # Create calibration set.
     X_train, X_calib, y_train, y_calib = train_test_split(
-        X_train, y_train, test_size=0.5, random_state=0
+        X_train, y_train, test_size=0.5, random_state=random_state
     )
 
-    qrf = RandomForestQuantileRegressor(random_state=0)
+    qrf = RandomForestQuantileRegressor(random_state=random_state)
     qrf.fit(X_train, y_train)
 
     # Calculate the lower and upper quantile values on the test data.
@@ -134,7 +134,7 @@ def cqr_strategy(alpha, X_train, X_test, y_train, y_test):
 dfs = []
 for cov_frac in coverages:
     alpha = float(round(1 - cov_frac, 2))
-    args = (alpha, X_train, X_test, y_train, y_test)
+    args = (alpha, X_train, X_test, y_train, y_test, random_seed)
     dfs.append(pd.concat([qrf_strategy(*args), cqr_strategy(*args)]).assign(alpha=alpha))
 df = pd.concat(dfs)
 
diff --git a/examples/plot_quantile_example.py b/examples/plot_quantile_example.py
index 6a5a96b..a3a1008 100755
--- a/examples/plot_quantile_example.py
+++ b/examples/plot_quantile_example.py
@@ -14,6 +14,7 @@
 
 from quantile_forest import RandomForestQuantileRegressor
 
+random_seed = 0
 n_samples = 1000
 bounds = [0, 10]
 quantiles = [0.025, 0.5, 0.975]
@@ -33,12 +34,12 @@ def make_toy_dataset(n_samples, bounds, add_noise=True, random_seed=0):
 
 
 # Create noisy data for modeling and non-noisy function data for illustration.
-X, y = make_toy_dataset(n_samples, bounds, add_noise=True, random_seed=0)
-X_func, y_func = make_toy_dataset(n_samples, bounds, add_noise=False, random_seed=0)
+X, y = make_toy_dataset(n_samples, bounds, add_noise=True, random_seed=random_seed)
+X_func, y_func = make_toy_dataset(n_samples, bounds, add_noise=False, random_seed=random_seed)
 
-X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_seed)
 
-qrf = RandomForestQuantileRegressor(max_depth=3, min_samples_leaf=5, random_state=0)
+qrf = RandomForestQuantileRegressor(max_depth=3, min_samples_leaf=5, random_state=random_seed)
 qrf.fit(X_train, y_train)
 
 y_pred_func = qrf.predict(X_func, quantiles=quantiles)
@@ -133,7 +134,7 @@ def plot_fit_and_intervals(df):
     chart = (
         (area_pred + points + line_true + line_pred + blank)
         .resolve_scale(color="independent")
-        .properties(height=400, width=650)
+        .properties(height=400, width=650, title="QRF Predictions vs. Ground Truth on Toy Dataset")
     )
 
     return chart
diff --git a/examples/plot_quantile_extrapolation.py b/examples/plot_quantile_extrapolation.py
index 6f86e0f..ff3c430 100755
--- a/examples/plot_quantile_extrapolation.py
+++ b/examples/plot_quantile_extrapolation.py
@@ -19,10 +19,12 @@
 import altair as alt
 import numpy as np
 import pandas as pd
+from sklearn.utils.validation import check_random_state
 
 from quantile_forest import RandomForestQuantileRegressor
 
-np.random.seed(0)
+random_seed = 0
+rng = check_random_state(random_seed)
 
 n_samples = 500
 bounds = [0, 15]
@@ -31,14 +33,15 @@
 func_str = "f(x) = x sin(x)"
 
 quantiles = [0.025, 0.975, 0.5]
-qrf_params = {"max_samples_leaf": None, "min_samples_leaf": 4, "random_state": 0}
+qrf_params = {"max_samples_leaf": None, "min_samples_leaf": 4, "random_state": random_seed}
 
 
-def make_func_Xy(func, bounds, n_samples):
+def make_func_Xy(func, bounds, n_samples, random_seed=0):
+    rng = np.random.RandomState(random_seed)
     x = np.linspace(bounds[0], bounds[1], n_samples)
     f = func(x)
     std = 0.01 + np.abs(x - 5.0) / 5.0
-    noise = np.random.normal(scale=std)
+    noise = rng.normal(scale=std)
     y = f + noise
     return np.atleast_2d(x).T, y
 
@@ -103,7 +106,7 @@ def _penalized_locpol(fval, v, X, weights, degree, pen=0, penalize_intercept=Fal
         return deriv_mat
 
     @staticmethod
-    def _get_tree_weight_matrix(X, Y, X_eval=None, n_trees=100, **kwargs):
+    def _get_tree_weight_matrix(X, Y, X_eval=None, n_trees=100, rng=None, **kwargs):
         """Fit forest and extract weights.
 
         This implementation extracts the weight matrix from a list of quantile
@@ -118,6 +121,8 @@ def _get_tree_weight_matrix(X, Y, X_eval=None, n_trees=100, **kwargs):
             del kwargs["random_state"]
         kwargs["bootstrap"] = False
 
+        rng = np.random.RandomState(0) if rng is None else rng
+
         trees = [RandomForestQuantileRegressor(random_state=i, **kwargs) for i in range(n_trees)]
 
         n = X.shape[0]
@@ -132,7 +137,7 @@ def _get_tree_weight_matrix(X, Y, X_eval=None, n_trees=100, **kwargs):
 
         for tree in trees:
             # Draw bootstrap sample.
-            boot_sample = np.random.choice(np.arange(n), bn, replace=False)
+            boot_sample = rng.choice(np.arange(n), bn, replace=False)
             split1 = boot_sample[: int(bn / 2)]
             split2 = np.concatenate([boot_sample[int(bn / 2) :], np.arange(nn) + n])
 
@@ -155,7 +160,7 @@ def _get_tree_weight_matrix(X, Y, X_eval=None, n_trees=100, **kwargs):
 
         return weight_mat
 
-    def fit_weights(self, X, fval, x0=None, train=False, **kwargs):
+    def fit_weights(self, X, fval, x0=None, train=False, rng=None, **kwargs):
         """Compute random forest weights for derivative estimation."""
         n, d = X.shape
         fval = fval.flatten()
@@ -167,19 +172,21 @@ def fit_weights(self, X, fval, x0=None, train=False, **kwargs):
             for jj, var in enumerate(xtra_features):
                 var_order = list(range(d))
                 var_order = np.array([var] + var_order[:var] + var_order[var + 1 :])
-                weights[jj] = self._get_tree_weight_matrix(X[:, var_order], fval, x0, **kwargs)
+                weights[jj] = self._get_tree_weight_matrix(
+                    X[:, var_order], fval, x0, rng=rng, **kwargs
+                )
         else:
-            weights = self._get_tree_weight_matrix(X, fval, x0, **kwargs)[n:, :n]
+            weights = self._get_tree_weight_matrix(X, fval, x0, rng=rng, **kwargs)[n:, :n]
 
         return weights
 
-    def fit_derivatives(self, X, fval, pen=0.1, **kwargs):
+    def fit_derivatives(self, X, fval, pen=0.1, rng=None, **kwargs):
         """Estimate derivatives."""
         n, d = X.shape
         fval = fval.flatten()
 
         # Fit weights for local polynomial.
-        weights = self.fit_weights(X, fval, train=True, **kwargs)
+        weights = self.fit_weights(X, fval, train=True, rng=rng, **kwargs)
 
         # Estimate derivatives with local polynomial.
         derivatives = np.zeros((self.max_order_ + 1, n, d))
@@ -203,7 +210,7 @@ def fit_derivatives(self, X, fval, pen=0.1, **kwargs):
 
         return derivatives
 
-    def prediction_bounds(self, X, fval, x0, nn=50, **kwargs):
+    def prediction_bounds(self, X, fval, x0, nn=50, rng=None, **kwargs):
         """Compute extrapolation bounds."""
         n, d = X.shape
         fval = fval.flatten()
@@ -213,7 +220,7 @@ def prediction_bounds(self, X, fval, x0, nn=50, **kwargs):
         xtra_features = list(range(d))
 
         # Fit derivatives.
-        derivatives = self.fit_derivatives(X, fval, **kwargs)
+        derivatives = self.fit_derivatives(X, fval, rng=rng, **kwargs)
 
         # Determine weighting for extrapolation points (using rotation).
         mu = derivatives[1].mean(axis=0)
@@ -293,7 +300,7 @@ def prediction_bounds(self, X, fval, x0, nn=50, **kwargs):
         return bounds
 
 
-def train_test_split(train_indices, **kwargs):
+def train_test_split(train_indices, rng=None, **kwargs):
     """Fit model on training samples and extrapolate on test samples."""
     X_train = X[train_indices, :]
     y_train = y[train_indices]
@@ -308,7 +315,9 @@ def train_test_split(train_indices, **kwargs):
     for i in range(len(quantiles)):
         # Run Xtrapolation on quantile.
         xtra = Xtrapolation()
-        bounds_list[i] = xtra.prediction_bounds(X_train, qmat[train_indices, i], X, **kwargs)
+        bounds_list[i] = xtra.prediction_bounds(
+            X_train, qmat[train_indices, i], X, rng=rng, **kwargs
+        )
 
     return {
         "train_indices": train_indices,
@@ -331,33 +340,34 @@ def prob_randomized_pi(qmat, y, coverage):
     return prob_si
 
 
-def randomized_pi(qmat, prob_si, y):
+def randomized_pi(qmat, prob_si, y, random_state=None):
     """Calculate coverage."""
-    si_index = np.random.choice([False, True], len(y), replace=True, p=[prob_si, 1 - prob_si])
+    rng = np.random.RandomState(0) if random_state is None else random_state
+    si_index = rng.choice([False, True], len(y), replace=True, p=[prob_si, 1 - prob_si])
     included = (qmat[:, 0] < y) & (y < qmat[:, 1])
     boundary = (qmat[:, 0] == y) | (qmat[:, 1] == y)
     return included | (boundary & si_index)
 
 
-def get_coverage_qrf(qmat, train_indices, test_indices, y_train, level):
+def get_coverage_qrf(qmat, train_indices, test_indices, y_train, level, *args):
     """Calculate extrapolation coverage for regular quantile forest."""
     prob_si = prob_randomized_pi(qmat[train_indices, :], y_train, level)
-    qrf = randomized_pi(qmat, prob_si, y)
+    qrf = randomized_pi(qmat, prob_si, y, *args)
     return np.mean(qrf[test_indices])
 
 
-def get_coverage_xtr(bounds_list, train_indices, test_indices, y_train, level):
+def get_coverage_xtr(bounds_list, train_indices, test_indices, y_train, level, *args):
     """Calculate extrapolation coverage for Xtrapolation."""
     bb_low = np.max(bounds_list[0][:, :, 0], axis=1)
     bb_upp = np.min(bounds_list[1][:, :, 1], axis=1)
     bb_low_train, bb_upp_train = bb_low[train_indices], bb_upp[train_indices]
     prob_si = prob_randomized_pi(np.c_[bb_low_train, bb_upp_train], y_train, level)
-    xtra = randomized_pi(np.c_[bb_low, bb_upp], prob_si, y)
+    xtra = randomized_pi(np.c_[bb_low, bb_upp], prob_si, y, *args)
     return np.mean(xtra[test_indices])
 
 
 # Create the full dataset.
-X, y = make_func_Xy(func, bounds, n_samples)
+X, y = make_func_Xy(func, bounds, n_samples, random_seed=random_seed)
 
 # Fit and extrapolate based on train-test split (depending on X).
 extrap_min_idx = int(n_samples * (extrap_frac / 2))
@@ -365,10 +375,10 @@ def get_coverage_xtr(bounds_list, train_indices, test_indices, y_train, level):
 sort_X = np.argsort(X.squeeze())
 train_indices = np.repeat(False, len(y))
 train_indices[sort_X[extrap_min_idx] : sort_X[extrap_max_idx]] = True
-res = train_test_split(train_indices, **qrf_params)
+res = train_test_split(train_indices, rng=rng, **qrf_params)
 
 # Get coverages on extrapolated samples.
-args = (train_indices, ~train_indices, y[train_indices], quantiles[1] - quantiles[0])
+args = (train_indices, ~train_indices, y[train_indices], quantiles[1] - quantiles[0], rng)
 cov_qrf = get_coverage_qrf(res["qmat"], *args)
 cov_xtr = get_coverage_xtr(res["bounds_list"], *args)
 
diff --git a/examples/plot_quantile_interpolation.py b/examples/plot_quantile_interpolation.py
index 5a940cf..37cf5f5 100755
--- a/examples/plot_quantile_interpolation.py
+++ b/examples/plot_quantile_interpolation.py
@@ -16,7 +16,8 @@
 
 from quantile_forest import RandomForestQuantileRegressor
 
-intervals = np.arange(0, 1.01, 0.01).round(2).tolist()
+random_seed = 0
+intervals = np.linspace(0, 1, num=101, endpoint=True).round(2).tolist()
 
 # Create toy dataset.
 X = np.array([[-1, -1], [-1, -1], [-1, -1], [1, 1], [1, 1]])
@@ -26,7 +27,7 @@
     n_estimators=1,
     max_samples_leaf=None,
     bootstrap=False,
-    random_state=0,
+    random_state=random_seed,
 )
 qrf.fit(X, y)
 
@@ -134,9 +135,9 @@ def plot_interpolations(df, legend):
         (area + point)
         .add_params(interval_selection, click)
         .transform_filter(
-            "(datum.quantile_low == round((0.5 - interval / 2) * 1000) / 1000)"
+            "(datum.method == 'Actual')"
+            "| (datum.quantile_low == round((0.5 - interval / 2) * 1000) / 1000)"
             "| (datum.quantile_upp == round((0.5 + interval / 2) * 1000) / 1000)"
-            "| (datum.method == 'Actual')"
         )
         .properties(height=400)
         .facet(
@@ -144,11 +145,13 @@ def plot_interpolations(df, legend):
                 "X:N",
                 header=alt.Header(labelOrient="bottom", titleOrient="bottom"),
                 title="Samples (Feature Values)",
-            )
+            ),
+            title="QRF Prediction Intervals by Quantile Interpolation on Toy Dataset",
         )
         .configure_facet(spacing=15)
         .configure_range(category=alt.RangeScheme(list(legend.values())))
         .configure_scale(bandPaddingInner=0.9)
+        .configure_title(anchor="middle")
         .configure_view(stroke=None)
     )
 
diff --git a/examples/plot_quantile_intervals.py b/examples/plot_quantile_intervals.py
index e85ad59..72412a4 100755
--- a/examples/plot_quantile_intervals.py
+++ b/examples/plot_quantile_intervals.py
@@ -17,16 +17,18 @@
 
 from quantile_forest import RandomForestQuantileRegressor
 
-rng = check_random_state(0)
+random_seed = 0
+rng = check_random_state(random_seed)
+
+n_samples = 1000
 
 # Load the California Housing Prices dataset.
-california = datasets.fetch_california_housing()
-n_samples = min(california.target.size, 1000)
-perm = rng.permutation(n_samples)
-X = california.data[perm]
-y = california.target[perm]
+X, y = datasets.fetch_california_housing(as_frame=True, return_X_y=True)
+perm = rng.permutation(min(len(X), n_samples))
+X = X.iloc[perm]
+y = y.iloc[perm]
 
-qrf = RandomForestQuantileRegressor(n_estimators=100, random_state=0)
+qrf = RandomForestQuantileRegressor(random_state=random_seed)
 
 kf = KFold(n_splits=5)
 kf.get_n_splits(X)
@@ -34,8 +36,8 @@
 # Using k-fold cross-validation, get predictions for all samples.
 data = {"y_true": [], "y_pred": [], "y_pred_low": [], "y_pred_upp": []}
 for train_index, test_index in kf.split(X):
-    X_train, y_train = X[train_index], y[train_index]
-    X_test, y_test = X[test_index], y[test_index]
+    X_train, y_train = X.iloc[train_index], y.iloc[train_index]
+    X_test, y_test = X.iloc[test_index], y.iloc[test_index]
 
     qrf.set_params(max_features=X_train.shape[1] // 3)
     qrf.fit(X_train, y_train)
diff --git a/examples/plot_quantile_multioutput.py b/examples/plot_quantile_multioutput.py
index a632ded..f763e74 100755
--- a/examples/plot_quantile_multioutput.py
+++ b/examples/plot_quantile_multioutput.py
@@ -14,24 +14,26 @@
 import numpy as np
 import pandas as pd
 from sklearn.model_selection import train_test_split
+from sklearn.utils.validation import check_random_state
 
 from quantile_forest import RandomForestQuantileRegressor
 
-np.random.seed(0)
+random_seed = 0
+rng = check_random_state(random_seed)
 
 n_samples = 2500
 bounds = [0, 100]
-quantiles = np.arange(0, 1.025, 0.025).round(3).tolist()
+quantiles = np.linspace(0, 1, num=41, endpoint=True).round(3).tolist()
 
 # Define functions that generate targets; each function maps to one target.
 funcs = [
     {
         "signal": lambda x: np.log1p(x + 1),
-        "noise": lambda x: np.log1p(x) * np.random.uniform(size=len(x)),
+        "noise": lambda x: np.log1p(x) * rng.uniform(size=len(x)),
     },
     {
         "signal": lambda x: np.log1p(np.sqrt(x)),
-        "noise": lambda x: np.log1p(x / 2) * np.random.uniform(size=len(x)),
+        "noise": lambda x: np.log1p(x / 2) * rng.uniform(size=len(x)),
     },
 ]
 
@@ -57,9 +59,9 @@ def format_frac(fraction):
 # Create the dataset with multiple target variables.
 X, y = make_func_Xy(funcs, bounds, n_samples)
 
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_seed)
 
-qrf = RandomForestQuantileRegressor(max_samples_leaf=None, max_depth=4, random_state=0)
+qrf = RandomForestQuantileRegressor(max_samples_leaf=None, max_depth=4, random_state=random_seed)
 qrf.fit(X_train, y_train)  # fit on all of the targets simultaneously
 
 # Get multiple-output predictions at many quantiles.
@@ -153,7 +155,11 @@ def plot_multioutputs(df, legend):
         (points + area + line)
         .add_params(interval_selection, click)
         .configure_range(category=alt.RangeScheme(list(legend.values())))
-        .properties(height=400, width=650, title="Multi-target Prediction Intervals")
+        .properties(
+            height=400,
+            width=650,
+            title="Multi-target Predictions and Prediction Intervals on Toy Dataset",
+        )
     )
 
     return chart
diff --git a/examples/plot_quantile_ranks.py b/examples/plot_quantile_ranks.py
index b950c7d..4eb5452 100644
--- a/examples/plot_quantile_ranks.py
+++ b/examples/plot_quantile_ranks.py
@@ -16,6 +16,7 @@
 
 from quantile_forest import RandomForestQuantileRegressor
 
+random_seed = 0
 n_samples = 5000
 bounds = [0, 10]
 
@@ -28,9 +29,9 @@ def make_toy_dataset(n_samples, bounds, random_seed=0):
     return X, y
 
 
-X, y = make_toy_dataset(n_samples, bounds, random_seed=0)
+X, y = make_toy_dataset(n_samples, bounds, random_seed=random_seed)
 
-params = {"max_samples_leaf": None, "min_samples_leaf": 50, "random_state": 0}
+params = {"max_samples_leaf": None, "min_samples_leaf": 50, "random_state": random_seed}
 qrf = RandomForestQuantileRegressor(**params).fit(X, y)
 
 y_pred = qrf.predict(X, quantiles=0.5)
@@ -94,7 +95,9 @@ def plot_fit_and_ranks(df):
     )
 
     chart = (dummy_legend + points + line_pred).properties(
-        height=400, width=650, title="QRF Predictions with Quantile Rank Thresholding"
+        height=400,
+        width=650,
+        title="QRF Predictions with Quantile Rank Thresholding on Toy Dataset",
     )
 
     return chart
diff --git a/examples/plot_quantile_vs_standard.py b/examples/plot_quantile_vs_standard.py
index 224b9b2..b63d69d 100755
--- a/examples/plot_quantile_vs_standard.py
+++ b/examples/plot_quantile_vs_standard.py
@@ -2,13 +2,13 @@
 Quantile Regression Forests vs. Random Forests
 ==============================================
 
-This example compares the estimates generated by a quantile regression forest
-(QRF) and a standard random forest regressor on a synthetic, right-skewed
-dataset. In a right-skewed distribution, the mean is to the right of the
-median. As illustrated by a greater overlap in the frequencies of the actual
-and predicted values, the median (quantile = 0.5) estimated by a quantile
-regressor can be a more reliable estimator of a skewed distribution than the
-mean.
+This example compares the predictions generated by a quantile regression
+forest (QRF) and a standard random forest regressor on a synthetic
+right-skewed dataset. In a right-skewed distribution, the mean is to the right
+of the median. As illustrated by a greater overlap in the frequencies of the
+actual and predicted values, the median (quantile = 0.5) predicted by a
+quantile regressor can be a more reliable estimator of a skewed distribution
+than the mean.
 """
 
 import altair as alt
@@ -23,9 +23,10 @@
 
 alt.data_transformers.disable_max_rows()
 
-rng = check_random_state(0)
+random_seed = 0
+rng = check_random_state(random_seed)
 
-quantiles = np.arange(0, 1.05, 0.05).round(2).tolist()
+quantiles = np.linspace(0, 1, num=21, endpoint=True).round(2).tolist()
 
 # Create right-skewed dataset.
 n_samples = 5000
@@ -35,10 +36,10 @@
 y = skewnorm_rv.rvs(n_samples)
 X = rng.randn(n_samples, 2) * y.reshape(-1, 1)
 
-regr_rf = RandomForestRegressor(n_estimators=10, random_state=0)
-regr_qrf = RandomForestQuantileRegressor(n_estimators=10, random_state=0)
+regr_rf = RandomForestRegressor(n_estimators=10, random_state=random_seed)
+regr_qrf = RandomForestQuantileRegressor(n_estimators=10, random_state=random_seed)
 
-X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_seed)
 
 regr_rf.fit(X_train, y_train)
 regr_qrf.fit(X_train, y_train)
@@ -69,7 +70,7 @@ def plot_prediction_histograms(df, legend):
         min=0,
         max=1,
         step=0.5 if len(quantiles) == 1 else 1 / (len(quantiles) - 1),
-        name="Quantile: ",
+        name="Predicted Quantile: ",
     )
 
     q_val = alt.selection_point(
@@ -114,7 +115,11 @@ def plot_prediction_histograms(df, legend):
             ],
         )
         .configure_range(category=alt.RangeScheme(list(legend.values())))
-        .properties(height=400, width=650)
+        .properties(
+            height=400,
+            width=650,
+            title="Distribution of RF vs. QRF Predictions on Right-Skewed Distribution",
+        )
     )
     return chart
 
diff --git a/examples/plot_treeshap_example.py b/examples/plot_treeshap_example.py
index 271ad72..a7dc33c 100644
--- a/examples/plot_treeshap_example.py
+++ b/examples/plot_treeshap_example.py
@@ -22,9 +22,10 @@
 
 from quantile_forest import RandomForestQuantileRegressor
 
+random_seed = 0
 n_samples = 500
 test_idx = 0
-quantiles = np.arange(0, 1.1, 0.1).round(1).tolist()
+quantiles = np.linspace(0, 1, num=11, endpoint=True).round(1).tolist()
 
 
 def get_shap_values(qrf, X, quantile=0.5, **kwargs):
@@ -69,9 +70,9 @@ def get_shap_value_by_index(shap_values, index):
 X = X.iloc[:n_samples]
 y = y[:n_samples]
 y *= 100_000  # convert to dollars
-X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_seed)
 
-qrf = RandomForestQuantileRegressor(random_state=0)
+qrf = RandomForestQuantileRegressor(random_state=random_seed)
 qrf.fit(X_train, y_train)
 
 df = pd.concat(
@@ -103,7 +104,7 @@ def plot_shap_waterfall_with_quantiles(df, height=300):
         min=0,
         max=1,
         step=0.5 if len(quantiles) == 1 else 1 / (len(quantiles) - 1),
-        name="Quantile: ",
+        name="Predicted Quantile: ",
     )
 
     q_val = alt.selection_point(