From fb795899de2d6cbc899af88d3483bb4911e01a4e Mon Sep 17 00:00:00 2001 From: Paul-Saves Date: Fri, 18 Oct 2024 18:32:29 +0200 Subject: [PATCH] ruff --- tutorial/Kernels/hale_periodic_gp.py | 121 ++++++++++-------- .../Misc/Split_Conformal_prediction_SMT.ipynb | 113 ++++++++-------- tutorial/README.md | 2 +- 3 files changed, 128 insertions(+), 108 deletions(-) diff --git a/tutorial/Kernels/hale_periodic_gp.py b/tutorial/Kernels/hale_periodic_gp.py index 4e518472e..741a0bebc 100644 --- a/tutorial/Kernels/hale_periodic_gp.py +++ b/tutorial/Kernels/hale_periodic_gp.py @@ -7,21 +7,15 @@ """ from sklearn.datasets import fetch_openml -from smt.kernels import Product -from scipy.io import loadmat -from smt.kernels import SquarSinExp from smt.kernels import Kernel from smt.kernels import Matern32 -from smt.kernels import Matern52 -from smt.kernels import Constant -from smt.kernels import PowExp import numpy as np from smt.surrogate_models import KRG import pickle -import os import matplotlib.pyplot as plt + co2 = fetch_openml(data_id=41187, as_frame=True) import polars as pl @@ -30,7 +24,7 @@ pl.date("year", "month", "day"), "co2" ) -co2_data = ( +co2_data = ( co2_data.sort(by="date") .group_by_dynamic("date", every="1mo") .agg(pl.col("co2").mean()) @@ -41,72 +35,93 @@ pl.col("date").dt.year() + pl.col("date").dt.month() / 12 ).to_numpy()[:100] y = co2_data["co2"].to_numpy()[:100] -X_smt=X.reshape(-1)[:100] +X_smt = X.reshape(-1)[:100] + +with open("./hale.pickle", "rb") as pickle_file: + ( + nodes_tr, + edges_in_tr, + edges_tar_tr, + globals_tr, + senders, + receivers, + list_tse_raw, + ) = pickle.load(pickle_file) -with open('./hale.pickle', 'rb') as pickle_file: - nodes_tr, edges_in_tr, edges_tar_tr, globals_tr, senders, receivers, list_tse_raw = pickle.load(pickle_file) - - class Period(Kernel): - def __call__(self,d,grad_ind=None,hess_ind=None,derivative_params=None): - n=self.theta.shape[0] - theta2=self.theta[:n//2] - theta3=self.theta[n//2:] - return np.atleast_2d(np.exp(-np.sum(theta2*np.sin(theta3*d)**2,axis=1))).T + def __call__(self, d, grad_ind=None, hess_ind=None, derivative_params=None): + n = self.theta.shape[0] + theta2 = self.theta[: n // 2] + theta3 = self.theta[n // 2 :] + return np.atleast_2d( + np.exp(-np.sum(theta2 * np.sin(theta3 * d) ** 2, axis=1)) + ).T + + class RBF(Kernel): def __call__(self, d, grad_ind=None, hess_ind=None, derivative_params=None): - theta=self.theta.reshape(1, d.shape[1]) - #r=np.zeros((d.shape[0],1)) - r=np.exp(-np.sum(theta*d**2,axis=1)) + theta = self.theta.reshape(1, d.shape[1]) + # r=np.zeros((d.shape[0],1)) + r = np.exp(-np.sum(theta * d**2, axis=1)) return np.atleast_2d(r).T + + class Rat_quad(Kernel): def __call__(self, d, grad_ind=None, hess_ind=None, derivative_params=None): - n=self.theta.shape[0] - theta4=self.theta[:n//2] - theta5=self.theta[n//2:] - r3=(1+d**2/(2*theta4*theta5))**(-theta4) + n = self.theta.shape[0] + theta4 = self.theta[: n // 2] + theta5 = self.theta[n // 2 :] + r3 = (1 + d**2 / (2 * theta4 * theta5)) ** (-theta4) return r3 + + class LocalPeriod(Kernel): def __call__(self, d, grad_ind=None, hess_ind=None, derivative_params=None): - n=self.theta.shape[0] - theta1=self.theta[:n//6] - theta2=self.theta[n//6:2*n//6] - theta3=self.theta[2*n//6:3*n//6] - theta4=self.theta[3*n//6:4*n//6] - theta5=self.theta[4*n//6:5*n//6] - theta6=self.theta[5*n//6:] - r1=np.exp(-np.sum(theta1*d**2,axis=1)) - r2=np.exp(-np.sum(theta2*np.sin(theta3*d)**2,axis=1)) - r3=(1+d**2/(2*theta4*theta5))**(-theta4) - r4=np.exp(-np.sum(theta6*d**2,axis=1)) - r=(np.atleast_2d(r4).T+r3+np.atleast_2d(r2).T*np.atleast_2d(r1).T)/3 + n = self.theta.shape[0] + theta1 = self.theta[: n // 6] + theta2 = self.theta[n // 6 : 2 * n // 6] + theta3 = self.theta[2 * n // 6 : 3 * n // 6] + theta4 = self.theta[3 * n // 6 : 4 * n // 6] + theta5 = self.theta[4 * n // 6 : 5 * n // 6] + theta6 = self.theta[5 * n // 6 :] + r1 = np.exp(-np.sum(theta1 * d**2, axis=1)) + r2 = np.exp(-np.sum(theta2 * np.sin(theta3 * d) ** 2, axis=1)) + r3 = (1 + d**2 / (2 * theta4 * theta5)) ** (-theta4) + r4 = np.exp(-np.sum(theta6 * d**2, axis=1)) + r = (np.atleast_2d(r4).T + r3 + np.atleast_2d(r2).T * np.atleast_2d(r1).T) / 3 return np.atleast_2d(r).T -k_test=LocalPeriod([0.01,0.01,0.01,0.01,0.01,0.01]) +k_test = LocalPeriod([0.01, 0.01, 0.01, 0.01, 0.01, 0.01]) -k=Period([0.01,0.01])*Period([0.01,0.01])+Matern32([0.01]) +k = Period([0.01, 0.01]) * Period([0.01, 0.01]) + Matern32([0.01]) -for i in range(1,2): - for j in range(10,11): - - x_Hale=np.arange(0,nodes_tr.shape[2]//84,7) - y_Hale=nodes_tr[i,j,x_Hale] - sm=KRG(corr=k, noise0=[1e-6], hyper_opt="Cobyla",n_start=15) +for i in range(1, 2): + for j in range(10, 11): + x_Hale = np.arange(0, nodes_tr.shape[2] // 84, 7) + y_Hale = nodes_tr[i, j, x_Hale] + sm = KRG(corr=k, noise0=[1e-6], hyper_opt="Cobyla", n_start=15) sm.set_training_values(x_Hale, y_Hale) sm.train() print(k.theta) - X_test = np.arange(0,nodes_tr.shape[2],1) - X_test= X_test[:1125] - mean_y_pred=sm.predict_values(X_test) + X_test = np.arange(0, nodes_tr.shape[2], 1) + X_test = X_test[:1125] + mean_y_pred = sm.predict_values(X_test) plt.plot(X_test, mean_y_pred, color="tab:blue", alpha=0.5, label="Periodic GP") - plt.scatter(x_Hale,y_Hale,color="g",label="data") - plt.plot(X_test,nodes_tr[i,j,X_test],color="black",linestyle="dashed",alpha=0.3,label=f"ref {i} {j}") - plt.legend(loc='upper left') - - plt.show() \ No newline at end of file + plt.scatter(x_Hale, y_Hale, color="g", label="data") + plt.plot( + X_test, + nodes_tr[i, j, X_test], + color="black", + linestyle="dashed", + alpha=0.3, + label=f"ref {i} {j}", + ) + plt.legend(loc="upper left") + + plt.show() diff --git a/tutorial/Misc/Split_Conformal_prediction_SMT.ipynb b/tutorial/Misc/Split_Conformal_prediction_SMT.ipynb index d7eec6508..a984a24b8 100644 --- a/tutorial/Misc/Split_Conformal_prediction_SMT.ipynb +++ b/tutorial/Misc/Split_Conformal_prediction_SMT.ipynb @@ -16,22 +16,12 @@ "source": [ "import numpy as np\n", "import pandas as pd\n", - "from sklearn.gaussian_process import GaussianProcessRegressor\n", - "from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C\n", - "from sklearn.preprocessing import StandardScaler\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", - "#from tqdm import tqdm\n", - "from scipy.stats import norm\n", - "from smt.kernels import Product\n", - "from scipy.io import loadmat\n", - "from smt.kernels import SquarSinExp\n", - "from smt.kernels import Kernel\n", - "from smt.kernels import Matern32\n", - "from smt.kernels import Matern52\n", + "\n", + "# from tqdm import tqdm\n", "from smt.kernels import Constant\n", "from smt.kernels import PowExp\n", - "import numpy as np\n", "from smt.surrogate_models import KRG" ] }, @@ -47,12 +37,13 @@ "# Quantile functions\n", "def quantile_cp(z, alpha):\n", " n = len(z)\n", - " q = np.sort(z)[int(np.ceil((1-alpha)*(n+1))-1)]\n", + " q = np.sort(z)[int(np.ceil((1 - alpha) * (n + 1)) - 1)]\n", " return q\n", "\n", + "\n", "def quantile_cp_minus(z, alpha):\n", " n = len(z)\n", - " q = np.sort(z)[int(np.floor(alpha*(n+1))-1)]\n", + " q = np.sort(z)[int(np.floor(alpha * (n + 1)) - 1)]\n", " return q" ] }, @@ -88,19 +79,20 @@ "# Function to generate random heteroskedastic data\n", "def make_random_data(n, std_dev):\n", " x = np.random.uniform(-1, 1, n)\n", - " y = x**3 + 2 * np.exp(-6 * (x - 0.3)**2)\n", + " y = x**3 + 2 * np.exp(-6 * (x - 0.3) ** 2)\n", " y = y + np.random.normal(0, std_dev * np.abs(x), n)\n", - " return pd.DataFrame({'x': x, 'y': y})\n", + " return pd.DataFrame({\"x\": x, \"y\": y})\n", + "\n", "\n", "# Generate train and test data\n", "np.random.seed(12345)\n", "ntrain = 1000\n", "ntest = 1000\n", "nvisu = 1000\n", - "std_dev = 1/5\n", + "std_dev = 1 / 5\n", "train_data = make_random_data(ntrain, std_dev)\n", "test_data = make_random_data(ntest, std_dev)\n", - "visu_data = pd.DataFrame({'x': np.linspace(-1, 1, nvisu)})" + "visu_data = pd.DataFrame({\"x\": np.linspace(-1, 1, nvisu)})" ] }, { @@ -233,48 +225,50 @@ } ], "source": [ - "k=PowExp([0.01])*Constant([0.01])\n", + "k = PowExp([0.01]) * Constant([0.01])\n", "\n", "\n", - "gp = KRG(corr=\"squar_exp\",noise0=[1e-6], hyper_opt=\"Cobyla\",n_start=20)\n", + "gp = KRG(corr=\"squar_exp\", noise0=[1e-6], hyper_opt=\"Cobyla\", n_start=20)\n", "\n", "# Pretraining Gaussian Process\n", - "X_pretrain = pretrain_data['x'].values.reshape(-1, 1)\n", - "y_pretrain = pretrain_data['y'].values\n", + "X_pretrain = pretrain_data[\"x\"].values.reshape(-1, 1)\n", + "y_pretrain = pretrain_data[\"y\"].values\n", "gp.set_training_values(X_pretrain, y_pretrain)\n", "gp.train()\n", - "gp_pred_pretrain = gp.predict_values(X_pretrain)[:,0]\n", - "gp_std_pretrain = np.sqrt(gp.predict_variances(X_pretrain))[:,0]\n", + "gp_pred_pretrain = gp.predict_values(X_pretrain)[:, 0]\n", + "gp_std_pretrain = np.sqrt(gp.predict_variances(X_pretrain))[:, 0]\n", "\n", "# Compute residuals and fit another GP on residuals\n", "res_gp_pred_pretrain = np.abs(y_pretrain - gp_pred_pretrain)\n", - "gp_res = KRG(corr=\"squar_exp\", noise0=[1e-6],hyper_opt=\"Cobyla\",n_start=20)\n", + "gp_res = KRG(corr=\"squar_exp\", noise0=[1e-6], hyper_opt=\"Cobyla\", n_start=20)\n", "gp_res.set_training_values(X_pretrain, res_gp_pred_pretrain)\n", "gp_res.train()\n", "\n", "# Predictions on calibration data\n", - "X_cal = cal_data['x'].values.reshape(-1, 1)\n", - "y_cal = cal_data['y'].values\n", - "gp_pred_cal = gp.predict_values(X_cal)[:,0]\n", - "gp_std_cal = np.sqrt(gp.predict_variances(X_cal))[:,0]\n", - "gp_res_pred_cal = gp_res.predict_values(X_cal)[:,0]\n", + "X_cal = cal_data[\"x\"].values.reshape(-1, 1)\n", + "y_cal = cal_data[\"y\"].values\n", + "gp_pred_cal = gp.predict_values(X_cal)[:, 0]\n", + "gp_std_cal = np.sqrt(gp.predict_variances(X_cal))[:, 0]\n", + "gp_res_pred_cal = gp_res.predict_values(X_cal)[:, 0]\n", "\n", "res_gp_pred_cal = np.abs(y_cal - gp_pred_cal) / gp_res_pred_cal\n", "gp_q_cal = quantile_cp(res_gp_pred_cal, alpha)\n", "\n", "# Predictions for visualization data\n", - "X_visu = visu_data['x'].values.reshape(-1, 1)\n", - "gp_pred_visu = gp.predict_values(X_visu)[:,0]\n", - "gp_std_visu = np.sqrt(gp.predict_variances(X_visu))[:,0]\n", - "gp_res_pred_visu = gp_res.predict_values(X_visu)[:,0]\n", + "X_visu = visu_data[\"x\"].values.reshape(-1, 1)\n", + "gp_pred_visu = gp.predict_values(X_visu)[:, 0]\n", + "gp_std_visu = np.sqrt(gp.predict_variances(X_visu))[:, 0]\n", + "gp_res_pred_visu = gp_res.predict_values(X_visu)[:, 0]\n", "\n", "# Compute lower and upper bounds for the GP with rescaling based on residuals\n", - "gp_pred = pd.DataFrame({\n", - " 'x': visu_data['x'],\n", - " '.pred': gp_pred_visu,\n", - " '.pred_lower': gp_pred_visu - gp_res_pred_visu * gp_q_cal,\n", - " '.pred_upper': gp_pred_visu + gp_res_pred_visu * gp_q_cal\n", - "})" + "gp_pred = pd.DataFrame(\n", + " {\n", + " \"x\": visu_data[\"x\"],\n", + " \".pred\": gp_pred_visu,\n", + " \".pred_lower\": gp_pred_visu - gp_res_pred_visu * gp_q_cal,\n", + " \".pred_upper\": gp_pred_visu + gp_res_pred_visu * gp_q_cal,\n", + " }\n", + ")" ] }, { @@ -316,20 +310,22 @@ ], "source": [ "# GP with rescaling from posterior standard deviation\n", - "gp_pred_cal2 = gp.predict_values(X_cal)[:,0]\n", - "gp_std_cal2 = np.sqrt(gp.predict_variances(X_cal))[:,0]\n", + "gp_pred_cal2 = gp.predict_values(X_cal)[:, 0]\n", + "gp_std_cal2 = np.sqrt(gp.predict_variances(X_cal))[:, 0]\n", "res_gp_pred_cal2 = np.abs(y_cal - gp_pred_cal2) / gp_std_cal2\n", "gp_q_cal2 = quantile_cp(res_gp_pred_cal2, alpha)\n", "\n", - "gp_pred_visu2 = gp.predict_values(X_visu)[:,0]\n", - "gp_std_visu2= np.sqrt(gp.predict_variances(X_visu))[:,0]\n", + "gp_pred_visu2 = gp.predict_values(X_visu)[:, 0]\n", + "gp_std_visu2 = np.sqrt(gp.predict_variances(X_visu))[:, 0]\n", "\n", - "gp_pred2 = pd.DataFrame({\n", - " 'x': visu_data['x'],\n", - " '.pred': gp_pred_visu2,\n", - " '.pred_lower': gp_pred_visu2 - gp_std_visu2* gp_q_cal2,\n", - " '.pred_upper': gp_pred_visu2+ gp_std_visu2 * gp_q_cal2\n", - "})" + "gp_pred2 = pd.DataFrame(\n", + " {\n", + " \"x\": visu_data[\"x\"],\n", + " \".pred\": gp_pred_visu2,\n", + " \".pred_lower\": gp_pred_visu2 - gp_std_visu2 * gp_q_cal2,\n", + " \".pred_upper\": gp_pred_visu2 + gp_std_visu2 * gp_q_cal2,\n", + " }\n", + ")" ] }, { @@ -354,8 +350,15 @@ ], "source": [ "# Combine and visualize\n", - "model_list = {'Gaussian Process MAD': gp_pred, 'Gaussian Process Posterior sd': gp_pred2}\n", - "all_pred = pd.concat(model_list, names=[\"id\"]).reset_index(level=0).rename(columns={'id': 'model'})\n", + "model_list = {\n", + " \"Gaussian Process MAD\": gp_pred,\n", + " \"Gaussian Process Posterior sd\": gp_pred2,\n", + "}\n", + "all_pred = (\n", + " pd.concat(model_list, names=[\"id\"])\n", + " .reset_index(level=0)\n", + " .rename(columns={\"id\": \"model\"})\n", + ")\n", "\n", "sns.set(style=\"whitegrid\")\n", "g = sns.FacetGrid(all_pred, col=\"model\", height=5, aspect=1.75)\n", @@ -363,8 +366,10 @@ "g.map(plt.plot, \"x\", \".pred_lower\", color=\"orange\", lw=1.75)\n", "g.map(plt.plot, \"x\", \".pred_upper\", color=\"orange\", lw=1.75)\n", "for ax in g.axes.flat:\n", - " sns.scatterplot(x=pretrain_data['x'], y=pretrain_data['y'], color='blue', alpha=0.05, ax=ax)\n", - " sns.scatterplot(x=cal_data['x'], y=cal_data['y'], color='orange', alpha=0.25, ax=ax)\n", + " sns.scatterplot(\n", + " x=pretrain_data[\"x\"], y=pretrain_data[\"y\"], color=\"blue\", alpha=0.05, ax=ax\n", + " )\n", + " sns.scatterplot(x=cal_data[\"x\"], y=cal_data[\"y\"], color=\"orange\", alpha=0.25, ax=ax)\n", "\n", "g.add_legend()\n", "\n", diff --git a/tutorial/README.md b/tutorial/README.md index 6db8b74cc..9a9f3f3cb 100644 --- a/tutorial/README.md +++ b/tutorial/README.md @@ -74,7 +74,7 @@ These tutorials introduce to use the opensource Surrogate Modeling Toolbox where [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/SMTorg/smt/blob/master/tutorial/Misc/SMT_SGP_analytic.ipynb) -### Conformal_prediction +### Conformal prediction [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/SMTorg/smt/blob/master/tutorial/Misc/Split_Conformal_prediction_SMT.ipynb)