Skip to content

Commit

Permalink
ruff
Browse files Browse the repository at this point in the history
  • Loading branch information
Paul-Saves committed Oct 18, 2024
1 parent 0d3df8f commit fb79589
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 108 deletions.
121 changes: 68 additions & 53 deletions tutorial/Kernels/hale_periodic_gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,15 @@
"""

from sklearn.datasets import fetch_openml
from smt.kernels import Product
from scipy.io import loadmat
from smt.kernels import SquarSinExp
from smt.kernels import Kernel
from smt.kernels import Matern32
from smt.kernels import Matern52
from smt.kernels import Constant

from smt.kernels import PowExp
import numpy as np
from smt.surrogate_models import KRG

import pickle
import os
import matplotlib.pyplot as plt

co2 = fetch_openml(data_id=41187, as_frame=True)

import polars as pl
Expand All @@ -30,7 +24,7 @@
pl.date("year", "month", "day"), "co2"
)

co2_data = (
co2_data = (
co2_data.sort(by="date")
.group_by_dynamic("date", every="1mo")
.agg(pl.col("co2").mean())
Expand All @@ -41,72 +35,93 @@
pl.col("date").dt.year() + pl.col("date").dt.month() / 12
).to_numpy()[:100]
y = co2_data["co2"].to_numpy()[:100]
X_smt=X.reshape(-1)[:100]
X_smt = X.reshape(-1)[:100]

with open("./hale.pickle", "rb") as pickle_file:
(
nodes_tr,
edges_in_tr,
edges_tar_tr,
globals_tr,
senders,
receivers,
list_tse_raw,
) = pickle.load(pickle_file)

with open('./hale.pickle', 'rb') as pickle_file:
nodes_tr, edges_in_tr, edges_tar_tr, globals_tr, senders, receivers, list_tse_raw = pickle.load(pickle_file)



class Period(Kernel):
def __call__(self,d,grad_ind=None,hess_ind=None,derivative_params=None):
n=self.theta.shape[0]
theta2=self.theta[:n//2]
theta3=self.theta[n//2:]
return np.atleast_2d(np.exp(-np.sum(theta2*np.sin(theta3*d)**2,axis=1))).T
def __call__(self, d, grad_ind=None, hess_ind=None, derivative_params=None):
n = self.theta.shape[0]
theta2 = self.theta[: n // 2]
theta3 = self.theta[n // 2 :]
return np.atleast_2d(
np.exp(-np.sum(theta2 * np.sin(theta3 * d) ** 2, axis=1))
).T


class RBF(Kernel):
def __call__(self, d, grad_ind=None, hess_ind=None, derivative_params=None):
theta=self.theta.reshape(1, d.shape[1])
#r=np.zeros((d.shape[0],1))
r=np.exp(-np.sum(theta*d**2,axis=1))
theta = self.theta.reshape(1, d.shape[1])
# r=np.zeros((d.shape[0],1))
r = np.exp(-np.sum(theta * d**2, axis=1))
return np.atleast_2d(r).T


class Rat_quad(Kernel):
def __call__(self, d, grad_ind=None, hess_ind=None, derivative_params=None):
n=self.theta.shape[0]
theta4=self.theta[:n//2]
theta5=self.theta[n//2:]
r3=(1+d**2/(2*theta4*theta5))**(-theta4)
n = self.theta.shape[0]
theta4 = self.theta[: n // 2]
theta5 = self.theta[n // 2 :]
r3 = (1 + d**2 / (2 * theta4 * theta5)) ** (-theta4)
return r3


class LocalPeriod(Kernel):
def __call__(self, d, grad_ind=None, hess_ind=None, derivative_params=None):
n=self.theta.shape[0]
theta1=self.theta[:n//6]
theta2=self.theta[n//6:2*n//6]
theta3=self.theta[2*n//6:3*n//6]
theta4=self.theta[3*n//6:4*n//6]
theta5=self.theta[4*n//6:5*n//6]
theta6=self.theta[5*n//6:]
r1=np.exp(-np.sum(theta1*d**2,axis=1))
r2=np.exp(-np.sum(theta2*np.sin(theta3*d)**2,axis=1))
r3=(1+d**2/(2*theta4*theta5))**(-theta4)
r4=np.exp(-np.sum(theta6*d**2,axis=1))
r=(np.atleast_2d(r4).T+r3+np.atleast_2d(r2).T*np.atleast_2d(r1).T)/3
n = self.theta.shape[0]
theta1 = self.theta[: n // 6]
theta2 = self.theta[n // 6 : 2 * n // 6]
theta3 = self.theta[2 * n // 6 : 3 * n // 6]
theta4 = self.theta[3 * n // 6 : 4 * n // 6]
theta5 = self.theta[4 * n // 6 : 5 * n // 6]
theta6 = self.theta[5 * n // 6 :]
r1 = np.exp(-np.sum(theta1 * d**2, axis=1))
r2 = np.exp(-np.sum(theta2 * np.sin(theta3 * d) ** 2, axis=1))
r3 = (1 + d**2 / (2 * theta4 * theta5)) ** (-theta4)
r4 = np.exp(-np.sum(theta6 * d**2, axis=1))
r = (np.atleast_2d(r4).T + r3 + np.atleast_2d(r2).T * np.atleast_2d(r1).T) / 3
return np.atleast_2d(r).T


k_test=LocalPeriod([0.01,0.01,0.01,0.01,0.01,0.01])
k_test = LocalPeriod([0.01, 0.01, 0.01, 0.01, 0.01, 0.01])

k=Period([0.01,0.01])*Period([0.01,0.01])+Matern32([0.01])
k = Period([0.01, 0.01]) * Period([0.01, 0.01]) + Matern32([0.01])


for i in range(1,2):
for j in range(10,11):

x_Hale=np.arange(0,nodes_tr.shape[2]//84,7)
y_Hale=nodes_tr[i,j,x_Hale]
sm=KRG(corr=k, noise0=[1e-6], hyper_opt="Cobyla",n_start=15)
for i in range(1, 2):
for j in range(10, 11):
x_Hale = np.arange(0, nodes_tr.shape[2] // 84, 7)
y_Hale = nodes_tr[i, j, x_Hale]
sm = KRG(corr=k, noise0=[1e-6], hyper_opt="Cobyla", n_start=15)
sm.set_training_values(x_Hale, y_Hale)
sm.train()
print(k.theta)

X_test = np.arange(0,nodes_tr.shape[2],1)
X_test= X_test[:1125]
mean_y_pred=sm.predict_values(X_test)
X_test = np.arange(0, nodes_tr.shape[2], 1)
X_test = X_test[:1125]
mean_y_pred = sm.predict_values(X_test)

plt.plot(X_test, mean_y_pred, color="tab:blue", alpha=0.5, label="Periodic GP")

plt.scatter(x_Hale,y_Hale,color="g",label="data")
plt.plot(X_test,nodes_tr[i,j,X_test],color="black",linestyle="dashed",alpha=0.3,label=f"ref {i} {j}")
plt.legend(loc='upper left')

plt.show()
plt.scatter(x_Hale, y_Hale, color="g", label="data")
plt.plot(
X_test,
nodes_tr[i, j, X_test],
color="black",
linestyle="dashed",
alpha=0.3,
label=f"ref {i} {j}",
)
plt.legend(loc="upper left")

plt.show()
113 changes: 59 additions & 54 deletions tutorial/Misc/Split_Conformal_prediction_SMT.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,12 @@
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.gaussian_process import GaussianProcessRegressor\n",
"from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C\n",
"from sklearn.preprocessing import StandardScaler\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"#from tqdm import tqdm\n",
"from scipy.stats import norm\n",
"from smt.kernels import Product\n",
"from scipy.io import loadmat\n",
"from smt.kernels import SquarSinExp\n",
"from smt.kernels import Kernel\n",
"from smt.kernels import Matern32\n",
"from smt.kernels import Matern52\n",
"\n",
"# from tqdm import tqdm\n",
"from smt.kernels import Constant\n",
"from smt.kernels import PowExp\n",
"import numpy as np\n",
"from smt.surrogate_models import KRG"
]
},
Expand All @@ -47,12 +37,13 @@
"# Quantile functions\n",
"def quantile_cp(z, alpha):\n",
" n = len(z)\n",
" q = np.sort(z)[int(np.ceil((1-alpha)*(n+1))-1)]\n",
" q = np.sort(z)[int(np.ceil((1 - alpha) * (n + 1)) - 1)]\n",
" return q\n",
"\n",
"\n",
"def quantile_cp_minus(z, alpha):\n",
" n = len(z)\n",
" q = np.sort(z)[int(np.floor(alpha*(n+1))-1)]\n",
" q = np.sort(z)[int(np.floor(alpha * (n + 1)) - 1)]\n",
" return q"
]
},
Expand Down Expand Up @@ -88,19 +79,20 @@
"# Function to generate random heteroskedastic data\n",
"def make_random_data(n, std_dev):\n",
" x = np.random.uniform(-1, 1, n)\n",
" y = x**3 + 2 * np.exp(-6 * (x - 0.3)**2)\n",
" y = x**3 + 2 * np.exp(-6 * (x - 0.3) ** 2)\n",
" y = y + np.random.normal(0, std_dev * np.abs(x), n)\n",
" return pd.DataFrame({'x': x, 'y': y})\n",
" return pd.DataFrame({\"x\": x, \"y\": y})\n",
"\n",
"\n",
"# Generate train and test data\n",
"np.random.seed(12345)\n",
"ntrain = 1000\n",
"ntest = 1000\n",
"nvisu = 1000\n",
"std_dev = 1/5\n",
"std_dev = 1 / 5\n",
"train_data = make_random_data(ntrain, std_dev)\n",
"test_data = make_random_data(ntest, std_dev)\n",
"visu_data = pd.DataFrame({'x': np.linspace(-1, 1, nvisu)})"
"visu_data = pd.DataFrame({\"x\": np.linspace(-1, 1, nvisu)})"
]
},
{
Expand Down Expand Up @@ -233,48 +225,50 @@
}
],
"source": [
"k=PowExp([0.01])*Constant([0.01])\n",
"k = PowExp([0.01]) * Constant([0.01])\n",
"\n",
"\n",
"gp = KRG(corr=\"squar_exp\",noise0=[1e-6], hyper_opt=\"Cobyla\",n_start=20)\n",
"gp = KRG(corr=\"squar_exp\", noise0=[1e-6], hyper_opt=\"Cobyla\", n_start=20)\n",
"\n",
"# Pretraining Gaussian Process\n",
"X_pretrain = pretrain_data['x'].values.reshape(-1, 1)\n",
"y_pretrain = pretrain_data['y'].values\n",
"X_pretrain = pretrain_data[\"x\"].values.reshape(-1, 1)\n",
"y_pretrain = pretrain_data[\"y\"].values\n",
"gp.set_training_values(X_pretrain, y_pretrain)\n",
"gp.train()\n",
"gp_pred_pretrain = gp.predict_values(X_pretrain)[:,0]\n",
"gp_std_pretrain = np.sqrt(gp.predict_variances(X_pretrain))[:,0]\n",
"gp_pred_pretrain = gp.predict_values(X_pretrain)[:, 0]\n",
"gp_std_pretrain = np.sqrt(gp.predict_variances(X_pretrain))[:, 0]\n",
"\n",
"# Compute residuals and fit another GP on residuals\n",
"res_gp_pred_pretrain = np.abs(y_pretrain - gp_pred_pretrain)\n",
"gp_res = KRG(corr=\"squar_exp\", noise0=[1e-6],hyper_opt=\"Cobyla\",n_start=20)\n",
"gp_res = KRG(corr=\"squar_exp\", noise0=[1e-6], hyper_opt=\"Cobyla\", n_start=20)\n",
"gp_res.set_training_values(X_pretrain, res_gp_pred_pretrain)\n",
"gp_res.train()\n",
"\n",
"# Predictions on calibration data\n",
"X_cal = cal_data['x'].values.reshape(-1, 1)\n",
"y_cal = cal_data['y'].values\n",
"gp_pred_cal = gp.predict_values(X_cal)[:,0]\n",
"gp_std_cal = np.sqrt(gp.predict_variances(X_cal))[:,0]\n",
"gp_res_pred_cal = gp_res.predict_values(X_cal)[:,0]\n",
"X_cal = cal_data[\"x\"].values.reshape(-1, 1)\n",
"y_cal = cal_data[\"y\"].values\n",
"gp_pred_cal = gp.predict_values(X_cal)[:, 0]\n",
"gp_std_cal = np.sqrt(gp.predict_variances(X_cal))[:, 0]\n",
"gp_res_pred_cal = gp_res.predict_values(X_cal)[:, 0]\n",
"\n",
"res_gp_pred_cal = np.abs(y_cal - gp_pred_cal) / gp_res_pred_cal\n",
"gp_q_cal = quantile_cp(res_gp_pred_cal, alpha)\n",
"\n",
"# Predictions for visualization data\n",
"X_visu = visu_data['x'].values.reshape(-1, 1)\n",
"gp_pred_visu = gp.predict_values(X_visu)[:,0]\n",
"gp_std_visu = np.sqrt(gp.predict_variances(X_visu))[:,0]\n",
"gp_res_pred_visu = gp_res.predict_values(X_visu)[:,0]\n",
"X_visu = visu_data[\"x\"].values.reshape(-1, 1)\n",
"gp_pred_visu = gp.predict_values(X_visu)[:, 0]\n",
"gp_std_visu = np.sqrt(gp.predict_variances(X_visu))[:, 0]\n",
"gp_res_pred_visu = gp_res.predict_values(X_visu)[:, 0]\n",
"\n",
"# Compute lower and upper bounds for the GP with rescaling based on residuals\n",
"gp_pred = pd.DataFrame({\n",
" 'x': visu_data['x'],\n",
" '.pred': gp_pred_visu,\n",
" '.pred_lower': gp_pred_visu - gp_res_pred_visu * gp_q_cal,\n",
" '.pred_upper': gp_pred_visu + gp_res_pred_visu * gp_q_cal\n",
"})"
"gp_pred = pd.DataFrame(\n",
" {\n",
" \"x\": visu_data[\"x\"],\n",
" \".pred\": gp_pred_visu,\n",
" \".pred_lower\": gp_pred_visu - gp_res_pred_visu * gp_q_cal,\n",
" \".pred_upper\": gp_pred_visu + gp_res_pred_visu * gp_q_cal,\n",
" }\n",
")"
]
},
{
Expand Down Expand Up @@ -316,20 +310,22 @@
],
"source": [
"# GP with rescaling from posterior standard deviation\n",
"gp_pred_cal2 = gp.predict_values(X_cal)[:,0]\n",
"gp_std_cal2 = np.sqrt(gp.predict_variances(X_cal))[:,0]\n",
"gp_pred_cal2 = gp.predict_values(X_cal)[:, 0]\n",
"gp_std_cal2 = np.sqrt(gp.predict_variances(X_cal))[:, 0]\n",
"res_gp_pred_cal2 = np.abs(y_cal - gp_pred_cal2) / gp_std_cal2\n",
"gp_q_cal2 = quantile_cp(res_gp_pred_cal2, alpha)\n",
"\n",
"gp_pred_visu2 = gp.predict_values(X_visu)[:,0]\n",
"gp_std_visu2= np.sqrt(gp.predict_variances(X_visu))[:,0]\n",
"gp_pred_visu2 = gp.predict_values(X_visu)[:, 0]\n",
"gp_std_visu2 = np.sqrt(gp.predict_variances(X_visu))[:, 0]\n",
"\n",
"gp_pred2 = pd.DataFrame({\n",
" 'x': visu_data['x'],\n",
" '.pred': gp_pred_visu2,\n",
" '.pred_lower': gp_pred_visu2 - gp_std_visu2* gp_q_cal2,\n",
" '.pred_upper': gp_pred_visu2+ gp_std_visu2 * gp_q_cal2\n",
"})"
"gp_pred2 = pd.DataFrame(\n",
" {\n",
" \"x\": visu_data[\"x\"],\n",
" \".pred\": gp_pred_visu2,\n",
" \".pred_lower\": gp_pred_visu2 - gp_std_visu2 * gp_q_cal2,\n",
" \".pred_upper\": gp_pred_visu2 + gp_std_visu2 * gp_q_cal2,\n",
" }\n",
")"
]
},
{
Expand All @@ -354,17 +350,26 @@
],
"source": [
"# Combine and visualize\n",
"model_list = {'Gaussian Process MAD': gp_pred, 'Gaussian Process Posterior sd': gp_pred2}\n",
"all_pred = pd.concat(model_list, names=[\"id\"]).reset_index(level=0).rename(columns={'id': 'model'})\n",
"model_list = {\n",
" \"Gaussian Process MAD\": gp_pred,\n",
" \"Gaussian Process Posterior sd\": gp_pred2,\n",
"}\n",
"all_pred = (\n",
" pd.concat(model_list, names=[\"id\"])\n",
" .reset_index(level=0)\n",
" .rename(columns={\"id\": \"model\"})\n",
")\n",
"\n",
"sns.set(style=\"whitegrid\")\n",
"g = sns.FacetGrid(all_pred, col=\"model\", height=5, aspect=1.75)\n",
"g.map(plt.plot, \"x\", \".pred\", color=\"blue\", lw=1)\n",
"g.map(plt.plot, \"x\", \".pred_lower\", color=\"orange\", lw=1.75)\n",
"g.map(plt.plot, \"x\", \".pred_upper\", color=\"orange\", lw=1.75)\n",
"for ax in g.axes.flat:\n",
" sns.scatterplot(x=pretrain_data['x'], y=pretrain_data['y'], color='blue', alpha=0.05, ax=ax)\n",
" sns.scatterplot(x=cal_data['x'], y=cal_data['y'], color='orange', alpha=0.25, ax=ax)\n",
" sns.scatterplot(\n",
" x=pretrain_data[\"x\"], y=pretrain_data[\"y\"], color=\"blue\", alpha=0.05, ax=ax\n",
" )\n",
" sns.scatterplot(x=cal_data[\"x\"], y=cal_data[\"y\"], color=\"orange\", alpha=0.25, ax=ax)\n",
"\n",
"g.add_legend()\n",
"\n",
Expand Down
2 changes: 1 addition & 1 deletion tutorial/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ These tutorials introduce to use the opensource Surrogate Modeling Toolbox where

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/SMTorg/smt/blob/master/tutorial/Misc/SMT_SGP_analytic.ipynb)

### Conformal_prediction
### Conformal prediction

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/SMTorg/smt/blob/master/tutorial/Misc/Split_Conformal_prediction_SMT.ipynb)

Expand Down

0 comments on commit fb79589

Please sign in to comment.