From 3cfc4974a9460d8f7c6df1ac789dc0953c1018fb Mon Sep 17 00:00:00 2001 From: RY4GIT <400mhs2@gmail.com> Date: Wed, 18 Dec 2024 21:17:05 -0800 Subject: [PATCH] add: aic and other perf metrics --- analysis/DrydownModel.py | 41 +++++++++++---- analysis/Event.py | 12 +++++ notebooks/fig_variable_labels.json | 14 +++++ notebooks/figs_stats.py | 4 +- notebooks/figs_stats_datapreprocess.py | 53 +++++++++++++++++-- notebooks/figs_stats_rev.py | 73 ++++++++++++++++++++++++-- 6 files changed, 178 insertions(+), 19 deletions(-) diff --git a/analysis/DrydownModel.py b/analysis/DrydownModel.py index 170f493..d9fda41 100644 --- a/analysis/DrydownModel.py +++ b/analysis/DrydownModel.py @@ -265,10 +265,12 @@ def fit_one_event(self, event): # Fit tau exponential model if self.run_tau_exp_model: try: - popt, pcov, y_opt, r_squared, aic, bic, _ = self.fit_tau_exp_model( - event + popt, pcov, y_opt, r_squared, aic, aicc, bic, ss_res, ss_tot, _ = ( + self.fit_tau_exp_model(event) + ) + event.add_attributes( + "tau_exp", popt, pcov, y_opt, r_squared, aic, aicc, bic, ss_res, ss_tot ) - event.add_attributes("tau_exp", popt, pcov, y_opt, r_squared, aic, bic) except Exception as e: log.debug(f"Exception raised in the thread {self.thread_name}: {e}") return None @@ -277,7 +279,9 @@ def fit_one_event(self, event): # Fit tau exponential model if self.run_exp_model: try: - popt, pcov, y_opt, r_squared, aic, bic, _ = self.fit_exp_model(event) + popt, pcov, y_opt, r_squared, aic, aicc, bic, ss_res, ss_tot, _ = self.fit_exp_model( + event + ) if self.is_stage1ET_active: est_theta_star = popt[2] @@ -292,7 +296,10 @@ def fit_one_event(self, event): y_opt, r_squared, aic, + aicc, bic, + ss_res, + ss_tot, np.nan, est_theta_star, est_theta_w, @@ -305,8 +312,8 @@ def fit_one_event(self, event): # Fit q model if self.run_q_model: try: - popt, pcov, y_opt, r_squared, aic, bic, p_value = self.fit_q_model( - event + popt, pcov, y_opt, r_squared, aic, aicc, bic, ss_res, ss_tot, p_value = ( + self.fit_q_model(event) ) if self.is_stage1ET_active: @@ -322,7 +329,10 @@ def fit_one_event(self, event): y_opt, r_squared, aic, + aicc, bic, + ss_res, + ss_tot, p_value, est_theta_star, est_theta_w, @@ -372,7 +382,7 @@ def fit_model(self, event, model, bounds, p0, param_names): y_opt = model(event.x, *popt) # Calculate the residuals - r_squared, aic, bic, p_value = self.calc_performance_metrics( + r_squared, aic, aicc, bic, ss_res, ss_tot, p_value = self.calc_performance_metrics( y_obs=event.y, y_pred=y_opt, popt=popt, @@ -380,7 +390,7 @@ def fit_model(self, event, model, bounds, p0, param_names): param_names=param_names, ) - return popt, pcov, y_opt, r_squared, aic, bic, p_value + return popt, pcov, y_opt, r_squared, aic, aicc, bic, ss_res, ss_tot, p_value except Exception as e: log.debug(f"Exception raised in the thread {self.thread_name}: {e}") @@ -407,13 +417,15 @@ def calc_performance_metrics(self, y_obs, y_pred, popt, pcov, param_names): # AIC and BIC try: aic = n * np.log(ss_res / n) + 2 * k + aicc = aic + (2 * k * (k + 1)) / (n - k - 1) except: aic = np.nan + aicc = np.nan try: bic = n * np.log(ss_res / n) + k * np.log(n) except: - aic = np.nan + bic = np.nan ############################ if "q" in param_names: @@ -437,7 +449,7 @@ def calc_performance_metrics(self, y_obs, y_pred, popt, pcov, param_names): else: p_value = np.nan - return r_squared, aic, bic, p_value + return r_squared, aic, aicc, bic, ss_res, ss_tot, p_value def fit_tau_exp_model(self, event): """Fits an exponential model to the given event data and returns the fitted parameters. @@ -780,7 +792,10 @@ def return_result_df(self): "tauexp_cov_theta_w_tau": event.tau_exp["cov_theta_w_tau"], "tauexp_r_squared": event.tau_exp["r_squared"], "tauexp_aic": event.tau_exp["aic"], + "tauexp_aicc": event.tau_exp["aicc"], "tauexp_bic": event.tau_exp["bic"], + "tauexp_ss_res": event.tau_exp["ss_res"], + "tauexp_ss_tot": event.tau_exp["ss_tot"], "tauexp_y_opt": event.tau_exp["y_opt"], } ) @@ -804,7 +819,10 @@ def return_result_df(self): ], "exp_r_squared": event.exp["r_squared"], "exp_aic": event.exp["aic"], + "exp_aicc": event.exp["aicc"], "exp_bic": event.exp["bic"], + "exp_ss_res": event.exp["ss_res"], + "exp_ss_tot": event.exp["ss_tot"], "exp_y_opt": event.exp["y_opt"], } ) @@ -831,7 +849,10 @@ def return_result_df(self): ], "q_r_squared": event.q["r_squared"], "q_aic": event.q["aic"], + "q_aicc": event.q["aicc"], "q_bic": event.q["bic"], + "q_ss_res": event.q["ss_res"], + "q_ss_tot": event.q["ss_tot"], "q_eq_1_p": event.q["q_eq_1_p"], "q_y_opt": event.q["y_opt"], } diff --git a/analysis/Event.py b/analysis/Event.py index cdd876f..c726ff9 100644 --- a/analysis/Event.py +++ b/analysis/Event.py @@ -46,7 +46,10 @@ def add_attributes( y_opt=[], r_squared=np.nan, aic=np.nan, + aicc=np.nan, bic=np.nan, + ss_res=np.nan, + ss_tot=np.nan, p_value=np.nan, est_theta_star=np.nan, est_theta_w=np.nan, @@ -83,7 +86,10 @@ def add_attributes( "y_opt": y_opt.tolist(), "r_squared": r_squared, "aic": aic, + "aicc": aicc, "bic": bic, + "ss_res": ss_res, + "ss_tot": ss_tot } if model_type == "exp": @@ -130,7 +136,10 @@ def add_attributes( "y_opt": y_opt.tolist(), "r_squared": r_squared, "aic": aic, + "aicc": aicc, "bic": bic, + "ss_res": ss_res, + "ss_tot": ss_tot, } if model_type == "q": @@ -189,7 +198,10 @@ def add_attributes( "y_opt": y_opt.tolist(), "r_squared": r_squared, "aic": aic, + "aicc": aicc, "bic": bic, + "ss_res": ss_res, + "ss_tot": ss_tot, "q_eq_1_p": p_value, } diff --git a/notebooks/fig_variable_labels.json b/notebooks/fig_variable_labels.json index 24e2d29..9913761 100644 --- a/notebooks/fig_variable_labels.json +++ b/notebooks/fig_variable_labels.json @@ -188,6 +188,20 @@ "unit": "", "lim": [-5, 5] }, + "diff_aicc_q_exp": { + "column_name": "diff_aicc_q_exp", + "symbol": "", + "label": "$AIC_c$ (Nonlinear - linear)", + "unit": "", + "lim": [-5, 5] + }, + "diff_aicc_q_tauexp": { + "column_name": "diff_aicc_q_tauexp", + "symbol": "", + "label": "$AIC_c$ (Nonlinear - $\\tau$-based linear)", + "unit": "", + "lim": [-5, 5] + }, "diff_bic_q_exp": { "column_name": "diff_aic_q_exp", "symbol": "", diff --git a/notebooks/figs_stats.py b/notebooks/figs_stats.py index a4013e6..a08a036 100644 --- a/notebooks/figs_stats.py +++ b/notebooks/figs_stats.py @@ -49,11 +49,11 @@ ################ CHANGE HERE FOR PATH CONFIG ################################## ############ CHANGE HERE FOR CHECKING DIFFERENT RESULTS ################### -dir_name = f"raraki_2024-05-13_global_piecewise" # "raraki_2024-02-02" # f"raraki_2023-11-25_global_95asmax" +dir_name = f"raraki_2024-12-18_init_q_0p5" # f"raraki_2024-05-13_global_piecewise" # "raraki_2024-02-02" # f"raraki_2023-11-25_global_95asmax" ########################################################################### # f"raraki_2024-05-13_global_piecewise" was used for the 1st version of the manuscript -# Ryoko Araki, Bryn Morgan, Hilary K McMillan, Kelly K Caylor. +# Ryoko Araki, Bryn Morgan, Hilary K McMillan, Kelly K Caylor. # Nonlinear Soil Moisture Loss Function Reveals Vegetation Responses to Water Availability. ESS Open Archive . August 01, 2024. # DOI: 10.22541/essoar.172251989.99347091/v1 diff --git a/notebooks/figs_stats_datapreprocess.py b/notebooks/figs_stats_datapreprocess.py index d02a797..eca190a 100644 --- a/notebooks/figs_stats_datapreprocess.py +++ b/notebooks/figs_stats_datapreprocess.py @@ -33,7 +33,7 @@ # %% Plot config ############ CHANGE HERE FOR CHECKING DIFFERENT RESULTS ################### -dir_name = f"raraki_2024-12-03_revision" # "raraki_2024-02-02" # f"raraki_2023-11-25_global_95asmax" +dir_name = f"raraki_2024-12-03_revision" # f"raraki_2024-12-03_revision" # "raraki_2024-02-02" # f"raraki_2023-11-25_global_95asmax" ############################|############################################### # f"raraki_2024-05-13_global_piecewise" was used for the 1st version of the manuscript @@ -125,7 +125,8 @@ # Get the binned ancillary information # %% -df.columns +print(df.columns) + # %% ############################################################################ # Calculate some stats for evaluation @@ -206,6 +207,32 @@ def check_1ts_range(row, verbose=False): return (dsdt_0 - dsdt_1) / (row["q_ETmax"] / z_mm) * (-1) +import numpy as np + + +def count_nonnan_sm(row): + input_string = row.sm + + # Processing the string + input_string = input_string.replace("\n", " np.nan") + input_string = input_string.replace(" nan", " np.nan") + input_string = input_string.strip("[]") + + # Converting to numpy array and handling np.nan + sm = np.array( + [ + float(value) if value != "np.nan" else np.nan + for value in input_string.split() + ] + ) + + # Counting the number of non-NaN values + count_non_nan = np.sum(~np.isnan(sm)) + + # Return the sm_range and the count of non-NaN values + return count_non_nan + + # Create new columns df["first3_avail2"] = df.apply(df_availability, axis=1) df["sm_range"] = df.apply(calculate_sm_range, axis=1) @@ -213,8 +240,26 @@ def check_1ts_range(row, verbose=False): pd.to_datetime(df["event_end"]) - pd.to_datetime(df["event_start"]) ).dt.days + 1 df["large_q_criteria"] = df.apply(check_1ts_range, axis=1) - - +df["event_ndays"] = df.apply(count_nonnan_sm, axis=1) + + +# # %% +# def calculate_aicc(row, k, aic_col): +# n = row["event_ndays"] +# aic = row[aic_col] +# if n > k + 1: # Avoid division by zero or invalid cases +# aicc = aic + (2 * k * (k + 1)) / (n - k - 1) +# else: +# aicc = np.nan # AICc is undefined if n <= k + 1 +# return aicc + + +# # Apply the calculation to each row in the DataFrame +# df["tauexp_aicc"] = df.apply(lambda row: calculate_aicc(row, 4, "tauexp_aic"), axis=1) +# df["exp_aicc"] = df.apply(lambda row: calculate_aicc(row, 3, "tauexp_aic"), axis=1) +# df["q_aicc"] = df.apply(lambda row: calculate_aicc(row, 4, "q_aic"), axis=1) +df = df.assign(diff_aicc_q_tauexp=df["q_aicc"] - df["tauexp_aicc"]) +df = df.assign(diff_aicc_q_exp=df["q_aicc"] - df["exp_aicc"]) # %% output_path = os.path.join(output_dir, dir_name, "all_results_processed.csv") df.to_csv(output_path) diff --git a/notebooks/figs_stats_rev.py b/notebooks/figs_stats_rev.py index e2005cc..2f5b3e1 100644 --- a/notebooks/figs_stats_rev.py +++ b/notebooks/figs_stats_rev.py @@ -49,7 +49,7 @@ ################ CHANGE HERE FOR PATH CONFIG ################################## ############ CHANGE HERE FOR CHECKING DIFFERENT RESULTS ################### -dir_name = f"raraki_2024-12-03_revision" # "raraki_2024-02-02" # f"raraki_2023-11-25_global_95asmax" +dir_name = f"raraki_2024-12-03_revision" # f"raraki_2024-12-03_revision" # "raraki_2024-02-02" # f"raraki_2023-11-25_global_95asmax" ########################################################################### # f"raraki_2024-05-13_global_piecewise" was used for the 1st version of the manuscript # f"raraki_2024-12-03_revision" was used for the revised version of the manuscript @@ -308,18 +308,26 @@ def count_model_performance(df, varname): print("linear vs nonlinear") count_model_performance(df_filt_q_and_exp, "diff_R2_q_exp") +print("\n") print(r"In terms of $AIC$") print("tau-linear vs nonlinear") count_model_performance(df_filt_q_and_tauexp, "diff_aic_q_tauexp") print("linear vs nonlinear") count_model_performance(df_filt_q_and_exp, "diff_aic_q_exp") +print("\n") print(r"In terms of $BIC$") print("tau-linear vs nonlinear") count_model_performance(df_filt_q_and_tauexp, "diff_bic_q_tauexp") print("linear vs nonlinear") count_model_performance(df_filt_q_and_exp, "diff_bic_q_exp") +print("\n") +print(r"In terms of $AICc$") +print("tau-linear vs nonlinear") +count_model_performance(df_filt_q_and_tauexp, "diff_aicc_q_tauexp") +print("linear vs nonlinear") +count_model_performance(df_filt_q_and_exp, "diff_aicc_q_exp") # %% ############################################################################ @@ -356,8 +364,14 @@ def plot_model_metrics(df, linearmodel, metric_name, threshold=None, save=False) plt.rcParams.update({"font.size": 30}) # Read df - x = df[f"{linearmodel}_{metric_name}"].values - y = df[f"q_{metric_name}"].values + x_varname = f"{linearmodel}_{metric_name}" + y_varname = f"q_{metric_name}" + + df = df[df[x_varname].notna() & np.isfinite(df[x_varname])] + df = df[df[y_varname].notna() & np.isfinite(df[y_varname])] + + x = df[x_varname].values + y = df[y_varname].values # Create a scatter plot # $ fig, ax = plt.subplots(figsize=(4.5 * 1.2, 4 * 1.2),) @@ -459,6 +473,13 @@ def plot_model_metrics(df, linearmodel, metric_name, threshold=None, save=False) df=df_filt_q_and_tauexp, linearmodel="tauexp", metric_name="bic", save=save ) +plot_model_metrics( + df=df_filt_q_and_exp, linearmodel="exp", metric_name="aicc", save=save +) +plot_model_metrics( + df=df_filt_q_and_tauexp, linearmodel="tauexp", metric_name="aicc", save=save +) + # %% Define plot_map ############################################################################ @@ -612,6 +633,52 @@ def print_global_stats(df, diff_var, model_desc): df_filt_q_and_tauexp, "diff_aic_q_exp", "nonlinear - tau-based linear" ) +# %% +var_key_exp = "diff_aicc_q_exp" +var_key_tauexp = "diff_aicc_q_tauexp" + +# Plot and save maps for exp model +plt.rcParams.update({"font.size": 12}) +fig_map_aicc, ax = plt.subplots( + figsize=(9, 9), subplot_kw={"projection": ccrs.Robinson()} +) +plot_map( + ax=ax, + df=df_filt_q_and_exp, + coord_info=coord_info, + cmap="RdBu_r", + norm=norm_exp, + var_item=var_dict[var_key_exp], + stat_type=stat_type, + bar_label=var_dict[var_key_exp]["label"], +) +if save: + save_figure(fig_map_aicc, fig_dir, f"aicc_map_{stat_type}_and_exp", "png", 900) + +# Print statistical summaries for exp model +print_global_stats(df_filt_q_and_exp, "diff_aicc_q_exp", "nonlinear - linear") + +# Plot and save maps for tauexp model +fig_map_aicc, ax = plt.subplots( + figsize=(9, 9), subplot_kw={"projection": ccrs.Robinson()} +) +plot_map( + ax=ax, + df=df_filt_q_and_tauexp, + coord_info=coord_info, + cmap="RdBu_r", + norm=norm_tauexp, + var_item=var_dict[var_key_tauexp], + stat_type=stat_type, + bar_label=var_dict[var_key_tauexp]["label"], +) +if save: + save_figure(fig_map_aicc, fig_dir, f"aicc_map_{stat_type}_and_tauexp", "png", 900) + +# Print statistical summaries for tauexp model +print_global_stats( + df_filt_q_and_tauexp, "diff_aicc_q_exp", "nonlinear - tau-based linear" +) # %% # Setup common variables var_key_exp = "diff_bic_q_exp"