From 3cfc4974a9460d8f7c6df1ac789dc0953c1018fb Mon Sep 17 00:00:00 2001
From: RY4GIT <400mhs2@gmail.com>
Date: Wed, 18 Dec 2024 21:17:05 -0800
Subject: [PATCH] add: aic and other perf metrics

---
 analysis/DrydownModel.py               | 41 +++++++++++----
 analysis/Event.py                      | 12 +++++
 notebooks/fig_variable_labels.json     | 14 +++++
 notebooks/figs_stats.py                |  4 +-
 notebooks/figs_stats_datapreprocess.py | 53 +++++++++++++++++--
 notebooks/figs_stats_rev.py            | 73 ++++++++++++++++++++++++--
 6 files changed, 178 insertions(+), 19 deletions(-)

diff --git a/analysis/DrydownModel.py b/analysis/DrydownModel.py
index 170f493..d9fda41 100644
--- a/analysis/DrydownModel.py
+++ b/analysis/DrydownModel.py
@@ -265,10 +265,12 @@ def fit_one_event(self, event):
         # Fit tau exponential model
         if self.run_tau_exp_model:
             try:
-                popt, pcov, y_opt, r_squared, aic, bic, _ = self.fit_tau_exp_model(
-                    event
+                popt, pcov, y_opt, r_squared, aic, aicc, bic, ss_res, ss_tot, _ = (
+                    self.fit_tau_exp_model(event)
+                )
+                event.add_attributes(
+                    "tau_exp", popt, pcov, y_opt, r_squared, aic, aicc, bic, ss_res, ss_tot
                 )
-                event.add_attributes("tau_exp", popt, pcov, y_opt, r_squared, aic, bic)
             except Exception as e:
                 log.debug(f"Exception raised in the thread {self.thread_name}: {e}")
                 return None
@@ -277,7 +279,9 @@ def fit_one_event(self, event):
         # Fit tau exponential model
         if self.run_exp_model:
             try:
-                popt, pcov, y_opt, r_squared, aic, bic, _ = self.fit_exp_model(event)
+                popt, pcov, y_opt, r_squared, aic, aicc, bic, ss_res, ss_tot, _ = self.fit_exp_model(
+                    event
+                )
 
                 if self.is_stage1ET_active:
                     est_theta_star = popt[2]
@@ -292,7 +296,10 @@ def fit_one_event(self, event):
                     y_opt,
                     r_squared,
                     aic,
+                    aicc,
                     bic,
+                    ss_res,
+                    ss_tot, 
                     np.nan,
                     est_theta_star,
                     est_theta_w,
@@ -305,8 +312,8 @@ def fit_one_event(self, event):
         # Fit q model
         if self.run_q_model:
             try:
-                popt, pcov, y_opt, r_squared, aic, bic, p_value = self.fit_q_model(
-                    event
+                popt, pcov, y_opt, r_squared, aic, aicc, bic, ss_res, ss_tot, p_value = (
+                    self.fit_q_model(event)
                 )
 
                 if self.is_stage1ET_active:
@@ -322,7 +329,10 @@ def fit_one_event(self, event):
                     y_opt,
                     r_squared,
                     aic,
+                    aicc,
                     bic,
+                    ss_res, 
+                    ss_tot,
                     p_value,
                     est_theta_star,
                     est_theta_w,
@@ -372,7 +382,7 @@ def fit_model(self, event, model, bounds, p0, param_names):
             y_opt = model(event.x, *popt)
 
             # Calculate the residuals
-            r_squared, aic, bic, p_value = self.calc_performance_metrics(
+            r_squared, aic, aicc, bic, ss_res, ss_tot, p_value = self.calc_performance_metrics(
                 y_obs=event.y,
                 y_pred=y_opt,
                 popt=popt,
@@ -380,7 +390,7 @@ def fit_model(self, event, model, bounds, p0, param_names):
                 param_names=param_names,
             )
 
-            return popt, pcov, y_opt, r_squared, aic, bic, p_value
+            return popt, pcov, y_opt, r_squared, aic, aicc, bic, ss_res, ss_tot, p_value
 
         except Exception as e:
             log.debug(f"Exception raised in the thread {self.thread_name}: {e}")
@@ -407,13 +417,15 @@ def calc_performance_metrics(self, y_obs, y_pred, popt, pcov, param_names):
         # AIC and BIC
         try:
             aic = n * np.log(ss_res / n) + 2 * k
+            aicc = aic + (2 * k * (k + 1)) / (n - k - 1)
         except:
             aic = np.nan
+            aicc = np.nan
 
         try:
             bic = n * np.log(ss_res / n) + k * np.log(n)
         except:
-            aic = np.nan
+            bic = np.nan
         ############################
 
         if "q" in param_names:
@@ -437,7 +449,7 @@ def calc_performance_metrics(self, y_obs, y_pred, popt, pcov, param_names):
         else:
             p_value = np.nan
 
-        return r_squared, aic, bic, p_value
+        return r_squared, aic, aicc, bic, ss_res, ss_tot, p_value
 
     def fit_tau_exp_model(self, event):
         """Fits an exponential model to the given event data and returns the fitted parameters.
@@ -780,7 +792,10 @@ def return_result_df(self):
                             "tauexp_cov_theta_w_tau": event.tau_exp["cov_theta_w_tau"],
                             "tauexp_r_squared": event.tau_exp["r_squared"],
                             "tauexp_aic": event.tau_exp["aic"],
+                            "tauexp_aicc": event.tau_exp["aicc"],
                             "tauexp_bic": event.tau_exp["bic"],
+                            "tauexp_ss_res": event.tau_exp["ss_res"],
+                            "tauexp_ss_tot": event.tau_exp["ss_tot"],
                             "tauexp_y_opt": event.tau_exp["y_opt"],
                         }
                     )
@@ -804,7 +819,10 @@ def return_result_df(self):
                             ],
                             "exp_r_squared": event.exp["r_squared"],
                             "exp_aic": event.exp["aic"],
+                            "exp_aicc": event.exp["aicc"],
                             "exp_bic": event.exp["bic"],
+                            "exp_ss_res": event.exp["ss_res"],
+                            "exp_ss_tot": event.exp["ss_tot"],
                             "exp_y_opt": event.exp["y_opt"],
                         }
                     )
@@ -831,7 +849,10 @@ def return_result_df(self):
                             ],
                             "q_r_squared": event.q["r_squared"],
                             "q_aic": event.q["aic"],
+                            "q_aicc": event.q["aicc"],
                             "q_bic": event.q["bic"],
+                            "q_ss_res": event.q["ss_res"],
+                            "q_ss_tot": event.q["ss_tot"],
                             "q_eq_1_p": event.q["q_eq_1_p"],
                             "q_y_opt": event.q["y_opt"],
                         }
diff --git a/analysis/Event.py b/analysis/Event.py
index cdd876f..c726ff9 100644
--- a/analysis/Event.py
+++ b/analysis/Event.py
@@ -46,7 +46,10 @@ def add_attributes(
         y_opt=[],
         r_squared=np.nan,
         aic=np.nan,
+        aicc=np.nan,
         bic=np.nan,
+        ss_res=np.nan,
+        ss_tot=np.nan,
         p_value=np.nan,
         est_theta_star=np.nan,
         est_theta_w=np.nan,
@@ -83,7 +86,10 @@ def add_attributes(
                 "y_opt": y_opt.tolist(),
                 "r_squared": r_squared,
                 "aic": aic,
+                "aicc": aicc,
                 "bic": bic,
+                "ss_res": ss_res,
+                "ss_tot": ss_tot
             }
 
         if model_type == "exp":
@@ -130,7 +136,10 @@ def add_attributes(
                 "y_opt": y_opt.tolist(),
                 "r_squared": r_squared,
                 "aic": aic,
+                "aicc": aicc,
                 "bic": bic,
+                "ss_res": ss_res,
+                "ss_tot": ss_tot,
             }
 
         if model_type == "q":
@@ -189,7 +198,10 @@ def add_attributes(
                 "y_opt": y_opt.tolist(),
                 "r_squared": r_squared,
                 "aic": aic,
+                "aicc": aicc,
                 "bic": bic,
+                "ss_res": ss_res,
+                "ss_tot": ss_tot,
                 "q_eq_1_p": p_value,
             }
 
diff --git a/notebooks/fig_variable_labels.json b/notebooks/fig_variable_labels.json
index 24e2d29..9913761 100644
--- a/notebooks/fig_variable_labels.json
+++ b/notebooks/fig_variable_labels.json
@@ -188,6 +188,20 @@
         "unit": "",
         "lim": [-5, 5]
     },
+    "diff_aicc_q_exp": {
+        "column_name": "diff_aicc_q_exp",
+        "symbol": "",
+        "label": "$AIC_c$ (Nonlinear - linear)",
+        "unit": "",
+        "lim": [-5, 5]
+    },
+    "diff_aicc_q_tauexp": {
+        "column_name": "diff_aicc_q_tauexp",
+        "symbol": "",
+        "label": "$AIC_c$ (Nonlinear - $\\tau$-based linear)",
+        "unit": "",
+        "lim": [-5, 5]
+    },
     "diff_bic_q_exp": {
         "column_name": "diff_aic_q_exp",
         "symbol": "",
diff --git a/notebooks/figs_stats.py b/notebooks/figs_stats.py
index a4013e6..a08a036 100644
--- a/notebooks/figs_stats.py
+++ b/notebooks/figs_stats.py
@@ -49,11 +49,11 @@
 
 ################ CHANGE HERE FOR PATH CONFIG ##################################
 ############ CHANGE HERE FOR CHECKING DIFFERENT RESULTS ###################
-dir_name = f"raraki_2024-05-13_global_piecewise"  # "raraki_2024-02-02"  # f"raraki_2023-11-25_global_95asmax"
+dir_name = f"raraki_2024-12-18_init_q_0p5"  # f"raraki_2024-05-13_global_piecewise"  # "raraki_2024-02-02"  # f"raraki_2023-11-25_global_95asmax"
 ###########################################################################
 # f"raraki_2024-05-13_global_piecewise" was used for the 1st version of the manuscript
 
-# Ryoko Araki, Bryn Morgan, Hilary K McMillan, Kelly K Caylor. 
+# Ryoko Araki, Bryn Morgan, Hilary K McMillan, Kelly K Caylor.
 # Nonlinear Soil Moisture Loss Function Reveals Vegetation Responses to Water Availability. ESS Open Archive . August 01, 2024.
 # DOI: 10.22541/essoar.172251989.99347091/v1
 
diff --git a/notebooks/figs_stats_datapreprocess.py b/notebooks/figs_stats_datapreprocess.py
index d02a797..eca190a 100644
--- a/notebooks/figs_stats_datapreprocess.py
+++ b/notebooks/figs_stats_datapreprocess.py
@@ -33,7 +33,7 @@
 # %% Plot config
 
 ############ CHANGE HERE FOR CHECKING DIFFERENT RESULTS ###################
-dir_name = f"raraki_2024-12-03_revision"  # "raraki_2024-02-02"  # f"raraki_2023-11-25_global_95asmax"
+dir_name = f"raraki_2024-12-03_revision"  # f"raraki_2024-12-03_revision"  # "raraki_2024-02-02"  # f"raraki_2023-11-25_global_95asmax"
 ############################|###############################################
 # f"raraki_2024-05-13_global_piecewise" was used for the 1st version of the manuscript
 
@@ -125,7 +125,8 @@
 # Get the binned ancillary information
 
 # %%
-df.columns
+print(df.columns)
+
 # %% ############################################################################
 # Calculate some stats for evaluation
 
@@ -206,6 +207,32 @@ def check_1ts_range(row, verbose=False):
     return (dsdt_0 - dsdt_1) / (row["q_ETmax"] / z_mm) * (-1)
 
 
+import numpy as np
+
+
+def count_nonnan_sm(row):
+    input_string = row.sm
+
+    # Processing the string
+    input_string = input_string.replace("\n", " np.nan")
+    input_string = input_string.replace(" nan", " np.nan")
+    input_string = input_string.strip("[]")
+
+    # Converting to numpy array and handling np.nan
+    sm = np.array(
+        [
+            float(value) if value != "np.nan" else np.nan
+            for value in input_string.split()
+        ]
+    )
+
+    # Counting the number of non-NaN values
+    count_non_nan = np.sum(~np.isnan(sm))
+
+    # Return the sm_range and the count of non-NaN values
+    return count_non_nan
+
+
 # Create new columns
 df["first3_avail2"] = df.apply(df_availability, axis=1)
 df["sm_range"] = df.apply(calculate_sm_range, axis=1)
@@ -213,8 +240,26 @@ def check_1ts_range(row, verbose=False):
     pd.to_datetime(df["event_end"]) - pd.to_datetime(df["event_start"])
 ).dt.days + 1
 df["large_q_criteria"] = df.apply(check_1ts_range, axis=1)
-
-
+df["event_ndays"] = df.apply(count_nonnan_sm, axis=1)
+
+
+# # %%
+# def calculate_aicc(row, k, aic_col):
+#     n = row["event_ndays"]
+#     aic = row[aic_col]
+#     if n > k + 1:  # Avoid division by zero or invalid cases
+#         aicc = aic + (2 * k * (k + 1)) / (n - k - 1)
+#     else:
+#         aicc = np.nan  # AICc is undefined if n <= k + 1
+#     return aicc
+
+
+# # Apply the calculation to each row in the DataFrame
+# df["tauexp_aicc"] = df.apply(lambda row: calculate_aicc(row, 4, "tauexp_aic"), axis=1)
+# df["exp_aicc"] = df.apply(lambda row: calculate_aicc(row, 3, "tauexp_aic"), axis=1)
+# df["q_aicc"] = df.apply(lambda row: calculate_aicc(row, 4, "q_aic"), axis=1)
+df = df.assign(diff_aicc_q_tauexp=df["q_aicc"] - df["tauexp_aicc"])
+df = df.assign(diff_aicc_q_exp=df["q_aicc"] - df["exp_aicc"])
 # %%
 output_path = os.path.join(output_dir, dir_name, "all_results_processed.csv")
 df.to_csv(output_path)
diff --git a/notebooks/figs_stats_rev.py b/notebooks/figs_stats_rev.py
index e2005cc..2f5b3e1 100644
--- a/notebooks/figs_stats_rev.py
+++ b/notebooks/figs_stats_rev.py
@@ -49,7 +49,7 @@
 
 ################ CHANGE HERE FOR PATH CONFIG ##################################
 ############ CHANGE HERE FOR CHECKING DIFFERENT RESULTS ###################
-dir_name = f"raraki_2024-12-03_revision"  # "raraki_2024-02-02"  # f"raraki_2023-11-25_global_95asmax"
+dir_name = f"raraki_2024-12-03_revision"  # f"raraki_2024-12-03_revision"  # "raraki_2024-02-02"  # f"raraki_2023-11-25_global_95asmax"
 ###########################################################################
 # f"raraki_2024-05-13_global_piecewise" was used for the 1st version of the manuscript
 # f"raraki_2024-12-03_revision" was used for the revised version of the manuscript
@@ -308,18 +308,26 @@ def count_model_performance(df, varname):
 print("linear vs nonlinear")
 count_model_performance(df_filt_q_and_exp, "diff_R2_q_exp")
 
+print("\n")
 print(r"In terms of $AIC$")
 print("tau-linear vs nonlinear")
 count_model_performance(df_filt_q_and_tauexp, "diff_aic_q_tauexp")
 print("linear vs nonlinear")
 count_model_performance(df_filt_q_and_exp, "diff_aic_q_exp")
 
+print("\n")
 print(r"In terms of $BIC$")
 print("tau-linear vs nonlinear")
 count_model_performance(df_filt_q_and_tauexp, "diff_bic_q_tauexp")
 print("linear vs nonlinear")
 count_model_performance(df_filt_q_and_exp, "diff_bic_q_exp")
 
+print("\n")
+print(r"In terms of $AICc$")
+print("tau-linear vs nonlinear")
+count_model_performance(df_filt_q_and_tauexp, "diff_aicc_q_tauexp")
+print("linear vs nonlinear")
+count_model_performance(df_filt_q_and_exp, "diff_aicc_q_exp")
 
 # %%
 ############################################################################
@@ -356,8 +364,14 @@ def plot_model_metrics(df, linearmodel, metric_name, threshold=None, save=False)
     plt.rcParams.update({"font.size": 30})
 
     # Read df
-    x = df[f"{linearmodel}_{metric_name}"].values
-    y = df[f"q_{metric_name}"].values
+    x_varname = f"{linearmodel}_{metric_name}"
+    y_varname = f"q_{metric_name}"
+
+    df = df[df[x_varname].notna() & np.isfinite(df[x_varname])]
+    df = df[df[y_varname].notna() & np.isfinite(df[y_varname])]
+
+    x = df[x_varname].values
+    y = df[y_varname].values
 
     # Create a scatter plot
     # $ fig, ax = plt.subplots(figsize=(4.5 * 1.2, 4 * 1.2),)
@@ -459,6 +473,13 @@ def plot_model_metrics(df, linearmodel, metric_name, threshold=None, save=False)
     df=df_filt_q_and_tauexp, linearmodel="tauexp", metric_name="bic", save=save
 )
 
+plot_model_metrics(
+    df=df_filt_q_and_exp, linearmodel="exp", metric_name="aicc", save=save
+)
+plot_model_metrics(
+    df=df_filt_q_and_tauexp, linearmodel="tauexp", metric_name="aicc", save=save
+)
+
 
 # %% Define plot_map
 ############################################################################
@@ -612,6 +633,52 @@ def print_global_stats(df, diff_var, model_desc):
     df_filt_q_and_tauexp, "diff_aic_q_exp", "nonlinear - tau-based linear"
 )
 
+# %%
+var_key_exp = "diff_aicc_q_exp"
+var_key_tauexp = "diff_aicc_q_tauexp"
+
+# Plot and save maps for exp model
+plt.rcParams.update({"font.size": 12})
+fig_map_aicc, ax = plt.subplots(
+    figsize=(9, 9), subplot_kw={"projection": ccrs.Robinson()}
+)
+plot_map(
+    ax=ax,
+    df=df_filt_q_and_exp,
+    coord_info=coord_info,
+    cmap="RdBu_r",
+    norm=norm_exp,
+    var_item=var_dict[var_key_exp],
+    stat_type=stat_type,
+    bar_label=var_dict[var_key_exp]["label"],
+)
+if save:
+    save_figure(fig_map_aicc, fig_dir, f"aicc_map_{stat_type}_and_exp", "png", 900)
+
+# Print statistical summaries for exp model
+print_global_stats(df_filt_q_and_exp, "diff_aicc_q_exp", "nonlinear - linear")
+
+# Plot and save maps for tauexp model
+fig_map_aicc, ax = plt.subplots(
+    figsize=(9, 9), subplot_kw={"projection": ccrs.Robinson()}
+)
+plot_map(
+    ax=ax,
+    df=df_filt_q_and_tauexp,
+    coord_info=coord_info,
+    cmap="RdBu_r",
+    norm=norm_tauexp,
+    var_item=var_dict[var_key_tauexp],
+    stat_type=stat_type,
+    bar_label=var_dict[var_key_tauexp]["label"],
+)
+if save:
+    save_figure(fig_map_aicc, fig_dir, f"aicc_map_{stat_type}_and_tauexp", "png", 900)
+
+# Print statistical summaries for tauexp model
+print_global_stats(
+    df_filt_q_and_tauexp, "diff_aicc_q_exp", "nonlinear - tau-based linear"
+)
 # %%
 # Setup common variables
 var_key_exp = "diff_bic_q_exp"