add: aic and other perf metrics

RY4GIT · Dec 19, 2024 · 3cfc497 · 3cfc497
1 parent b060d6f
commit 3cfc497
Show file tree

Hide file tree

Showing 6 changed files with 178 additions and 19 deletions.
diff --git a/analysis/DrydownModel.py b/analysis/DrydownModel.py
@@ -265,10 +265,12 @@ def fit_one_event(self, event):
         # Fit tau exponential model
         if self.run_tau_exp_model:
             try:
-                popt, pcov, y_opt, r_squared, aic, bic, _ = self.fit_tau_exp_model(
-                    event
+                popt, pcov, y_opt, r_squared, aic, aicc, bic, ss_res, ss_tot, _ = (
+                    self.fit_tau_exp_model(event)
+                )
+                event.add_attributes(
+                    "tau_exp", popt, pcov, y_opt, r_squared, aic, aicc, bic, ss_res, ss_tot
                 )
-                event.add_attributes("tau_exp", popt, pcov, y_opt, r_squared, aic, bic)
             except Exception as e:
                 log.debug(f"Exception raised in the thread {self.thread_name}: {e}")
                 return None
@@ -277,7 +279,9 @@ def fit_one_event(self, event):
         # Fit tau exponential model
         if self.run_exp_model:
             try:
-                popt, pcov, y_opt, r_squared, aic, bic, _ = self.fit_exp_model(event)
+                popt, pcov, y_opt, r_squared, aic, aicc, bic, ss_res, ss_tot, _ = self.fit_exp_model(
+                    event
+                )
 
                 if self.is_stage1ET_active:
                     est_theta_star = popt[2]
@@ -292,7 +296,10 @@ def fit_one_event(self, event):
                     y_opt,
                     r_squared,
                     aic,
+                    aicc,
                     bic,
+                    ss_res,
+                    ss_tot, 
                     np.nan,
                     est_theta_star,
                     est_theta_w,
@@ -305,8 +312,8 @@ def fit_one_event(self, event):
         # Fit q model
         if self.run_q_model:
             try:
-                popt, pcov, y_opt, r_squared, aic, bic, p_value = self.fit_q_model(
-                    event
+                popt, pcov, y_opt, r_squared, aic, aicc, bic, ss_res, ss_tot, p_value = (
+                    self.fit_q_model(event)
                 )
 
                 if self.is_stage1ET_active:
@@ -322,7 +329,10 @@ def fit_one_event(self, event):
                     y_opt,
                     r_squared,
                     aic,
+                    aicc,
                     bic,
+                    ss_res, 
+                    ss_tot,
                     p_value,
                     est_theta_star,
                     est_theta_w,
@@ -372,15 +382,15 @@ def fit_model(self, event, model, bounds, p0, param_names):
             y_opt = model(event.x, *popt)
 
             # Calculate the residuals
-            r_squared, aic, bic, p_value = self.calc_performance_metrics(
+            r_squared, aic, aicc, bic, ss_res, ss_tot, p_value = self.calc_performance_metrics(
                 y_obs=event.y,
                 y_pred=y_opt,
                 popt=popt,
                 pcov=pcov,
                 param_names=param_names,
             )
 
-            return popt, pcov, y_opt, r_squared, aic, bic, p_value
+            return popt, pcov, y_opt, r_squared, aic, aicc, bic, ss_res, ss_tot, p_value
 
         except Exception as e:
             log.debug(f"Exception raised in the thread {self.thread_name}: {e}")
@@ -407,13 +417,15 @@ def calc_performance_metrics(self, y_obs, y_pred, popt, pcov, param_names):
         # AIC and BIC
         try:
             aic = n * np.log(ss_res / n) + 2 * k
+            aicc = aic + (2 * k * (k + 1)) / (n - k - 1)
         except:
             aic = np.nan
+            aicc = np.nan
 
         try:
             bic = n * np.log(ss_res / n) + k * np.log(n)
         except:
-            aic = np.nan
+            bic = np.nan
         ############################
 
         if "q" in param_names:
@@ -437,7 +449,7 @@ def calc_performance_metrics(self, y_obs, y_pred, popt, pcov, param_names):
         else:
             p_value = np.nan
 
-        return r_squared, aic, bic, p_value
+        return r_squared, aic, aicc, bic, ss_res, ss_tot, p_value
 
     def fit_tau_exp_model(self, event):
         """Fits an exponential model to the given event data and returns the fitted parameters.
@@ -780,7 +792,10 @@ def return_result_df(self):
                             "tauexp_cov_theta_w_tau": event.tau_exp["cov_theta_w_tau"],
                             "tauexp_r_squared": event.tau_exp["r_squared"],
                             "tauexp_aic": event.tau_exp["aic"],
+                            "tauexp_aicc": event.tau_exp["aicc"],
                             "tauexp_bic": event.tau_exp["bic"],
+                            "tauexp_ss_res": event.tau_exp["ss_res"],
+                            "tauexp_ss_tot": event.tau_exp["ss_tot"],
                             "tauexp_y_opt": event.tau_exp["y_opt"],
                         }
                     )
@@ -804,7 +819,10 @@ def return_result_df(self):
                             ],
                             "exp_r_squared": event.exp["r_squared"],
                             "exp_aic": event.exp["aic"],
+                            "exp_aicc": event.exp["aicc"],
                             "exp_bic": event.exp["bic"],
+                            "exp_ss_res": event.exp["ss_res"],
+                            "exp_ss_tot": event.exp["ss_tot"],
                             "exp_y_opt": event.exp["y_opt"],
                         }
                     )
@@ -831,7 +849,10 @@ def return_result_df(self):
                             ],
                             "q_r_squared": event.q["r_squared"],
                             "q_aic": event.q["aic"],
+                            "q_aicc": event.q["aicc"],
                             "q_bic": event.q["bic"],
+                            "q_ss_res": event.q["ss_res"],
+                            "q_ss_tot": event.q["ss_tot"],
                             "q_eq_1_p": event.q["q_eq_1_p"],
                             "q_y_opt": event.q["y_opt"],
                         }

diff --git a/analysis/Event.py b/analysis/Event.py
@@ -46,7 +46,10 @@ def add_attributes(
         y_opt=[],
         r_squared=np.nan,
         aic=np.nan,
+        aicc=np.nan,
         bic=np.nan,
+        ss_res=np.nan,
+        ss_tot=np.nan,
         p_value=np.nan,
         est_theta_star=np.nan,
         est_theta_w=np.nan,
@@ -83,7 +86,10 @@ def add_attributes(
                 "y_opt": y_opt.tolist(),
                 "r_squared": r_squared,
                 "aic": aic,
+                "aicc": aicc,
                 "bic": bic,
+                "ss_res": ss_res,
+                "ss_tot": ss_tot
             }
 
         if model_type == "exp":
@@ -130,7 +136,10 @@ def add_attributes(
                 "y_opt": y_opt.tolist(),
                 "r_squared": r_squared,
                 "aic": aic,
+                "aicc": aicc,
                 "bic": bic,
+                "ss_res": ss_res,
+                "ss_tot": ss_tot,
             }
 
         if model_type == "q":
@@ -189,7 +198,10 @@ def add_attributes(
                 "y_opt": y_opt.tolist(),
                 "r_squared": r_squared,
                 "aic": aic,
+                "aicc": aicc,
                 "bic": bic,
+                "ss_res": ss_res,
+                "ss_tot": ss_tot,
                 "q_eq_1_p": p_value,
             }
 

diff --git a/notebooks/fig_variable_labels.json b/notebooks/fig_variable_labels.json
@@ -188,6 +188,20 @@
         "unit": "",
         "lim": [-5, 5]
     },
+    "diff_aicc_q_exp": {
+        "column_name": "diff_aicc_q_exp",
+        "symbol": "",
+        "label": "$AIC_c$ (Nonlinear - linear)",
+        "unit": "",
+        "lim": [-5, 5]
+    },
+    "diff_aicc_q_tauexp": {
+        "column_name": "diff_aicc_q_tauexp",
+        "symbol": "",
+        "label": "$AIC_c$ (Nonlinear - $\\tau$-based linear)",
+        "unit": "",
+        "lim": [-5, 5]
+    },
     "diff_bic_q_exp": {
         "column_name": "diff_aic_q_exp",
         "symbol": "",

diff --git a/notebooks/figs_stats.py b/notebooks/figs_stats.py
@@ -49,11 +49,11 @@
 
 ################ CHANGE HERE FOR PATH CONFIG ##################################
 ############ CHANGE HERE FOR CHECKING DIFFERENT RESULTS ###################
-dir_name = f"raraki_2024-05-13_global_piecewise"  # "raraki_2024-02-02"  # f"raraki_2023-11-25_global_95asmax"
+dir_name = f"raraki_2024-12-18_init_q_0p5"  # f"raraki_2024-05-13_global_piecewise"  # "raraki_2024-02-02"  # f"raraki_2023-11-25_global_95asmax"
 ###########################################################################
 # f"raraki_2024-05-13_global_piecewise" was used for the 1st version of the manuscript
 
-# Ryoko Araki, Bryn Morgan, Hilary K McMillan, Kelly K Caylor. 
+# Ryoko Araki, Bryn Morgan, Hilary K McMillan, Kelly K Caylor.
 # Nonlinear Soil Moisture Loss Function Reveals Vegetation Responses to Water Availability. ESS Open Archive . August 01, 2024.
 # DOI: 10.22541/essoar.172251989.99347091/v1
 

diff --git a/notebooks/figs_stats_datapreprocess.py b/notebooks/figs_stats_datapreprocess.py
@@ -33,7 +33,7 @@
 # %% Plot config
 
 ############ CHANGE HERE FOR CHECKING DIFFERENT RESULTS ###################
-dir_name = f"raraki_2024-12-03_revision"  # "raraki_2024-02-02"  # f"raraki_2023-11-25_global_95asmax"
+dir_name = f"raraki_2024-12-03_revision"  # f"raraki_2024-12-03_revision"  # "raraki_2024-02-02"  # f"raraki_2023-11-25_global_95asmax"
 ############################|###############################################
 # f"raraki_2024-05-13_global_piecewise" was used for the 1st version of the manuscript
 
@@ -125,7 +125,8 @@
 # Get the binned ancillary information
 
 # %%
-df.columns
+print(df.columns)
+
 # %% ############################################################################
 # Calculate some stats for evaluation
 
@@ -206,15 +207,59 @@ def check_1ts_range(row, verbose=False):
     return (dsdt_0 - dsdt_1) / (row["q_ETmax"] / z_mm) * (-1)
 
 
+import numpy as np
+
+
+def count_nonnan_sm(row):
+    input_string = row.sm
+
+    # Processing the string
+    input_string = input_string.replace("\n", " np.nan")
+    input_string = input_string.replace(" nan", " np.nan")
+    input_string = input_string.strip("[]")
+
+    # Converting to numpy array and handling np.nan
+    sm = np.array(
+        [
+            float(value) if value != "np.nan" else np.nan
+            for value in input_string.split()
+        ]
+    )
+
+    # Counting the number of non-NaN values
+    count_non_nan = np.sum(~np.isnan(sm))
+
+    # Return the sm_range and the count of non-NaN values
+    return count_non_nan
+
+
 # Create new columns
 df["first3_avail2"] = df.apply(df_availability, axis=1)
 df["sm_range"] = df.apply(calculate_sm_range, axis=1)
 df["event_length"] = (
     pd.to_datetime(df["event_end"]) - pd.to_datetime(df["event_start"])
 ).dt.days + 1
 df["large_q_criteria"] = df.apply(check_1ts_range, axis=1)
-
-
+df["event_ndays"] = df.apply(count_nonnan_sm, axis=1)
+
+
+# # %%
+# def calculate_aicc(row, k, aic_col):
+#     n = row["event_ndays"]
+#     aic = row[aic_col]
+#     if n > k + 1:  # Avoid division by zero or invalid cases
+#         aicc = aic + (2 * k * (k + 1)) / (n - k - 1)
+#     else:
+#         aicc = np.nan  # AICc is undefined if n <= k + 1
+#     return aicc
+
+
+# # Apply the calculation to each row in the DataFrame
+# df["tauexp_aicc"] = df.apply(lambda row: calculate_aicc(row, 4, "tauexp_aic"), axis=1)
+# df["exp_aicc"] = df.apply(lambda row: calculate_aicc(row, 3, "tauexp_aic"), axis=1)
+# df["q_aicc"] = df.apply(lambda row: calculate_aicc(row, 4, "q_aic"), axis=1)
+df = df.assign(diff_aicc_q_tauexp=df["q_aicc"] - df["tauexp_aicc"])
+df = df.assign(diff_aicc_q_exp=df["q_aicc"] - df["exp_aicc"])
 # %%
 output_path = os.path.join(output_dir, dir_name, "all_results_processed.csv")
 df.to_csv(output_path)