add normalized by capacity metrics

openclimatefix · Dec 19, 2023 · 3edd3bc · 3edd3bc
1 parent 8c74d41
commit 3edd3bc
Show file tree

Hide file tree

Showing 3 changed files with 53 additions and 17 deletions.
diff --git a/README.md b/README.md
@@ -66,16 +66,16 @@ This contains 50 PV sites, which 50 unique timestamps. The data is from 2021.
 The results of the evaluation are as follows
 The MAE is 0.1906 kw across all horizons. 
 
-| Horizons | MAE [kw]      |
-|----------|---------------|
-| 0        | 0.202 +- 0.03 |
-| 1        | 0.211 +- 0.03 |
-| 2        | 0.216 +- 0.03 |
-| 3-4      | 0.211 +- 0.02 |
-| 5-8      | 0.191 +- 0.01 |
-| 9-16     | 0.161 +- 0.01 |
-| 17-24    | 0.173 +- 0.01 |
-| 24-48    | 0.201 +- 0.01 |
+| Horizons | MAE [kw]      | MAE [%] |
+|----------|---------------| ------- |
+| 0        | 0.202 +- 0.03 | 6.2 |
+| 1        | 0.211 +- 0.03 | 6.4 |
+| 2        | 0.216 +- 0.03 | 6.5 |
+| 3 - 4    | 0.211 +- 0.02 |6.3 |
+| 5 - 8    | 0.191 +- 0.01 | 6 |
+| 9 - 16   | 0.161 +- 0.01 | 5 |
+| 17 - 24  | 0.173 +- 0.01 | 5.3 |
+| 24 - 48  | 0.201 +- 0.01 | 6.1 |
 
 
 

diff --git a/quartz_solar_forecast/eval/metrics.py b/quartz_solar_forecast/eval/metrics.py
@@ -2,7 +2,7 @@
 import pandas as pd
 
 
-def metrics(results_df: pd.DataFrame):
+def metrics(results_df: pd.DataFrame, pv_metadata: pd.DataFrame):
     """
     Calculate and print metrics: MAE
 
@@ -13,10 +13,23 @@ def metrics(results_df: pd.DataFrame):
     - forecast_power
     - generation_power
 
+    pv_metadata is a dataframe with the following columns
+    - pv_id
+    - capacity
+
     """
 
+    # merge pv_metadata with results_df
+    results_df = pd.merge(results_df, pv_metadata, on="pv_id")
+
     mae = np.round((results_df["forecast_power"] - results_df["generation_power"]).abs().mean(), 4)
-    print(f"MAE: {mae}")
+    mae_normalized = np.round(
+        ((results_df["forecast_power"] - results_df["generation_power"]) / results_df["capacity"])
+        .abs()
+        .mean(),
+        4,
+    )
+    print(f"MAE: {mae} kw, normalized {mae_normalized} %")
 
     # calculate metrics over the different horizons hours
     # find all unique horizon_hours
@@ -35,12 +48,23 @@ def metrics(results_df: pd.DataFrame):
                 (results_df_horizon["forecast_power"] - results_df_horizon["generation_power"])
                 .abs()
                 .std()
-                / len(results_df_horizon) ** 0.5
+                / 50 ** 0.5
             ),
             3,
         )
+        mae_normalized = np.round(
+            (
+                (results_df_horizon["forecast_power"] - results_df_horizon["generation_power"])
+                / results_df_horizon["capacity"]
+            )
+            .abs()
+            .mean(),
+            3,
+        )
 
-        print(f"MAE for horizon {horizon_hour}: {mae} +- {1.96*sem}")
+        print(
+            f"MAE for horizon {horizon_hour}: {mae} +- {1.96*sem}. Normalized MAE: {mae_normalized} %"
+        )
 
     # calculate metrics over the different horizon groups
     horizon_groups = [[0, 0], [1, 1], [2, 2], [3, 4], [5, 8], [9, 16], [17, 24], [24, 48]]
@@ -59,11 +83,23 @@ def metrics(results_df: pd.DataFrame):
                 (horizon_group_df["forecast_power"] - horizon_group_df["generation_power"])
                 .abs()
                 .std()
-                / len(horizon_group_df) ** 0.5
+                / 50 ** 0.5
             ),
             3,
         )
 
-        print(f"MAE for horizon {horizon_group}: {mae} +- {1.96*sem}")
+        mae_normalized = np.round(
+            (
+                (horizon_group_df["forecast_power"] - horizon_group_df["generation_power"])
+                / horizon_group_df["capacity"]
+            )
+            .abs()
+            .mean(),
+            3,
+        )
+
+        print(
+            f"MAE for horizon {horizon_group}: {mae} +- {1.96*sem}. mae_normalized: {mae_normalized} %"
+        )
 
         # TODO add more metrics using ocf_ml_metrics
diff --git a/quartz_solar_forecast/evaluation.py b/quartz_solar_forecast/evaluation.py
@@ -57,7 +57,7 @@ def run_eval(testset_path: str = "quartz_solar_forecast/dataset/testset.csv"):
     results_df.to_csv("results.csv")
 
     # Calculate and print metrics: MAE
-    metrics(results_df)
+    metrics(results_df, pv_metadata)
 
     # Visualizations
     # TODO