Skip to content

Commit

Permalink
Merge pull request #33 from openclimatefix/add-night-time-metrics
Browse files Browse the repository at this point in the history
add day time metrics also
  • Loading branch information
peterdudfield authored Dec 22, 2023
2 parents e1a6b1a + 3fe96e4 commit 6ed43b2
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 36 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ The MAE is 0.1906 kw across all horizons.
| 17 - 24 | 0.173 +- 0.01 | 5.3 |
| 24 - 48 | 0.201 +- 0.01 | 6.1 |

If we exclude nighttime, then the average MAE [%] from 0 to 36 forecast hours is 13.0%.

Notes:
- THe MAE in % is the MAE divided by the capacity of the PV site. We acknowledge there are a number of different ways to do this.
Expand Down
45 changes: 10 additions & 35 deletions quartz_solar_forecast/eval/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
import pandas as pd


def metrics(results_df: pd.DataFrame, pv_metadata: pd.DataFrame):
def metrics(results_df: pd.DataFrame, pv_metadata: pd.DataFrame, include_night: bool = False):
"""
Calculate and print metrics: MAE
There is an option to include nighttime in the calculation of the MAE.
results_df dataframe with the following columns
- timestamp
- pv_id
Expand All @@ -19,6 +21,10 @@ def metrics(results_df: pd.DataFrame, pv_metadata: pd.DataFrame):
"""

# remove night time
if not include_night:
results_df = results_df[results_df["generation_power"] > 0.1]

# merge pv_metadata with results_df
results_df = pd.merge(results_df, pv_metadata, on="pv_id")

Expand All @@ -34,40 +40,9 @@ def metrics(results_df: pd.DataFrame, pv_metadata: pd.DataFrame):
# calculate metrics over the different horizons hours
# find all unique horizon_hours
horizon_hours = results_df["horizon_hour"].unique()
for horizon_hour in horizon_hours:
# filter results_df to only include the horizon_hour
results_df_horizon = results_df[results_df["horizon_hour"] == horizon_hour]
mae = np.round(
(results_df_horizon["forecast_power"] - results_df_horizon["generation_power"])
.abs()
.mean(),
3,
)
sem = np.round(
(
(results_df_horizon["forecast_power"] - results_df_horizon["generation_power"])
.abs()
.std()
/ 50 ** 0.5
),
3,
)
mae_normalized = np.round(
(
(results_df_horizon["forecast_power"] - results_df_horizon["generation_power"])
/ results_df_horizon["capacity"]
)
.abs()
.mean(),
3,
)

print(
f"MAE for horizon {horizon_hour}: {mae} +- {1.96*sem}. Normalized MAE: {mae_normalized} %"
)
horizon_groups = [[x, x] for x in horizon_hours]
horizon_groups += [[3, 4], [5, 8], [9, 16], [17, 24], [24, 48], [0, 36]]

# calculate metrics over the different horizon groups
horizon_groups = [[0, 0], [1, 1], [2, 2], [3, 4], [5, 8], [9, 16], [17, 24], [24, 48]]
for horizon_group in horizon_groups:
horizon_group_df = results_df[
results_df["horizon_hour"].between(horizon_group[0], horizon_group[1])
Expand Down Expand Up @@ -99,7 +74,7 @@ def metrics(results_df: pd.DataFrame, pv_metadata: pd.DataFrame):
)

print(
f"MAE for horizon {horizon_group}: {mae} +- {1.96*sem}. mae_normalized: {mae_normalized} %"
f"MAE for horizon {horizon_group}: {mae} +- {1.96*sem}. mae_normalized: {100*mae_normalized} %"
)

# TODO add more metrics using ocf_ml_metrics
3 changes: 2 additions & 1 deletion quartz_solar_forecast/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ def run_eval(testset_path: str = "quartz_solar_forecast/dataset/testset.csv"):
results_df.to_csv("results.csv")

# Calculate and print metrics: MAE
metrics(results_df, pv_metadata)
metrics(results_df, pv_metadata, include_night=True)
metrics(results_df, pv_metadata, include_night=False)

# Visualizations
# TODO
Expand Down

0 comments on commit 6ed43b2

Please sign in to comment.