diff --git a/README.md b/README.md index 11a27f60..61819889 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,7 @@ The MAE is 0.1906 kw across all horizons. | 17 - 24 | 0.173 +- 0.01 | 5.3 | | 24 - 48 | 0.201 +- 0.01 | 6.1 | +If we exclude nighttime, then the average MAE [%] from 0 to 36 forecast hours is 13.0%. Notes: - THe MAE in % is the MAE divided by the capacity of the PV site. We acknowledge there are a number of different ways to do this. diff --git a/quartz_solar_forecast/eval/metrics.py b/quartz_solar_forecast/eval/metrics.py index a80fa943..fedd2220 100644 --- a/quartz_solar_forecast/eval/metrics.py +++ b/quartz_solar_forecast/eval/metrics.py @@ -2,10 +2,12 @@ import pandas as pd -def metrics(results_df: pd.DataFrame, pv_metadata: pd.DataFrame): +def metrics(results_df: pd.DataFrame, pv_metadata: pd.DataFrame, include_night: bool = False): """ Calculate and print metrics: MAE + There is an option to include nighttime in the calculation of the MAE. + results_df dataframe with the following columns - timestamp - pv_id @@ -19,6 +21,10 @@ def metrics(results_df: pd.DataFrame, pv_metadata: pd.DataFrame): """ + # remove night time + if not include_night: + results_df = results_df[results_df["generation_power"] > 0.1] + # merge pv_metadata with results_df results_df = pd.merge(results_df, pv_metadata, on="pv_id") @@ -34,40 +40,9 @@ def metrics(results_df: pd.DataFrame, pv_metadata: pd.DataFrame): # calculate metrics over the different horizons hours # find all unique horizon_hours horizon_hours = results_df["horizon_hour"].unique() - for horizon_hour in horizon_hours: - # filter results_df to only include the horizon_hour - results_df_horizon = results_df[results_df["horizon_hour"] == horizon_hour] - mae = np.round( - (results_df_horizon["forecast_power"] - results_df_horizon["generation_power"]) - .abs() - .mean(), - 3, - ) - sem = np.round( - ( - (results_df_horizon["forecast_power"] - results_df_horizon["generation_power"]) - .abs() - .std() - / 50 ** 0.5 - ), - 3, - ) - mae_normalized = np.round( - ( - (results_df_horizon["forecast_power"] - results_df_horizon["generation_power"]) - / results_df_horizon["capacity"] - ) - .abs() - .mean(), - 3, - ) - - print( - f"MAE for horizon {horizon_hour}: {mae} +- {1.96*sem}. Normalized MAE: {mae_normalized} %" - ) + horizon_groups = [[x, x] for x in horizon_hours] + horizon_groups += [[3, 4], [5, 8], [9, 16], [17, 24], [24, 48], [0, 36]] - # calculate metrics over the different horizon groups - horizon_groups = [[0, 0], [1, 1], [2, 2], [3, 4], [5, 8], [9, 16], [17, 24], [24, 48]] for horizon_group in horizon_groups: horizon_group_df = results_df[ results_df["horizon_hour"].between(horizon_group[0], horizon_group[1]) @@ -99,7 +74,7 @@ def metrics(results_df: pd.DataFrame, pv_metadata: pd.DataFrame): ) print( - f"MAE for horizon {horizon_group}: {mae} +- {1.96*sem}. mae_normalized: {mae_normalized} %" + f"MAE for horizon {horizon_group}: {mae} +- {1.96*sem}. mae_normalized: {100*mae_normalized} %" ) # TODO add more metrics using ocf_ml_metrics diff --git a/quartz_solar_forecast/evaluation.py b/quartz_solar_forecast/evaluation.py index b455db72..ee3d6f87 100644 --- a/quartz_solar_forecast/evaluation.py +++ b/quartz_solar_forecast/evaluation.py @@ -57,7 +57,8 @@ def run_eval(testset_path: str = "quartz_solar_forecast/dataset/testset.csv"): results_df.to_csv("results.csv") # Calculate and print metrics: MAE - metrics(results_df, pv_metadata) + metrics(results_df, pv_metadata, include_night=True) + metrics(results_df, pv_metadata, include_night=False) # Visualizations # TODO