diff --git a/india_forecast_app/adjuster.py b/india_forecast_app/adjuster.py index 192d242..059d5d6 100644 --- a/india_forecast_app/adjuster.py +++ b/india_forecast_app/adjuster.py @@ -45,6 +45,7 @@ def get_me_values( site_uuid: str, start_datetime: Optional[datetime] = None, ml_model_name: Optional[str] = None, + average_minutes: Optional[int] = 60, ) -> pd.DataFrame: """ Get the ME values for the last 7 days for a given hour, for a given hour creation time @@ -55,15 +56,22 @@ def get_me_values( start_datetime:: the start datetime to filter on. This defaults to 7 days before now. session: sqlalchemy session ml_model_name: the name of the model to filter on, this is optional. + average_minutes: the average minutes for the adjuster to group results by, this defaults to 60. + For solar data this should be 15, because of the sunrise and sunset, + for wind data this should be 60. """ + assert average_minutes <= 60, "Average minutes for adjuster should be <= 60" + if start_datetime is None: start_datetime = datetime.now() - timedelta(days=7) query = session.query( func.avg(ForecastValueSQL.forecast_power_kw - GenerationSQL.generation_power_kw), # create hour column - (cast(ForecastValueSQL.horizon_minutes / 60, INT)).label("horizon_hour"), + (cast(ForecastValueSQL.horizon_minutes / average_minutes, INT)).label( + "horizon_div_average_minutes" + ), ) # join @@ -72,8 +80,8 @@ def get_me_values( # round Generation start_utc and join to forecast start_utc start_utc_minute_rounded = ( cast(func.date_part("minute", GenerationSQL.start_utc), INT) - / 30 - * text("interval '30 min'") + / 15 + * text("interval '15 min'") ) start_utc_hour = func.date_trunc("hour", GenerationSQL.start_utc) generation_start_utc = start_utc_hour + start_utc_minute_rounded @@ -95,18 +103,18 @@ def get_me_values( query = query.filter(MLModelSQL.name == ml_model_name) # group by forecast horizon - query = query.group_by('horizon_hour') + query = query.group_by("horizon_div_average_minutes") # order by forecast horizon - query = query.order_by('horizon_hour') + query = query.order_by("horizon_div_average_minutes") me = query.all() - me_df = pd.DataFrame(me, columns=["me_kw", "horizon_hour"]) - me_df["horizon_minutes"] = me_df["horizon_hour"] * 60 + me_df = pd.DataFrame(me, columns=["me_kw", "horizon_div_average_minutes"]) + me_df["horizon_minutes"] = me_df["horizon_div_average_minutes"] * average_minutes # drop the hour column - me_df.drop(columns=["horizon_hour"], inplace=True) + me_df.drop(columns=["horizon_div_average_minutes"], inplace=True) if len(me_df) == 0: return me_df @@ -115,11 +123,16 @@ def get_me_values( # currently in 0, 60, 120,... # change to 0, 15, 30, 45, 60, 75, 90, 105, 120, ... me_df = me_df.set_index("horizon_minutes") - me_df = me_df.reindex(range(0, max(me_df.index)+15, 15)).interpolate(limit=3) + me_df = me_df.reindex(range(0, max(me_df.index) + 15, 15)).interpolate(limit=3) # reset index me_df = me_df.reset_index() + # smooth by a few blocks, 30 minutes hour either side, and keep 0 values 0 + idx = me_df["me_kw"] == 0 + me_df["me_kw"] = me_df["me_kw"].rolling(window=5, min_periods=1, center=True).mean() + me_df.loc[idx, "me_kw"] = 0 + # log the maximum and minimum adjuster results log.info(f"ME results: max={me_df['me_kw'].max()}, min={me_df['me_kw'].min()}") @@ -127,7 +140,11 @@ def get_me_values( def adjust_forecast_with_adjuster( - db_session, forecast_meta: dict, forecast_values_df: pd.DataFrame, ml_model_name: str + db_session, + forecast_meta: dict, + forecast_values_df: pd.DataFrame, + ml_model_name: str, + average_minutes: Optional[int] = 60, ): """ Adjust forecast values with ME values @@ -137,6 +154,7 @@ def adjust_forecast_with_adjuster( forecast_meta: forecast metadata forecast_values_df: forecast values dataframe ml_model_name: the ml model name + average_minutes: the average minutes for the adjuster to group results by, this defaults to 60. """ # get the ME values @@ -145,6 +163,7 @@ def adjust_forecast_with_adjuster( forecast_meta["timestamp_utc"].hour, site_uuid=forecast_meta["site_uuid"], ml_model_name=ml_model_name, + average_minutes=average_minutes, ) log.debug(f"ME values: {me_values}") @@ -173,6 +192,7 @@ def adjust_forecast_with_adjuster( forecast_values_df_adjust["me_kw"].fillna(0, inplace=True) # adjust forecast_power_kw by ME values + log.info(forecast_values_df_adjust["me_kw"]) forecast_values_df_adjust["forecast_power_kw"] = ( forecast_values_df_adjust["forecast_power_kw"] - forecast_values_df_adjust["me_kw"] ) diff --git a/india_forecast_app/app.py b/india_forecast_app/app.py index 3424330..9889e4f 100644 --- a/india_forecast_app/app.py +++ b/india_forecast_app/app.py @@ -206,6 +206,7 @@ def save_forecast( ml_model_name: Optional[str] = None, ml_model_version: Optional[str] = None, use_adjuster: bool = True, + adjuster_average_minutes: Optional[int] = 60, ): """ Saves a forecast for a given site & timestamp @@ -217,6 +218,8 @@ def save_forecast( ml_model_name: Name of the ML model used for the forecast ml_model_version: Version of the ML model used for the forecast use_adjuster: Make new model, adjusted by last 7 days of ME values + adjuster_average_minutes: The number of minutes that results are average over + when calculating adjuster values Raises: IOError: An error if database save fails @@ -244,7 +247,11 @@ def save_forecast( if use_adjuster: log.info(f"Adjusting forecast for site_id={forecast_meta['site_uuid']}...") forecast_values_df_adjust = adjust_forecast_with_adjuster( - db_session, forecast_meta, forecast_values_df, ml_model_name=ml_model_name + db_session, + forecast_meta, + forecast_values_df, + ml_model_name=ml_model_name, + average_minutes=adjuster_average_minutes, ) log.info(forecast_values_df_adjust) @@ -386,11 +393,14 @@ def app_run(timestamp: dt.datetime | None, write_to_db: bool = False, log_level: write_to_db=write_to_db, ml_model_name=ml_model.name, ml_model_version=version, + adjuster_average_minutes=model_config.adjuster_average_minutes, ) successful_runs += 1 - log.info(f"Completed forecasts for {successful_runs} runs for " - f"{runs} model runs. This was for {len(sites)} sites") + log.info( + f"Completed forecasts for {successful_runs} runs for " + f"{runs} model runs. This was for {len(sites)} sites" + ) if successful_runs == runs: log.info("All forecasts completed successfully") elif 0 < successful_runs < runs: diff --git a/india_forecast_app/models/all_models.yaml b/india_forecast_app/models/all_models.yaml index e1ce259..053224c 100644 --- a/india_forecast_app/models/all_models.yaml +++ b/india_forecast_app/models/all_models.yaml @@ -39,6 +39,7 @@ models: version: 20e7c3af76664ee2ac0e1502801749825ab8ed1f client: ad asset_type: pv + adjuster_average_minutes: 15 # this is just a dummy one - name: dummy type: dummy diff --git a/india_forecast_app/models/pydantic_models.py b/india_forecast_app/models/pydantic_models.py index c30d3a8..1de3a50 100644 --- a/india_forecast_app/models/pydantic_models.py +++ b/india_forecast_app/models/pydantic_models.py @@ -28,6 +28,14 @@ class Model(BaseModel): asset_type: str = Field( "pv", title="Asset Type", description="The type of asset the model is for (pv or wind)" ) + adjuster_average_minutes: int = Field( + 60, + title="Average Minutes", + description="The number of minutes that results are average over when " + "calculating adjuster values. " + "For solar site with regular data, 15 should be used. " + "For wind sites, 60 minutes should be used.", + ) class Models(BaseModel): diff --git a/tests/test_adjuster.py b/tests/test_adjuster.py index 5c614c7..1da99e0 100644 --- a/tests/test_adjuster.py +++ b/tests/test_adjuster.py @@ -25,6 +25,29 @@ def test_get_me_values(db_session, sites, generation_db_values, forecasts): assert me_df["me_kw"].sum() != 0 assert me_df["horizon_minutes"][0] == 0 assert me_df["horizon_minutes"][1] == 15 + assert me_df["me_kw"][90] != 0 + + +def test_get_me_values_15(db_session, sites, generation_db_values, forecasts): + """Check ME results are found""" + + hour = pd.Timestamp(datetime.now()).hour + me_df_15 = get_me_values( + db_session, hour, site_uuid=sites[0].site_uuid, ml_model_name="test", average_minutes=15 + ) + me_df_60 = get_me_values( + db_session, hour, site_uuid=sites[0].site_uuid, ml_model_name="test", average_minutes=60 + ) + + assert len(me_df_15) != 0 + assert len(me_df_15) == 96 + assert me_df_15["me_kw"].sum() != 0 + assert me_df_15["horizon_minutes"][0] == 0 + assert me_df_15["horizon_minutes"][1] == 15 + + # make sure the 15 and 60 are differnet values + assert me_df_15["horizon_minutes"][90] == me_df_60["horizon_minutes"][90] + assert me_df_15["me_kw"][90] != me_df_60["me_kw"][90] def test_get_me_values_no_generation(db_session, sites, forecasts):