Skip to content

Commit

Permalink
Issue/adjuster minutes (#138)
Browse files Browse the repository at this point in the history
* add different grouping for the adjuster depending on config

* lint

* lint
  • Loading branch information
peterdudfield authored Nov 26, 2024
1 parent e50f9c7 commit fbd3f04
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 13 deletions.
40 changes: 30 additions & 10 deletions india_forecast_app/adjuster.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def get_me_values(
site_uuid: str,
start_datetime: Optional[datetime] = None,
ml_model_name: Optional[str] = None,
average_minutes: Optional[int] = 60,
) -> pd.DataFrame:
"""
Get the ME values for the last 7 days for a given hour, for a given hour creation time
Expand All @@ -55,15 +56,22 @@ def get_me_values(
start_datetime:: the start datetime to filter on. This defaults to 7 days before now.
session: sqlalchemy session
ml_model_name: the name of the model to filter on, this is optional.
average_minutes: the average minutes for the adjuster to group results by, this defaults to 60.
For solar data this should be 15, because of the sunrise and sunset,
for wind data this should be 60.
"""

assert average_minutes <= 60, "Average minutes for adjuster should be <= 60"

if start_datetime is None:
start_datetime = datetime.now() - timedelta(days=7)

query = session.query(
func.avg(ForecastValueSQL.forecast_power_kw - GenerationSQL.generation_power_kw),
# create hour column
(cast(ForecastValueSQL.horizon_minutes / 60, INT)).label("horizon_hour"),
(cast(ForecastValueSQL.horizon_minutes / average_minutes, INT)).label(
"horizon_div_average_minutes"
),
)

# join
Expand All @@ -72,8 +80,8 @@ def get_me_values(
# round Generation start_utc and join to forecast start_utc
start_utc_minute_rounded = (
cast(func.date_part("minute", GenerationSQL.start_utc), INT)
/ 30
* text("interval '30 min'")
/ 15
* text("interval '15 min'")
)
start_utc_hour = func.date_trunc("hour", GenerationSQL.start_utc)
generation_start_utc = start_utc_hour + start_utc_minute_rounded
Expand All @@ -95,18 +103,18 @@ def get_me_values(
query = query.filter(MLModelSQL.name == ml_model_name)

# group by forecast horizon
query = query.group_by('horizon_hour')
query = query.group_by("horizon_div_average_minutes")

# order by forecast horizon
query = query.order_by('horizon_hour')
query = query.order_by("horizon_div_average_minutes")

me = query.all()

me_df = pd.DataFrame(me, columns=["me_kw", "horizon_hour"])
me_df["horizon_minutes"] = me_df["horizon_hour"] * 60
me_df = pd.DataFrame(me, columns=["me_kw", "horizon_div_average_minutes"])
me_df["horizon_minutes"] = me_df["horizon_div_average_minutes"] * average_minutes

# drop the hour column
me_df.drop(columns=["horizon_hour"], inplace=True)
me_df.drop(columns=["horizon_div_average_minutes"], inplace=True)

if len(me_df) == 0:
return me_df
Expand All @@ -115,19 +123,28 @@ def get_me_values(
# currently in 0, 60, 120,...
# change to 0, 15, 30, 45, 60, 75, 90, 105, 120, ...
me_df = me_df.set_index("horizon_minutes")
me_df = me_df.reindex(range(0, max(me_df.index)+15, 15)).interpolate(limit=3)
me_df = me_df.reindex(range(0, max(me_df.index) + 15, 15)).interpolate(limit=3)

# reset index
me_df = me_df.reset_index()

# smooth by a few blocks, 30 minutes hour either side, and keep 0 values 0
idx = me_df["me_kw"] == 0
me_df["me_kw"] = me_df["me_kw"].rolling(window=5, min_periods=1, center=True).mean()
me_df.loc[idx, "me_kw"] = 0

# log the maximum and minimum adjuster results
log.info(f"ME results: max={me_df['me_kw'].max()}, min={me_df['me_kw'].min()}")

return me_df


def adjust_forecast_with_adjuster(
db_session, forecast_meta: dict, forecast_values_df: pd.DataFrame, ml_model_name: str
db_session,
forecast_meta: dict,
forecast_values_df: pd.DataFrame,
ml_model_name: str,
average_minutes: Optional[int] = 60,
):
"""
Adjust forecast values with ME values
Expand All @@ -137,6 +154,7 @@ def adjust_forecast_with_adjuster(
forecast_meta: forecast metadata
forecast_values_df: forecast values dataframe
ml_model_name: the ml model name
average_minutes: the average minutes for the adjuster to group results by, this defaults to 60.
"""
# get the ME values
Expand All @@ -145,6 +163,7 @@ def adjust_forecast_with_adjuster(
forecast_meta["timestamp_utc"].hour,
site_uuid=forecast_meta["site_uuid"],
ml_model_name=ml_model_name,
average_minutes=average_minutes,
)
log.debug(f"ME values: {me_values}")

Expand Down Expand Up @@ -173,6 +192,7 @@ def adjust_forecast_with_adjuster(
forecast_values_df_adjust["me_kw"].fillna(0, inplace=True)

# adjust forecast_power_kw by ME values
log.info(forecast_values_df_adjust["me_kw"])
forecast_values_df_adjust["forecast_power_kw"] = (
forecast_values_df_adjust["forecast_power_kw"] - forecast_values_df_adjust["me_kw"]
)
Expand Down
16 changes: 13 additions & 3 deletions india_forecast_app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ def save_forecast(
ml_model_name: Optional[str] = None,
ml_model_version: Optional[str] = None,
use_adjuster: bool = True,
adjuster_average_minutes: Optional[int] = 60,
):
"""
Saves a forecast for a given site & timestamp
Expand All @@ -217,6 +218,8 @@ def save_forecast(
ml_model_name: Name of the ML model used for the forecast
ml_model_version: Version of the ML model used for the forecast
use_adjuster: Make new model, adjusted by last 7 days of ME values
adjuster_average_minutes: The number of minutes that results are average over
when calculating adjuster values
Raises:
IOError: An error if database save fails
Expand Down Expand Up @@ -244,7 +247,11 @@ def save_forecast(
if use_adjuster:
log.info(f"Adjusting forecast for site_id={forecast_meta['site_uuid']}...")
forecast_values_df_adjust = adjust_forecast_with_adjuster(
db_session, forecast_meta, forecast_values_df, ml_model_name=ml_model_name
db_session,
forecast_meta,
forecast_values_df,
ml_model_name=ml_model_name,
average_minutes=adjuster_average_minutes,
)

log.info(forecast_values_df_adjust)
Expand Down Expand Up @@ -386,11 +393,14 @@ def app_run(timestamp: dt.datetime | None, write_to_db: bool = False, log_level:
write_to_db=write_to_db,
ml_model_name=ml_model.name,
ml_model_version=version,
adjuster_average_minutes=model_config.adjuster_average_minutes,
)
successful_runs += 1

log.info(f"Completed forecasts for {successful_runs} runs for "
f"{runs} model runs. This was for {len(sites)} sites")
log.info(
f"Completed forecasts for {successful_runs} runs for "
f"{runs} model runs. This was for {len(sites)} sites"
)
if successful_runs == runs:
log.info("All forecasts completed successfully")
elif 0 < successful_runs < runs:
Expand Down
1 change: 1 addition & 0 deletions india_forecast_app/models/all_models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ models:
version: 20e7c3af76664ee2ac0e1502801749825ab8ed1f
client: ad
asset_type: pv
adjuster_average_minutes: 15
# this is just a dummy one
- name: dummy
type: dummy
Expand Down
8 changes: 8 additions & 0 deletions india_forecast_app/models/pydantic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ class Model(BaseModel):
asset_type: str = Field(
"pv", title="Asset Type", description="The type of asset the model is for (pv or wind)"
)
adjuster_average_minutes: int = Field(
60,
title="Average Minutes",
description="The number of minutes that results are average over when "
"calculating adjuster values. "
"For solar site with regular data, 15 should be used. "
"For wind sites, 60 minutes should be used.",
)


class Models(BaseModel):
Expand Down
23 changes: 23 additions & 0 deletions tests/test_adjuster.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,29 @@ def test_get_me_values(db_session, sites, generation_db_values, forecasts):
assert me_df["me_kw"].sum() != 0
assert me_df["horizon_minutes"][0] == 0
assert me_df["horizon_minutes"][1] == 15
assert me_df["me_kw"][90] != 0


def test_get_me_values_15(db_session, sites, generation_db_values, forecasts):
"""Check ME results are found"""

hour = pd.Timestamp(datetime.now()).hour
me_df_15 = get_me_values(
db_session, hour, site_uuid=sites[0].site_uuid, ml_model_name="test", average_minutes=15
)
me_df_60 = get_me_values(
db_session, hour, site_uuid=sites[0].site_uuid, ml_model_name="test", average_minutes=60
)

assert len(me_df_15) != 0
assert len(me_df_15) == 96
assert me_df_15["me_kw"].sum() != 0
assert me_df_15["horizon_minutes"][0] == 0
assert me_df_15["horizon_minutes"][1] == 15

# make sure the 15 and 60 are differnet values
assert me_df_15["horizon_minutes"][90] == me_df_60["horizon_minutes"][90]
assert me_df_15["me_kw"][90] != me_df_60["me_kw"][90]


def test_get_me_values_no_generation(db_session, sites, forecasts):
Expand Down

0 comments on commit fbd3f04

Please sign in to comment.