Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue/adjuster minutes #138

Merged
merged 3 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 30 additions & 10 deletions india_forecast_app/adjuster.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def get_me_values(
site_uuid: str,
start_datetime: Optional[datetime] = None,
ml_model_name: Optional[str] = None,
average_minutes: Optional[int] = 60,
) -> pd.DataFrame:
"""
Get the ME values for the last 7 days for a given hour, for a given hour creation time
Expand All @@ -55,15 +56,22 @@ def get_me_values(
start_datetime:: the start datetime to filter on. This defaults to 7 days before now.
session: sqlalchemy session
ml_model_name: the name of the model to filter on, this is optional.
average_minutes: the average minutes for the adjuster to group results by, this defaults to 60.
For solar data this should be 15, because of the sunrise and sunset,
for wind data this should be 60.
"""

assert average_minutes <= 60, "Average minutes for adjuster should be <= 60"

if start_datetime is None:
start_datetime = datetime.now() - timedelta(days=7)

query = session.query(
func.avg(ForecastValueSQL.forecast_power_kw - GenerationSQL.generation_power_kw),
# create hour column
(cast(ForecastValueSQL.horizon_minutes / 60, INT)).label("horizon_hour"),
(cast(ForecastValueSQL.horizon_minutes / average_minutes, INT)).label(
"horizon_div_average_minutes"
),
)

# join
Expand All @@ -72,8 +80,8 @@ def get_me_values(
# round Generation start_utc and join to forecast start_utc
start_utc_minute_rounded = (
cast(func.date_part("minute", GenerationSQL.start_utc), INT)
/ 30
* text("interval '30 min'")
/ 15
* text("interval '15 min'")
)
start_utc_hour = func.date_trunc("hour", GenerationSQL.start_utc)
generation_start_utc = start_utc_hour + start_utc_minute_rounded
Expand All @@ -95,18 +103,18 @@ def get_me_values(
query = query.filter(MLModelSQL.name == ml_model_name)

# group by forecast horizon
query = query.group_by('horizon_hour')
query = query.group_by("horizon_div_average_minutes")

# order by forecast horizon
query = query.order_by('horizon_hour')
query = query.order_by("horizon_div_average_minutes")

me = query.all()

me_df = pd.DataFrame(me, columns=["me_kw", "horizon_hour"])
me_df["horizon_minutes"] = me_df["horizon_hour"] * 60
me_df = pd.DataFrame(me, columns=["me_kw", "horizon_div_average_minutes"])
me_df["horizon_minutes"] = me_df["horizon_div_average_minutes"] * average_minutes

# drop the hour column
me_df.drop(columns=["horizon_hour"], inplace=True)
me_df.drop(columns=["horizon_div_average_minutes"], inplace=True)

if len(me_df) == 0:
return me_df
Expand All @@ -115,19 +123,28 @@ def get_me_values(
# currently in 0, 60, 120,...
# change to 0, 15, 30, 45, 60, 75, 90, 105, 120, ...
me_df = me_df.set_index("horizon_minutes")
me_df = me_df.reindex(range(0, max(me_df.index)+15, 15)).interpolate(limit=3)
me_df = me_df.reindex(range(0, max(me_df.index) + 15, 15)).interpolate(limit=3)

# reset index
me_df = me_df.reset_index()

# smooth by a few blocks, 30 minutes hour either side, and keep 0 values 0
idx = me_df["me_kw"] == 0
me_df["me_kw"] = me_df["me_kw"].rolling(window=5, min_periods=1, center=True).mean()
me_df.loc[idx, "me_kw"] = 0

# log the maximum and minimum adjuster results
log.info(f"ME results: max={me_df['me_kw'].max()}, min={me_df['me_kw'].min()}")

return me_df


def adjust_forecast_with_adjuster(
db_session, forecast_meta: dict, forecast_values_df: pd.DataFrame, ml_model_name: str
db_session,
forecast_meta: dict,
forecast_values_df: pd.DataFrame,
ml_model_name: str,
average_minutes: Optional[int] = 60,
):
"""
Adjust forecast values with ME values
Expand All @@ -137,6 +154,7 @@ def adjust_forecast_with_adjuster(
forecast_meta: forecast metadata
forecast_values_df: forecast values dataframe
ml_model_name: the ml model name
average_minutes: the average minutes for the adjuster to group results by, this defaults to 60.

"""
# get the ME values
Expand All @@ -145,6 +163,7 @@ def adjust_forecast_with_adjuster(
forecast_meta["timestamp_utc"].hour,
site_uuid=forecast_meta["site_uuid"],
ml_model_name=ml_model_name,
average_minutes=average_minutes,
)
log.debug(f"ME values: {me_values}")

Expand Down Expand Up @@ -173,6 +192,7 @@ def adjust_forecast_with_adjuster(
forecast_values_df_adjust["me_kw"].fillna(0, inplace=True)

# adjust forecast_power_kw by ME values
log.info(forecast_values_df_adjust["me_kw"])
forecast_values_df_adjust["forecast_power_kw"] = (
forecast_values_df_adjust["forecast_power_kw"] - forecast_values_df_adjust["me_kw"]
)
Expand Down
16 changes: 13 additions & 3 deletions india_forecast_app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ def save_forecast(
ml_model_name: Optional[str] = None,
ml_model_version: Optional[str] = None,
use_adjuster: bool = True,
adjuster_average_minutes: Optional[int] = 60,
):
"""
Saves a forecast for a given site & timestamp
Expand All @@ -217,6 +218,8 @@ def save_forecast(
ml_model_name: Name of the ML model used for the forecast
ml_model_version: Version of the ML model used for the forecast
use_adjuster: Make new model, adjusted by last 7 days of ME values
adjuster_average_minutes: The number of minutes that results are average over
when calculating adjuster values

Raises:
IOError: An error if database save fails
Expand Down Expand Up @@ -244,7 +247,11 @@ def save_forecast(
if use_adjuster:
log.info(f"Adjusting forecast for site_id={forecast_meta['site_uuid']}...")
forecast_values_df_adjust = adjust_forecast_with_adjuster(
db_session, forecast_meta, forecast_values_df, ml_model_name=ml_model_name
db_session,
forecast_meta,
forecast_values_df,
ml_model_name=ml_model_name,
average_minutes=adjuster_average_minutes,
)

log.info(forecast_values_df_adjust)
Expand Down Expand Up @@ -386,11 +393,14 @@ def app_run(timestamp: dt.datetime | None, write_to_db: bool = False, log_level:
write_to_db=write_to_db,
ml_model_name=ml_model.name,
ml_model_version=version,
adjuster_average_minutes=model_config.adjuster_average_minutes,
)
successful_runs += 1

log.info(f"Completed forecasts for {successful_runs} runs for "
f"{runs} model runs. This was for {len(sites)} sites")
log.info(
f"Completed forecasts for {successful_runs} runs for "
f"{runs} model runs. This was for {len(sites)} sites"
)
if successful_runs == runs:
log.info("All forecasts completed successfully")
elif 0 < successful_runs < runs:
Expand Down
1 change: 1 addition & 0 deletions india_forecast_app/models/all_models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ models:
version: 20e7c3af76664ee2ac0e1502801749825ab8ed1f
client: ad
asset_type: pv
adjuster_average_minutes: 15
# this is just a dummy one
- name: dummy
type: dummy
Expand Down
8 changes: 8 additions & 0 deletions india_forecast_app/models/pydantic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ class Model(BaseModel):
asset_type: str = Field(
"pv", title="Asset Type", description="The type of asset the model is for (pv or wind)"
)
adjuster_average_minutes: int = Field(
60,
title="Average Minutes",
description="The number of minutes that results are average over when "
"calculating adjuster values. "
"For solar site with regular data, 15 should be used. "
"For wind sites, 60 minutes should be used.",
)


class Models(BaseModel):
Expand Down
23 changes: 23 additions & 0 deletions tests/test_adjuster.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,29 @@ def test_get_me_values(db_session, sites, generation_db_values, forecasts):
assert me_df["me_kw"].sum() != 0
assert me_df["horizon_minutes"][0] == 0
assert me_df["horizon_minutes"][1] == 15
assert me_df["me_kw"][90] != 0


def test_get_me_values_15(db_session, sites, generation_db_values, forecasts):
"""Check ME results are found"""

hour = pd.Timestamp(datetime.now()).hour
me_df_15 = get_me_values(
db_session, hour, site_uuid=sites[0].site_uuid, ml_model_name="test", average_minutes=15
)
me_df_60 = get_me_values(
db_session, hour, site_uuid=sites[0].site_uuid, ml_model_name="test", average_minutes=60
)

assert len(me_df_15) != 0
assert len(me_df_15) == 96
assert me_df_15["me_kw"].sum() != 0
assert me_df_15["horizon_minutes"][0] == 0
assert me_df_15["horizon_minutes"][1] == 15

# make sure the 15 and 60 are differnet values
assert me_df_15["horizon_minutes"][90] == me_df_60["horizon_minutes"][90]
assert me_df_15["me_kw"][90] != me_df_60["me_kw"][90]


def test_get_me_values_no_generation(db_session, sites, forecasts):
Expand Down
Loading