Skip to content

Commit

Permalink
PR comments + refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
peterdudfield committed Nov 13, 2024
1 parent 9610b4b commit 888186c
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 38 deletions.
84 changes: 77 additions & 7 deletions india_forecast_app/adjuster.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
""" Adjuster code, adjust forecast by last 7 days of ME"""
import logging
from datetime import datetime, timedelta
from typing import Optional

import pandas as pd
from pvsite_datamodel.sqlmodels import ForecastSQL, ForecastValueSQL, GenerationSQL
from pvsite_datamodel.read import get_site_by_uuid
from pvsite_datamodel.sqlmodels import ForecastSQL, ForecastValueSQL, GenerationSQL, MLModelSQL
from sqlalchemy import INT, cast, text
from sqlalchemy.sql import func

log = logging.getLogger(__name__)

# Wad to get all the forecast for the last 7 days made, at this time.
# And find the ME for each forecast horizon

"""
Here is the SQL query that it based off
Here is the SQL query that it based off:
select
AVG(generation.generation_power_kw - forecast_values.forecast_power_kw)
AVG(forecast_values.forecast_power_kw - generation.generation_power_kw)
-- generation.generation_power_kw,
-- forecast_values.forecast_power_kw,
-- forecast_values.start_utc,
Expand All @@ -29,21 +33,28 @@
and extract(hour from forecasts.created_utc) = 7
group by horizon_minutes
-- order by forecast_values.start_utc
I've left some sql comments in, so its easier to remove the group by, when debugging.
The site_uuid is hardcoded, but this should be updated.
"""


def get_me_values(
session, hour: int, site_uuid: str, start_datetime: Optional[datetime] = None
session,
hour: int,
site_uuid: str,
start_datetime: Optional[datetime] = None,
ml_model_name: Optional[str] = None,
) -> pd.DataFrame:
"""
Get the ME values for the last 7 days for a given hour, for a given hour creation time
Args:
hour: the hour of when the forecast is created
site_uuid: the site this is for
start_datetime:: the start datetime to filter on.
start_datetime:: the start datetime to filter on. This defaults to 7 days before now.
session: sqlalchemy session
ml_model_name: the name of the model to filter on, this is optional.
"""

if start_datetime is None:
Expand All @@ -67,7 +78,7 @@ def get_me_values(
generation_start_utc = start_utc_hour + start_utc_minute_rounded
query = query.filter(generation_start_utc == ForecastValueSQL.start_utc)

# only include the last 7 days
# only include the last x days
query = query.filter(ForecastValueSQL.start_utc >= start_datetime)
query = query.filter(GenerationSQL.start_utc >= start_datetime)

Expand All @@ -78,6 +89,10 @@ def get_me_values(
# filter on created_utc
query = query.filter((func.extract("hour", ForecastSQL.created_utc) == hour))

if ml_model_name is not None:
query = query.join(MLModelSQL)
query = query.filter(MLModelSQL.ml_model_name == ml_model_name)

# group by forecast horizon
query = query.group_by(ForecastValueSQL.horizon_minutes)

Expand All @@ -89,3 +104,58 @@ def get_me_values(
me_df = pd.DataFrame(me, columns=["me_kw", "horizon_minutes"])

return me_df


def adjust_forecast_with_adjuster(
db_session, forecast_meta: dict, forecast_values_df: pd.DataFrame, ml_model_name: str
):
"""
Adjust forecast values with ME values
Args:
db_session: sqlalchemy session
forecast_meta: forecast metadata
forecast_values_df: forecast values dataframe
ml_model_name: the ml model name
"""
# get the ME values
me_values = get_me_values(
db_session,
forecast_meta["timestamp_utc"].hour,
site_uuid=forecast_meta["site_uuid"],
ml_model_name=ml_model_name,
)
log.debug(f"ME values: {me_values}")

# clip me values by 10% of the capacity
site = get_site_by_uuid(db_session, forecast_meta["site_uuid"])
capacity = site.capacity_kw
me_kw_limit = 0.1 * capacity
n_values_above_limit = (me_values["me_kw"] > me_kw_limit).sum()
n_values_below_limit = (me_values["me_kw"] > me_kw_limit).sum()
me_values["me_kw"].clip(lower=-0.1 * capacity, upper=0.1 * capacity, inplace=True)
log.debug(
f"ME values clipped: There were {n_values_above_limit} values above the limit and "
f"{n_values_below_limit} values below the limit."
)

# join forecast_values_df with me_values on horizon_minutes
forecast_values_df_adjust = forecast_values_df.copy()
forecast_values_df_adjust = forecast_values_df_adjust.merge(
me_values, on="horizon_minutes", how="left"
)

# if me_kw is null, set to 0
forecast_values_df_adjust["me_kw"].fillna(0, inplace=True)

# adjust forecast_power_kw by ME values
forecast_values_df_adjust["forecast_power_kw"] = (
forecast_values_df_adjust["forecast_power_kw"] - forecast_values_df_adjust["me_kw"]
)
# drop me_kw column
forecast_values_df_adjust.drop(columns=["me_kw"], inplace=True)

# clip negative values to 0
forecast_values_df_adjust["forecast_power_kw"].clip(lower=0, inplace=True)
return forecast_values_df_adjust
35 changes: 4 additions & 31 deletions india_forecast_app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@
import pandas as pd
import sentry_sdk
from pvsite_datamodel import DatabaseConnection
from pvsite_datamodel.read import get_pv_generation_by_sites, get_site_by_uuid, get_sites_by_country
from pvsite_datamodel.read import get_pv_generation_by_sites, get_sites_by_country
from pvsite_datamodel.sqlmodels import SiteAssetType, SiteSQL
from pvsite_datamodel.write import insert_forecast_values
from sqlalchemy.orm import Session

import india_forecast_app
from india_forecast_app.adjuster import get_me_values
from india_forecast_app.adjuster import adjust_forecast_with_adjuster
from india_forecast_app.models import PVNetModel, get_all_models
from india_forecast_app.sentry import traces_sampler

Expand Down Expand Up @@ -243,37 +243,10 @@ def save_forecast(

if use_adjuster:
log.info(f"Adjusting forecast for site_id={forecast_meta['site_uuid']}...")
me_values = get_me_values(
db_session, forecast_meta["timestamp_utc"].hour, site_uuid=forecast_meta["site_uuid"]
forecast_values_df_adjust = adjust_forecast_with_adjuster(
db_session, forecast_meta, forecast_values_df, ml_model_name=ml_model_name
)

log.info(f"ME values: {me_values}")

# clip me values by 10% of the capacity
site = get_site_by_uuid(db_session, forecast_meta["site_uuid"])
capacity = site.capacity_kw
me_values["me_kw"].clip(lower=-0.1 * capacity, upper=0.1 * capacity, inplace=True)

# join forecast_values_df with me_values on horizon_minutes
forecast_values_df_adjust = forecast_values_df.copy()
forecast_values_df_adjust = forecast_values_df_adjust.merge(
me_values, on="horizon_minutes", how="left"
)

# if me_kw is null, set to 0
forecast_values_df_adjust["me_kw"].fillna(0, inplace=True)

# adjust forecast_power_kw by ME values
forecast_values_df_adjust["forecast_power_kw"] = (
forecast_values_df_adjust["forecast_power_kw"] - forecast_values_df_adjust["me_kw"]
)

# drop me_kw column
forecast_values_df_adjust.drop(columns=["me_kw"], inplace=True)

# clip negative values to 0
forecast_values_df_adjust["forecast_power_kw"].clip(lower=0, inplace=True)

log.info(forecast_values_df_adjust)

if write_to_db:
Expand Down

0 comments on commit 888186c

Please sign in to comment.