From 888186c43ff49f0a54c8ea6ebd23fd5f1660f3b8 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Wed, 13 Nov 2024 06:37:47 +0000 Subject: [PATCH] PR comments + refactor --- india_forecast_app/adjuster.py | 84 +++++++++++++++++++++++++++++++--- india_forecast_app/app.py | 35 ++------------ 2 files changed, 81 insertions(+), 38 deletions(-) diff --git a/india_forecast_app/adjuster.py b/india_forecast_app/adjuster.py index 96f002d..415da19 100644 --- a/india_forecast_app/adjuster.py +++ b/india_forecast_app/adjuster.py @@ -1,20 +1,24 @@ """ Adjuster code, adjust forecast by last 7 days of ME""" +import logging from datetime import datetime, timedelta from typing import Optional import pandas as pd -from pvsite_datamodel.sqlmodels import ForecastSQL, ForecastValueSQL, GenerationSQL +from pvsite_datamodel.read import get_site_by_uuid +from pvsite_datamodel.sqlmodels import ForecastSQL, ForecastValueSQL, GenerationSQL, MLModelSQL from sqlalchemy import INT, cast, text from sqlalchemy.sql import func +log = logging.getLogger(__name__) + # Wad to get all the forecast for the last 7 days made, at this time. # And find the ME for each forecast horizon """ -Here is the SQL query that it based off +Here is the SQL query that it based off: select -AVG(generation.generation_power_kw - forecast_values.forecast_power_kw) +AVG(forecast_values.forecast_power_kw - generation.generation_power_kw) -- generation.generation_power_kw, -- forecast_values.forecast_power_kw, -- forecast_values.start_utc, @@ -29,11 +33,18 @@ and extract(hour from forecasts.created_utc) = 7 group by horizon_minutes -- order by forecast_values.start_utc + +I've left some sql comments in, so its easier to remove the group by, when debugging. +The site_uuid is hardcoded, but this should be updated. """ def get_me_values( - session, hour: int, site_uuid: str, start_datetime: Optional[datetime] = None + session, + hour: int, + site_uuid: str, + start_datetime: Optional[datetime] = None, + ml_model_name: Optional[str] = None, ) -> pd.DataFrame: """ Get the ME values for the last 7 days for a given hour, for a given hour creation time @@ -41,9 +52,9 @@ def get_me_values( Args: hour: the hour of when the forecast is created site_uuid: the site this is for - start_datetime:: the start datetime to filter on. + start_datetime:: the start datetime to filter on. This defaults to 7 days before now. session: sqlalchemy session - + ml_model_name: the name of the model to filter on, this is optional. """ if start_datetime is None: @@ -67,7 +78,7 @@ def get_me_values( generation_start_utc = start_utc_hour + start_utc_minute_rounded query = query.filter(generation_start_utc == ForecastValueSQL.start_utc) - # only include the last 7 days + # only include the last x days query = query.filter(ForecastValueSQL.start_utc >= start_datetime) query = query.filter(GenerationSQL.start_utc >= start_datetime) @@ -78,6 +89,10 @@ def get_me_values( # filter on created_utc query = query.filter((func.extract("hour", ForecastSQL.created_utc) == hour)) + if ml_model_name is not None: + query = query.join(MLModelSQL) + query = query.filter(MLModelSQL.ml_model_name == ml_model_name) + # group by forecast horizon query = query.group_by(ForecastValueSQL.horizon_minutes) @@ -89,3 +104,58 @@ def get_me_values( me_df = pd.DataFrame(me, columns=["me_kw", "horizon_minutes"]) return me_df + + +def adjust_forecast_with_adjuster( + db_session, forecast_meta: dict, forecast_values_df: pd.DataFrame, ml_model_name: str +): + """ + Adjust forecast values with ME values + + Args: + db_session: sqlalchemy session + forecast_meta: forecast metadata + forecast_values_df: forecast values dataframe + ml_model_name: the ml model name + + """ + # get the ME values + me_values = get_me_values( + db_session, + forecast_meta["timestamp_utc"].hour, + site_uuid=forecast_meta["site_uuid"], + ml_model_name=ml_model_name, + ) + log.debug(f"ME values: {me_values}") + + # clip me values by 10% of the capacity + site = get_site_by_uuid(db_session, forecast_meta["site_uuid"]) + capacity = site.capacity_kw + me_kw_limit = 0.1 * capacity + n_values_above_limit = (me_values["me_kw"] > me_kw_limit).sum() + n_values_below_limit = (me_values["me_kw"] > me_kw_limit).sum() + me_values["me_kw"].clip(lower=-0.1 * capacity, upper=0.1 * capacity, inplace=True) + log.debug( + f"ME values clipped: There were {n_values_above_limit} values above the limit and " + f"{n_values_below_limit} values below the limit." + ) + + # join forecast_values_df with me_values on horizon_minutes + forecast_values_df_adjust = forecast_values_df.copy() + forecast_values_df_adjust = forecast_values_df_adjust.merge( + me_values, on="horizon_minutes", how="left" + ) + + # if me_kw is null, set to 0 + forecast_values_df_adjust["me_kw"].fillna(0, inplace=True) + + # adjust forecast_power_kw by ME values + forecast_values_df_adjust["forecast_power_kw"] = ( + forecast_values_df_adjust["forecast_power_kw"] - forecast_values_df_adjust["me_kw"] + ) + # drop me_kw column + forecast_values_df_adjust.drop(columns=["me_kw"], inplace=True) + + # clip negative values to 0 + forecast_values_df_adjust["forecast_power_kw"].clip(lower=0, inplace=True) + return forecast_values_df_adjust diff --git a/india_forecast_app/app.py b/india_forecast_app/app.py index 4e99401..a418b27 100644 --- a/india_forecast_app/app.py +++ b/india_forecast_app/app.py @@ -13,13 +13,13 @@ import pandas as pd import sentry_sdk from pvsite_datamodel import DatabaseConnection -from pvsite_datamodel.read import get_pv_generation_by_sites, get_site_by_uuid, get_sites_by_country +from pvsite_datamodel.read import get_pv_generation_by_sites, get_sites_by_country from pvsite_datamodel.sqlmodels import SiteAssetType, SiteSQL from pvsite_datamodel.write import insert_forecast_values from sqlalchemy.orm import Session import india_forecast_app -from india_forecast_app.adjuster import get_me_values +from india_forecast_app.adjuster import adjust_forecast_with_adjuster from india_forecast_app.models import PVNetModel, get_all_models from india_forecast_app.sentry import traces_sampler @@ -243,37 +243,10 @@ def save_forecast( if use_adjuster: log.info(f"Adjusting forecast for site_id={forecast_meta['site_uuid']}...") - me_values = get_me_values( - db_session, forecast_meta["timestamp_utc"].hour, site_uuid=forecast_meta["site_uuid"] + forecast_values_df_adjust = adjust_forecast_with_adjuster( + db_session, forecast_meta, forecast_values_df, ml_model_name=ml_model_name ) - log.info(f"ME values: {me_values}") - - # clip me values by 10% of the capacity - site = get_site_by_uuid(db_session, forecast_meta["site_uuid"]) - capacity = site.capacity_kw - me_values["me_kw"].clip(lower=-0.1 * capacity, upper=0.1 * capacity, inplace=True) - - # join forecast_values_df with me_values on horizon_minutes - forecast_values_df_adjust = forecast_values_df.copy() - forecast_values_df_adjust = forecast_values_df_adjust.merge( - me_values, on="horizon_minutes", how="left" - ) - - # if me_kw is null, set to 0 - forecast_values_df_adjust["me_kw"].fillna(0, inplace=True) - - # adjust forecast_power_kw by ME values - forecast_values_df_adjust["forecast_power_kw"] = ( - forecast_values_df_adjust["forecast_power_kw"] - forecast_values_df_adjust["me_kw"] - ) - - # drop me_kw column - forecast_values_df_adjust.drop(columns=["me_kw"], inplace=True) - - # clip negative values to 0 - forecast_values_df_adjust["forecast_power_kw"].clip(lower=0, inplace=True) - log.info(forecast_values_df_adjust) if write_to_db: