From 888186c43ff49f0a54c8ea6ebd23fd5f1660f3b8 Mon Sep 17 00:00:00 2001
From: peterdudfield <peter.dudfield@hotmail.com>
Date: Wed, 13 Nov 2024 06:37:47 +0000
Subject: [PATCH] PR comments + refactor

---
 india_forecast_app/adjuster.py | 84 +++++++++++++++++++++++++++++++---
 india_forecast_app/app.py      | 35 ++------------
 2 files changed, 81 insertions(+), 38 deletions(-)

diff --git a/india_forecast_app/adjuster.py b/india_forecast_app/adjuster.py
index 96f002d..415da19 100644
--- a/india_forecast_app/adjuster.py
+++ b/india_forecast_app/adjuster.py
@@ -1,20 +1,24 @@
 """ Adjuster code, adjust forecast by last 7 days of ME"""
+import logging
 from datetime import datetime, timedelta
 from typing import Optional
 
 import pandas as pd
-from pvsite_datamodel.sqlmodels import ForecastSQL, ForecastValueSQL, GenerationSQL
+from pvsite_datamodel.read import get_site_by_uuid
+from pvsite_datamodel.sqlmodels import ForecastSQL, ForecastValueSQL, GenerationSQL, MLModelSQL
 from sqlalchemy import INT, cast, text
 from sqlalchemy.sql import func
 
+log = logging.getLogger(__name__)
+
 # Wad to get all the forecast for the last 7 days made, at this time.
 # And find the ME for each forecast horizon
 
 """
-Here is the SQL query that it based off
+Here is the SQL query that it based off:
 
 select 
-AVG(generation.generation_power_kw - forecast_values.forecast_power_kw)
+AVG(forecast_values.forecast_power_kw - generation.generation_power_kw)
 -- generation.generation_power_kw,
 -- forecast_values.forecast_power_kw,
 -- forecast_values.start_utc,
@@ -29,11 +33,18 @@
 and extract(hour from forecasts.created_utc) = 7
 group by horizon_minutes
 -- order by forecast_values.start_utc
+
+I've left some sql comments in, so its easier to remove the group by, when debugging. 
+The site_uuid is hardcoded, but this should be updated.
 """
 
 
 def get_me_values(
-    session, hour: int, site_uuid: str, start_datetime: Optional[datetime] = None
+    session,
+    hour: int,
+    site_uuid: str,
+    start_datetime: Optional[datetime] = None,
+    ml_model_name: Optional[str] = None,
 ) -> pd.DataFrame:
     """
     Get the ME values for the last 7 days for a given hour, for a given hour creation time
@@ -41,9 +52,9 @@ def get_me_values(
     Args:
     hour: the hour of when the forecast is created
     site_uuid: the site this is for
-    start_datetime:: the start datetime to filter on.
+    start_datetime:: the start datetime to filter on. This defaults to 7 days before now.
     session: sqlalchemy session
-
+    ml_model_name: the name of the model to filter on, this is optional.
     """
 
     if start_datetime is None:
@@ -67,7 +78,7 @@ def get_me_values(
     generation_start_utc = start_utc_hour + start_utc_minute_rounded
     query = query.filter(generation_start_utc == ForecastValueSQL.start_utc)
 
-    # only include the last 7 days
+    # only include the last x days
     query = query.filter(ForecastValueSQL.start_utc >= start_datetime)
     query = query.filter(GenerationSQL.start_utc >= start_datetime)
 
@@ -78,6 +89,10 @@ def get_me_values(
     # filter on created_utc
     query = query.filter((func.extract("hour", ForecastSQL.created_utc) == hour))
 
+    if ml_model_name is not None:
+        query = query.join(MLModelSQL)
+        query = query.filter(MLModelSQL.ml_model_name == ml_model_name)
+
     # group by forecast horizon
     query = query.group_by(ForecastValueSQL.horizon_minutes)
 
@@ -89,3 +104,58 @@ def get_me_values(
     me_df = pd.DataFrame(me, columns=["me_kw", "horizon_minutes"])
 
     return me_df
+
+
+def adjust_forecast_with_adjuster(
+    db_session, forecast_meta: dict, forecast_values_df: pd.DataFrame, ml_model_name: str
+):
+    """
+    Adjust forecast values with ME values
+
+    Args:
+    db_session: sqlalchemy session
+    forecast_meta: forecast metadata
+    forecast_values_df: forecast values dataframe
+    ml_model_name: the ml model name
+
+    """
+    # get the ME values
+    me_values = get_me_values(
+        db_session,
+        forecast_meta["timestamp_utc"].hour,
+        site_uuid=forecast_meta["site_uuid"],
+        ml_model_name=ml_model_name,
+    )
+    log.debug(f"ME values: {me_values}")
+
+    # clip me values by 10% of the capacity
+    site = get_site_by_uuid(db_session, forecast_meta["site_uuid"])
+    capacity = site.capacity_kw
+    me_kw_limit = 0.1 * capacity
+    n_values_above_limit = (me_values["me_kw"] > me_kw_limit).sum()
+    n_values_below_limit = (me_values["me_kw"] > me_kw_limit).sum()
+    me_values["me_kw"].clip(lower=-0.1 * capacity, upper=0.1 * capacity, inplace=True)
+    log.debug(
+        f"ME values clipped: There were {n_values_above_limit} values above the limit and "
+        f"{n_values_below_limit} values below the limit."
+    )
+
+    # join forecast_values_df with me_values on horizon_minutes
+    forecast_values_df_adjust = forecast_values_df.copy()
+    forecast_values_df_adjust = forecast_values_df_adjust.merge(
+        me_values, on="horizon_minutes", how="left"
+    )
+
+    # if me_kw is null, set to 0
+    forecast_values_df_adjust["me_kw"].fillna(0, inplace=True)
+
+    # adjust forecast_power_kw by ME values
+    forecast_values_df_adjust["forecast_power_kw"] = (
+        forecast_values_df_adjust["forecast_power_kw"] - forecast_values_df_adjust["me_kw"]
+    )
+    # drop me_kw column
+    forecast_values_df_adjust.drop(columns=["me_kw"], inplace=True)
+
+    # clip negative values to 0
+    forecast_values_df_adjust["forecast_power_kw"].clip(lower=0, inplace=True)
+    return forecast_values_df_adjust
diff --git a/india_forecast_app/app.py b/india_forecast_app/app.py
index 4e99401..a418b27 100644
--- a/india_forecast_app/app.py
+++ b/india_forecast_app/app.py
@@ -13,13 +13,13 @@
 import pandas as pd
 import sentry_sdk
 from pvsite_datamodel import DatabaseConnection
-from pvsite_datamodel.read import get_pv_generation_by_sites, get_site_by_uuid, get_sites_by_country
+from pvsite_datamodel.read import get_pv_generation_by_sites, get_sites_by_country
 from pvsite_datamodel.sqlmodels import SiteAssetType, SiteSQL
 from pvsite_datamodel.write import insert_forecast_values
 from sqlalchemy.orm import Session
 
 import india_forecast_app
-from india_forecast_app.adjuster import get_me_values
+from india_forecast_app.adjuster import adjust_forecast_with_adjuster
 from india_forecast_app.models import PVNetModel, get_all_models
 from india_forecast_app.sentry import traces_sampler
 
@@ -243,37 +243,10 @@ def save_forecast(
 
     if use_adjuster:
         log.info(f"Adjusting forecast for site_id={forecast_meta['site_uuid']}...")
-        me_values = get_me_values(
-            db_session, forecast_meta["timestamp_utc"].hour, site_uuid=forecast_meta["site_uuid"]
+        forecast_values_df_adjust = adjust_forecast_with_adjuster(
+            db_session, forecast_meta, forecast_values_df, ml_model_name=ml_model_name
         )
 
-        log.info(f"ME values: {me_values}")
-
-        # clip me values by 10% of the capacity
-        site = get_site_by_uuid(db_session, forecast_meta["site_uuid"])
-        capacity = site.capacity_kw
-        me_values["me_kw"].clip(lower=-0.1 * capacity, upper=0.1 * capacity, inplace=True)
-
-        # join forecast_values_df with me_values on horizon_minutes
-        forecast_values_df_adjust = forecast_values_df.copy()
-        forecast_values_df_adjust = forecast_values_df_adjust.merge(
-            me_values, on="horizon_minutes", how="left"
-        )
-
-        # if me_kw is null, set to 0
-        forecast_values_df_adjust["me_kw"].fillna(0, inplace=True)
-
-        # adjust forecast_power_kw by ME values
-        forecast_values_df_adjust["forecast_power_kw"] = (
-            forecast_values_df_adjust["forecast_power_kw"] - forecast_values_df_adjust["me_kw"]
-        )
-
-        # drop me_kw column
-        forecast_values_df_adjust.drop(columns=["me_kw"], inplace=True)
-
-        # clip negative values to 0
-        forecast_values_df_adjust["forecast_power_kw"].clip(lower=0, inplace=True)
-
         log.info(forecast_values_df_adjust)
 
         if write_to_db: