Skip to content

Commit

Permalink
remove no distinct forecast values last seven days
Browse files Browse the repository at this point in the history
  • Loading branch information
peterdudfield committed Jul 9, 2024
1 parent 72e62cf commit 0d5c526
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 22 deletions.
31 changes: 11 additions & 20 deletions nowcasting_datamodel/save/save.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def save(
update_gsp: Optional[bool] = True,
apply_adjuster: Optional[bool] = True,
save_to_last_seven_days: Optional[bool] = True,
save_distinct_last_seven_days: bool = True,
remove_non_distinct_last_seven_days: bool = True,
):
"""
Save forecast to database
Expand All @@ -41,7 +41,7 @@ def save(
:param update_gsp: Optional (default true), to update all the GSP forecasts
:param apply_adjuster: Optional (default true), to apply the adjuster
:param save_to_last_seven_days: Optional (default true), to save to the last seven days table
:param save_distinct_last_seven_days: Optional (default True), to only save distinct
:param remove_non_distinct_last_seven_days: Optional (default True), to only keep distinct
forecast values in the forecast_value_last_seven_days table
"""

Expand Down Expand Up @@ -71,7 +71,9 @@ def save(
if save_to_last_seven_days:
logger.debug("Saving to last seven days table")
save_all_forecast_values_seven_days(
session=session, forecasts=forecasts, save_distinct=save_distinct_last_seven_days
session=session,
forecasts=forecasts,
remove_non_distinct=remove_non_distinct_last_seven_days,
)
session.commit()

Expand Down Expand Up @@ -117,14 +119,14 @@ def save_pv_system(session: Session, pv_system: PVSystem) -> PVSystemSQL:


def save_all_forecast_values_seven_days(
session: Session, forecasts: List[ForecastSQL], save_distinct: bool = True
session: Session, forecasts: List[ForecastSQL], remove_non_distinct: bool = True
):
"""
Save all the forecast values in the last seven days table
:param session: database sessions
:param forecasts: list of forecasts
:param save_distinct: Optional (default True), to only save distinct forecast values
:param remove_non_distinct: Optional (default True), to only keep distinct forecast values
If the last saved forecast value is the same as the current one, it will not be saved
"""

Expand All @@ -135,24 +137,13 @@ def save_all_forecast_values_seven_days(
forecast_value_last_7_days = change_forecast_value_to_forecast_last_7_days(
forecast_value
)

if save_distinct:
# only keep it if its unique
# this does take extra time, but it keeps the database smaller
forecast_is_new = is_new_forecast_values_distinct(
forecast_value=forecast_value_last_7_days,
session=session,
location_id=forecast.location_id,
model_name=forecast.model.name,
)
if forecast_is_new:
forecast_values_last_7_days.append(forecast_value_last_7_days)
else:
forecast_values_last_7_days.append(forecast_value_last_7_days)
forecast_values_last_7_days.append(forecast_value_last_7_days)

# add them to the database
add_forecast_last_7_days_and_remove_old_data(
session=session, forecast_values=forecast_values_last_7_days
session=session,
forecast_values=forecast_values_last_7_days,
remove_non_distinct=remove_non_distinct,
)


Expand Down
83 changes: 81 additions & 2 deletions nowcasting_datamodel/save/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from sqlalchemy.orm.session import Session

from nowcasting_datamodel import N_GSP
from nowcasting_datamodel.models import ForecastValueSevenDaysSQL
from nowcasting_datamodel.models import ForecastValueSevenDaysSQL, LocationSQL, MLModelSQL
from nowcasting_datamodel.models.forecast import (
ForecastSQL,
ForecastValueLatestSQL,
Expand Down Expand Up @@ -296,14 +296,84 @@ def change_forecast_value_to_forecast_last_7_days(
return forecast_new


def remove_non_distinct_forecast_values(
session: Session, start_datetime: datetime, end_datetime: datetime
):
"""
Remove any non-distinct forecast values
We remove any non-distinct values from ForecastValueSevenDaysSQL. The values are distinct on
- target_time
- location_id
- model_name
- expected_power_generation_megawatts
We keep the first value, and remove and values from later on.
:param session: database session
:param start_datetime: start datetime
:param end_datetime: end_dateimte
"""

logger.debug(f"Removing non distinct forecast values for {start_datetime} to {end_datetime}")

# 1. sub query, these are the values we want to keep
sub_query = session.query(ForecastValueSevenDaysSQL.uuid)

# distinct
sub_query = sub_query.distinct(
ForecastValueSevenDaysSQL.target_time,
ForecastSQL.location_id,
MLModelSQL.name,
ForecastValueSevenDaysSQL.expected_power_generation_megawatts,
)

# join
sub_query = sub_query.join(ForecastSQL)
sub_query = sub_query.join(LocationSQL)
sub_query = sub_query.join(MLModelSQL)

# filter on time
sub_query = sub_query.filter(ForecastValueSevenDaysSQL.target_time >= start_datetime)
sub_query = sub_query.filter(ForecastValueSevenDaysSQL.target_time < end_datetime)

# order by
sub_query = sub_query.order_by(
ForecastValueSevenDaysSQL.target_time,
ForecastSQL.location_id,
MLModelSQL.name,
ForecastValueSevenDaysSQL.expected_power_generation_megawatts,
ForecastValueSevenDaysSQL.created_utc, # get the first one
)

sub_query = sub_query.subquery()

# 2. main query
query = session.query(ForecastValueSevenDaysSQL)

# filter on tiem
query = query.filter(ForecastValueSevenDaysSQL.target_time >= start_datetime)
query = query.filter(ForecastValueSevenDaysSQL.target_time < end_datetime)

# select uuid not in subquery
query = query.filter(ForecastValueSevenDaysSQL.uuid.notin_(sub_query))

# delete all results
query.delete()


def add_forecast_last_7_days_and_remove_old_data(
forecast_values: List[ForecastValueSevenDaysSQL], session: Session
forecast_values: List[ForecastValueSevenDaysSQL],
session: Session,
remove_non_distinct: bool = True,
):
"""
Add forecast values and delete old values
:param forecast_values:
:param session:
:param remove_non_distinct: Optional (default True), to only keep distinct forecast values
If the last saved forecast value is the same as the current one, it will not be saved
:return:
"""

Expand All @@ -313,7 +383,16 @@ def add_forecast_last_7_days_and_remove_old_data(
# remove old data
now_minus_7_days = datetime.now(tz=timezone.utc) - timedelta(days=7)

# remove any duplicate forecast values
if remove_non_distinct:
for i in range(0, 24 * 10):
start_datetime = now_minus_7_days + timedelta(hours=i)
end_datetime = start_datetime + timedelta(hours=1)
remove_non_distinct_forecast_values(session, start_datetime, end_datetime)

logger.debug(f"Removing data before {now_minus_7_days}")
query = session.query(ForecastValueSevenDaysSQL)
query = query.where(ForecastValueSevenDaysSQL.target_time < now_minus_7_days)
query.delete()

session.commit()
4 changes: 4 additions & 0 deletions tests/save/test_save.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ def test_save_all_forecast_values_seven_days(db_session):
now = datetime.now(tz=timezone.utc)
forecasts = make_fake_forecasts(gsp_ids=range(0, 3), session=db_session, t0_datetime_utc=now)

# need to commit the forecasts to the database
db_session.add_all(forecasts)
db_session.commit()

save_all_forecast_values_seven_days(session=db_session, forecasts=forecasts)

assert len(db_session.query(ForecastValueSevenDaysSQL).all()) == 3 * 112
Expand Down

0 comments on commit 0d5c526

Please sign in to comment.