Skip to content

Commit

Permalink
Merge pull request #141 from openclimatefix/issue/use-legacy
Browse files Browse the repository at this point in the history
add back in pvnet ecmwf old model
  • Loading branch information
peterdudfield authored Oct 15, 2024
2 parents a01a60d + 1b2a45a commit fb3b6c4
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 7 deletions.
7 changes: 5 additions & 2 deletions pvnet_app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ def app(
- SENTRY_DSN, optional link to sentry
- ENVIRONMENT, the environment this is running in, defaults to local
- USE_ECMWF_ONLY, option to use ecmwf only model, defaults to false
- USE_OCF_DATA_SAMPLER, option to use ocf_data_sampler, defaults to true
Args:
t0 (datetime): Datetime at which forecast is made
Expand All @@ -127,18 +128,20 @@ def app(
use_day_ahead_model = os.getenv("DAY_AHEAD_MODEL", "false").lower() == "true"
use_ecmwf_only = os.getenv("USE_ECMWF_ONLY", "false").lower() == "true"
run_extra_models = os.getenv("RUN_EXTRA_MODELS", "false").lower() == "true"
use_ocf_data_sampler = os.getenv("USE_OCF_DATA_SAMPLER", "true").lower() == "true"

logger.info(f"Using `pvnet` library version: {pvnet.__version__}")
logger.info(f"Using `pvnet_app` library version: {pvnet_app.__version__}")
logger.info(f"Using {num_workers} workers")
logger.info(f"Using day ahead model: {use_day_ahead_model}")
logger.info(f"Using ecwmwf only: {use_ecmwf_only}")
logger.info(f"Using ecmwf only: {use_ecmwf_only}")
logger.info(f"Running extra models: {run_extra_models}")

# load models
model_configs = get_all_models(get_ecmwf_only=use_ecmwf_only,
get_day_ahead_only=use_day_ahead_model,
run_extra_models=run_extra_models)
run_extra_models=run_extra_models,
use_ocf_data_sampler=use_ocf_data_sampler)

logger.info(f"Using adjuster: {model_configs[0].use_adjuster}")
logger.info(f"Saving GSP sum: {model_configs[0].save_gsp_sum}")
Expand Down
24 changes: 24 additions & 0 deletions pvnet_app/model_configs/all_models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,29 @@ models:
version: 4fe6b1441b6dd549292c201ed85eee156ecc220c
ecmwf_only: True
uses_satellite_data: False
# This is the old model for pvnet and pvnet_ecmwf
- name: pvnet_v2
pvnet:
repo: openclimatefix/pvnet_uk_region
version: 62e5e20ab793cee7cf94eadac870d2199501a730
summation:
repo: openclimatefix/pvnet_v2_summation
version: ffac655f9650b81865d96023baa15839f3ce26ec
use_adjuster: True
save_gsp_sum: False
verbose: True
save_gsp_to_recent: True
uses_ocf_data_sampler: False
- name: pvnet_ecmwf # this name is important as it used for blending
pvnet:
repo: openclimatefix/pvnet_uk_region
version: 35d55181a82440bdd087f380d650bfd0b64bd322
summation:
repo: openclimatefix/pvnet_v2_summation
version: 9002baf1e9dc1ec141f3c4a1fa8447b6316a4558
ecmwf_only: True
uses_satellite_data: False
uses_ocf_data_sampler: False
# The day ahead model has not yet been re-trained with data-sampler.
# It will be run with the legacy dataloader using ocf_datapipes
- name: pvnet_day_ahead
Expand All @@ -61,4 +84,5 @@ models:
verbose: True
save_gsp_to_recent: True
day_ahead: True
uses_ocf_data_sampler: False

19 changes: 17 additions & 2 deletions pvnet_app/model_configs/pydantic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ class Model(BaseModel):
True, title="Uses Satellite Data", description="If this model uses satellite data"
)

uses_ocf_data_sampler: Optional[bool] = Field(
True, title="Uses OCF Data Sampler", description="If this model uses data sampler, old one uses ocf_datapipes"
)




class Models(BaseModel):
"""A group of ml models"""
Expand All @@ -60,8 +66,8 @@ class Models(BaseModel):
@field_validator("models")
@classmethod
def name_must_be_unique(cls, v: List[Model]) -> List[Model]:
"""Ensure that all model names are unique"""
names = [model.name for model in v]
"""Ensure that all model names are unique, respect to using ocf_data_sampler or not"""
names = [(model.name,model.uses_ocf_data_sampler) for model in v]
unique_names = set(names)

if len(names) != len(unique_names):
Expand All @@ -73,6 +79,7 @@ def get_all_models(
get_ecmwf_only: Optional[bool] = False,
get_day_ahead_only: Optional[bool] = False,
run_extra_models: Optional[bool] = False,
use_ocf_data_sampler: Optional[bool] = True,
) -> List[Model]:
"""
Returns all the models for a given client
Expand All @@ -81,6 +88,7 @@ def get_all_models(
get_ecmwf_only: If only the ECMWF model should be returned
get_day_ahead_only: If only the day ahead model should be returned
run_extra_models: If extra models should be run
use_ocf_data_sampler: If the OCF Data Sampler should be used
"""

# load models from yaml file
Expand All @@ -107,6 +115,13 @@ def get_all_models(
log.info("Not running extra models")
models.models = [model for model in models.models if model.name == "pvnet_v2"]

if use_ocf_data_sampler:
log.info("Using OCF Data Sampler")
models.models = [model for model in models.models if model.uses_ocf_data_sampler]
else:
log.info("Not using OCF Data Sampler, using ocf_datapipes")
models.models = [model for model in models.models if not model.uses_ocf_data_sampler]

return models.models


Expand Down
14 changes: 13 additions & 1 deletion tests/model_configs/test_pydantic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def test_get_all_models_get_ecmwf_only():

def test_get_all_models_get_day_ahead_only():
"""Test for getting all models with ecmwf_only"""
models = get_all_models(get_day_ahead_only=True)
models = get_all_models(get_day_ahead_only=True, use_ocf_data_sampler=False)
assert len(models) == 1
assert models[0].day_ahead

Expand All @@ -28,3 +28,15 @@ def test_get_all_models_run_extra_models():
models = get_all_models(run_extra_models=True)
assert len(models) == 5


def test_get_all_models_ocf_data_sampler():
"""Test for getting all models with ecmwf_only"""
models = get_all_models(use_ocf_data_sampler=True, run_extra_models=True)
assert len(models) == 5

models = get_all_models(use_ocf_data_sampler=False, run_extra_models=True)
assert len(models) == 2

models = get_all_models(use_ocf_data_sampler=False, run_extra_models=True, get_day_ahead_only=True)
assert len(models) == 1

74 changes: 72 additions & 2 deletions tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,14 +111,15 @@ def test_app_day_ahead_model(

os.environ["DAY_AHEAD_MODEL"] = "True"
os.environ["RUN_EXTRA_MODELS"] = "False"
os.environ["USE_OCF_DATA_SAMPLER"] = "False"

# Run prediction
# Thes import needs to come after the environ vars have been set
from pvnet_app.app import app

app(gsp_ids=list(range(1, 318)), num_workers=2)

all_models = get_all_models(get_day_ahead_only=True)
all_models = get_all_models(get_day_ahead_only=True, use_ocf_data_sampler=False)

# Check correct number of forecasts have been made
# (317 GSPs + 1 National + maybe GSP-sum) = 318 or 319 forecasts
Expand Down Expand Up @@ -174,6 +175,7 @@ def test_app_no_sat(
os.environ["RUN_EXTRA_MODELS"] = "True"
os.environ["SAVE_GSP_SUM"] = "True"
os.environ["DAY_AHEAD_MODEL"] = "False"
os.environ["USE_OCF_DATA_SAMPLER"] = "True"

# Run prediction
# Thes import needs to come after the environ vars have been set
Expand Down Expand Up @@ -213,4 +215,72 @@ def test_app_no_sat(
expected_forecast_results += 317 * model_config.save_gsp_to_recent
expected_forecast_results += model_config.save_gsp_sum # optional Sum of GSPs

assert len(db_session.query(ForecastValueSevenDaysSQL).all()) == expected_forecast_results * 16
assert len(db_session.query(ForecastValueSevenDaysSQL).all()) == expected_forecast_results * 16


# test legacy models
# Its nice to have this here, so we can run the latest version in production, but still use the old models
# Once we have re trained PVnet summation models we can remove this
def test_app_ocf_datapipes(
db_session, nwp_ukv_data, nwp_ecmwf_data, sat_5_data, gsp_yields_and_systems, me_latest
):
"""Test the app running the day ahead model"""

with tempfile.TemporaryDirectory() as tmpdirname:
os.chdir(tmpdirname)

temp_nwp_path = "temp_nwp_ukv.zarr"
os.environ["NWP_UKV_ZARR_PATH"] = temp_nwp_path
nwp_ukv_data.to_zarr(temp_nwp_path)

temp_nwp_path = "temp_nwp_ecmwf.zarr"
os.environ["NWP_ECMWF_ZARR_PATH"] = temp_nwp_path
nwp_ecmwf_data.to_zarr(temp_nwp_path)

temp_sat_path = "temp_sat.zarr.zip"
os.environ["SATELLITE_ZARR_PATH"] = temp_sat_path
with zarr.storage.ZipStore(temp_sat_path, mode="x") as store:
sat_5_data.to_zarr(store)

os.environ["DAY_AHEAD_MODEL"] = "False"
os.environ["RUN_EXTRA_MODELS"] = "False"
os.environ["USE_OCF_DATA_SAMPLER"] = "False"

# Run prediction
# Thes import needs to come after the environ vars have been set
from pvnet_app.app import app

app(gsp_ids=list(range(1, 318)), num_workers=2)

all_models = get_all_models(use_ocf_data_sampler=False)

# Check correct number of forecasts have been made
# (317 GSPs + 1 National + maybe GSP-sum) = 318 or 319 forecasts
# Forecast made with multiple models
expected_forecast_results = 0
for model_config in all_models:
expected_forecast_results += 318 + model_config.save_gsp_sum

forecasts = db_session.query(ForecastSQL).all()
# Doubled for historic and forecast
assert len(forecasts) == expected_forecast_results * 2

# Check probabilistic added
assert "90" in forecasts[0].forecast_values[0].properties
assert "10" in forecasts[0].forecast_values[0].properties

# 72 time steps in forecast
expected_forecast_timesteps = 16

assert (
len(db_session.query(ForecastValueSQL).all())
== expected_forecast_results * expected_forecast_timesteps
)
assert (
len(db_session.query(ForecastValueLatestSQL).all())
== expected_forecast_results * expected_forecast_timesteps
)
assert (
len(db_session.query(ForecastValueSevenDaysSQL).all())
== expected_forecast_results * expected_forecast_timesteps
)

0 comments on commit fb3b6c4

Please sign in to comment.