From 31581ac901dda0c66ee91b0bdba94003c66386cd Mon Sep 17 00:00:00 2001 From: AUdaltsova Date: Thu, 17 Oct 2024 15:48:46 +0100 Subject: [PATCH 01/11] added pv padding and hf model handling to backtest_sites.py --- scripts/backtest_sites.py | 84 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 78 insertions(+), 6 deletions(-) diff --git a/scripts/backtest_sites.py b/scripts/backtest_sites.py index e764abf8..b2482466 100644 --- a/scripts/backtest_sites.py +++ b/scripts/backtest_sites.py @@ -50,13 +50,17 @@ ) from ocf_datapipes.utils.consts import ELEVATION_MEAN, ELEVATION_STD from omegaconf import DictConfig -from torch.utils.data import DataLoader +from torch.utils.data import DataLoader, IterDataPipe, functional_datapipe from torch.utils.data.datapipes.iter import IterableWrapper from tqdm import tqdm from pvnet.load_model import get_model_from_checkpoints from pvnet.utils import SiteLocationLookup +import json +from huggingface_hub import hf_hub_download +from huggingface_hub.constants import CONFIG_NAME, PYTORCH_WEIGHTS_NAME + # ------------------------------------------------------------------ # USER CONFIGURED VARIABLES TO RUN THE SCRIPT @@ -67,6 +71,10 @@ # checkpoint on the val set model_chckpoint_dir = "PLACEHOLDER" +revision = None +token = None +model_id = None + # Forecasts will be made for all available init times between these start_datetime = "2022-05-08 00:00" end_datetime = "2022-05-08 00:30" @@ -101,11 +109,64 @@ # FUNCTIONS +@functional_datapipe('pad_forward_pv') +class PadForwardPVIterDataPipe(IterDataPipe): + """ + Pads forecast pv. Sun position is calculated based off of pv time index + and for t0's close to end of pv data can have wrong shape as pv starts + to run out of data to slice for the forecast part. + """ + + def __init__(self, pv_dp: IterDataPipe, forecast_duration: np.timedelta64): + """Init""" + + super().__init__() + self.pv_dp = pv_dp + self.forecast_duration = forecast_duration + + def __iter__(self): + """Iter""" + + for xr_data in self.pv_dp: + t0 = xr_data.time_utc.data[int(xr_data.attrs['t0_idx'])] + pv_step = np.timedelta64(xr_data.attrs['sample_period_duration']) + t_end = t0 + self.forecast_duration + pv_step + time_idx = np.arange(xr_data.time_utc.data[0], t_end, pv_step) + yield xr_data.reindex(time_utc=time_idx, fill_value=-1) + + +def load_model_from_hf(model_id: str, revision: str, token: str): + model_file = hf_hub_download( + repo_id=model_id, + filename=PYTORCH_WEIGHTS_NAME, + revision=revision, + token=token, + ) + + # load config file + config_file = hf_hub_download( + repo_id=model_id, + filename=CONFIG_NAME, + revision=revision, + token=token, + ) + + with open(config_file, "r", encoding="utf-8") as f: + config = json.load(f) + + model = hydra.utils.instantiate(config) + + state_dict = torch.load(model_file, map_location=torch.device("cuda")) + model.load_state_dict(state_dict) # type: ignore + model.eval() # type: ignore + + return model + + def preds_to_dataarray(preds, model, valid_times, site_ids): """Put numpy array of predictions into a dataarray""" if model.use_quantile_regression: - output_labels = model.output_quantiles output_labels = [f"forecast_mw_plevel_{int(q*100):02}" for q in model.output_quantiles] output_labels[output_labels.index("forecast_mw_plevel_50")] = "forecast_mw" else: @@ -333,7 +394,7 @@ def predict_batch(self, batch: NumpyBatch) -> xr.Dataset: da_abs_site = da_abs_site.where(~da_sundown_mask).fillna(0.0) da_abs_site = da_abs_site.expand_dims(dim="init_time_utc", axis=0).assign_coords( - init_time_utc=[t0] + init_time_utc=np.array([t0], dtype="datetime64[ns]") ) return da_abs_site @@ -362,6 +423,11 @@ def get_datapipe(config_path: str) -> NumpyBatch: t0_datapipe, ) + config = load_yaml_configuration(config_path) + data_pipeline['pv'] = data_pipeline['pv'].pad_forward_pv( + forecast_duration=np.timedelta64(config.input_data.pv.forecast_minutes, 'm') + ) + data_pipeline = DictDatasetIterDataPipe( {k: v for k, v in data_pipeline.items() if k != "config"}, ).map(split_dataset_dict_dp) @@ -412,7 +478,13 @@ def main(config: DictConfig): # Create a dataloader for the concurrent batches and use multiprocessing dataloader = DataLoader(batch_pipe, **dataloader_kwargs) # Load the PVNet model - model, *_ = get_model_from_checkpoints([model_chckpoint_dir], val_best=True) + if model_chckpoint_dir is not None: + model, *_ = get_model_from_checkpoints([model_chckpoint_dir], val_best=True) + elif model_id is not None: + model = load_model_from_hf(model_id, revision, token) + else: + raise ValueError("Provide a model checkpoint or a HuggingFace model") + model = model.eval().to(device) # Create object to make predictions for each input batch @@ -426,13 +498,13 @@ def main(config: DictConfig): t0 = ds_abs_all.init_time_utc.values[0] - # Save the predictioons + # Save the predictions filename = f"{output_dir}/{t0}.nc" ds_abs_all.to_netcdf(filename) pbar.update() except Exception as e: - print(f"Exception {e} at {i}") + print(f"Exception {e} at batch {i}") pass # Close down From 3acf2671ee431305360fd5402aa8f9c1db7e86dd Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 17 Oct 2024 15:38:53 +0000 Subject: [PATCH 02/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/backtest_sites.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/scripts/backtest_sites.py b/scripts/backtest_sites.py index b2482466..ca6c5313 100644 --- a/scripts/backtest_sites.py +++ b/scripts/backtest_sites.py @@ -23,6 +23,7 @@ except RuntimeError: pass +import json import logging import os import sys @@ -32,6 +33,8 @@ import pandas as pd import torch import xarray as xr +from huggingface_hub import hf_hub_download +from huggingface_hub.constants import CONFIG_NAME, PYTORCH_WEIGHTS_NAME from ocf_datapipes.batch import ( BatchKey, NumpyBatch, @@ -57,10 +60,6 @@ from pvnet.load_model import get_model_from_checkpoints from pvnet.utils import SiteLocationLookup -import json -from huggingface_hub import hf_hub_download -from huggingface_hub.constants import CONFIG_NAME, PYTORCH_WEIGHTS_NAME - # ------------------------------------------------------------------ # USER CONFIGURED VARIABLES TO RUN THE SCRIPT @@ -109,7 +108,7 @@ # FUNCTIONS -@functional_datapipe('pad_forward_pv') +@functional_datapipe("pad_forward_pv") class PadForwardPVIterDataPipe(IterDataPipe): """ Pads forecast pv. Sun position is calculated based off of pv time index @@ -128,8 +127,8 @@ def __iter__(self): """Iter""" for xr_data in self.pv_dp: - t0 = xr_data.time_utc.data[int(xr_data.attrs['t0_idx'])] - pv_step = np.timedelta64(xr_data.attrs['sample_period_duration']) + t0 = xr_data.time_utc.data[int(xr_data.attrs["t0_idx"])] + pv_step = np.timedelta64(xr_data.attrs["sample_period_duration"]) t_end = t0 + self.forecast_duration + pv_step time_idx = np.arange(xr_data.time_utc.data[0], t_end, pv_step) yield xr_data.reindex(time_utc=time_idx, fill_value=-1) @@ -424,8 +423,8 @@ def get_datapipe(config_path: str) -> NumpyBatch: ) config = load_yaml_configuration(config_path) - data_pipeline['pv'] = data_pipeline['pv'].pad_forward_pv( - forecast_duration=np.timedelta64(config.input_data.pv.forecast_minutes, 'm') + data_pipeline["pv"] = data_pipeline["pv"].pad_forward_pv( + forecast_duration=np.timedelta64(config.input_data.pv.forecast_minutes, "m") ) data_pipeline = DictDatasetIterDataPipe( From ac9eb36db95c06d364242af2ea6e1ae5ec959198 Mon Sep 17 00:00:00 2001 From: Alexandra Udaltsova <43303448+AUdaltsova@users.noreply.github.com> Date: Thu, 17 Oct 2024 17:38:53 +0100 Subject: [PATCH 03/11] Apply suggestions from code review Co-authored-by: Sukhil Patel <42407101+Sukh-P@users.noreply.github.com> --- scripts/backtest_sites.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/backtest_sites.py b/scripts/backtest_sites.py index ca6c5313..87048b9c 100644 --- a/scripts/backtest_sites.py +++ b/scripts/backtest_sites.py @@ -70,9 +70,9 @@ # checkpoint on the val set model_chckpoint_dir = "PLACEHOLDER" -revision = None -token = None -model_id = None +hf_revision = None +hf_token = None +hf_model_id = None # Forecasts will be made for all available init times between these start_datetime = "2022-05-08 00:00" @@ -477,9 +477,9 @@ def main(config: DictConfig): # Create a dataloader for the concurrent batches and use multiprocessing dataloader = DataLoader(batch_pipe, **dataloader_kwargs) # Load the PVNet model - if model_chckpoint_dir is not None: + if model_chckpoint_dir: model, *_ = get_model_from_checkpoints([model_chckpoint_dir], val_best=True) - elif model_id is not None: + elif model_id: model = load_model_from_hf(model_id, revision, token) else: raise ValueError("Provide a model checkpoint or a HuggingFace model") From 2284eb2d5739d8c8148926bda552a86562d9422c Mon Sep 17 00:00:00 2001 From: Alexandra Udaltsova <43303448+AUdaltsova@users.noreply.github.com> Date: Thu, 17 Oct 2024 17:40:55 +0100 Subject: [PATCH 04/11] Update scripts/backtest_sites.py --- scripts/backtest_sites.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/backtest_sites.py b/scripts/backtest_sites.py index 87048b9c..82128851 100644 --- a/scripts/backtest_sites.py +++ b/scripts/backtest_sites.py @@ -479,8 +479,8 @@ def main(config: DictConfig): # Load the PVNet model if model_chckpoint_dir: model, *_ = get_model_from_checkpoints([model_chckpoint_dir], val_best=True) - elif model_id: - model = load_model_from_hf(model_id, revision, token) + elif hf_model_id: + model = load_model_from_hf(hf_model_id, hf_revision, hf_token) else: raise ValueError("Provide a model checkpoint or a HuggingFace model") From 76496f2a661b9fe985fbe43a8d2feec7126fe6e9 Mon Sep 17 00:00:00 2001 From: Alexandra Udaltsova <43303448+AUdaltsova@users.noreply.github.com> Date: Fri, 18 Oct 2024 10:58:09 +0100 Subject: [PATCH 05/11] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0a066326..b53b8131 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ dependencies = [ "xarray", "ipykernel", "h5netcdf", - "torch>=2.0.0", + "torch>=2.0.0, <2.5.0", "lightning", "torchvision", "pytest", From b927f483207ebeaf56018ce764786dc81e90d82c Mon Sep 17 00:00:00 2001 From: Alexandra Udaltsova <43303448+AUdaltsova@users.noreply.github.com> Date: Fri, 18 Oct 2024 12:44:50 +0100 Subject: [PATCH 06/11] undo Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b53b8131..0a066326 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ dependencies = [ "xarray", "ipykernel", "h5netcdf", - "torch>=2.0.0, <2.5.0", + "torch>=2.0.0", "lightning", "torchvision", "pytest", From fade9f30194624a0303dc8e0511ac48c065a2ce6 Mon Sep 17 00:00:00 2001 From: Alexandra Udaltsova <43303448+AUdaltsova@users.noreply.github.com> Date: Fri, 18 Oct 2024 13:37:37 +0100 Subject: [PATCH 07/11] linting --- scripts/backtest_sites.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scripts/backtest_sites.py b/scripts/backtest_sites.py index 82128851..5cf0aeb9 100644 --- a/scripts/backtest_sites.py +++ b/scripts/backtest_sites.py @@ -111,7 +111,9 @@ @functional_datapipe("pad_forward_pv") class PadForwardPVIterDataPipe(IterDataPipe): """ - Pads forecast pv. Sun position is calculated based off of pv time index + Pads forecast pv. + + Sun position is calculated based off of pv time index and for t0's close to end of pv data can have wrong shape as pv starts to run out of data to slice for the forecast part. """ @@ -135,6 +137,10 @@ def __iter__(self): def load_model_from_hf(model_id: str, revision: str, token: str): + +""" +Loads model from HuggingFace +""" model_file = hf_hub_download( repo_id=model_id, filename=PYTORCH_WEIGHTS_NAME, From 0d45dfae1c4acfe823e76bb31d10cff36594e9d2 Mon Sep 17 00:00:00 2001 From: Alexandra Udaltsova <43303448+AUdaltsova@users.noreply.github.com> Date: Fri, 18 Oct 2024 13:38:31 +0100 Subject: [PATCH 08/11] docstring scripts/backtest_sites.py --- scripts/backtest_sites.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/backtest_sites.py b/scripts/backtest_sites.py index 5cf0aeb9..d14d7082 100644 --- a/scripts/backtest_sites.py +++ b/scripts/backtest_sites.py @@ -141,6 +141,7 @@ def load_model_from_hf(model_id: str, revision: str, token: str): """ Loads model from HuggingFace """ + model_file = hf_hub_download( repo_id=model_id, filename=PYTORCH_WEIGHTS_NAME, From dd1b1e2a3b7962a74c2e06b7fc637f4340438d06 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 18 Oct 2024 12:40:16 +0000 Subject: [PATCH 09/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/backtest_sites.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/backtest_sites.py b/scripts/backtest_sites.py index d14d7082..c653b385 100644 --- a/scripts/backtest_sites.py +++ b/scripts/backtest_sites.py @@ -111,8 +111,8 @@ @functional_datapipe("pad_forward_pv") class PadForwardPVIterDataPipe(IterDataPipe): """ - Pads forecast pv. - + Pads forecast pv. + Sun position is calculated based off of pv time index and for t0's close to end of pv data can have wrong shape as pv starts to run out of data to slice for the forecast part. From d70081bf8d1033c293270a6f59708d72215be48f Mon Sep 17 00:00:00 2001 From: Alexandra Udaltsova <43303448+AUdaltsova@users.noreply.github.com> Date: Fri, 18 Oct 2024 14:06:35 +0100 Subject: [PATCH 10/11] docstring scripts/backtest_sites.py --- scripts/backtest_sites.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/backtest_sites.py b/scripts/backtest_sites.py index c653b385..2cf179a3 100644 --- a/scripts/backtest_sites.py +++ b/scripts/backtest_sites.py @@ -138,9 +138,9 @@ def __iter__(self): def load_model_from_hf(model_id: str, revision: str, token: str): -""" -Loads model from HuggingFace -""" + """ + Loads model from HuggingFace + """ model_file = hf_hub_download( repo_id=model_id, From 6f88cafe6ad5e5cf04333a420c5546489f3b0f43 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 18 Oct 2024 13:06:45 +0000 Subject: [PATCH 11/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/backtest_sites.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/backtest_sites.py b/scripts/backtest_sites.py index 2cf179a3..3572daa3 100644 --- a/scripts/backtest_sites.py +++ b/scripts/backtest_sites.py @@ -137,7 +137,6 @@ def __iter__(self): def load_model_from_hf(model_id: str, revision: str, token: str): - """ Loads model from HuggingFace """