-
-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ad23fd9
commit 7de8328
Showing
28 changed files
with
5,192 additions
and
64 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
"""Available models for India forecast""" | ||
|
||
from .dummy import DummyModel | ||
from .pvnet.model import PVNetModel | ||
|
||
__all__ = ['DummyModel', 'PVNetModel'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
""" | ||
PVNet model class | ||
""" | ||
|
||
import datetime as dt | ||
import logging | ||
import os | ||
import tempfile | ||
|
||
import fsspec | ||
import torch | ||
from ocf_datapipes.batch import stack_np_examples_into_batch | ||
from ocf_datapipes.training.pvnet import construct_sliced_data_pipeline as pv_base_pipeline | ||
from ocf_datapipes.training.windnet import construct_sliced_data_pipeline as wind_base_pipeline | ||
from pvnet.models.base_model import BaseModel as PVNetBaseModel | ||
from torch.utils.data import DataLoader | ||
from torch.utils.data.datapipes.iter import IterableWrapper | ||
|
||
from .utils import populate_data_config_sources, worker_init_fn | ||
|
||
# Global settings for running the model | ||
|
||
# Model will use GPU if available | ||
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") | ||
WIND_MODEL_NAME = os.getenv("WIND_MODEL_NAME", default="openclimatefix/windnet_india") | ||
WIND_MODEL_VERSION = os.getenv("WIND_MODEL_VERSION", | ||
default="c6af802823edc5e87b22df680b41b0dcdb4869e1") | ||
|
||
PV_MODEL_NAME = os.getenv("WIND_MODEL_NAME", default="openclimatefix/pvnet_india") | ||
PV_MODEL_VERSION = os.getenv("WIND_MODEL_VERSION", | ||
default="d194488203375e766253f0d2961010356de52eb9") | ||
|
||
BATCH_SIZE = 10 | ||
|
||
log = logging.getLogger(__name__) | ||
|
||
|
||
class PVNetModel: | ||
""" | ||
Instantiates a PVNet model for inference | ||
""" | ||
|
||
@property | ||
def name(self): | ||
"""Model name""" | ||
|
||
return WIND_MODEL_NAME if self.asset_type == "wind" else PV_MODEL_NAME | ||
|
||
@property | ||
def version(self): | ||
"""Model version""" | ||
|
||
return WIND_MODEL_VERSION if self.asset_type == "wind" else PV_MODEL_VERSION | ||
|
||
def __init__(self, asset_type: str, timestamp: dt.datetime): | ||
"""Initializer for the model""" | ||
|
||
self.asset_type = asset_type | ||
self.t0 = timestamp | ||
self.setup() | ||
|
||
def setup(self): | ||
"""Sets up the model ready for inference""" | ||
|
||
self._prepare_data_sources() | ||
self._create_dataloader() | ||
self._load_model() | ||
|
||
def predict(self, site_id: str, timestamp: dt.datetime): | ||
"""Make a prediction for the model""" | ||
|
||
return [] | ||
|
||
def _prepare_data_sources(self): | ||
"""Pull and prepare data sources required for inference""" | ||
|
||
log.info("Preparing data sources") | ||
|
||
nwp_source_file_path = os.getenv("NWP_ZARR_PATH", default="") | ||
fs = fsspec.open(nwp_source_file_path).fs | ||
fs.get(nwp_source_file_path, "nwp.zarr", recursive=True) | ||
|
||
# TODO load historic wind data | ||
|
||
def _create_dataloader(self): | ||
"""Setup dataloader with prepared data sources""" | ||
|
||
log.info("Creating dataloader") | ||
|
||
# Pull the data config from huggingface | ||
data_config_filename = PVNetBaseModel.get_data_config( | ||
self.name, | ||
revision=self.version, | ||
) | ||
|
||
# Populate the data config with production data paths | ||
temp_dir = tempfile.TemporaryDirectory() | ||
populated_data_config_filename = f"{temp_dir.name}/data_config.yaml" | ||
|
||
populate_data_config_sources(data_config_filename, populated_data_config_filename) | ||
|
||
# Location and time datapipes | ||
# TODO not sure what to use here for the location pipe - site uuid/location? | ||
location_pipe = IterableWrapper([1]) | ||
t0_datapipe = IterableWrapper([self.t0]) | ||
# t0_datapipe = IterableWrapper([self.t0]).repeat(len(location_pipe)) | ||
|
||
location_pipe = location_pipe.sharding_filter() | ||
t0_datapipe = t0_datapipe.sharding_filter() | ||
|
||
# Batch datapipe | ||
base_pipeline = wind_base_pipeline if self.asset_type == "wind" else pv_base_pipeline | ||
batch_datapipe = ( | ||
# TODO wind return dict, whereas PV returns IterDataPipe - need to resolve this | ||
# Perhaps see https://github.com/openclimatefix/ocf_datapipes/blob/main/ocf_datapipes/training/windnet.py#L328 | ||
base_pipeline( | ||
config_filename=populated_data_config_filename, | ||
location_pipe=location_pipe, | ||
t0_datapipe=t0_datapipe, | ||
production=False # TODO was True, but threw error as expecting GSP key to be defined | ||
) | ||
.batch(BATCH_SIZE) | ||
.map(stack_np_examples_into_batch) | ||
) | ||
|
||
n_workers = os.cpu_count() - 1 | ||
|
||
# Set up dataloader for parallel loading | ||
dataloader_kwargs = dict( | ||
shuffle=False, | ||
batch_size=None, # batched in datapipe step | ||
sampler=None, | ||
batch_sampler=None, | ||
num_workers=n_workers, | ||
collate_fn=None, | ||
pin_memory=False, | ||
drop_last=False, | ||
timeout=0, | ||
worker_init_fn=worker_init_fn, | ||
prefetch_factor=None if n_workers == 0 else 2, | ||
persistent_workers=False, | ||
) | ||
|
||
self.dataloader = DataLoader(batch_datapipe, **dataloader_kwargs) | ||
|
||
def _load_model(self): | ||
"""Load model""" | ||
|
||
log.info(f"Loading model: {self.name} - {self.version}") | ||
self.model = PVNetBaseModel.from_pretrained( | ||
self.name, | ||
revision=self.version | ||
).to(DEVICE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
"""Useful functions for setting up PVNet model""" | ||
|
||
import fsspec | ||
import yaml | ||
|
||
|
||
def worker_init_fn(worker_id): | ||
""" | ||
Clear reference to the loop and thread. | ||
This is a nasty hack that was suggested but NOT recommended by the lead fsspec developer! | ||
This appears necessary otherwise gcsfs hangs when used after forking multiple worker processes. | ||
Only required for fsspec >= 0.9.0 | ||
See: | ||
- https://github.com/fsspec/gcsfs/issues/379#issuecomment-839929801 | ||
- https://github.com/fsspec/filesystem_spec/pull/963#issuecomment-1131709948 | ||
TODO: Try deleting this two lines to make sure this is still relevant. | ||
""" | ||
fsspec.asyn.iothread[0] = None | ||
fsspec.asyn.loop[0] = None | ||
|
||
|
||
def populate_data_config_sources(input_path, output_path): | ||
"""Re-save the data config and replace the source filepaths | ||
Args: | ||
input_path: Path to input datapipes configuration file | ||
output_path: Location to save the output configuration file | ||
""" | ||
with open(input_path) as infile: | ||
config = yaml.load(infile, Loader=yaml.FullLoader) | ||
|
||
production_paths = { | ||
# "wind": os.environ["DB_URL"], | ||
"nwp": {"ecmwf": "nwp.zarr"} | ||
} | ||
|
||
if "nwp" in config["input_data"]: | ||
nwp_config = config["input_data"]["nwp"] | ||
for nwp_source in nwp_config.keys(): | ||
if nwp_config[nwp_source]["nwp_zarr_path"] != "": | ||
assert "nwp" in production_paths, "Missing production path: nwp" | ||
assert nwp_source in production_paths["nwp"], f"Missing NWP path: {nwp_source}" | ||
nwp_config[nwp_source]["nwp_zarr_path"] = production_paths["nwp"][nwp_source] | ||
|
||
# We do not need to set wind/PV path right now. This currently done through datapipes | ||
# TODO - Move the wind/PV path to here? | ||
|
||
with open(output_path, 'w') as outfile: | ||
yaml.dump(config, outfile, default_flow_style=False) |
Oops, something went wrong.