Skip to content

Commit

Permalink
Merge branch 'data_sampler' of https://github.com/openclimatefix/PVNet
Browse files Browse the repository at this point in the history
…into data_sampler
  • Loading branch information
dfulu committed Nov 13, 2024
2 parents 87d5718 + db81147 commit 465e518
Show file tree
Hide file tree
Showing 14 changed files with 294 additions and 254 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[bumpversion]
commit = True
tag = True
current_version = 3.0.53
current_version = 3.0.63
message = Bump version: {current_version} → {new_version} [skip ci]

[bumpversion:file:pvnet/__init__.py]
Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# PVNet 2.1

[![Python Bump Version & release](https://github.com/openclimatefix/PVNet/actions/workflows/release.yml/badge.svg)](https://github.com/openclimatefix/PVNet/actions/workflows/release.yml)
[![Python Bump Version & release](https://github.com/openclimatefix/PVNet/actions/workflows/release.yml/badge.svg)](https://github.com/openclimatefix/PVNet/actions/workflows/release.yml) [![ease of contribution: hard](https://img.shields.io/badge/ease%20of%20contribution:%20hard-bb2629)](https://github.com/openclimatefix/ocf-meta-repo?tab=readme-ov-file#overview-of-ocfs-nowcasting-repositories)


This project is used for training PVNet and running PVNet on live data.

Expand Down Expand Up @@ -85,6 +86,8 @@ OCF maintains a Zarr formatted version of the German Weather Service's (DWD)
ICON-EU NWP model here:
https://huggingface.co/datasets/openclimatefix/dwd-icon-eu which includes the UK

Please note that the current version of [ICON loader]([url](https://github.com/openclimatefix/ocf_datapipes/blob/9ec252eeee44937c12ab52699579bdcace76e72f/ocf_datapipes/load/nwp/providers/icon.py#L9-L30)) supports a different format. If you want to use our ICON-EU dataset or your own NWP source, you can create a loader for it using [the instructions here]([url](https://github.com/openclimatefix/ocf_datapipes/tree/main/ocf_datapipes/load#nwp)).

**PV**\
OCF maintains a dataset of PV generation from 1311 private PV installations
here: https://huggingface.co/datasets/openclimatefix/uk_pv
Expand Down
48 changes: 32 additions & 16 deletions experiments/analysis.py → experiments/mae_analysis.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
"""
Script to generate a table comparing two run for MAE values for 48 hour 15 minute forecast
Script to generate analysis of MAE values for multiple model forecasts
Does this for 48 hour horizon forecasts with 15 minute granularity
"""

import argparse
Expand All @@ -10,16 +13,23 @@
import wandb


def main(runs: list[str], run_names: list[str]) -> None:
def main(project: str, runs: list[str], run_names: list[str]) -> None:
"""
Compare two runs for MAE values for 48 hour 15 minute forecast
Compare MAE values for multiple model forecasts for 48 hour horizon with 15 minute granularity
Args:
project: name of W&B project
runs: W&B ids of runs
run_names: user specified names for runs
"""
api = wandb.Api()
dfs = []
epoch_num = []
for run in runs:
run = api.run(f"openclimatefix/india/{run}")
run = api.run(f"openclimatefix/{project}/{run}")

df = run.history()
df = run.history(samples=run.lastHistoryStep + 1)
# Get the columns that are in the format 'MAE_horizon/step_<number>/val`
mae_cols = [col for col in df.columns if "MAE_horizon/step_" in col and "val" in col]
# Sort them
Expand All @@ -40,6 +50,7 @@ def main(runs: list[str], run_names: list[str]) -> None:
# Get the step from the column name
column_timesteps = [int(col.split("_")[-1].split("/")[0]) * 15 for col in mae_cols]
dfs.append(df)
epoch_num.append(min_row_idx)
# Get the timedelta for each group
groupings = [
[0, 0],
Expand Down Expand Up @@ -86,36 +97,41 @@ def main(runs: list[str], run_names: list[str]) -> None:
for idx, df in enumerate(dfs):
print(f"{run_names[idx]}: {df.mean()*100:0.3f}")

# Plot the error on per timestep, and all timesteps
# Plot the error per timestep
plt.figure()
for idx, df in enumerate(dfs):
plt.plot(column_timesteps, df, label=run_names[idx])
plt.plot(
column_timesteps, df, label=f"{run_names[idx]}, epoch: {epoch_num[idx]}", linestyle="-"
)
plt.legend()
plt.xlabel("Timestep (minutes)")
plt.ylabel("MAE %")
plt.title("MAE % for each timestep")
plt.savefig("mae_per_timestep.png")
plt.show()

# Plot the error on per timestep, and grouped timesteps
# Plot the error per grouped timestep
plt.figure()
for run_name in run_names:
plt.plot(groups_df[run_name], label=run_name)
for idx, run_name in enumerate(run_names):
plt.plot(
groups_df[run_name],
label=f"{run_name}, epoch: {epoch_num[idx]}",
marker="o",
linestyle="-",
)
plt.legend()
plt.xlabel("Timestep (minutes)")
plt.ylabel("MAE %")
plt.title("MAE % for each timestep")
plt.savefig("mae_per_timestep.png")
plt.title("MAE % for each grouped timestep")
plt.savefig("mae_per_grouped_timestep.png")
plt.show()


if __name__ == "__main__":
parser = argparse.ArgumentParser()
"5llq8iw6"
parser.add_argument("--first_run", type=str, default="xdlew7ib")
parser.add_argument("--second_run", type=str, default="v3mja33d")
parser.add_argument("--project", type=str, default="")
# Add arguments that is a list of strings
parser.add_argument("--list_of_runs", nargs="+")
parser.add_argument("--run_names", nargs="+")
args = parser.parse_args()
main(args.list_of_runs, args.run_names)
main(args.project, args.list_of_runs, args.run_names)
2 changes: 1 addition & 1 deletion pvnet/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"""PVNet"""
__version__ = "3.0.53"
__version__ = "3.0.63"
35 changes: 13 additions & 22 deletions pvnet/data/datamodule.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,35 @@
""" Data module for pytorch lightning """
from datetime import datetime
from glob import glob

from lightning.pytorch import LightningDataModule
from torch.utils.data import Dataset, DataLoader
import torch

from ocf_datapipes.batch import batch_to_tensor, stack_np_examples_into_batch, NumpyBatch
from ocf_data_sampler.torch_datasets.pvnet_uk_regional import (
PVNetUKRegionalDataset
)
from lightning.pytorch import LightningDataModule
from ocf_data_sampler.torch_datasets.pvnet_uk_regional import PVNetUKRegionalDataset
from ocf_datapipes.batch import NumpyBatch, batch_to_tensor, stack_np_examples_into_batch
from torch.utils.data import DataLoader, Dataset


class NumpybatchPremadeSamplesDataset(Dataset):
"""Dataset to load NumpyBatch samples"""

def __init__(self, sample_dir):
"""Dataset to load NumpyBatch samples
Args:
sample_dir: Path to the directory of pre-saved samples.
"""
self.sample_paths = glob(f"{sample_dir}/*.pt")



def __len__(self):
return len(self.sample_paths)

def __getitem__(self, idx):
return torch.load(self.sample_paths[idx])


def collate_fn(samples: list[NumpyBatch]):
"""Convert a list of NumpyBatch samples to a tensor batch"""
return batch_to_tensor(stack_np_examples_into_batch(samples))


class DataModule(LightningDataModule):
"""Datamodule for training pvnet and using pvnet pipeline in `ocf_datapipes`."""
Expand All @@ -46,9 +41,8 @@ def __init__(
batch_size: int = 16,
num_workers: int = 0,
prefetch_factor: int | None = None,
train_period: list[str|None] = [None, None],
val_period: list[str|None] = [None, None],

train_period: list[str | None] = [None, None],
val_period: list[str | None] = [None, None],
):
"""Datamodule for training pvnet architecture.
Expand All @@ -67,7 +61,6 @@ def __init__(
"""
super().__init__()


if not ((sample_dir is not None) ^ (configuration is not None)):
raise ValueError("Exactly one of `sample_dir` or `configuration` must be set.")

Expand Down Expand Up @@ -100,21 +93,19 @@ def _get_streamed_samples_dataset(self, start_time, end_time) -> Dataset:
def _get_premade_samples_dataset(self, subdir) -> Dataset:
split_dir = f"{self.sample_dir}/{subdir}"
return NumpybatchPremadeSamplesDataset(split_dir)

def train_dataloader(self) -> DataLoader:
"""Construct train dataloader"""
if self.sample_dir is not None:
dataset = self._get_premade_samples_dataset("train")
else:
dataset = self._get_streamed_samples_dataset(*self.train_period)
return DataLoader(dataset, shuffle=True, **self._common_dataloader_kwargs)

def val_dataloader(self) -> DataLoader:
"""Construct val dataloader"""
if self.sample_dir is not None:
dataset = self._get_premade_samples_dataset("val")
else:
dataset = self._get_streamed_samples_dataset(*self.val_period)
return DataLoader(dataset, shuffle=False, **self._common_dataloader_kwargs)


Loading

0 comments on commit 465e518

Please sign in to comment.