Skip to content

Commit

Permalink
add test nwp
Browse files Browse the repository at this point in the history
  • Loading branch information
peterdudfield committed Dec 12, 2023
1 parent 1d5052c commit 8f134e8
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 53 deletions.
103 changes: 50 additions & 53 deletions quartz_solar_forecast/eval/nwp.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
""" Get nwp data from HF"""
import os
import pandas as pd

import ocf_blosc2 # noqa
import ocf_blosc2 # noqa
import xarray as xr
from huggingface_hub import HfFileSystem


def get_nwp(time_locations:pd.DataFrame):
def get_nwp(time_locations: pd.DataFrame):
"""
Get all the nwp data fpr the time locations
Expand All @@ -19,13 +20,15 @@ def get_nwp(time_locations:pd.DataFrame):

all_nwp_dfs = []
for i, row in time_locations.iterrows():
print(f'{i} of {len(time_locations)}')
one_nwp_df = get_nwp_for_one_timestamp_one_location(row['timestamp'], row['latitude'], row['longitude'])
print(f"{i} of {len(time_locations)}")
one_nwp_df = get_nwp_for_one_timestamp_one_location(
row["timestamp"], row["latitude"], row["longitude"]
)

one_nwp_df['timestamp'] = row['timestamp']
one_nwp_df['pv_id'] = row['pv_id']
one_nwp_df['latitude'] = row['latitude']
one_nwp_df['longitude'] = row['longitude']
one_nwp_df["timestamp"] = row["timestamp"]
one_nwp_df["pv_id"] = row["pv_id"]
one_nwp_df["latitude"] = row["latitude"]
one_nwp_df["longitude"] = row["longitude"]

all_nwp_dfs.append(one_nwp_df)

Expand All @@ -34,14 +37,14 @@ def get_nwp(time_locations:pd.DataFrame):
return all_nwp_df


def get_nwp_for_one_timestamp_one_location(
timestamp: pd.Timestamp, latitude, longitude
):
def get_nwp_for_one_timestamp_one_location(timestamp: pd.Timestamp, latitude, longitude):
"""
Get NWP data from Hugging Face for one timestamp and one location
:param timestamp: the timestamp for when you want the forecast for
:param location: the location for when you want the forecast for
:param latitude: the latitude of the location
:param longitude: the longitude of the location
:return: nwp forecast in xarray
"""

Expand All @@ -59,57 +62,51 @@ def get_nwp_for_one_timestamp_one_location(
date_and_hour = timestamp.strftime("%Y%m%d_%H")

date = f"{year}/{month}/{day}"
file_location = f"{date}/{date_and_hour}.zarr.zip"
file_location = f"{date}/{date_and_hour}"
huggingface_route = "zip:///::hf://datasets/openclimatefix/dwd-icon-eu/data"
#huggingface_route = "datasets/openclimatefix/dwd-icon-eu/data"
huggingface_file = f"{huggingface_route}/{file_location}"

# TODO add cache so we only need to download this file once
# see if this file exists in the cache
# cache_dir = 'data/nwp'
# cache_file = f"{cache_dir}/{file_location}"
# if not os.path.exists(cache_file):
# # use fsspec to copy file
# print('copying file { from HF to local')
# os.makedirs(f'{cache_dir}/{date}', exist_ok=True)
# fs.get(f"{huggingface_route}/{file_location}", f"{cache_file}")

data = xr.open_zarr(
f"{huggingface_file}",
chunks="auto",
)
# huggingface_route = "datasets/openclimatefix/dwd-icon-eu/data"
huggingface_file = f"{huggingface_route}/{file_location}.zarr.zip"

# take nearest location
data_at_location = data.sel(latitude=latitude, longitude=longitude, method="nearest")

# select the following variables
# "visibility": "vis",
# "windspeed_10m": "si10", from u and v
# "temperature_2m": "t_2m",
# "precipitation": "tot_prec",
# "shortwave_radiation": "aswdifd_s",
# "direct_radiation": "aswdir_s",
# "cloudcover_low": "clcl",
# "cloudcover_mid": "clcm",
# "cloudcover_high": "clch",
# dataset variables
variables = ["t_2m", "tot_prec", "clch", "clcm", "clcl", "u", "v", "aswdir_s", "aswdifd_s"]
data_at_location = data_at_location[variables]

# choise the first isobaricInhPa
data_at_location = data_at_location.isel(isobaricInhPa=-1)
cache_dir = "data/nwp"
cache_file = f"{cache_dir}/{file_location}_{latitude}_{longitude}.zarr"
if not os.path.exists(cache_file):
# use fsspec to copy file
print(f"Opening file {huggingface_file} from HF to local")

data = xr.open_zarr(
f"{huggingface_file}",
chunks="auto",
)

# take nearest location and only select the variables we want
data_at_location = data.sel(latitude=latitude, longitude=longitude, method="nearest")
data_at_location = data_at_location[variables]

# reduce to 54 hours timestaps, this means there is at least a 48 hours forecast
data_at_location = data_at_location.isel(step=slice(0, 54))
# choise the first isobaricInhPa
data_at_location = data_at_location.isel(isobaricInhPa=-1)

# matke times from the init time + steps
# reduce to 54 hours timestaps, this means there is at least a 48 hours forecast
data_at_location = data_at_location.isel(step=slice(0, 54))

# load all the data, this can take about ~1 minute seconds
print(f"Loading dataset for {timestamp=} {longitude=} {latitude=}")
data_at_location.load()

# save to cache
data_at_location.to_zarr(cache_file)
else:
# load from cache
print("loading from cache")
data_at_location = xr.open_zarr(cache_file)

# make times from the init time + steps
times = pd.to_datetime(data_at_location.time.values) + pd.to_timedelta(
data_at_location.step.values, unit="h"
)

# load all the data, this can take about ~1 minute seconds
print(f"Loading dataset for {timestamp=} {longitude=} {latitude=}")
data_at_location.load()

# convert to pandas dataframe
df = pd.DataFrame(times, columns=["timestamp"])
for variable in variables:
Expand Down
20 changes: 20 additions & 0 deletions tests/eval/test_nwp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from quartz_solar_forecast.eval.nwp import get_nwp
import pandas as pd


# can take ~ 1 minute to run
def test_get_nwp():
# make test dataset file
test_set_df = pd.DataFrame(
[
{
"timestamp": pd.Timestamp("2021-01-26 01:15:00"),
"latitude": 51.5,
"longitude": 0.0,
"pv_id": 0,
}
]
)

# Collect NWP data from Hugging Face, ICON. (Peter)
_ = get_nwp(test_set_df)

0 comments on commit 8f134e8

Please sign in to comment.