Skip to content

Commit

Permalink
Issue/sat scale (#127)
Browse files Browse the repository at this point in the history
* scale sat data by 1024

* only scale for new dataloader

* clean up unused params

---------

Co-authored-by: James Fulton <[email protected]>
  • Loading branch information
peterdudfield and dfulu authored Sep 17, 2024
1 parent 159e38b commit 17d9362
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 15 deletions.
5 changes: 4 additions & 1 deletion pvnet_app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,10 @@ def app(
download_all_sat_data()

# Preprocess the satellite data and record the delay of the most recent non-nan timestep
all_satellite_datetimes, data_freq_minutes = preprocess_sat_data(t0)
all_satellite_datetimes, data_freq_minutes = preprocess_sat_data(
t0,
use_legacy=use_day_ahead_model
)
else:
all_satellite_datetimes = []
data_freq_minutes = None
Expand Down
39 changes: 25 additions & 14 deletions pvnet_app/data/satellite.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@ def combine_5_and_15_sat_data(t0) -> [datetime, int, int, [datetime]]:
"""Select and/or combine the 5 and 15-minutely satellite data
The return is
- the most recent timestamp of the data
- the delay in minutes of the most recent timestamp from t0
- the data frequency, 5 or 15
- all the datetimes
"""
Expand All @@ -74,44 +72,40 @@ def combine_5_and_15_sat_data(t0) -> [datetime, int, int, [datetime]]:

# Find the delay in the 5- and 15-minutely data
if exists_5_minute:
latest_time_5, delay_mins_5, all_datetimes_5 = _get_latest_time_and_mins_delay(
latest_time_5, _, all_datetimes_5 = _get_latest_time_and_mins_delay(
sat_5_path, t0
)
logger.info(
f"Latest 5-minute timestamp is {latest_time_5} for t0 time {t0}. All the datetimes are {all_datetimes_5}"
)
else:
latest_time_5, delay_mins_5, all_datetimes_5 = datetime.min, np.inf, []
latest_time_5, all_datetimes_5 = datetime.min, []
logger.info(f"No 5-minute data was found.")

if exists_15_minute:
latest_time_15, delay_mins_15, all_datetimes_15 = _get_latest_time_and_mins_delay(
latest_time_15, _, all_datetimes_15 = _get_latest_time_and_mins_delay(
sat_15_path, t0
)
logger.info(
f"Latest 5-minute timestamp is {latest_time_15} for t0 time {t0}. All the datetimes are {all_datetimes_15}"
)
else:
latest_time_15, delay_mins_15, all_datetimes_15 = datetime.min, np.inf, []
latest_time_15 = datetime.min
logger.info(f"No 15-minute data was found.")

# Move the data with the most recent timestamp to the expected path
if latest_time_5 >= latest_time_15:
logger.info(f"Using 5-minutely data.")
os.system(f"mv {sat_5_path} {sat_path}")
latest_time = latest_time_5
delay_mins = delay_mins_5
data_freq_minutes = 5
all_datetimes = all_datetimes_5
else:
logger.info(f"Using 15-minutely data.")
os.system(f"mv {sat_15_path} {sat_path}")
latest_time = latest_time_15
delay_mins = delay_mins_15
data_freq_minutes = 15
all_datetimes = all_datetimes_15

return latest_time, delay_mins, data_freq_minutes, all_datetimes
return data_freq_minutes, all_datetimes


def extend_satellite_data_with_nans(t0):
Expand Down Expand Up @@ -199,14 +193,31 @@ def check_model_inputs_available(
return available


def preprocess_sat_data(t0):
def preprocess_sat_data(t0, use_legacy=False):
"""Combine and 5- and 15-minutely satellite data and extend to t0 if required"""

# Deal with switching between the 5 and 15 minutely satellite data
_, _, data_freq_minutes, all_datetimes = combine_5_and_15_sat_data(t0)
data_freq_minutes, all_datetimes = combine_5_and_15_sat_data(t0)

# Extend the satellite data with NaNs if needed by the model and record the delay of most recent
# non-nan timestamp
extend_satellite_data_with_nans(t0)

if not use_legacy:
# scale the satellite data if not legacy. The legacy dataloader does production data scaling
# inside it. The new dataloader does not
scale_satellite_data()

return all_datetimes, data_freq_minutes
return all_datetimes, data_freq_minutes


def scale_satellite_data():
"""Scale the satellite data to be between 0 and 1"""

ds_sat = xr.open_zarr(sat_path)
ds_sat = ds_sat / 1024
ds_sat = ds_sat.compute()

# save
os.system(f"rm -rf {sat_path}")
ds_sat.to_zarr(sat_path)

0 comments on commit 17d9362

Please sign in to comment.