Skip to content

Commit

Permalink
allow user to specific file save and storage location
Browse files Browse the repository at this point in the history
  • Loading branch information
zakwatts committed Sep 25, 2023
1 parent 74ced2d commit 6ee195d
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 55 deletions.
2 changes: 1 addition & 1 deletion reports/pvnet_comp.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ Next this report will look at the performance of PVNet compared to PVLive update

* 2022-01-01 to 2022-11-19

The results shown in the table below are from taking that 0th hour forecast (now), which is the same that was used for the PVLive Intraday comparison above, and comparing the results to PVLive Updated. This section also contains graphs which illustrate the performance of PVNet across its different forecasting horizons (0-8hours).
The results shown in the table below are from taking the PVNet 30 minute forecast, which is the same that was used for the PVLive Intraday comparison above, and comparing the results to PVLive Updated. This section also contains graphs which illustrate the performance of PVNet across its different forecasting horizons (0-8hours).

### PVNet Results Table

Expand Down
107 changes: 53 additions & 54 deletions scripts/join_backtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,62 +2,61 @@
to ouput the pv data saved as Zarr. Currently set up for the 2022 backtest used for
the PVLive and PVNet evaluation.
"""
import xarray as xr
import fsspec

dir = "gs://solar-pv-nowcasting-data/backtest/pvnet_v2_2022/hindcasts/"
# dir = "gs://solar-pv-nowcasting-data/backtest/pvnet_v2_2022/hindcasts/"
# filename = 'gs://solar-pv-nowcasting-data/backtest/pvnet_v2_2022/hindcasts/2022-01-01T03:00:00.nc'
# save_to = "/home/zak/data/fc_bt_comp/pvnet_backtest_2022.zarr"

# get all the files
fs = fsspec.open(dir).fs
files = fs.ls(dir)


N_start = 10080
# N_end = 10580
N_end = 10100
N = N_end - N_start
N_files = len(files)
all_dataset_xr = []
for i, filename in enumerate(files): # [N_start:N_end]):
print(f"{round(i/N_files*100)}%")

## get all files in a directory
with fsspec.open(f"gs://{filename}", mode="rb") as file:
dataset = xr.open_dataset(file, engine="h5netcdf")

# just select national
national = dataset.sel(gsp_id=0)

# assign forecast_init_time as coordinate
national = national.assign_coords(
forecast_init_time=national.forecast_init_time.values
)

# drop target_time
idx = range(0, len(national.target_datetime_utc.values))
national = national.assign_coords(target_datetime_utc=idx)

# load the data
national = national.load()

all_dataset_xr.append(national)

print(all_dataset_xr[0])

print("next")

# print(all_dataset_xr)

# join datasets together
all_dataset_xr = xr.concat(all_dataset_xr, dim="forecast_init_time")

print("PROCESSED")

print(all_dataset_xr)

# dnetcdf = all_dataset_xr.to_netcdf()
import click
import xarray as xr
import fsspec

# print(dnetcdf)

all_dataset_xr.to_zarr("/home/zak/data/fc_bt_comp/pvnet_backtest_2022.zarr")
@click.command()
@click.option(
"--dir",
prompt="Directory to process",
type=click.STRING,
help="The directory to process.",
)
@click.option(
"--save_to",
prompt="Location to save to with filename",
type=click.Path(),
help="The location to save the processed data including the filename.",
)
def main(dir, save_to):
# get all the files
fs = fsspec.open(dir).fs
files = fs.ls(dir)

# Can select a proportion of the data to processes as a test
N_start = 10080
N_end = 10100
N_files = len(files)
all_dataset_xr = []
# Change to iterate through N_start to N_end if wanting to use a sample
for i, filename in enumerate(files):
print(f"{round(i/N_files*100)}%")

## get all files in a directory
with fsspec.open(f"gs://{filename}", mode="rb") as file:
dataset = xr.open_dataset(file, engine="h5netcdf")
national = dataset.sel(gsp_id=0)
national = national.assign_coords(
forecast_init_time=national.forecast_init_time.values
)
idx = range(0, len(national.target_datetime_utc.values))
national = national.assign_coords(target_datetime_utc=idx)
national = national.load()
all_dataset_xr.append(national)

print("Merging data")
all_dataset_xr = xr.concat(all_dataset_xr, dim="forecast_init_time")
print("Data merged, now saving")
all_dataset_xr.to_zarr(save_to)
print(f"Saved Zarr to {save_to}")


if __name__ == "__main__":
main()

0 comments on commit 6ee195d

Please sign in to comment.