diff --git a/reports/pvnet_comp.md b/reports/pvnet_comp.md index e2299b12..b5222bea 100644 --- a/reports/pvnet_comp.md +++ b/reports/pvnet_comp.md @@ -150,7 +150,7 @@ Next this report will look at the performance of PVNet compared to PVLive update * 2022-01-01 to 2022-11-19 -The results shown in the table below are from taking that 0th hour forecast (now), which is the same that was used for the PVLive Intraday comparison above, and comparing the results to PVLive Updated. This section also contains graphs which illustrate the performance of PVNet across its different forecasting horizons (0-8hours). +The results shown in the table below are from taking the PVNet 30 minute forecast, which is the same that was used for the PVLive Intraday comparison above, and comparing the results to PVLive Updated. This section also contains graphs which illustrate the performance of PVNet across its different forecasting horizons (0-8hours). ### PVNet Results Table diff --git a/scripts/join_backtest.py b/scripts/join_backtest.py index e1daa4d7..af241c52 100644 --- a/scripts/join_backtest.py +++ b/scripts/join_backtest.py @@ -2,62 +2,61 @@ to ouput the pv data saved as Zarr. Currently set up for the 2022 backtest used for the PVLive and PVNet evaluation. """ -import xarray as xr -import fsspec -dir = "gs://solar-pv-nowcasting-data/backtest/pvnet_v2_2022/hindcasts/" +# dir = "gs://solar-pv-nowcasting-data/backtest/pvnet_v2_2022/hindcasts/" # filename = 'gs://solar-pv-nowcasting-data/backtest/pvnet_v2_2022/hindcasts/2022-01-01T03:00:00.nc' +# save_to = "/home/zak/data/fc_bt_comp/pvnet_backtest_2022.zarr" -# get all the files -fs = fsspec.open(dir).fs -files = fs.ls(dir) - - -N_start = 10080 -# N_end = 10580 -N_end = 10100 -N = N_end - N_start -N_files = len(files) -all_dataset_xr = [] -for i, filename in enumerate(files): # [N_start:N_end]): - print(f"{round(i/N_files*100)}%") - - ## get all files in a directory - with fsspec.open(f"gs://{filename}", mode="rb") as file: - dataset = xr.open_dataset(file, engine="h5netcdf") - - # just select national - national = dataset.sel(gsp_id=0) - - # assign forecast_init_time as coordinate - national = national.assign_coords( - forecast_init_time=national.forecast_init_time.values - ) - - # drop target_time - idx = range(0, len(national.target_datetime_utc.values)) - national = national.assign_coords(target_datetime_utc=idx) - - # load the data - national = national.load() - - all_dataset_xr.append(national) - -print(all_dataset_xr[0]) - -print("next") - -# print(all_dataset_xr) - -# join datasets together -all_dataset_xr = xr.concat(all_dataset_xr, dim="forecast_init_time") - -print("PROCESSED") - -print(all_dataset_xr) - -# dnetcdf = all_dataset_xr.to_netcdf() +import click +import xarray as xr +import fsspec -# print(dnetcdf) -all_dataset_xr.to_zarr("/home/zak/data/fc_bt_comp/pvnet_backtest_2022.zarr") +@click.command() +@click.option( + "--dir", + prompt="Directory to process", + type=click.STRING, + help="The directory to process.", +) +@click.option( + "--save_to", + prompt="Location to save to with filename", + type=click.Path(), + help="The location to save the processed data including the filename.", +) +def main(dir, save_to): + # get all the files + fs = fsspec.open(dir).fs + files = fs.ls(dir) + + # Can select a proportion of the data to processes as a test + N_start = 10080 + N_end = 10100 + N_files = len(files) + all_dataset_xr = [] + # Change to iterate through N_start to N_end if wanting to use a sample + for i, filename in enumerate(files): + print(f"{round(i/N_files*100)}%") + + ## get all files in a directory + with fsspec.open(f"gs://{filename}", mode="rb") as file: + dataset = xr.open_dataset(file, engine="h5netcdf") + national = dataset.sel(gsp_id=0) + national = national.assign_coords( + forecast_init_time=national.forecast_init_time.values + ) + idx = range(0, len(national.target_datetime_utc.values)) + national = national.assign_coords(target_datetime_utc=idx) + national = national.load() + all_dataset_xr.append(national) + + print("Merging data") + all_dataset_xr = xr.concat(all_dataset_xr, dim="forecast_init_time") + print("Data merged, now saving") + all_dataset_xr.to_zarr(save_to) + print(f"Saved Zarr to {save_to}") + + +if __name__ == "__main__": + main()