Skip to content

Commit

Permalink
Publish forecast to Hugging Face at 9am every day (#163) (#176)
Browse files Browse the repository at this point in the history
* new workflow

* rename workflow

* install

* disable

* no cd

* publish from workflow

* add env vars

* requirements

* loosen requirements

* datasets only

* datasets

* lower datasets version

* remove

* wip

* update

* bump python version

* pv-site-prediction

* seems to be working

* update

* use filesystem interface

* use UTC

* set up cron schedule

* don't need new requirements any more

* tidy up

* add lat/long to file name

* loop through models

* add capacity

* add test for file path

* new line at end of file

* use named arg

* change to HF_TOKEN_PUSH

* wrong workflow

* move to utils module

* fix test

* fix test

* __init__.py

* empty

* update lat/long and capacity

Co-authored-by: Matthew Duffin <[email protected]>
  • Loading branch information
peterdudfield and mduffin95 authored Aug 8, 2024
1 parent a401577 commit 50c6c6c
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 16 deletions.
32 changes: 32 additions & 0 deletions .github/workflows/publish.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: Run forecast

on:
schedule:
- cron: "0 9 * * *"

jobs:
build:
runs-on: ubuntu-latest
steps:
- name: checkout repo content
uses: actions/checkout@v2

- name: setup python
uses: actions/setup-python@v4
with:
python-version: '3.11'

- name: install python packages
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install -e .
- name: upload forecast to Hugging Face
env:
HF_TOKEN: ${{ secrets.HF_TOKEN_PUSH }}
HF_REPO: ${{ secrets.HF_REPO }}
run: |
cd scripts
python hf_upload.py
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,42 @@
import unittest
from unittest.mock import patch

def generate_forecast(init_time_freq, start_datetime, end_datetime, site_name, latitude, longitude, capacity_kwp):

def generate_all_forecasts(
init_time_freq: int,
start: datetime,
end: datetime,
latitude: float,
longitude: float,
capacity_kwp: float) -> pd.DataFrame:

all_forecasts = pd.DataFrame()

init_time = start
while init_time <= end:
print(f"Running forecast for initialization time: {init_time}")
predictions_df = forecast_for_site(latitude, longitude, capacity_kwp, init_time=init_time)
predictions_df['forecast_init_time'] = init_time
all_forecasts = pd.concat([all_forecasts, predictions_df])
init_time += timedelta(hours=init_time_freq)

return all_forecasts


def forecast_for_site(latitude: float,
longitude: float,
capacity_kwp: float,
model: str = "gb",
init_time: datetime = None) -> pd.DataFrame:

site = PVSite(latitude=latitude, longitude=longitude, capacity_kwp=capacity_kwp)
predictions_df = run_forecast(site=site, model=model, ts=init_time)
predictions_df.reset_index(inplace=True)
predictions_df.rename(columns={'index': 'datetime'}, inplace=True)
return predictions_df


def write_out_forecasts(init_time_freq, start_datetime, end_datetime, site_name, latitude, longitude, capacity_kwp):
"""
Generates forecasts at specified intervals and saves them into a CSV file.
Expand All @@ -23,30 +58,19 @@ def generate_forecast(init_time_freq, start_datetime, end_datetime, site_name, l
start_date = start.date()
end = datetime.strptime(end_datetime, "%Y-%m-%d %H:%M:%S")
end_date = end.date()
all_forecasts = pd.DataFrame()
site = PVSite(latitude=latitude, longitude=longitude, capacity_kwp=capacity_kwp)

init_time = start
while init_time <= end:
print(f"Running forecast for initialization time: {init_time}")
predictions_df = run_forecast(site=site, ts=init_time.strftime("%Y-%m-%d %H:%M:%S"))
predictions_df.reset_index(inplace=True)
predictions_df.rename(columns={'index': 'datetime'}, inplace=True)
predictions_df['forecast_init_time'] = init_time
all_forecasts = pd.concat([all_forecasts, predictions_df])
init_time += timedelta(hours=init_time_freq)
all_forecasts = generate_all_forecasts(init_time_freq, start, end, latitude, longitude, capacity_kwp)

output_dir = os.path.join(os.getcwd(), 'csv_forecasts')
if not os.path.exists(output_dir):
os.makedirs(output_dir)
output_file_name = f"forecast_{site_name}_{start_date}_{end_date}.csv"
output_file_path = os.path.join(output_dir, output_file_name)
all_forecasts.to_csv(output_file_path, index=False)
all_forecasts.to_csv(output_file_path, index=False)
print(f"Forecasts saved to {output_file_path}")

if __name__ == "__main__":
# please change the site name, start_datetime and end_datetime, latitude, longitude and capacity_kwp as per your requirement
generate_forecast(
write_out_forecasts(
init_time_freq=6,
start_datetime="2024-03-10 00:00:00",
end_datetime="2024-03-11 00:00:00",
Expand Down Expand Up @@ -81,7 +105,7 @@ def test_generate_forecast(self, mock_run_forecast):
if not os.path.exists(self.output_dir):
os.makedirs(self.output_dir)

generate_forecast(self.init_time_freq,
write_out_forecasts(self.init_time_freq,
self.start_datetime,
self.end_datetime,
self.site_name,
Expand Down
32 changes: 32 additions & 0 deletions quartz_solar_forecast/utils/hf_upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import os
from datetime import datetime
from huggingface_hub import login, HfFileSystem
from quartz_solar_forecast.utils.forecast_csv import forecast_for_site


def get_file_path(latitude: float,
longitude: float,
capacity_kwp: float,
model: str = "gb",
time: datetime = None) -> str:
return time.strftime(f"data/%Y/%-m/%-d/{model}_{latitude}_{longitude}_{capacity_kwp}_%Y%m%d_%H.csv")


if __name__ == "__main__":

hf_token = os.getenv("HF_TOKEN")
hf_repo = os.getenv("HF_REPO")

login(hf_token)
fs = HfFileSystem()
now = datetime.utcnow()
latitude = 51.59,
longitude = -1.89
capacity_kwp = 4

for model in ["gb", "xgb"]:
forecast = forecast_for_site(latitude, longitude, capacity_kwp, model, now)

path = get_file_path(latitude, longitude, capacity_kwp, model, now)
with fs.open(f"datasets/{hf_repo}/{path}", "w") as f:
forecast.to_csv(path_or_buf=f)
12 changes: 12 additions & 0 deletions tests/utils/test_hf_upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from quartz_solar_forecast.utils.hf_upload import get_file_path
from datetime import datetime


def test_get_file_path():
latitude = 51.75
longitude = -1.25
capacity_kwp = 1.25
date = datetime(2024, 7, 26, 12, 0, 0)
path = get_file_path(latitude, longitude, capacity_kwp, "gb", date)

assert path == "data/2024/7/26/gb_51.75_-1.25_1.25_20240726_12.csv"

0 comments on commit 50c6c6c

Please sign in to comment.