diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml new file mode 100644 index 00000000..775ace75 --- /dev/null +++ b/.github/workflows/publish.yaml @@ -0,0 +1,32 @@ +name: Run forecast + +on: + schedule: + - cron: "0 9 * * *" + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: checkout repo content + uses: actions/checkout@v2 + + - name: setup python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: install python packages + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install -e . + + - name: upload forecast to Hugging Face + env: + HF_TOKEN: ${{ secrets.HF_TOKEN_PUSH }} + HF_REPO: ${{ secrets.HF_REPO }} + run: | + cd scripts + python hf_upload.py + diff --git a/quartz_solar_forecast/utils/__init__.py b/quartz_solar_forecast/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/scripts/forecast_csv.py b/quartz_solar_forecast/utils/forecast_csv.py similarity index 75% rename from scripts/forecast_csv.py rename to quartz_solar_forecast/utils/forecast_csv.py index d134b40b..7bfdf1be 100644 --- a/scripts/forecast_csv.py +++ b/quartz_solar_forecast/utils/forecast_csv.py @@ -6,7 +6,42 @@ import unittest from unittest.mock import patch -def generate_forecast(init_time_freq, start_datetime, end_datetime, site_name, latitude, longitude, capacity_kwp): + +def generate_all_forecasts( + init_time_freq: int, + start: datetime, + end: datetime, + latitude: float, + longitude: float, + capacity_kwp: float) -> pd.DataFrame: + + all_forecasts = pd.DataFrame() + + init_time = start + while init_time <= end: + print(f"Running forecast for initialization time: {init_time}") + predictions_df = forecast_for_site(latitude, longitude, capacity_kwp, init_time=init_time) + predictions_df['forecast_init_time'] = init_time + all_forecasts = pd.concat([all_forecasts, predictions_df]) + init_time += timedelta(hours=init_time_freq) + + return all_forecasts + + +def forecast_for_site(latitude: float, + longitude: float, + capacity_kwp: float, + model: str = "gb", + init_time: datetime = None) -> pd.DataFrame: + + site = PVSite(latitude=latitude, longitude=longitude, capacity_kwp=capacity_kwp) + predictions_df = run_forecast(site=site, model=model, ts=init_time) + predictions_df.reset_index(inplace=True) + predictions_df.rename(columns={'index': 'datetime'}, inplace=True) + return predictions_df + + +def write_out_forecasts(init_time_freq, start_datetime, end_datetime, site_name, latitude, longitude, capacity_kwp): """ Generates forecasts at specified intervals and saves them into a CSV file. @@ -23,30 +58,19 @@ def generate_forecast(init_time_freq, start_datetime, end_datetime, site_name, l start_date = start.date() end = datetime.strptime(end_datetime, "%Y-%m-%d %H:%M:%S") end_date = end.date() - all_forecasts = pd.DataFrame() - site = PVSite(latitude=latitude, longitude=longitude, capacity_kwp=capacity_kwp) - - init_time = start - while init_time <= end: - print(f"Running forecast for initialization time: {init_time}") - predictions_df = run_forecast(site=site, ts=init_time.strftime("%Y-%m-%d %H:%M:%S")) - predictions_df.reset_index(inplace=True) - predictions_df.rename(columns={'index': 'datetime'}, inplace=True) - predictions_df['forecast_init_time'] = init_time - all_forecasts = pd.concat([all_forecasts, predictions_df]) - init_time += timedelta(hours=init_time_freq) + all_forecasts = generate_all_forecasts(init_time_freq, start, end, latitude, longitude, capacity_kwp) output_dir = os.path.join(os.getcwd(), 'csv_forecasts') if not os.path.exists(output_dir): os.makedirs(output_dir) output_file_name = f"forecast_{site_name}_{start_date}_{end_date}.csv" output_file_path = os.path.join(output_dir, output_file_name) - all_forecasts.to_csv(output_file_path, index=False) + all_forecasts.to_csv(output_file_path, index=False) print(f"Forecasts saved to {output_file_path}") if __name__ == "__main__": # please change the site name, start_datetime and end_datetime, latitude, longitude and capacity_kwp as per your requirement - generate_forecast( + write_out_forecasts( init_time_freq=6, start_datetime="2024-03-10 00:00:00", end_datetime="2024-03-11 00:00:00", @@ -81,7 +105,7 @@ def test_generate_forecast(self, mock_run_forecast): if not os.path.exists(self.output_dir): os.makedirs(self.output_dir) - generate_forecast(self.init_time_freq, + write_out_forecasts(self.init_time_freq, self.start_datetime, self.end_datetime, self.site_name, diff --git a/quartz_solar_forecast/utils/hf_upload.py b/quartz_solar_forecast/utils/hf_upload.py new file mode 100644 index 00000000..7fcf4a0e --- /dev/null +++ b/quartz_solar_forecast/utils/hf_upload.py @@ -0,0 +1,32 @@ +import os +from datetime import datetime +from huggingface_hub import login, HfFileSystem +from quartz_solar_forecast.utils.forecast_csv import forecast_for_site + + +def get_file_path(latitude: float, + longitude: float, + capacity_kwp: float, + model: str = "gb", + time: datetime = None) -> str: + return time.strftime(f"data/%Y/%-m/%-d/{model}_{latitude}_{longitude}_{capacity_kwp}_%Y%m%d_%H.csv") + + +if __name__ == "__main__": + + hf_token = os.getenv("HF_TOKEN") + hf_repo = os.getenv("HF_REPO") + + login(hf_token) + fs = HfFileSystem() + now = datetime.utcnow() + latitude = 51.59, + longitude = -1.89 + capacity_kwp = 4 + + for model in ["gb", "xgb"]: + forecast = forecast_for_site(latitude, longitude, capacity_kwp, model, now) + + path = get_file_path(latitude, longitude, capacity_kwp, model, now) + with fs.open(f"datasets/{hf_repo}/{path}", "w") as f: + forecast.to_csv(path_or_buf=f) diff --git a/tests/utils/test_hf_upload.py b/tests/utils/test_hf_upload.py new file mode 100644 index 00000000..d266c17e --- /dev/null +++ b/tests/utils/test_hf_upload.py @@ -0,0 +1,12 @@ +from quartz_solar_forecast.utils.hf_upload import get_file_path +from datetime import datetime + + +def test_get_file_path(): + latitude = 51.75 + longitude = -1.25 + capacity_kwp = 1.25 + date = datetime(2024, 7, 26, 12, 0, 0) + path = get_file_path(latitude, longitude, capacity_kwp, "gb", date) + + assert path == "data/2024/7/26/gb_51.75_-1.25_1.25_20240726_12.csv"