diff --git a/README.md b/README.md index bbcf78f5..54295e22 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,9 @@ # Quartz Solar Forecast + [![All Contributors](https://img.shields.io/badge/all_contributors-17-orange.svg?style=flat-square)](#contributors-) + The aim of the project is to build an open source PV forecast that is free and easy to use. @@ -131,7 +133,7 @@ To use this model specify `model="xgb"` in `run_forecast(site=site, model="xgb", The following plot shows example predictions of both models for the same time period. Additionally for the Gradient Boosting model (default) the results from the two different data sources are shown. -![model comparison](images/model_data_comparison.png) +![model comparison](images/model_data_comparison_hr.png) _Predictions using the two different models and different data sources._ ## Known restrictions @@ -139,7 +141,7 @@ _Predictions using the two different models and different data sources._ - The model is trained on [UK MetOffice](https://www.metoffice.gov.uk/services/data/met-office-weather-datahub) NWPs, but when running inference we use [GFS](https://www.ncei.noaa.gov/products/weather-climate-models/global-forecast) data from [Open-meteo](https://open-meteo.com/). The differences between GFS and UK MetOffice could led to some odd behaviours. - Depending, whether the timestamp for the prediction lays more than 90 days in the past or not, different data sources for the NWP are used. If we predict within the last 90 days, we can use ICON or GFS from the open-meteo Weather Forecast API. Since ICON doesn't provide visibility, this parameter is queried from GFS in any case. If the date for the prediction is further back in time, a reanalysis model of historical data is used (open-meteo | Historical Weather API). The historical weather API doesn't't provide visibility at all, that's why it's set to a maximum of 24000 meter in this case. This can lead to some loss of precision. - The model was trained and tested only over the UK, applying it to other geographical regions should be done with caution. -- When using the XGBoost model, only predictions within the last 90 days are available for data consistency. +- When using the XGBoost model, only hourly predictions within the last 90 days are available for data consistency. ## Evaluation diff --git a/images/model_data_comparison_hr.png b/images/model_data_comparison_hr.png new file mode 100644 index 00000000..d10b1341 Binary files /dev/null and b/images/model_data_comparison_hr.png differ diff --git a/quartz_solar_forecast/forecast.py b/quartz_solar_forecast/forecast.py index 6c923c80..6b4f389a 100644 --- a/quartz_solar_forecast/forecast.py +++ b/quartz_solar_forecast/forecast.py @@ -51,10 +51,10 @@ def predict_tryolabs( # set start and end time, if no time is given use current time if ts is None: start_date = pd.Timestamp.now().strftime("%Y-%m-%d") - start_time = pd.Timestamp.now().round("15min") + start_time = pd.Timestamp.now().round(freq='h') else: start_date = pd.Timestamp(ts).strftime("%Y-%m-%d") - start_time = pd.Timestamp(ts).round("15min") + start_time = pd.Timestamp(ts).round(freq='h') end_time = start_time + pd.Timedelta(hours=48) start_date_datetime = datetime.strptime(start_date, "%Y-%m-%d") diff --git a/quartz_solar_forecast/forecasts/v2.py b/quartz_solar_forecast/forecasts/v2.py index 20b59d78..f3643271 100644 --- a/quartz_solar_forecast/forecasts/v2.py +++ b/quartz_solar_forecast/forecasts/v2.py @@ -124,7 +124,7 @@ def get_data( weather_service = WeatherService() - weather_data = weather_service.get_15_minutely_weather( + weather_data = weather_service.get_hourly_weather( latitude, longitude, start_date, end_date ) @@ -220,7 +220,9 @@ def predict_power_output( predictions_df = pd.DataFrame(predictions, columns=["prediction"]) final_data = cleaned_data.join(predictions_df) # set night predictions to 0 - final_data.loc[final_data["is_day"]==0, "prediction"] = 0 + final_data.loc[final_data["is_day"] == 0, "prediction"] = 0 + # set negative output to 0 + final_data.loc[final_data["prediction"] < 0, "prediction"] = 0 df = final_data[[self.DATE_COLUMN, "prediction"]] df = df.rename(columns={"prediction": "power_wh"}) return df diff --git a/quartz_solar_forecast/weather/open_meteo.py b/quartz_solar_forecast/weather/open_meteo.py index d3c743ec..f5082440 100644 --- a/quartz_solar_forecast/weather/open_meteo.py +++ b/quartz_solar_forecast/weather/open_meteo.py @@ -43,7 +43,7 @@ def _build_url( str The URL for the OpenMeteo API. """ - url = "https://api.open-meteo.com/v1/forecast?latitude={latitude}&longitude={longitude}&minutely_15={variables}&start_date={start_date}&end_date={end_date}&timezone=GMT".format( + url = "https://api.open-meteo.com/v1/forecast?latitude={latitude}&longitude={longitude}&hourly={variables}&start_date={start_date}&end_date={end_date}&timezone=GMT".format( latitude=latitude, longitude=longitude, variables=",".join(variables), @@ -99,11 +99,11 @@ def _validate_date_format(self, start_date: str, end_date: str) -> None: f"Invalid date format or range. Please use YYYY-MM-DD and ensure end_date is greater than start_date. Error: {str(e)}" ) - def get_15_minutely_weather( + def get_hourly_weather( self, latitude: float, longitude: float, start_date: str, end_date: str ) -> pd.DataFrame: """ - Get 15 minutely weather data ranging from 3 months ago up to 15 days ahead (forecast). + Get hourly weather data ranging from 3 months ago up to 15 days ahead (forecast). Parameters ---------- @@ -150,7 +150,7 @@ def get_15_minutely_weather( ] url = self._build_url(latitude, longitude, start_date, end_date, variables) response = requests.get(url) - data = response.json()["minutely_15"] + data = response.json()["hourly"] df = pd.DataFrame(data) df["time"] = pd.to_datetime(df["time"]) diff --git a/tests/test_forecast_no_ts.py b/tests/test_forecast_no_ts.py index 49adfc00..241db1f6 100644 --- a/tests/test_forecast_no_ts.py +++ b/tests/test_forecast_no_ts.py @@ -8,8 +8,9 @@ def test_run_forecast_no_ts(): site = PVSite(latitude=51.75, longitude=-1.25, capacity_kwp=1.25) current_ts = pd.Timestamp.now().round("15min") + current_hr = pd.Timestamp.now().round(freq='h') - # run ocf model with no ts + # run gradient boosting model with no ts predications_df = run_forecast(site=site, model="gb") # check current ts agrees with dataset assert predications_df.index.min() == current_ts @@ -18,10 +19,10 @@ def test_run_forecast_no_ts(): print(f"Current time: {current_ts}") print(f"Max: {predications_df['power_wh'].max()}") - # run tryolabs model with no ts + # run xgb model with no ts predications_df = run_forecast(site=site, model="xgb") # check current ts agrees with dataset - assert predications_df.index.min() == current_ts + assert predications_df.index.min() == current_hr print(predications_df) print(f"Current time: {current_ts}")