Skip to content

Commit

Permalink
Merge pull request #129 from aryanbhosale/main
Browse files Browse the repository at this point in the history
add live data integration
  • Loading branch information
aryanbhosale authored Jun 14, 2024
2 parents 62b28d8 + 125976a commit 3ee6457
Show file tree
Hide file tree
Showing 13 changed files with 164 additions and 60 deletions.
2 changes: 1 addition & 1 deletion examples/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def main():
predictions_df = run_forecast(site=site, ts=ts, nwp_source="icon")

print(predictions_df)
print(f"Max: {predictions_df['power_wh'].max()}")
print(f"Max: {predictions_df['power_kw'].max()}")


if __name__ == "__main__":
Expand Down
16 changes: 8 additions & 8 deletions examples/example_notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@
"source": [
"# Create an interactive plot of the forecast using plotly.\n",
"fig = px.line(predictions_df.reset_index().rename(columns={\"index\": \"date\"}),\n",
" x=\"date\", y=\"power_wh\",\n",
" labels={\"power_wh\": \"Power (wh)\"},\n",
" x=\"date\", y=\"power_kw\",\n",
" labels={\"power_kw\": \"Power (kw)\"},\n",
" title=\"Solar Energy Prediction\")\n",
"fig.show()"
]
Expand Down Expand Up @@ -154,8 +154,8 @@
"source": [
"# Create an interactive plot of the forecast using plotly.\n",
"fig = px.line(predictions_df.reset_index().rename(columns={\"index\": \"date\"}),\n",
" x=\"date\", y=\"power_wh\",\n",
" labels={\"power_wh\": \"Power (wh)\"},\n",
" x=\"date\", y=\"power_kw\",\n",
" labels={\"power_kw\": \"Power (kw)\"},\n",
" title=\"Solar Energy Prediction\")\n",
"fig.show()"
]
Expand Down Expand Up @@ -267,7 +267,7 @@
"outputs": [],
"source": [
"figures = [px.line(predictions_df,\n",
" x=\"date\", y=\"power_wh\"),\n",
" x=\"date\", y=\"power_kw\"),\n",
" px.line(predictions_df,\n",
" x=\"date\", y=\"temperature_2m\"),\n",
" px.line(predictions_df,\n",
Expand All @@ -288,7 +288,7 @@
" ]\n",
"\n",
"fig = make_subplots(rows=len(figures), cols=1,\n",
" subplot_titles=(\"Power (Wh)\",\n",
" subplot_titles=(\"Power (kw)\",\n",
" \"Temperature\", \"Precipitation\",\n",
" \"Cloud Cover (Low)\", \"Cloud Cover (Mid)\",\n",
" \"Cloud Cover (High)\", \"Wind Speed (10m)\",\n",
Expand Down Expand Up @@ -339,8 +339,8 @@
"source": [
"# Create an interactive plot of the forecast using plotly.\n",
"fig = px.line(predictions_df.reset_index().rename(columns={\"index\": \"date\"}),\n",
" x=\"date\", y=\"power_wh\",\n",
" labels={\"power_wh\": \"Power (wh)\"},\n",
" x=\"date\", y=\"power_kw\",\n",
" labels={\"power_kw\": \"Power (kw)\"},\n",
" title=\"Solar Energy Prediction\")\n",
"fig.show()"
]
Expand Down
37 changes: 29 additions & 8 deletions examples/inverter_example.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,39 @@
""" Example code to run the forecast"""
import pandas as pd
from datetime import datetime
from quartz_solar_forecast.forecast import run_forecast
from quartz_solar_forecast.pydantic_models import PVSite
from datetime import datetime, timedelta
from datetime import datetime, timezone

# Set plotly backend to be plotly, you might have to install plotly
pd.options.plotting.backend = "plotly"

def main():
# make input data
site = PVSite(latitude=51.75, longitude=-1.25, capacity_kwp=1.25, inverter_type="enphase")

ts = datetime.today() - timedelta(weeks=1)
predictions_df = run_forecast(site=site, ts=ts, nwp_source="icon")

print(predictions_df)
print(f"Max: {predictions_df['power_wh'].max()}")
timestamp = datetime.now().timestamp()
timestamp_str = datetime.fromtimestamp(timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
ts = pd.to_datetime(timestamp_str)

# make input data with live enphase data
site_live = PVSite(latitude=51.75, longitude=-1.25, capacity_kwp=1.25, inverter_type="enphase")

# make input data with nan data
site_no_live = PVSite(latitude=51.75, longitude=-1.25, capacity_kwp=1.25)

# run model, with and without recent pv data
predictions_with_recent_pv_df = run_forecast(site=site_live, ts=ts)
predictions_df = run_forecast(site=site_no_live, ts=ts)

predictions_with_recent_pv_df["power_kw_no_live_pv"] = predictions_df["power_kw"]

# plot
fig = predictions_with_recent_pv_df.plot(
title="PV Forecast",
template="plotly_dark",
y=["power_kw", "power_kw_no_live_pv"],
labels={"value": "Power (kW)", "index": "Time"},
)
fig.show(renderer="browser")

if __name__ == "__main__":
main()
40 changes: 23 additions & 17 deletions quartz_solar_forecast/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,11 @@ def make_pv_data(site: PVSite, ts: pd.Timestamp) -> xr.Dataset:
:param ts: the timestamp of the site
:return: The combined PV dataset in xarray form
"""
live_generation_kw = None # Initialize live_generation_kw to None

# Check if the site has an inverter type specified
if site.inverter_type == 'solaredge':
# Fetch the list of site IDs associated with the account
# Fetch the list of site IDs associated with the SolarEdge account
site_ids = get_site_list()
# Find the site ID that matches the site's latitude and longitude
matching_site_ids = [s_id for s_id in site_ids if abs(site.latitude - lat) < 1e-6 and abs(site.longitude - lon) < 1e-6 for lat, lon in get_site_coordinates(s_id)]
Expand All @@ -163,32 +165,36 @@ def make_pv_data(site: PVSite, ts: pd.Timestamp) -> xr.Dataset:
raise ValueError("Multiple sites found matching the given latitude and longitude.")
else:
site_id = matching_site_ids[0]
live_generation_wh = get_solaredge_data(site_id)
live_generation_kw = get_solaredge_data(site_id)
elif site.inverter_type == 'enphase':
live_generation_wh = get_enphase_data(system_id)
live_generation_kw = get_enphase_data(system_id)
else:
# If no inverter type is specified, use the default value
live_generation_wh = np.nan

# Combine live data with fake PV data, this is where we could add history of a PV system
generation_wh = [[live_generation_wh]]
lon = [site.longitude]
lat = [site.latitude]
timestamp = [ts]
pv_id = [1]
# If no inverter type is specified or not recognized, set live_generation_kw to None
live_generation_kw = None

if live_generation_kw is not None:
# get the most recent data
recent_pv_data = live_generation_kw[live_generation_kw['timestamp'] <= ts]
power_kw = np.array([np.array(recent_pv_data["power_kw"].values, dtype=np.float64)])
timestamp = recent_pv_data['timestamp'].values
else:
# make fake pv data, this is where we could add history of a pv system
power_kw = [[np.nan]]
timestamp = [ts]

da = xr.DataArray(
data=generation_wh,
data=power_kw,
dims=["pv_id", "timestamp"],
coords=dict(
longitude=(["pv_id"], lon),
latitude=(["pv_id"], lat),
longitude=(["pv_id"], [site.longitude]),
latitude=(["pv_id"], [site.latitude]),
timestamp=timestamp,
pv_id=pv_id,
pv_id=[1],
kwp=(["pv_id"], [site.capacity_kwp]),
tilt=(["pv_id"], [site.tilt]),
orientation=(["pv_id"], [site.orientation]),
),
)
da = da.to_dataset(name="generation_wh")
da = da.to_dataset(name="generation_kw")

return da
2 changes: 1 addition & 1 deletion quartz_solar_forecast/eval/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def combine_forecast_ground_truth(forecast_df: pd.DataFrame, ground_truth_df: pd
"""

# rename power to forecast_power
forecast_df = forecast_df.rename(columns={"power_wh": "forecast_power"})
forecast_df = forecast_df.rename(columns={"power_kw": "forecast_power"})

# rename power to ground_truth_power
ground_truth_df = ground_truth_df.rename(columns={"value": "generation_power"})
Expand Down
4 changes: 2 additions & 2 deletions quartz_solar_forecast/forecasts/v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def forecast_v1(nwp_source:str, nwp_xr:xr.Dataset, pv_xr:xr.Dataset, ts:pd.Times
pv_xr,
id_dim_name="pv_id",
timestamp_dim_name="timestamp",
rename={"generation_wh": "power", "kwp": "capacity"},
rename={"generation_kw": "power", "kwp": "capacity"},
ignore_pv_ids=[],
)
# make NwpDataSource
Expand All @@ -38,6 +38,6 @@ def forecast_v1(nwp_source:str, nwp_xr:xr.Dataset, pv_xr:xr.Dataset, ts:pd.Times

# format into timerange and put into pd dataframe
times = pd.date_range(start=x.ts, periods=len(pred.powers), freq="15min")
pred_df = pd.DataFrame({"power_wh": pred.powers}, index=times)
pred_df = pd.DataFrame({"power_kw": pred.powers}, index=times)

return pred_df
4 changes: 2 additions & 2 deletions quartz_solar_forecast/forecasts/v1_tilt_orientation.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def forecast_v1_tilt_orientation(nwp_source:str, nwp_xr:xr.Dataset, pv_xr:xr.Dat
pv_xr,
id_dim_name="pv_id",
timestamp_dim_name="timestamp",
rename={"generation_wh": "power", "kwp": "capacity"},
rename={"generation_kw": "power", "kwp": "capacity"},
ignore_pv_ids=[],
)
# make NwpDataSource
Expand All @@ -38,6 +38,6 @@ def forecast_v1_tilt_orientation(nwp_source:str, nwp_xr:xr.Dataset, pv_xr:xr.Dat

# format into timerange and put into pd dataframe
times = pd.date_range(start=x.ts, periods=len(pred.powers), freq="15min")
pred_df = pd.DataFrame({"power_wh": pred.powers}, index=times)
pred_df = pd.DataFrame({"power_kw": pred.powers}, index=times)

return pred_df
2 changes: 1 addition & 1 deletion quartz_solar_forecast/forecasts/v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,5 +224,5 @@ def predict_power_output(
# set negative output to 0
final_data.loc[final_data["prediction"] < 0, "prediction"] = 0
df = final_data[[self.DATE_COLUMN, "prediction"]]
df = df.rename(columns={"prediction": "power_wh"})
df = df.rename(columns={"prediction": "power_kw"})
return df
38 changes: 26 additions & 12 deletions quartz_solar_forecast/inverters/enphase.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import requests
import http.client
import os
import pandas as pd
import json
import base64
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone

from dotenv import load_dotenv

import os
from urllib.parse import urlencode

def get_enphase_auth_url():
Expand Down Expand Up @@ -99,7 +98,7 @@ def get_enphase_access_token():
return access_token


def get_enphase_data(enphase_system_id: str) -> float:
def get_enphase_data(enphase_system_id: str) -> pd.DataFrame:
"""
Get live PV generation data from Enphase API v4
:param enphase_system_id: System ID for Enphase API
Expand All @@ -108,11 +107,11 @@ def get_enphase_data(enphase_system_id: str) -> float:
api_key = os.getenv('ENPHASE_API_KEY')
access_token = get_enphase_access_token()

# Set the start time to 30mins from now
start_at = int((datetime.now() - timedelta(minutes=30)).timestamp())
# Set the start time to 1 week from now
start_at = int((datetime.now() - timedelta(weeks=1)).timestamp())

# Set the granularity to day
granularity = "day"
# Set the granularity to week
granularity = "week"

conn = http.client.HTTPSConnection("api.enphaseenergy.com")
headers = {
Expand All @@ -132,8 +131,23 @@ def get_enphase_data(enphase_system_id: str) -> float:

# Convert the decoded data into JSON format
data_json = json.loads(decoded_data)

# Initialize an empty list to store the data
data_list = []

# Loop through the intervals and collect the data for the last 30 minutes
for interval in data_json['intervals']:
end_at = interval['end_at']
if end_at >= start_at:
timestamp = datetime.fromtimestamp(end_at, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')

# Append the data to the list
data_list.append({"timestamp": timestamp, "power_kw": interval['powr']/1000})

# Convert the list to a DataFrame
live_generation_kw = pd.DataFrame(data_list)

# Convert to UTC
live_generation_kw["timestamp"] = pd.to_datetime(live_generation_kw["timestamp"])

### TO-DO
# Extracting live generation data assuming it's in Watt-hours
live_generation_wh = data_json['current_power']['power']
return live_generation_wh
return live_generation_kw
2 changes: 1 addition & 1 deletion quartz_solar_forecast/pydantic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,5 @@ class PVSite(BaseModel):
inverter_type: str = Field(
default=None,
description="The type of inverter used, either 'solaredge' or 'enphase'",
choices=["solaredge", "enphase", None],
json_schema_extra=["solaredge", "enphase", None],
)
63 changes: 63 additions & 0 deletions tests/data/test_make_pv_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import pandas as pd
import numpy as np
import xarray as xr
import pytest
from unittest.mock import patch
from datetime import datetime
from quartz_solar_forecast.pydantic_models import PVSite

def mock_enphase_data(*args, **kwargs):
return pd.DataFrame({
'timestamp': [
datetime(2024, 6, 5, 11, 25),
datetime(2024, 6, 5, 11, 30),
datetime(2024, 6, 5, 11, 35)
],
'power_kw': [0.5, 0.6, 0.7]
})

@pytest.mark.parametrize("site, expected_data", [
(PVSite(latitude=40.7128, longitude=-74.0059, capacity_kwp=8.5, inverter_type='enphase'), mock_enphase_data()),
])
@patch('quartz_solar_forecast.inverters.enphase.get_enphase_data', side_effect=mock_enphase_data)
def test_make_pv_data_enphase(mock_get_enphase, site, expected_data, ts=pd.Timestamp('2023-06-14 12:15:00')):
from quartz_solar_forecast.data import make_pv_data
result = make_pv_data(site, ts)
expected = expected_data[expected_data['timestamp'] <= ts]
expected_xr = xr.DataArray(
data=expected['power_kw'].values.reshape(1, -1),
dims=['pv_id', 'timestamp'],
coords={
'longitude': (['pv_id'], [site.longitude]),
'latitude': (['pv_id'], [site.latitude]),
'timestamp': (['timestamp'], expected['timestamp'].values.astype('datetime64[ns]')),
'pv_id': [1],
'kwp': (['pv_id'], [site.capacity_kwp]),
'tilt': (["pv_id"], [site.tilt]),
'orientation': (["pv_id"], [site.orientation]),
}
).to_dataset(name='generation_kw')

assert result.equals(expected_xr)

@pytest.mark.parametrize("site, expected_data", [
(PVSite(latitude=40.7128, longitude=-74.0059, capacity_kwp=8.5, inverter_type='unknown'), np.array([[np.nan]])),
])
def test_make_pv_data_no_live(site, expected_data, ts=pd.Timestamp('2023-06-14 12:15:00')):
from quartz_solar_forecast.data import make_pv_data
result = make_pv_data(site, ts)
expected_xr = xr.DataArray(
data=expected_data,
dims=['pv_id', 'timestamp'],
coords={
'longitude': (['pv_id'], [site.longitude]),
'latitude': (['pv_id'], [site.latitude]),
'timestamp': (['timestamp'], [ts]),
'pv_id': [1],
'kwp': (['pv_id'], [site.capacity_kwp]),
'tilt': (["pv_id"], [site.tilt]),
'orientation': (["pv_id"], [site.orientation]),
}
).to_dataset(name='generation_kw')

assert result.equals(expected_xr)
10 changes: 5 additions & 5 deletions tests/test_forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@ def test_run_forecast():

print("\n Prediction based on GFS NWP\n")
print(predications_df_gfs)
print(f" Max: {predications_df_gfs['power_wh'].max()}")
print(f" Max: {predications_df_gfs['power_kw'].max()}")

print("\n Prediction based on ICON NWP\n")
print(predications_df_icon)
print(f" Max: {predications_df_icon['power_wh'].max()}")
print(f" Max: {predications_df_icon['power_kw'].max()}")

print("\n Prediction based on XGB\n")
print(predications_df_xgb)
print(f" Max: {predications_df_xgb['power_wh'].max()}")
print(f" Max: {predications_df_xgb['power_kw'].max()}")


def test_run_forecast_historical():
Expand All @@ -40,11 +40,11 @@ def test_run_forecast_historical():

print("\n Prediction based on GFS NWP\n")
print(predications_df_gfs)
print(f" Max: {predications_df_gfs['power_wh'].max()}")
print(f" Max: {predications_df_gfs['power_kw'].max()}")

print("\n Prediction based on ICON NWP\n")
print(predications_df_icon)
print(f" Max: {predications_df_icon['power_wh'].max()}")
print(f" Max: {predications_df_icon['power_kw'].max()}")

print("\n Prediction based on XGB\n")
print(predications_df_xgb)
Expand Down
Loading

0 comments on commit 3ee6457

Please sign in to comment.