diff --git a/ocf_datapipes/convert/numpy/batch/pv.py b/ocf_datapipes/convert/numpy/batch/pv.py index b5efea683..f3ab4d3d6 100644 --- a/ocf_datapipes/convert/numpy/batch/pv.py +++ b/ocf_datapipes/convert/numpy/batch/pv.py @@ -35,11 +35,11 @@ def __iter__(self) -> NumpyBatch: BatchKey.pv_t0_idx: xr_data.attrs["t0_idx"], BatchKey.pv_ml_id: xr_data["ml_id"].values, BatchKey.pv_id: xr_data["pv_system_id"].values.astype(np.float32), - BatchKey.pv_observed_capacity_watt_power: ( - xr_data["observed_capacity_watt_power"].values + BatchKey.pv_observed_capacity_wp: ( + xr_data["observed_capacity_wp"].values ), - BatchKey.pv_metadata_capacity_watt_power: ( - xr_data["metadata_capacity_watt_power"].values + BatchKey.pv_nominal_capacity_wp: ( + xr_data["nominal_capacity_wp"].values ), BatchKey.pv_time_utc: datetime64_to_float(xr_data["time_utc"].values), BatchKey.pv_latitude: xr_data["latitude"].values, diff --git a/ocf_datapipes/load/pv/database.py b/ocf_datapipes/load/pv/database.py index 6510df344..307b17fed 100644 --- a/ocf_datapipes/load/pv/database.py +++ b/ocf_datapipes/load/pv/database.py @@ -96,7 +96,7 @@ def __iter__(self): data_xr = put_pv_data_into_an_xr_dataarray( df_gen=pv_power, observed_system_capacities=pv_metadata.observed_capacity_watt_power, - metadata_system_capacities=pv_metadata.capacity_watt_power, + nominal_system_capacities=pv_metadata.capacity_watt_power, ml_id=pv_metadata.ml_id, latitude=pv_metadata.latitude, longitude=pv_metadata.longitude, @@ -371,7 +371,7 @@ def __iter__(self): df_gen = get_pv_power_from_pvsites_database(history_duration=self.history_duration) # Database record is very short. Set observed max to NaN - df_metadata["observed_capacity_watt_power"] = np.nan + df_metadata["observed_capacity_wp"] = np.nan # Ensure systems are consistant between generation data, and metadata common_systems = list(np.intersect1d(df_metadata.index, df_gen.columns)) @@ -381,8 +381,8 @@ def __iter__(self): # Compile data into an xarray DataArray xr_array = put_pv_data_into_an_xr_dataarray( df_gen=df_gen, - observed_system_capacities=df_metadata.observed_capacity_watt_power, - metadata_system_capacities=df_metadata.capacity_watts, + observed_system_capacities=df_metadata.observed_capacity_wp, + nominal_system_capacities=df_metadata.nominal_capacity_wp, ml_id=df_metadata.ml_id, latitude=df_metadata.latitude, longitude=df_metadata.longitude, @@ -405,7 +405,7 @@ def get_metadata_from_pvsites_database() -> pd.DataFrame: with db_connection.engine.connect() as conn: df_sites_metadata = pd.DataFrame(conn.execute(text("SELECT * FROM sites")).fetchall()) - df_sites_metadata["capacity_watts"] = df_sites_metadata["capacity_kw"] * 1000 + df_sites_metadata["nominal_capacity_wp"] = df_sites_metadata["capacity_kw"] * 1000 df_sites_metadata = df_sites_metadata.set_index("site_uuid") diff --git a/ocf_datapipes/load/pv/pv.py b/ocf_datapipes/load/pv/pv.py index 06c35f118..bee2538f9 100644 --- a/ocf_datapipes/load/pv/pv.py +++ b/ocf_datapipes/load/pv/pv.py @@ -111,7 +111,7 @@ def load_everything_into_ram( xr_array = put_pv_data_into_an_xr_dataarray( df_gen=df_gen, observed_system_capacities=estimated_capacities, - metadata_system_capacities=df_metadata.capacity_watts, + nominal_system_capacities=df_metadata.capacity_watts, ml_id=df_metadata.ml_id, latitude=df_metadata.latitude, longitude=df_metadata.longitude, diff --git a/ocf_datapipes/load/pv/utils.py b/ocf_datapipes/load/pv/utils.py index 574d56233..78369c252 100644 --- a/ocf_datapipes/load/pv/utils.py +++ b/ocf_datapipes/load/pv/utils.py @@ -13,7 +13,7 @@ def put_pv_data_into_an_xr_dataarray( df_gen: pd.DataFrame, observed_system_capacities: pd.Series, - metadata_system_capacities: pd.Series, + nominal_system_capacities: pd.Series, ml_id: pd.Series, longitude: pd.Series, latitude: pd.Series, @@ -27,7 +27,7 @@ def put_pv_data_into_an_xr_dataarray( the index is UTC datetime observed_system_capacities: The max power output observed in the time series for PV system in watts. Index is PV system IDs - metadata_system_capacities: The metadata value for each PV system capacities in watts + nominal_system_capacities: The metadata value for each PV system capacities in watts ml_id: The `ml_id` used to identify each PV system longitude: longitude of the locations latitude: latitude of the locations @@ -38,7 +38,7 @@ def put_pv_data_into_an_xr_dataarray( system_ids = df_gen.columns for name, series in ( ("observed_system_capacities", observed_system_capacities), - ("metadata_system_capacities", metadata_system_capacities), + ("nominal_system_capacities", nominal_system_capacities), ("ml_id", ml_id), ("longitude", longitude), ("latitude", latitude), @@ -60,8 +60,8 @@ def put_pv_data_into_an_xr_dataarray( ).astype(np.float32) data_array = data_array.assign_coords( - observed_capacity_watt_power=("pv_system_id", observed_system_capacities), - metadata_capacity_watt_power=("pv_system_id", metadata_system_capacities), + observed_capacity_wp=("pv_system_id", observed_system_capacities), + nominal_capacity_wp=("pv_system_id", nominal_system_capacities), ml_id=("pv_system_id", ml_id), longitude=("pv_system_id", longitude), latitude=("pv_system_id", latitude), diff --git a/ocf_datapipes/select/drop_pv_sys_generating_overnight.py b/ocf_datapipes/select/drop_pv_sys_generating_overnight.py index 25f126786..20a5844bd 100644 --- a/ocf_datapipes/select/drop_pv_sys_generating_overnight.py +++ b/ocf_datapipes/select/drop_pv_sys_generating_overnight.py @@ -44,7 +44,7 @@ def __iter__(self) -> xr.DataArray(): ds_night = ds.where(ds.status_daynight == "night", drop=True) # Find relative maximum night-time generation for each system - night_time_max_gen = (ds_night / ds_night.observed_capacity_watt_power).max( + night_time_max_gen = (ds_night / ds_night.observed_capacity_wp).max( dim="time_utc" ) diff --git a/ocf_datapipes/select/select_pv_systems_on_capacity.py b/ocf_datapipes/select/select_pv_systems_on_capacity.py index 432fbf5d0..ebf71fb45 100644 --- a/ocf_datapipes/select/select_pv_systems_on_capacity.py +++ b/ocf_datapipes/select/select_pv_systems_on_capacity.py @@ -33,7 +33,7 @@ def __init__( def __iter__(self) -> Union[xr.DataArray, xr.Dataset]: for ds in self.source_datapipe: - too_low = ds.observed_capacity_watt_power < self.min_capacity_watts - too_high = ds.observed_capacity_watt_power > self.max_capacity_watts + too_low = ds.observed_capacity_wp < self.min_capacity_watts + too_high = ds.observed_capacity_wp > self.max_capacity_watts mask = np.logical_or(too_low, too_high) yield ds.where(~mask, drop=True) diff --git a/ocf_datapipes/training/example/nwp_pv.py b/ocf_datapipes/training/example/nwp_pv.py index 8bc3627ba..63db7630a 100644 --- a/ocf_datapipes/training/example/nwp_pv.py +++ b/ocf_datapipes/training/example/nwp_pv.py @@ -65,7 +65,7 @@ def nwp_pv_datapipe( minutes=configuration.input_data.pv.time_resolution_minutes ), history_duration=timedelta(minutes=configuration.input_data.pv.history_minutes), - ).normalize(normalize_fn=lambda x: x / x.observed_capacity_watt_power) + ).normalize(normalize_fn=lambda x: x / x.observed_capacity_wp) nwp_datapipe = nwp_datapipe.add_t0_idx_and_sample_period_duration( sample_period_duration=timedelta( minutes=configuration.input_data.nwp.time_resolution_minutes diff --git a/ocf_datapipes/training/example/simple_pv.py b/ocf_datapipes/training/example/simple_pv.py index 830ac47b8..f5b80b106 100644 --- a/ocf_datapipes/training/example/simple_pv.py +++ b/ocf_datapipes/training/example/simple_pv.py @@ -63,7 +63,7 @@ def simple_pv_datapipe( logger.debug("Making PV space slice") pv_datapipe, pv_t0_datapipe, pv_time_periods_datapipe = ( - pv_datapipe.normalize(normalize_fn=lambda x: x / x.observed_capacity_watt_power) + pv_datapipe.normalize(normalize_fn=lambda x: x / x.observed_capacity_wp) .add_t0_idx_and_sample_period_duration( sample_period_duration=timedelta( minutes=configuration.input_data.pv.time_resolution_minutes diff --git a/ocf_datapipes/training/metnet_gsp_national.py b/ocf_datapipes/training/metnet_gsp_national.py index cdf3fbc41..6d96ee8d8 100644 --- a/ocf_datapipes/training/metnet_gsp_national.py +++ b/ocf_datapipes/training/metnet_gsp_national.py @@ -45,7 +45,7 @@ def normalize_pv(x): # So it can be pickled Returns: Normalized DataArray """ - return x / x.observed_capacity_watt_power + return x / x.observed_capacity_wp def _remove_nans(x): diff --git a/ocf_datapipes/training/metnet_pv_national.py b/ocf_datapipes/training/metnet_pv_national.py index 3a8023f2e..47607c8d1 100644 --- a/ocf_datapipes/training/metnet_pv_national.py +++ b/ocf_datapipes/training/metnet_pv_national.py @@ -50,7 +50,7 @@ def normalize_pv(x): # So it can be pickled Returns: Normalized DataArray """ - return x / x.observed_capacity_watt_power + return x / x.observed_capacity_wp def _remove_nans(x): diff --git a/ocf_datapipes/training/metnet_pv_site.py b/ocf_datapipes/training/metnet_pv_site.py index c075a0667..05b10af18 100644 --- a/ocf_datapipes/training/metnet_pv_site.py +++ b/ocf_datapipes/training/metnet_pv_site.py @@ -33,7 +33,7 @@ def normalize_pv(x): # So it can be pickled Returns: Normalized DataArray """ - return x / x.observed_capacity_watt_power + return x / x.observed_capacity_wp def _remove_nans(x): diff --git a/ocf_datapipes/training/pseudo_irradience.py b/ocf_datapipes/training/pseudo_irradience.py index e4b4caadb..00f7c0d1b 100644 --- a/ocf_datapipes/training/pseudo_irradience.py +++ b/ocf_datapipes/training/pseudo_irradience.py @@ -36,7 +36,7 @@ def normalize_pv(x): # So it can be pickled Returns: Normalized DataArray """ - return x / x.observed_capacity_watt_power + return x / x.observed_capacity_wp def _remove_nans(x): @@ -111,7 +111,7 @@ def _normalize_by_pvlib(pv_system): clear_sky["dni"] + clear_sky["dhi"] + clear_sky["ghi"] ) print(fraction_clear_sky) - pv_system /= pv_system.observed_capacity_watt_power + pv_system /= pv_system.observed_capacity_wp print(pv_system) pv_system *= fraction_clear_sky print(pv_system) diff --git a/ocf_datapipes/training/pvnet.py b/ocf_datapipes/training/pvnet.py index 23f227ae0..b1b75b032 100644 --- a/ocf_datapipes/training/pvnet.py +++ b/ocf_datapipes/training/pvnet.py @@ -49,7 +49,7 @@ def normalize_pv(x): Returns: Normalized DataArray """ - return (x / x.metadata_capacity_watt_power).clip(None, 5) + return (x / x.nominal_capacity_wp).clip(None, 5) def production_sat_scale(x): diff --git a/ocf_datapipes/transform/xarray/pv/create_pv_image.py b/ocf_datapipes/transform/xarray/pv/create_pv_image.py index ce80f2b29..9d976c433 100644 --- a/ocf_datapipes/transform/xarray/pv/create_pv_image.py +++ b/ocf_datapipes/transform/xarray/pv/create_pv_image.py @@ -209,6 +209,6 @@ def _normalize_by_pvlib(pv_system): fraction_clear_sky = total_irradiance["poa_global"] / ( clear_sky["dni"] + clear_sky["dhi"] + clear_sky["ghi"] ) - pv_system /= pv_system.observed_capacity_watt_power + pv_system /= pv_system.observed_capacity_wp pv_system *= fraction_clear_sky return pv_system diff --git a/ocf_datapipes/utils/consts.py b/ocf_datapipes/utils/consts.py index 7c3e3f653..c3f743425 100644 --- a/ocf_datapipes/utils/consts.py +++ b/ocf_datapipes/utils/consts.py @@ -164,6 +164,10 @@ class BatchKey(Enum): pv_id = auto() # shape: (batch_size, n_pv_systems) pv_observed_capacity_wp = auto() # shape: (batch_size, n_pv_systems) pv_nominal_capacity_wp = auto() # shape: (batch_size, n_pv_systems) + #: pv_mask is True for good PV systems in each example. + # The RawPVDataSource doesn't use pv_mask. Instead is sets missing PV systems to NaN + # across all PV batch keys. + pv_mask = auto() # shape: (batch_size, n_pv_systems) # PV coordinates: # Each has shape: (batch_size, n_pv_systems), will be NaN for missing PV systems. diff --git a/tests/load/pv/test_pv_database.py b/tests/load/pv/test_pv_database.py index 26fbc65bb..b2d204a0e 100644 --- a/tests/load/pv/test_pv_database.py +++ b/tests/load/pv/test_pv_database.py @@ -152,8 +152,8 @@ def test_open_pv_from_pvsites_db(): for variable in [ "time_utc", "pv_system_id", - "observed_capacity_watt_power", - "metadata_capacity_watt_power", + "observed_capacity_wp", + "nominal_capacity_wp", "orientation", "tilt", "longitude", diff --git a/tests/select/test_drop_pv_generating_overnight.py b/tests/select/test_drop_pv_generating_overnight.py index f3b100caa..3bba1489b 100644 --- a/tests/select/test_drop_pv_generating_overnight.py +++ b/tests/select/test_drop_pv_generating_overnight.py @@ -70,7 +70,7 @@ def test_drop_with_constructed_dataarray(): coords=ALL_COORDS, ) data_array = data_array.assign_coords( - observed_capacity_watt_power=("pv_system_id", np.ones(len(pv_system_id))), + observed_capacity_wp=("pv_system_id", np.ones(len(pv_system_id))), ) # run the function diff --git a/tests/transform/xarray/test_normalize.py b/tests/transform/xarray/test_normalize.py index d2ec1ea37..295963db5 100644 --- a/tests/transform/xarray/test_normalize.py +++ b/tests/transform/xarray/test_normalize.py @@ -38,7 +38,7 @@ def test_normalize_gsp(gsp_datapipe): def test_normalize_passiv(passiv_datapipe): passiv_datapipe = passiv_datapipe.normalize( - normalize_fn=lambda x: x / x.observed_capacity_watt_power + normalize_fn=lambda x: x / x.observed_capacity_wp ) data = next(iter(passiv_datapipe)) assert np.min(data) >= 0.0 @@ -47,7 +47,7 @@ def test_normalize_passiv(passiv_datapipe): def test_normalize_pvoutput(pvoutput_datapipe): pvoutput_datapipe = pvoutput_datapipe.normalize( - normalize_fn=lambda x: x / x.observed_capacity_watt_power + normalize_fn=lambda x: x / x.observed_capacity_wp ) data = next(iter(pvoutput_datapipe)) assert np.min(data) >= 0.0