diff --git a/notebooks/2021-08/2021-08-25/video.py b/notebooks/2021-08/2021-08-25/video.py index dd4a5173..b76c7450 100644 --- a/notebooks/2021-08/2021-08-25/video.py +++ b/notebooks/2021-08/2021-08-25/video.py @@ -68,7 +68,6 @@ channel_indexes = [1, 8, 9] satellite_data = [] for channel_index in channel_indexes: - # renormalize satellite_data.append( data["sat_data"][batch_index, :, :, :, channel_index] * SAT_STD.values[channel_index] diff --git a/notebooks/2021-08/2021-08-26/video.py b/notebooks/2021-08/2021-08-26/video.py index 4ef91de9..9aa5ff61 100644 --- a/notebooks/2021-08/2021-08-26/video.py +++ b/notebooks/2021-08/2021-08-26/video.py @@ -88,7 +88,6 @@ channel_indexes = [1, 9, 8] satellite_data = [] for channel_index in channel_indexes: - # renormalize satellite_data.append( data["sat_data"][batch_index, :, :, :, channel_index] * SAT_STD.values[channel_index] diff --git a/notebooks/2021-09/2021-09-13/remove_hash.py b/notebooks/2021-09/2021-09-13/remove_hash.py index aa098838..4126e738 100644 --- a/notebooks/2021-09/2021-09-13/remove_hash.py +++ b/notebooks/2021-09/2021-09-13/remove_hash.py @@ -20,7 +20,6 @@ for filenames in [train_filenames, validation_filenames]: for file in train_filenames: - print(file) filename = file.split("/")[-1] diff --git a/notebooks/2021-09/2021-09-14/gsp_centroid.py b/notebooks/2021-09/2021-09-14/gsp_centroid.py index 583c9999..0c61ce6e 100644 --- a/notebooks/2021-09/2021-09-14/gsp_centroid.py +++ b/notebooks/2021-09/2021-09-14/gsp_centroid.py @@ -15,7 +15,6 @@ # for index in range(0, len(shape_data_raw)): for index in range(140, 150): - # just select the first one shape_data = shape_data_raw.iloc[index : index + 1] shapes_dict = json.loads(shape_data["geometry"].to_json()) diff --git a/notebooks/2021-09/2021-09-14/gsp_duplicated.py b/notebooks/2021-09/2021-09-14/gsp_duplicated.py index 5502a117..1c8531af 100644 --- a/notebooks/2021-09/2021-09-14/gsp_duplicated.py +++ b/notebooks/2021-09/2021-09-14/gsp_duplicated.py @@ -12,7 +12,6 @@ duplicated_raw["Amount"] = range(0, len(duplicated_raw)) for i in range(0, 8, 2): - # just select the first one duplicated = duplicated_raw.iloc[i : i + 2] shapes_dict = json.loads(duplicated["geometry"].to_json()) diff --git a/notebooks/2021-09/2021-09-29/gsp_duplicated.py b/notebooks/2021-09/2021-09-29/gsp_duplicated.py index adcf9c2b..72c01f14 100644 --- a/notebooks/2021-09/2021-09-29/gsp_duplicated.py +++ b/notebooks/2021-09/2021-09-29/gsp_duplicated.py @@ -14,7 +14,6 @@ duplicated_raw["Amount"] = range(0, len(duplicated_raw)) for i in range(0, 8, 2): - # just select the first one duplicated = duplicated_raw.iloc[i : i + 2] shapes_dict = json.loads(duplicated["geometry"].to_json()) diff --git a/notebooks/2021-09/2021-09-29/video.py b/notebooks/2021-09/2021-09-29/video.py index 78a4fef8..067644c7 100644 --- a/notebooks/2021-09/2021-09-29/video.py +++ b/notebooks/2021-09/2021-09-29/video.py @@ -41,7 +41,6 @@ def get_trace(dt): - # plot to check it looks right return go.Choroplethmapbox( geojson=shapes_dict, @@ -54,7 +53,6 @@ def get_trace(dt): def get_frame(dt): - # plot to check it looks right return go.Choroplethmapbox( z=gps_data[dt], diff --git a/notebooks/2021-10/2021-10-01/pydantic.py b/notebooks/2021-10/2021-10-01/pydantic.py index 3474a74d..6ccb8077 100644 --- a/notebooks/2021-10/2021-10-01/pydantic.py +++ b/notebooks/2021-10/2021-10-01/pydantic.py @@ -11,7 +11,6 @@ class Satellite(BaseModel): - # width: int = Field(..., g=0, description="The width of the satellite image") # height: int = Field(..., g=0, description="The width of the satellite image") # num_channels: int = Field(..., g=0, description="The width of the satellite image") @@ -49,7 +48,6 @@ class Config: class Batch(BaseModel): - batch_size: int = Field( ..., g=0, diff --git a/notebooks/2021-10/2021-10-08/xr_compression.py b/notebooks/2021-10/2021-10-08/xr_compression.py index b835ddf9..72369236 100644 --- a/notebooks/2021-10/2021-10-08/xr_compression.py +++ b/notebooks/2021-10/2021-10-08/xr_compression.py @@ -9,7 +9,6 @@ def get_satellite_xrarray_data_array( batch_size, seq_length_5, satellite_image_size_pixels, number_sat_channels=10 ): - r = np.random.randn( # self.batch_size, seq_length_5, diff --git a/notebooks/2021-10/2021-10-08/xr_pydantic.py b/notebooks/2021-10/2021-10-08/xr_pydantic.py index ff45f9f5..d8ec57e3 100644 --- a/notebooks/2021-10/2021-10-08/xr_pydantic.py +++ b/notebooks/2021-10/2021-10-08/xr_pydantic.py @@ -25,7 +25,6 @@ def v_image_data(cls, v): class Batch(BaseModel): - batch_size: int = 0 satellite: Satellite diff --git a/nowcasting_dataset/data_sources/data_source.py b/nowcasting_dataset/data_sources/data_source.py index d4760e70..a23a2783 100644 --- a/nowcasting_dataset/data_sources/data_source.py +++ b/nowcasting_dataset/data_sources/data_source.py @@ -82,7 +82,6 @@ def __post_init__(self): def _get_start_dt( self, t0_datetime_utc: Union[pd.Timestamp, pd.DatetimeIndex] ) -> Union[pd.Timestamp, pd.DatetimeIndex]: - return t0_datetime_utc - self.history_duration def _get_end_dt( diff --git a/nowcasting_dataset/data_sources/fake/batch.py b/nowcasting_dataset/data_sources/fake/batch.py index 71407e50..4f3b01e7 100644 --- a/nowcasting_dataset/data_sources/fake/batch.py +++ b/nowcasting_dataset/data_sources/fake/batch.py @@ -504,7 +504,6 @@ def topographic_fake( # make batch of arrays xr_arrays = [] for i in range(batch_size): - x, y = make_image_coords_osgb( size_x=image_size_pixels_width, size_y=image_size_pixels_height, diff --git a/nowcasting_dataset/data_sources/gsp/eso.py b/nowcasting_dataset/data_sources/gsp/eso.py index f7f464b5..15785c06 100644 --- a/nowcasting_dataset/data_sources/gsp/eso.py +++ b/nowcasting_dataset/data_sources/gsp/eso.py @@ -164,7 +164,6 @@ def get_gsp_shape_from_eso( shape_gpd["RegionID"] = range(1, len(shape_gpd) + 1) if save_local_file: - # rename the columns to less than 10 characters shape_gpd_to_save = shape_gpd.copy() shape_gpd_to_save.rename(columns=rename_save_columns, inplace=True) diff --git a/nowcasting_dataset/data_sources/gsp/gsp_data_source.py b/nowcasting_dataset/data_sources/gsp/gsp_data_source.py index 8686eaaf..6b39a8da 100644 --- a/nowcasting_dataset/data_sources/gsp/gsp_data_source.py +++ b/nowcasting_dataset/data_sources/gsp/gsp_data_source.py @@ -173,7 +173,6 @@ def get_all_locations(self, t0_datetimes_utc: pd.DatetimeIndex) -> List[SpaceTim if total_gsp_nan_count > 0: assert Exception("There are nans in the GSP data. Can't get locations for all GSPs") else: - t0_datetimes_utc.name = "t0_datetime_utc" # get all locations @@ -236,7 +235,6 @@ def get_locations(self, t0_datetimes_utc: pd.DatetimeIndex) -> List[SpaceTimeLoc total_gsp_nan_count = self.gsp_power.isna().sum().sum() if total_gsp_nan_count == 0: - # get random GSP metadata indexes = sorted( list(self.rng.integers(low=0, high=len(self.metadata), size=len(t0_datetimes_utc))) @@ -249,7 +247,6 @@ def get_locations(self, t0_datetimes_utc: pd.DatetimeIndex) -> List[SpaceTimeLoc ids = list(metadata.index) else: - logger.warning( "There are some nans in the gsp data, " "so to get x,y locations we have to do a big loop" @@ -262,7 +259,6 @@ def get_locations(self, t0_datetimes_utc: pd.DatetimeIndex) -> List[SpaceTimeLoc ids = [] for t0_dt in t0_datetimes_utc: - # Choose start and end times start_dt = self._get_start_dt(t0_dt) end_dt = self._get_end_dt(t0_dt) @@ -290,7 +286,6 @@ def get_locations(self, t0_datetimes_utc: pd.DatetimeIndex) -> List[SpaceTimeLoc locations = [] for i in range(len(x_centers_osgb)): - locations.append( SpaceTimeLocation( t0_datetime_utc=t0_datetimes_utc[i], diff --git a/nowcasting_dataset/data_sources/gsp/pvlive.py b/nowcasting_dataset/data_sources/gsp/pvlive.py index 16a9f82b..c1b4be38 100644 --- a/nowcasting_dataset/data_sources/gsp/pvlive.py +++ b/nowcasting_dataset/data_sources/gsp/pvlive.py @@ -89,7 +89,6 @@ def load_pv_gsp_raw_data_from_pvlive( future_tasks = [] with futures.ThreadPoolExecutor(max_workers=1) as executor: for gsp_id in gsp_ids: - # set the first chunk start and end times start_chunk = first_start_chunk end_chunk = first_end_chunk diff --git a/nowcasting_dataset/data_sources/metadata/metadata_model.py b/nowcasting_dataset/data_sources/metadata/metadata_model.py index 1acfd99b..114724a9 100644 --- a/nowcasting_dataset/data_sources/metadata/metadata_model.py +++ b/nowcasting_dataset/data_sources/metadata/metadata_model.py @@ -114,7 +114,6 @@ def save_to_csv(self, path): metadata_df = pd.DataFrame(metadata_dict) else: - metadata_df = pd.read_csv(filename) metadata_df_extra = pd.DataFrame(metadata_dict) diff --git a/nowcasting_dataset/data_sources/pv/live.py b/nowcasting_dataset/data_sources/pv/live.py index b3c41f7f..cf05440e 100644 --- a/nowcasting_dataset/data_sources/pv/live.py +++ b/nowcasting_dataset/data_sources/pv/live.py @@ -43,7 +43,6 @@ def get_metadata_from_database(providers: List[str] = None) -> pd.DataFrame: pv_system_all_df = [] for provider in providers: - logger.debug(f"Get PV systems from database for {provider}") with db_connection.get_session() as session: @@ -136,7 +135,6 @@ def get_pv_power_from_database( logger.debug(f"Found {len(pv_yields_df)} pv yields") if len(pv_yields_df) == 0: - data = create_empty_pv_data(end_utc=now, providers=providers, start_utc=start_utc) return data diff --git a/nowcasting_dataset/data_sources/pv/pv_data_source.py b/nowcasting_dataset/data_sources/pv/pv_data_source.py index a27ef4ea..a3bc7d3e 100644 --- a/nowcasting_dataset/data_sources/pv/pv_data_source.py +++ b/nowcasting_dataset/data_sources/pv/pv_data_source.py @@ -98,7 +98,6 @@ def get_data_model_for_batch(): return PV def _load_metadata(self): - logger.debug(f"Loading PV metadata from {self.files_groups}") # collect all metadata together @@ -155,7 +154,6 @@ def _load_metadata(self): logger.debug(f"Found {len(pv_metadata)} pv systems") def _load_pv_power(self): - logger.debug(f"Loading PV Power data from {self.files_groups}") if not self.is_live: @@ -453,6 +451,7 @@ def get_locations(self, t0_datetimes_utc: pd.DatetimeIndex) -> List[SpaceTimeLoc Returns: x_locations, y_locations. Each has one entry per t0_datetime. Locations are in OSGB coordinates. """ + # Set this up as a separate function, so we can cache the result! @functools.cache # functools.cache requires Python >= 3.9 def _get_pv_system_ids(t0_datetime: pd.Timestamp) -> pd.Int64Dtype: diff --git a/nowcasting_dataset/data_sources/sun/raw_data_load_save.py b/nowcasting_dataset/data_sources/sun/raw_data_load_save.py index f3ed3b30..8df3dadb 100644 --- a/nowcasting_dataset/data_sources/sun/raw_data_load_save.py +++ b/nowcasting_dataset/data_sources/sun/raw_data_load_save.py @@ -49,16 +49,13 @@ def get_azimuth_and_elevation( names = [] # loop over locations and find azimuth and elevation angles, with futures.ThreadPoolExecutor() as executor: - logger.debug("Setting up jobs") # Submit tasks to the executor. future_azimuth_and_elevation_per_location = [] for i in tqdm(range(len(x_centers))): - name = x_y_to_name(x_centers[i], y_centers[i]) if name not in names: - lat, lon = geospatial.osgb_to_lat_lon(x=x_centers[i], y=y_centers[i]) future_azimuth_and_elevation = executor.submit( diff --git a/nowcasting_dataset/data_sources/sun/sun_data_source.py b/nowcasting_dataset/data_sources/sun/sun_data_source.py index 71b767df..2605af0b 100644 --- a/nowcasting_dataset/data_sources/sun/sun_data_source.py +++ b/nowcasting_dataset/data_sources/sun/sun_data_source.py @@ -69,7 +69,6 @@ def get_example(self, location: SpaceTimeLocation) -> xr.Dataset: end_dt = self._get_end_dt(t0_datetime_utc) if not self.load_live: - # The names of the columns get truncated when saving, therefore we need to look for the # name of the columns near the location we are looking for locations = np.array( @@ -96,7 +95,6 @@ def get_example(self, location: SpaceTimeLocation) -> xr.Dataset: elevation = self.elevation.loc[start_dt:end_dt][name] else: - latitude, longitude = osgb_to_lat_lon(x=x_center_osgb, y=y_center_osgb) datestamps = pd.date_range(start=start_dt, end=end_dt, freq="5T").tolist() @@ -115,7 +113,6 @@ def get_example(self, location: SpaceTimeLocation) -> xr.Dataset: return sun def _load(self): - logger.info(f"Loading Sun data from {self.zarr_path}") if not self.load_live: diff --git a/nowcasting_dataset/dataset/batch.py b/nowcasting_dataset/dataset/batch.py index dc7375f5..b066c6f1 100644 --- a/nowcasting_dataset/dataset/batch.py +++ b/nowcasting_dataset/dataset/batch.py @@ -137,7 +137,6 @@ def load_netcdf( # loop over data sources for data_source_name in data_sources_names: - local_netcdf_filename = os.path.join( local_netcdf_path, data_source_name, get_netcdf_filename(batch_idx) ) @@ -193,7 +192,6 @@ def load_netcdf( # legacy NWP if "nwp" in batch_dict.keys(): - nwp_rename_dict = { "x_index": "x_osgb_index", "y_index": "y_osgb_index", diff --git a/nowcasting_dataset/dataset/split/method.py b/nowcasting_dataset/dataset/split/method.py index eeb64179..eecbb5c3 100644 --- a/nowcasting_dataset/dataset/split/method.py +++ b/nowcasting_dataset/dataset/split/method.py @@ -85,7 +85,6 @@ def split_method( test_periods = unique_periods[unique_periods["modulo"].isin(test_indexes)]["period"] elif method == "random": - # randomly sort indexes rng = np.random.default_rng(seed) unique_periods_in_dataset = rng.permutation(unique_periods_in_dataset) @@ -108,7 +107,6 @@ def split_method( test_periods = pd.to_datetime(unique_periods_in_dataset[validation_test_split:]) elif method == "specific": - train_periods = unique_periods_in_dataset[ unique_periods_in_dataset.isin(train_test_validation_specific.train) ] diff --git a/nowcasting_dataset/filesystem/utils.py b/nowcasting_dataset/filesystem/utils.py index 50def3d5..08a854c1 100644 --- a/nowcasting_dataset/filesystem/utils.py +++ b/nowcasting_dataset/filesystem/utils.py @@ -90,7 +90,6 @@ def delete_all_files_in_temp_path(path: Union[Path, str], delete_dirs: bool = Fa else: # loop over folder structure, but only delete files for root, dirs, files in filesystem.walk(path): - for f in files: filesystem.rm(f"{root}/{f}") diff --git a/nowcasting_dataset/manager/manager.py b/nowcasting_dataset/manager/manager.py index 3acbcaff..973bd0cd 100644 --- a/nowcasting_dataset/manager/manager.py +++ b/nowcasting_dataset/manager/manager.py @@ -273,7 +273,6 @@ def sample_spatial_and_temporal_locations_for_examples( shuffled_t0_datetimes = pd.DatetimeIndex(shuffled_t0_datetimes) if get_all_locations: - # note that the returned 'shuffled_t0_datetimes' # has duplicate datetimes for each location locations = self.data_source_which_defines_geospatial_locations.get_all_locations( @@ -281,7 +280,6 @@ def sample_spatial_and_temporal_locations_for_examples( ) else: - locations = self.data_source_which_defines_geospatial_locations.get_locations( shuffled_t0_datetimes ) @@ -404,7 +402,6 @@ def create_batches(self, overwrite_batches: bool) -> None: for worker_id, (data_source_name, data_source) in enumerate( self.data_sources.items() ): - # Get indexes of first batch and example. And subset locations_for_split. idx_of_first_batch = first_batches_to_create[split_name][data_source_name] idx_of_first_example = idx_of_first_batch * self.config.process.batch_size diff --git a/nowcasting_dataset/manager/manager_live.py b/nowcasting_dataset/manager/manager_live.py index f608f456..491dc256 100644 --- a/nowcasting_dataset/manager/manager_live.py +++ b/nowcasting_dataset/manager/manager_live.py @@ -186,7 +186,6 @@ def create_batches(self, use_async: Optional[bool] = True) -> None: async_results_from_create_batches = [] an_error_has_occured = multiprocessing.Event() for worker_id, (data_source_name, data_source) in enumerate(self.data_sources.items()): - # Get indexes of first batch and example. And subset locations_for_split. idx_of_first_batch = 0 locations = locations_for_each_example @@ -226,7 +225,6 @@ def create_batches(self, use_async: Optional[bool] = True) -> None: # Sometimes when debuggin it is easy to use non async data_source.create_batches(**kwargs_for_create_batches) else: - async_result = pool.apply_async( data_source.create_batches, kwds=kwargs_for_create_batches, diff --git a/nowcasting_dataset/utils.py b/nowcasting_dataset/utils.py index 0e173451..e11b9e3f 100644 --- a/nowcasting_dataset/utils.py +++ b/nowcasting_dataset/utils.py @@ -180,6 +180,7 @@ def shutdown(self, wait=True): def arg_logger(func): """A function decorator to log all the args and kwargs passed into a function.""" + # Adapted from https://stackoverflow.com/a/23983263/732596 @wraps(func) def inner_func(*args, **kwargs): @@ -191,6 +192,7 @@ def inner_func(*args, **kwargs): def exception_logger(func): """A function decorator to log exceptions thrown by the inner function.""" + # Adapted from # www.blog.pythonlibrary.org/2016/06/09/python-how-to-create-an-exception-logging-decorator @wraps(func) diff --git a/scripts/generate_raw_data/get_raw_pv_gsp_data.py b/scripts/generate_raw_data/get_raw_pv_gsp_data.py index 0c7109ea..a7652eaa 100755 --- a/scripts/generate_raw_data/get_raw_pv_gsp_data.py +++ b/scripts/generate_raw_data/get_raw_pv_gsp_data.py @@ -48,10 +48,16 @@ def fetch_data(): data_df = load_pv_gsp_raw_data_from_pvlive(start=start, end=end, normalize_data=False) # pivot to index as datetime_gmt, and columns as gsp_id - data_generation_df = data_df.pivot(index="datetime_gmt", columns="gsp_id", values="generation_mw") - data_installedcapacity_df = data_df.pivot(index="datetime_gmt", columns="gsp_id", values="installedcapacity_mwp") + data_generation_df = data_df.pivot( + index="datetime_gmt", columns="gsp_id", values="generation_mw" + ) + data_installedcapacity_df = data_df.pivot( + index="datetime_gmt", columns="gsp_id", values="installedcapacity_mwp" + ) data_capacity_df = data_df.pivot(index="datetime_gmt", columns="gsp_id", values="capacity_mwp") - data_updated_gmt_df = data_df.pivot(index="datetime_gmt", columns="gsp_id", values="updated_gmt") + data_updated_gmt_df = data_df.pivot( + index="datetime_gmt", columns="gsp_id", values="updated_gmt" + ) data_xarray = xr.Dataset( data_vars={ "generation_mw": (("datetime_gmt", "gsp_id"), data_generation_df), @@ -59,10 +65,7 @@ def fetch_data(): "capacity_mwp": (("datetime_gmt", "gsp_id"), data_capacity_df), "updated_gmt": (("datetime_gmt", "gsp_id"), data_updated_gmt_df), }, - coords={ - "datetime_gmt": data_generation_df.index, - "gsp_id": data_generation_df.columns - }, + coords={"datetime_gmt": data_generation_df.index, "gsp_id": data_generation_df.columns}, ) # save config to file @@ -71,7 +74,8 @@ def fetch_data(): # Make encoding encoding = { - var: {"compressor": numcodecs.Blosc(cname="zstd", clevel=5)} for var in data_xarray.data_vars + var: {"compressor": numcodecs.Blosc(cname="zstd", clevel=5)} + for var in data_xarray.data_vars } # save data to file diff --git a/scripts/generate_topographic_data.py b/scripts/generate_topographic_data.py index dcffb882..e44fb131 100644 --- a/scripts/generate_topographic_data.py +++ b/scripts/generate_topographic_data.py @@ -24,7 +24,6 @@ upscale_factor = 0.12 # 30m to 250m-ish, just making it small enough files to actually merge for f in files: with rasterio.open(f) as dataset: - # resample data to target shape data = dataset.read( out_shape=( diff --git a/tests/config/test_config.py b/tests/config/test_config.py index 87faefc5..aaf0000f 100644 --- a/tests/config/test_config.py +++ b/tests/config/test_config.py @@ -53,7 +53,6 @@ def test_yaml_save(): """ with tempfile.NamedTemporaryFile(suffix=".yaml") as fp: - filename = fp.name # check that temp file cant be loaded diff --git a/tests/conftest.py b/tests/conftest.py index 60af5dc6..7a6dea5d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -24,7 +24,6 @@ def gsp_yields_and_systems(db_session): gsp_yield_sqls = [] locations = [] for i in range(317): - location_sql_1 = get_location(session=db_session, gsp_id=i + 1, label=f"GSP_{i+1}") location_sql_1.installed_capacity_mw = 123.0 diff --git a/tests/data_sources/conftest.py b/tests/data_sources/conftest.py index e500d714..b23745ff 100644 --- a/tests/data_sources/conftest.py +++ b/tests/data_sources/conftest.py @@ -80,7 +80,6 @@ def pv_yields_and_systems(db_session): # pv system with gaps every 5 mins for minutes in [0, 10, 20, 30]: - pv_yield_4 = PVYield( datetime_utc=datetime(2022, 1, 1, 4) + timedelta(minutes=minutes), solar_generation_kw=4 ).to_orm() diff --git a/tests/data_sources/fake/test_fake.py b/tests/data_sources/fake/test_fake.py index eaa016ed..d646689a 100644 --- a/tests/data_sources/fake/test_fake.py +++ b/tests/data_sources/fake/test_fake.py @@ -38,14 +38,12 @@ def test_metadata_fake_gsp(): def test_model_no_opticalflow(configuration): # noqa: D103 - configuration.input_data.opticalflow = None _ = Batch.fake(configuration=configuration) def test_model(configuration): # noqa: D103 - assert configuration.input_data.opticalflow is not None batch = Batch.fake(configuration=configuration) diff --git a/tests/data_sources/gsp/test_gsp_model.py b/tests/data_sources/gsp/test_gsp_model.py index 0206724d..9543fb8c 100644 --- a/tests/data_sources/gsp/test_gsp_model.py +++ b/tests/data_sources/gsp/test_gsp_model.py @@ -10,7 +10,6 @@ def test_gsp_init(configuration): # noqa: D103 - configuration.process.batch_size = 4 configuration.input_data.gsp.history_minutes = 60 configuration.input_data.gsp.forecast_minutes = 60 @@ -36,7 +35,6 @@ def test_gsp_normalized(configuration): def test_gsp_validation(configuration): # noqa: D103 - configuration.process.batch_size = 4 configuration.input_data.gsp.history_minutes = 60 configuration.input_data.gsp.forecast_minutes = 60 @@ -52,7 +50,6 @@ def test_gsp_validation(configuration): # noqa: D103 def test_gsp_save(configuration): # noqa: D103 - configuration.process.batch_size = 4 configuration.input_data.gsp.history_minutes = 60 configuration.input_data.gsp.forecast_minutes = 60 diff --git a/tests/data_sources/optical_flow/test_optical_flow_model.py b/tests/data_sources/optical_flow/test_optical_flow_model.py index b8e46c09..8f34208a 100644 --- a/tests/data_sources/optical_flow/test_optical_flow_model.py +++ b/tests/data_sources/optical_flow/test_optical_flow_model.py @@ -24,7 +24,6 @@ def test_optical_flow_validation(): # noqa: D103 def test_optical_flow_save(): # noqa: D103 - with tempfile.TemporaryDirectory() as dirpath: optical_flow_fake().save_netcdf(path=dirpath, batch_i=0) diff --git a/tests/data_sources/pv/test_pv_data_source.py b/tests/data_sources/pv/test_pv_data_source.py index a7a0bae5..d3d1b185 100644 --- a/tests/data_sources/pv/test_pv_data_source.py +++ b/tests/data_sources/pv/test_pv_data_source.py @@ -37,7 +37,6 @@ def test_pv_normalized(configuration): def test_get_example_and_batch(): # noqa: D103 - path = os.path.dirname(nowcasting_dataset.__file__) # Solar PV data (test data) diff --git a/tests/data_sources/satellite/test_satellite_model.py b/tests/data_sources/satellite/test_satellite_model.py index c995a4d1..7cceee33 100644 --- a/tests/data_sources/satellite/test_satellite_model.py +++ b/tests/data_sources/satellite/test_satellite_model.py @@ -35,7 +35,6 @@ def test_satellite_validation(): # noqa: D103 def test_satellite_save(): # noqa: D103 - with tempfile.TemporaryDirectory() as dirpath: satellite_fake().save_netcdf(path=dirpath, batch_i=0) diff --git a/tests/data_sources/sun/test_load.py b/tests/data_sources/sun/test_load.py index 558f06b3..92b107ba 100644 --- a/tests/data_sources/sun/test_load.py +++ b/tests/data_sources/sun/test_load.py @@ -30,7 +30,6 @@ def test_calculate_azimuth_and_elevation(): def test_save(): - datestamps = pd.to_datetime(pd.date_range("2010-01-01", "2010-01-02", freq="5 min")) N = 100 metadata = pd.DataFrame(index=range(0, N)) @@ -48,7 +47,6 @@ def test_save(): def test_load(test_data_folder): - zarr_path = test_data_folder + "/sun/test.zarr" azimuth, elevation = load_from_zarr(zarr_path=zarr_path) diff --git a/tests/data_sources/sun/test_sun_data_source.py b/tests/data_sources/sun/test_sun_data_source.py index e261724f..14a9bade 100644 --- a/tests/data_sources/sun/test_sun_data_source.py +++ b/tests/data_sources/sun/test_sun_data_source.py @@ -47,7 +47,6 @@ def test_get_example_different_year(test_data_folder): # noqa 103 def test_get_load_live(): # noqa 103 - sun_data_source = SunDataSource( zarr_path="", history_minutes=30, forecast_minutes=60, load_live=True ) diff --git a/tests/data_sources/sun/test_sun_model.py b/tests/data_sources/sun/test_sun_model.py index 7d1cef58..59a2f6d4 100644 --- a/tests/data_sources/sun/test_sun_model.py +++ b/tests/data_sources/sun/test_sun_model.py @@ -48,7 +48,6 @@ def test_sun_validation_azimuth(configuration): # noqa: D103 def test_sun_save(configuration): # noqa: D103 - configuration.process.batch_size = 4 with tempfile.TemporaryDirectory() as dirpath: sun = sun_fake(configuration=configuration) diff --git a/tests/data_sources/test_datasource_output.py b/tests/data_sources/test_datasource_output.py index 5d8154ba..49a7dbcc 100644 --- a/tests/data_sources/test_datasource_output.py +++ b/tests/data_sources/test_datasource_output.py @@ -9,7 +9,6 @@ def test_datasource_output_validation(configuration): # noqa: D103 - configuration.process.batch_size = 2 configuration.input_data.gsp.history_minutes = 60 configuration.input_data.gsp.forecast_minutes = 60 diff --git a/tests/data_sources/test_metadata.py b/tests/data_sources/test_metadata.py index 9c87acd5..86658ec7 100644 --- a/tests/data_sources/test_metadata.py +++ b/tests/data_sources/test_metadata.py @@ -19,7 +19,6 @@ def test_metadata_save(): metadata = metadata_fake(10) with tempfile.TemporaryDirectory() as local_temp_path: - metadata.save_to_csv(path=local_temp_path) diff --git a/tests/dataset/test_batch.py b/tests/dataset/test_batch.py index dccf8653..acad1cf3 100644 --- a/tests/dataset/test_batch.py +++ b/tests/dataset/test_batch.py @@ -27,7 +27,6 @@ def test_model_align_in_time(configuration): # noqa: D103 def test_model_nwp_channels(configuration): # noqa: D103 - configuration.input_data = configuration.input_data.set_all_to_defaults() configuration.process.batch_size = 4 configuration.input_data.nwp.nwp_channels = ["dlwrf"] diff --git a/tests/dataset/test_split.py b/tests/dataset/test_split.py index 36320422..78ab944c 100644 --- a/tests/dataset/test_split.py +++ b/tests/dataset/test_split.py @@ -7,7 +7,6 @@ def test_split_same(): - datetimes = pd.date_range("2021-01-01", "2021-01-02", freq="5T") train, validation, test = split_data(datetimes=datetimes, method=SplitMethod.SAME) @@ -18,7 +17,6 @@ def test_split_same(): def test_split_day(): - datetimes = pd.date_range("2021-01-01", "2021-02-01", freq="5T") train, validation, test = split_data(datetimes=datetimes, method=SplitMethod.DAY) @@ -42,7 +40,6 @@ def test_split_day(): def test_split_day_every_5(): - datetimes = pd.date_range("2021-01-01", "2021-01-02", freq="5T") datetimes = datetimes.append(pd.date_range("2021-01-06", "2021-01-07", freq="5T")) datetimes = datetimes.append(pd.date_range("2021-01-11", "2021-01-12", freq="5T")) @@ -74,7 +71,6 @@ def test_split_day_every_5(): def test_split_day_random(): - datetimes = pd.date_range("2021-01-01", "2021-12-31 23:59:00", freq="5T") train, validation, test = split_data(datetimes=datetimes, method=SplitMethod.DAY_RANDOM) @@ -103,7 +99,6 @@ def test_split_day_random(): def test_split_year(): - datetimes = pd.date_range("2014-01-01", "2021-01-01", freq="MS") train, validation, test = split_data(datetimes=datetimes, method=SplitMethod.YEAR_SPECIFIC) @@ -131,7 +126,6 @@ def test_split_year(): def test_split_day_specific(): - datetimes = pd.date_range("2021-01-01", "2021-01-10", freq="D") train_test_validation_specific = TrainValidationTestSpecific( @@ -164,7 +158,6 @@ def test_split_day_specific(): def test_split_year_error(): - with pytest.raises(Exception): TrainValidationTestSpecific(train=[2015, 2016], validation=[2016], test=[2017]) @@ -176,7 +169,6 @@ def test_split_year_error(): def test_split_week(): - datetimes = pd.date_range("2021-01-01", "2021-06-01", freq="30T") train, validation, test = split_data(datetimes=datetimes, method=SplitMethod.WEEK) @@ -202,7 +194,6 @@ def test_split_week(): def test_split_week_random(): - datetimes = pd.date_range("2021-01-04", "2022-01-02", freq="1D") train, validation, test = split_data(datetimes=datetimes, method=SplitMethod.WEEK_RANDOM) @@ -231,7 +222,6 @@ def test_split_week_random(): def test_split_random_day_test_specific(): - datetimes = pd.date_range("2020-01-01", "2022-01-01", freq="1D") train, validation, test = split_data( @@ -261,7 +251,6 @@ def test_split_random_day_test_specific(): def test_split_date(): - datetimes = pd.date_range("2020-01-01", "2022-01-01", freq="1D") train_validation_test_datetime_split = [pd.Timestamp("2020-07-01"), pd.Timestamp("2021-01-01")] @@ -296,7 +285,6 @@ def test_split_date(): def test_split_day_random_test_date(): - datetimes = pd.date_range("2020-01-01", "2022-01-01", freq="1D") train_validation_test_datetime_split = [pd.Timestamp("2020-07-01"), pd.Timestamp("2021-07-01")] diff --git a/tests/filesystem/test_aws_gcp.py b/tests/filesystem/test_aws_gcp.py index 2a30c3c5..58531633 100644 --- a/tests/filesystem/test_aws_gcp.py +++ b/tests/filesystem/test_aws_gcp.py @@ -17,7 +17,6 @@ @pytest.mark.skip("CI does not have access to AWS ro GCP") @pytest.mark.parametrize("prefix", ["s3", "gs"]) def test_aws_upload_and_delete_local_files(prefix): - file1 = "test_file1.txt" file2 = "test_dir/test_file2.txt" @@ -49,7 +48,6 @@ def test_aws_upload_and_delete_local_files(prefix): @pytest.mark.skip("CI does not have access to AWS ro GCP") @pytest.mark.parametrize("prefix", ["s3", "gs"]) def test_upload_one_file(prefix): - file1 = "test_file1.txt" now = datetime.now().isoformat() dst_path = f"{prefix}://solar-pv-nowcasting-data/temp_dir_for_unit_tests/{now}" @@ -75,7 +73,6 @@ def test_upload_one_file(prefix): @pytest.mark.skip("CI does not have access to AWS ro GCP") @pytest.mark.parametrize("prefix", ["s3", "gs"]) def test_download_file(prefix): - file1 = "test_file1.txt" now = datetime.now().isoformat() dst_path = f"{prefix}://solar-pv-nowcasting-data/temp_dir_for_unit_tests/{now}" diff --git a/tests/filesystem/test_local.py b/tests/filesystem/test_local.py index 92bd2168..4126e1ba 100644 --- a/tests/filesystem/test_local.py +++ b/tests/filesystem/test_local.py @@ -19,7 +19,6 @@ def test_check_file_exists(): # noqa: D103 - file1 = "test_file1.txt" file2 = "test_dir/test_file2.txt" @@ -42,7 +41,6 @@ def test_check_file_exists(): # noqa: D103 def test_rename_file(): # noqa: D103 - file1 = "test_file1.txt" file2 = "test_file2.txt" @@ -91,7 +89,6 @@ def test_get_maximum_batch_id(): def test_check_file_exists_wild_card(): # noqa: D103 - file1 = "test_file1.txt" file2 = "test_dir/test_file2.txt" @@ -114,7 +111,6 @@ def test_check_file_exists_wild_card(): # noqa: D103 def test_makedirs(): # noqa: D103 - folder_1 = "test_dir_1" folder_2 = "test_dir_2" @@ -137,7 +133,6 @@ def test_makedirs(): # noqa: D103 def test_delete_local_files(): # noqa: D103 - file1 = "test_file1.txt" folder1 = "test_dir" file2 = "test_dir/test_file2.txt" @@ -167,7 +162,6 @@ def test_delete_local_files(): # noqa: D103 def test_delete_local_files_and_folder(): # noqa: D103 - file1 = "test_file1.txt" folder1 = "test_dir" file2 = "test_dir/test_file2.txt" @@ -197,7 +191,6 @@ def test_delete_local_files_and_folder(): # noqa: D103 def test_download(): # noqa: D103 - file1 = "test_file1.txt" file2 = "test_dir/test_file2.txt" file3 = "test_file3.txt" @@ -234,7 +227,6 @@ def test_download(): # noqa: D103 def test_upload(): # noqa: D103 - file1 = "test_file1.txt" file2 = "test_dir/test_file2.txt" file3 = "test_file3.txt" diff --git a/tests/manager/test_manager.py b/tests/manager/test_manager.py index c84a3e0b..e6ddaa22 100644 --- a/tests/manager/test_manager.py +++ b/tests/manager/test_manager.py @@ -22,7 +22,6 @@ def test_configure_loggers(test_configuration_filename): manager.load_yaml_configuration(filename=test_configuration_filename) with tempfile.TemporaryDirectory() as dst_path: - filepath = f"{dst_path}/extra_temp_folder" manager.config.output_data.filepath = Path(filepath) @@ -30,7 +29,6 @@ def test_configure_loggers(test_configuration_filename): def test_sample_spatial_and_temporal_locations_for_examples(gsp, sun): # noqa: D103 - manager = Manager() manager.data_sources = {"gsp": gsp, "sun": sun} manager.data_source_which_defines_geospatial_locations = gsp @@ -54,7 +52,6 @@ def test_initialize_data_source_with_loggers(test_configuration_filename): manager.load_yaml_configuration(filename=test_configuration_filename) with tempfile.TemporaryDirectory() as dst_path: - manager.config.output_data.filepath = Path(dst_path) manager.configure_loggers(log_level="DEBUG") manager.initialize_data_sources() @@ -103,7 +100,6 @@ def test_create_files_specifying_spatial_and_temporal_locations_of_each_example_ batch_size = manager.config.process.batch_size with tempfile.TemporaryDirectory() as local_temp_path, tempfile.TemporaryDirectory() as dst_path: # noqa 101 - manager.config.output_data.filepath = Path(dst_path) manager.local_temp_path = Path(local_temp_path) @@ -141,7 +137,6 @@ def test_create_files_specifying_spatial_and_temporal_locations_of_each_example_ manager.initialize_data_sources() with tempfile.TemporaryDirectory() as local_temp_path, tempfile.TemporaryDirectory() as dst_path: # noqa 101 - manager.config.output_data.filepath = Path(dst_path) manager.local_temp_path = Path(local_temp_path) @@ -163,7 +158,6 @@ def test_error_create_files_specifying_spatial_and_temporal_locations_of_each_ex manager.initialize_data_sources() with tempfile.TemporaryDirectory() as local_temp_path, tempfile.TemporaryDirectory() as dst_path: # noqa 101 - manager.config.output_data.filepath = Path(dst_path) manager.local_temp_path = Path(local_temp_path) with pytest.raises(RuntimeError): @@ -177,7 +171,6 @@ def test_batches(test_configuration_filename_no_hrv, sat, gsp): manager.load_yaml_configuration(filename=test_configuration_filename_no_hrv) with tempfile.TemporaryDirectory() as local_temp_path, tempfile.TemporaryDirectory() as dst_path: # noqa 101 - # set local temp path, and dst path manager.config.output_data.filepath = Path(dst_path) manager.local_temp_path = Path(local_temp_path) @@ -222,7 +215,6 @@ def test_save_config(test_configuration_filename): manager.load_yaml_configuration(filename=test_configuration_filename) with tempfile.TemporaryDirectory() as local_temp_path, tempfile.TemporaryDirectory() as dst_path: # noqa 101 - # set local temp path, and dst path manager.config.output_data.filepath = Path(dst_path) manager.local_temp_path = Path(local_temp_path) @@ -265,7 +257,6 @@ def test_run(test_configuration_filename_no_hrv): manager.initialize_data_sources() with tempfile.TemporaryDirectory() as local_temp_path, tempfile.TemporaryDirectory() as dst_path: # noqa 101 - manager.config.output_data.filepath = Path(dst_path) manager.local_temp_path = Path(local_temp_path) @@ -281,7 +272,6 @@ def test_run_overwrite_batches_false(test_configuration_filename_no_hrv): manager.initialize_data_sources() with tempfile.TemporaryDirectory() as local_temp_path, tempfile.TemporaryDirectory() as dst_path: # noqa 101 - manager.config.output_data.filepath = Path(dst_path) manager.local_temp_path = Path(local_temp_path) diff --git a/tests/manager/test_manager_live.py b/tests/manager/test_manager_live.py index 7328a987..8da37cfb 100644 --- a/tests/manager/test_manager_live.py +++ b/tests/manager/test_manager_live.py @@ -16,7 +16,6 @@ def test_sample_spatial_and_temporal_locations_for_examples( test_configuration_filename, gsp, sun ): # noqa: D103 - manager = ManagerLive() manager.load_yaml_configuration(filename=test_configuration_filename) manager.data_sources = {"gsp": gsp, "sun": sun} @@ -46,7 +45,6 @@ def test_create_files_specifying_spatial_and_temporal_locations_of_each_example( batch_size = manager.config.process.batch_size with tempfile.TemporaryDirectory() as local_temp_path, tempfile.TemporaryDirectory() as dst_path: # noqa 101 - manager.config.output_data.filepath = Path(dst_path) manager.local_temp_path = Path(local_temp_path) @@ -79,7 +77,6 @@ def test_create_files_locations_of_each_example_reduced( batch_size = manager.config.process.batch_size with tempfile.TemporaryDirectory() as local_temp_path, tempfile.TemporaryDirectory() as dst_path: # noqa 101 - manager.config.output_data.filepath = Path(dst_path) manager.local_temp_path = Path(local_temp_path) @@ -109,7 +106,6 @@ def test_batches(test_configuration_filename, sat, gsp): gsp.forecast_minutes = 0 with tempfile.TemporaryDirectory() as local_temp_path, tempfile.TemporaryDirectory() as dst_path: # noqa 101 - # set local temp path, and dst path manager.config.output_data.filepath = Path(dst_path) manager.local_temp_path = Path(local_temp_path) @@ -139,7 +135,6 @@ def test_batches_not_async(test_configuration_filename, sat, gsp): manager.load_yaml_configuration(filename=test_configuration_filename) with tempfile.TemporaryDirectory() as local_temp_path, tempfile.TemporaryDirectory() as dst_path: # noqa 101 - # set local temp path, and dst path manager.config.output_data.filepath = Path(dst_path) manager.local_temp_path = Path(local_temp_path) @@ -199,7 +194,6 @@ def test_run_just_gsp(test_configuration_filename, gsp_yields_and_systems): manager.initialize_data_sources(names_of_selected_data_sources=["gsp"]) with tempfile.TemporaryDirectory() as local_temp_path, tempfile.TemporaryDirectory() as dst_path: # noqa 101 - manager.config.output_data.filepath = Path(dst_path) manager.local_temp_path = Path(local_temp_path)