diff --git a/pvoutput/mapscraper.py b/pvoutput/mapscraper.py index c64f1f1..9c0c8d1 100644 --- a/pvoutput/mapscraper.py +++ b/pvoutput/mapscraper.py @@ -350,6 +350,7 @@ def clean_soup(soup): """Function to clean scraped soup object. Note that the downloaded soup could change over time. + Args: soup: bs4.BeautifulSoup diff --git a/pvoutput/pvoutput.py b/pvoutput/pvoutput.py index 0d3c2fd..8fdf49d 100644 --- a/pvoutput/pvoutput.py +++ b/pvoutput/pvoutput.py @@ -161,9 +161,7 @@ def search( if lat is not None and lon is not None: api_params["ll"] = "{:f},{:f}".format(lat, lon) - pv_systems_text = self._api_query( - service="search", api_params=api_params, **kwargs - ) + pv_systems_text = self._api_query(service="search", api_params=api_params, **kwargs) pv_systems = pd.read_csv( StringIO(pv_systems_text), @@ -316,9 +314,7 @@ def get_system_status( temperature_C, voltage, """ - _LOG.info( - f"system_ids {pv_system_ids}: Requesting batch system status for %s", date - ) + _LOG.info(f"system_ids {pv_system_ids}: Requesting batch system status for %s", date) date = date_to_pvoutput_str(date) _check_date(date) @@ -336,9 +332,7 @@ def get_system_status( ) except NoStatusFound: - _LOG.info( - f"system_id {all_pv_system_id}: No status found for date %s", date - ) + _LOG.info(f"system_id {all_pv_system_id}: No status found for date %s", date) pv_system_status_text = "no status found" # each pv system is on a new line @@ -443,8 +437,7 @@ def get_batch_status( time.sleep(1) else: _print_and_log( - "Call get_batch_status again in a minute to see if" - " results are ready." + "Call get_batch_status again in a minute to see if" " results are ready." ) else: break @@ -571,12 +564,8 @@ def get_metadata_for_country( **kwargs, ) - _LOG.debug( - f"getting metadata for {country_code} for {start_id_range} to {end_id_range}" - ) - print( - f"getting metadata for {country_code} for {start_id_range} to {end_id_range}" - ) + _LOG.debug(f"getting metadata for {country_code} for {start_id_range} to {end_id_range}") + print(f"getting metadata for {country_code} for {start_id_range} to {end_id_range}") pv_metadata_for_country = pd.read_csv( StringIO(pv_metadata_text), @@ -688,12 +677,8 @@ def get_statistic( else: pv_metadata.index = [pv_system_id] - pv_metadata["query_date_from"] = ( - pd.Timestamp(date_from) if date_from else pd.NaT - ) - pv_metadata["query_date_to"] = ( - pd.Timestamp(date_to) if date_to else pd.Timestamp.now() - ) + pv_metadata["query_date_from"] = pd.Timestamp(date_from) if date_from else pd.NaT + pv_metadata["query_date_to"] = pd.Timestamp(date_to) if date_to else pd.Timestamp.now() return pv_metadata def _get_statistic_with_cache( @@ -740,9 +725,7 @@ def _get_fresh_statistic(): return stats try: - stats = pd.read_hdf( - store_filename, key="statistics", where="index=pv_system_id" - ) + stats = pd.read_hdf(store_filename, key="statistics", where="index=pv_system_id") except (FileNotFoundError, KeyError): return _get_fresh_statistic() @@ -808,9 +791,7 @@ def download_multiple_systems_to_disk( n = len(system_ids) for i, pv_system_id in enumerate(system_ids): _LOG.info("**********************") - msg = "system_id {:d}: {:d} of {:d} ({:%})".format( - pv_system_id, i + 1, n, (i + 1) / n - ) + msg = "system_id {:d}: {:d} of {:d} ({:%})".format(pv_system_id, i + 1, n, (i + 1) / n) _LOG.info(msg) print("\r", msg, end="", flush=True) @@ -944,13 +925,9 @@ def _filter_date_range( _LOG.info("system_id %d: Stats say there is no data!", system_id) return [] - timeseries_date_range = DateRange( - stats["actual_date_from"], stats["actual_date_to"] - ) + timeseries_date_range = DateRange(stats["actual_date_from"], stats["actual_date_to"]) - data_availability = stats["num_outputs"] / ( - timeseries_date_range.total_days() + 1 - ) + data_availability = stats["num_outputs"] / (timeseries_date_range.total_days() + 1) if data_availability < min_data_availability: _LOG.info( @@ -1091,9 +1068,7 @@ def _api_query( RateLimitExceeded """ get_response_func = ( - self._get_data_service_response - if use_data_service - else self._get_api_response + self._get_data_service_response if use_data_service else self._get_api_response ) try: @@ -1105,16 +1080,13 @@ def _api_query( try: return self._process_api_response(response) except RateLimitExceeded: - msg = ( - "PVOutput.org API rate limit exceeded!" - " Rate limit will be reset at {}".format(self.rate_limit_reset_time) + msg = "PVOutput.org API rate limit exceeded!" " Rate limit will be reset at {}".format( + self.rate_limit_reset_time ) _print_and_log(msg) if wait_if_rate_limit_exceeded: self.wait_for_rate_limit_reset() - return self._api_query( - service, api_params, wait_if_rate_limit_exceeded=False - ) + return self._api_query(service, api_params, wait_if_rate_limit_exceeded=False) raise RateLimitExceeded(response, msg) @@ -1138,9 +1110,7 @@ def _get_api_response(self, service: str, api_params: Dict) -> requests.Response return _get_response(api_url, api_params, headers) - def _get_data_service_response( - self, service: str, api_params: Dict - ) -> requests.Response: + def _get_data_service_response(self, service: str, api_params: Dict) -> requests.Response: """ Get the data service response from pvoutput.org @@ -1172,9 +1142,7 @@ def _set_rate_limit_params(self, headers): header_value = int(headers[header_key]) setattr(self, param_name, header_value) - self.rate_limit_reset_time = pd.Timestamp.utcfromtimestamp( - self.rate_limit_reset_time - ) + self.rate_limit_reset_time = pd.Timestamp.utcfromtimestamp(self.rate_limit_reset_time) self.rate_limit_reset_time = self.rate_limit_reset_time.tz_convert("utc") _LOG.debug("%s", self.rate_limit_info()) @@ -1248,9 +1216,7 @@ def wait_for_rate_limit_reset(self, do_sleeping: bool = True) -> int: # retry_time_local = retry_time_utc.tz_convert(tz=datetime.now(tzlocal()).tzname()) retry_time_local = retry_time_utc _print_and_log( - "Waiting {:.0f} seconds. Will retry at {} UTC".format( - secs_to_wait, retry_time_local - ) + "Waiting {:.0f} seconds. Will retry at {} UTC".format(secs_to_wait, retry_time_local) ) if do_sleeping: time.sleep(secs_to_wait) @@ -1339,25 +1305,14 @@ def _append_missing_date_range( missing_end_date, ) with pd.HDFStore(output_filename, mode="a", complevel=9) as store: - store.append( - key="missing_dates", value=new_missing_date_range, data_columns=True - ) + store.append(key="missing_dates", value=new_missing_date_range, data_columns=True) -def _record_gaps( - output_filename, pv_system_id, date_to, timeseries, datetime_of_api_request -): +def _record_gaps(output_filename, pv_system_id, date_to, timeseries, datetime_of_api_request): dates_of_data = ( - timeseries["instantaneous_power_gen_W"] - .dropna() - .resample("D") - .mean() - .dropna() - .index.date + timeseries["instantaneous_power_gen_W"].dropna().resample("D").mean().dropna().index.date ) - dates_requested = pd.date_range( - date_to - timedelta(days=365), date_to, freq="D" - ).date + dates_requested = pd.date_range(date_to - timedelta(days=365), date_to, freq="D").date missing_dates = set(dates_requested) - set(dates_of_data) missing_date_ranges = _convert_consecutive_dates_to_date_ranges(list(missing_dates)) _LOG.info( diff --git a/scripts/fetch_pv_timeseries.py b/scripts/fetch_pv_timeseries.py index ac7d4a6..39b6eda 100644 --- a/scripts/fetch_pv_timeseries.py +++ b/scripts/fetch_pv_timeseries.py @@ -21,14 +21,15 @@ or create and use a ~/.pvoutput.yml file as described in the PVOutput library documentation """ -from pvoutput import * - -import click as cl import datetime as dt +import logging +import pathlib import sys + +import click as cl import pandas as pd -import pathlib -import logging + +from pvoutput import * @cl.command()