Skip to content

Commit

Permalink
Merge branch 'issue/get-india-data' of https://github.com/openclimate…
Browse files Browse the repository at this point in the history
…fix/pvoutput into issue/get-india-data
  • Loading branch information
rachel-labri-tipton committed Nov 7, 2023
2 parents d44b86c + ec8cdbf commit 7693349
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 65 deletions.
87 changes: 22 additions & 65 deletions pvoutput/pvoutput.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,7 @@ def search(
if lat is not None and lon is not None:
api_params["ll"] = "{:f},{:f}".format(lat, lon)

pv_systems_text = self._api_query(
service="search", api_params=api_params, **kwargs
)
pv_systems_text = self._api_query(service="search", api_params=api_params, **kwargs)

pv_systems = pd.read_csv(
StringIO(pv_systems_text),
Expand Down Expand Up @@ -316,9 +314,7 @@ def get_system_status(
temperature_C,
voltage,
"""
_LOG.info(
f"system_ids {pv_system_ids}: Requesting batch system status for %s", date
)
_LOG.info(f"system_ids {pv_system_ids}: Requesting batch system status for %s", date)
date = date_to_pvoutput_str(date)
_check_date(date)

Expand All @@ -336,9 +332,7 @@ def get_system_status(
)

except NoStatusFound:
_LOG.info(
f"system_id {all_pv_system_id}: No status found for date %s", date
)
_LOG.info(f"system_id {all_pv_system_id}: No status found for date %s", date)
pv_system_status_text = "no status found"

# each pv system is on a new line
Expand Down Expand Up @@ -443,8 +437,7 @@ def get_batch_status(
time.sleep(1)
else:
_print_and_log(
"Call get_batch_status again in a minute to see if"
" results are ready."
"Call get_batch_status again in a minute to see if" " results are ready."
)
else:
break
Expand Down Expand Up @@ -571,9 +564,7 @@ def get_metadata_for_country(
**kwargs,
)

_LOG.debug(
f"getting metadata for {country_code} for {start_id_range} to {end_id_range}"
)
_LOG.debug(f"getting metadata for {country_code} for {start_id_range} to {end_id_range}")
print(
f"Getting metadata for country code: {country_code} for {start_id_range} to {end_id_range}"
)
Expand Down Expand Up @@ -684,12 +675,8 @@ def get_statistic(
else:
pv_metadata.index = [pv_system_id]

pv_metadata["query_date_from"] = (
pd.Timestamp(date_from) if date_from else pd.NaT
)
pv_metadata["query_date_to"] = (
pd.Timestamp(date_to) if date_to else pd.Timestamp.now()
)
pv_metadata["query_date_from"] = pd.Timestamp(date_from) if date_from else pd.NaT
pv_metadata["query_date_to"] = pd.Timestamp(date_to) if date_to else pd.Timestamp.now()
return pv_metadata

def _get_statistic_with_cache(
Expand Down Expand Up @@ -736,9 +723,7 @@ def _get_fresh_statistic():
return stats

try:
stats = pd.read_hdf(
store_filename, key="statistics", where="index=pv_system_id"
)
stats = pd.read_hdf(store_filename, key="statistics", where="index=pv_system_id")
except (FileNotFoundError, KeyError):
return _get_fresh_statistic()

Expand Down Expand Up @@ -804,9 +789,7 @@ def download_multiple_systems_to_disk(
n = len(system_ids)
for i, pv_system_id in enumerate(system_ids):
_LOG.info("**********************")
msg = "system_id {:d}: {:d} of {:d} ({:%})".format(
pv_system_id, i + 1, n, (i + 1) / n
)
msg = "system_id {:d}: {:d} of {:d} ({:%})".format(pv_system_id, i + 1, n, (i + 1) / n)
_LOG.info(msg)
print("\r", msg, end="", flush=True)

Expand Down Expand Up @@ -940,13 +923,9 @@ def _filter_date_range(
_LOG.info("system_id %d: Stats say there is no data!", system_id)
return []

timeseries_date_range = DateRange(
stats["actual_date_from"], stats["actual_date_to"]
)
timeseries_date_range = DateRange(stats["actual_date_from"], stats["actual_date_to"])

data_availability = stats["num_outputs"] / (
timeseries_date_range.total_days() + 1
)
data_availability = stats["num_outputs"] / (timeseries_date_range.total_days() + 1)

if data_availability < min_data_availability:
_LOG.info(
Expand Down Expand Up @@ -1088,9 +1067,7 @@ def _api_query(
RateLimitExceeded
"""
get_response_func = (
self._get_data_service_response
if use_data_service
else self._get_api_response
self._get_data_service_response if use_data_service else self._get_api_response
)

try:
Expand All @@ -1102,16 +1079,13 @@ def _api_query(
try:
return self._process_api_response(response)
except RateLimitExceeded:
msg = (
"PVOutput.org API rate limit exceeded!"
" Rate limit will be reset at {}".format(self.rate_limit_reset_time)
msg = "PVOutput.org API rate limit exceeded!" " Rate limit will be reset at {}".format(
self.rate_limit_reset_time
)
_print_and_log(msg)
if wait_if_rate_limit_exceeded:
self.wait_for_rate_limit_reset()
return self._api_query(
service, api_params, wait_if_rate_limit_exceeded=False
)
return self._api_query(service, api_params, wait_if_rate_limit_exceeded=False)

raise RateLimitExceeded(response, msg)

Expand All @@ -1135,9 +1109,7 @@ def _get_api_response(self, service: str, api_params: Dict) -> requests.Response

return _get_response(api_url, api_params, headers)

def _get_data_service_response(
self, service: str, api_params: Dict
) -> requests.Response:
def _get_data_service_response(self, service: str, api_params: Dict) -> requests.Response:
"""
Get the data service response from pvoutput.org
Expand Down Expand Up @@ -1169,9 +1141,7 @@ def _set_rate_limit_params(self, headers):
header_value = int(headers[header_key])
setattr(self, param_name, header_value)

self.rate_limit_reset_time = pd.Timestamp.utcfromtimestamp(
self.rate_limit_reset_time
)
self.rate_limit_reset_time = pd.Timestamp.utcfromtimestamp(self.rate_limit_reset_time)
if self.rate_limit_reset_time.tzinfo is None:
self.rate_limit_reset_time = self.rate_limit_reset_time.tz_localize("utc")
else:
Expand Down Expand Up @@ -1248,9 +1218,7 @@ def wait_for_rate_limit_reset(self, do_sleeping: bool = True) -> int:
# retry_time_local = retry_time_utc.tz_convert(tz=datetime.now(tzlocal()).tzname())
retry_time_local = retry_time_utc
_print_and_log(
"Waiting {:.0f} seconds. Will retry at {} UTC".format(
secs_to_wait, retry_time_local
)
"Waiting {:.0f} seconds. Will retry at {} UTC".format(secs_to_wait, retry_time_local)
)
if do_sleeping:
time.sleep(secs_to_wait)
Expand Down Expand Up @@ -1339,25 +1307,14 @@ def _append_missing_date_range(
missing_end_date,
)
with pd.HDFStore(output_filename, mode="a", complevel=9) as store:
store.append(
key="missing_dates", value=new_missing_date_range, data_columns=True
)
store.append(key="missing_dates", value=new_missing_date_range, data_columns=True)


def _record_gaps(
output_filename, pv_system_id, date_to, timeseries, datetime_of_api_request
):
def _record_gaps(output_filename, pv_system_id, date_to, timeseries, datetime_of_api_request):
dates_of_data = (
timeseries["instantaneous_power_gen_W"]
.dropna()
.resample("D")
.mean()
.dropna()
.index.date
timeseries["instantaneous_power_gen_W"].dropna().resample("D").mean().dropna().index.date
)
dates_requested = pd.date_range(
date_to - timedelta(days=365), date_to, freq="D"
).date
dates_requested = pd.date_range(date_to - timedelta(days=365), date_to, freq="D").date
missing_dates = set(dates_requested) - set(dates_of_data)
missing_date_ranges = _convert_consecutive_dates_to_date_ranges(list(missing_dates))
_LOG.info(
Expand Down
1 change: 1 addition & 0 deletions scripts/fetch_pv_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

import datetime as dt
import logging
import os
import pathlib
import sys
import os
Expand Down

0 comments on commit 7693349

Please sign in to comment.