From bd6e2aab970fa321d3d723d9ca911762b47dc472 Mon Sep 17 00:00:00 2001 From: jgmill Date: Wed, 5 Jun 2019 00:20:51 +0200 Subject: [PATCH] Correct Error in German wind generation data --- checksums.txt | 22 +++++++++++----------- input/sources.yml | 28 +++++++++++++++++++++------- processing.ipynb | 19 ++++--------------- timeseries_scripts/read.py | 21 ++++++++++----------- 4 files changed, 46 insertions(+), 44 deletions(-) diff --git a/checksums.txt b/checksums.txt index b52028f..b93d942 100644 --- a/checksums.txt +++ b/checksums.txt @@ -1,11 +1,11 @@ -time_series.sqlite,196c91e89a2abc0944de8aca1b00f790ffd20308508e8493b141ecde9a0322f1 -time_series.xlsx,903696ef43fd90bfe9304ad29f730d362424371cc2b0c0768baada30195aa5b8 -time_series_15min_multiindex.csv,ba6d20cebf88e7d18164c72bd25fbc2dba6a518774bf4d4888f42f402e8c5aaa -time_series_15min_singleindex.csv,82ed0a991312981aa6e4787d23ac71e82ad2791a3d9a7de5f098a0f178b03771 -time_series_15min_stacked.csv,06a63afdf42d45490e6c9765bcaf10949739df73cf719a8a78a5a499347522a8 -time_series_30min_multiindex.csv,3b32c52371ec44e60d49dc2543ea15cbf45dd49eb1a85f891b3ad0a53356fa2d -time_series_30min_singleindex.csv,622f610eddb0ad082be298383443a023483827bda4d9014ebfdc993fe2314960 -time_series_30min_stacked.csv,86d843b2ea7a07e65ee0726d71438f29cde52a95ef1e3dbf504b906a7e0efb85 -time_series_60min_multiindex.csv,8f29e5dc1e6fa257addc57e44aa5ca5bb739de840ad1d864312e78dcebf6b938 -time_series_60min_singleindex.csv,02befabf1a7dbae2ab805942ff4338e67a8ec515ca085d0a7c0b0a5a9eb3ce6e -time_series_60min_stacked.csv,d778b58b3e90b1de5f0a1e3bdaec54e8c30dcd0dc6501bd873739970d60a2230 +time_series.sqlite,4cbe3f00c9ab945df92b475cb89e84edd1c03e4ab2de9e13ce10d7428d5ff7fe +time_series.xlsx,f4fed5ba62884f3013fc81ddf28cea942bbf5227ec9f04fd8b6068e99cb5a4fa +time_series_15min_multiindex.csv,1406e4a4551d3e3763338b670ef81448a7b00343eda36d7bcb3bd875ba773cde +time_series_15min_singleindex.csv,9a0d54da4b2898690f6b2917f6feb1b9ba630d9d4e53fd514e29344f8235c107 +time_series_15min_stacked.csv,b21faab1f280a3d65e88697de33fd94384fad47b61a57099736c4119ff932c0b +time_series_30min_multiindex.csv,3b734a43b338e524e0eb0c1f89cf190cddec3102883b8b7c657c71749b6c8ed9 +time_series_30min_singleindex.csv,a9395820af21c9f50582ff031aaf2eadd317749afdd86439d0c627cf982f57f3 +time_series_30min_stacked.csv,d8e31a0fae2962710469f30584ccd4a504a83dca6896d0f6571eafd729db3e8e +time_series_60min_multiindex.csv,38087091a891bea7a65b51b92413549773eebd8f693f066d0ab54de16d20150c +time_series_60min_singleindex.csv,659fe789af2672aabe989aebc8c5c21052a1a96e4da70b0fc941910a1cd4de9d +time_series_60min_stacked.csv,90e2e450e60d9451b551cf9c4fa254a0818642336f9fe3b568dbedb496a714f6 diff --git a/input/sources.yml b/input/sources.yml index cbac907..7e141d9 100644 --- a/input/sources.yml +++ b/input/sources.yml @@ -265,6 +265,13 @@ ENTSO-E Power Statistics: # Automatic download not implemented unit: MW variable: wind web: https://www.50hertz.com/en/Transparency/GridData/Windpower + wind_onshore: + attribute: generation_actual + region: DE_50hertz + source: 50Hertz + unit: MW + variable: wind_onshore + web: https://www.50hertz.com/en/Transparency/GridData/Windpower wind generation_forecast pre-offshore: url_template: http://ws.50hertz.com/web02/api/WindPowerForecast/DownloadFile url_params_template: @@ -298,12 +305,12 @@ ENTSO-E Power Statistics: # Automatic download not implemented - 15min web: https://www.50hertz.com/en/Transparency/GridData/Windpower colmap: - Offshore MW: + MW: attribute: generation_actual region: DE_50hertz source: 50Hertz unit: MW - variable: wind_offshore + variable: wind web: https://www.50hertz.com/en/Transparency/GridData/Windpower Onshore MW: attribute: generation_actual @@ -312,12 +319,12 @@ ENTSO-E Power Statistics: # Automatic download not implemented unit: MW variable: wind_onshore web: https://www.50hertz.com/en/Transparency/GridData/Windpower - wind: + Offshore MW: attribute: generation_actual region: DE_50hertz source: 50Hertz unit: MW - variable: wind + variable: wind_offshore web: https://www.50hertz.com/en/Transparency/GridData/Windpower wind generation_forecast with-offshore: url_template: http://ws.50hertz.com/web02/api/WindPowerForecast/DownloadFile @@ -332,20 +339,27 @@ ENTSO-E Power Statistics: # Automatic download not implemented - 15min web: https://www.50hertz.com/en/Transparency/GridData/Windpower colmap: - Offshore MW: + MW: attribute: generation_forecast region: DE_50hertz source: 50Hertz unit: MW - variable: wind_offshore + variable: wind web: https://www.50hertz.com/en/Transparency/GridData/Windpower Onshore MW: - attribute: generation_actual + attribute: generation_forecast region: DE_50hertz source: 50Hertz unit: MW variable: wind_onshore web: https://www.50hertz.com/en/Transparency/GridData/Windpower + Offshore MW: + attribute: generation_actual + region: DE_50hertz + source: 50Hertz + unit: MW + variable: wind_offshore + web: https://www.50hertz.com/en/Transparency/GridData/Windpower solar generation_actual : url_template: http://ws.50hertz.com/web02/api/PhotovoltaicActual/DownloadFile url_params_template: diff --git a/processing.ipynb b/processing.ipynb index b3974ca..d9a9322 100644 --- a/processing.ipynb +++ b/processing.ipynb @@ -78,8 +78,8 @@ }, "outputs": [], "source": [ - "version = '2019-05-15'\n", - "changes = '''Update with 2018 data'''" + "version = '2019-06-05'\n", + "changes = '''Correct Error in German wind generation data'''" ] }, { @@ -426,8 +426,8 @@ "outputs": [], "source": [ "subset = yaml.load('''\n", - "OPSD:\n", - "- capacity\n", + "50Hertz:\n", + "- wind generation_actual with-offshore\n", "''')\n", "subset = None # to include all sources\n", "\n", @@ -1013,17 +1013,6 @@ "Execute this to see an example of where the data has been patched." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "data_sets['60min'][data_sets['60min']['interpolated_values'].notnull()].tail()" - ] - }, { "cell_type": "markdown", "metadata": { diff --git a/timeseries_scripts/read.py b/timeseries_scripts/read.py index 755ab53..bdc0145 100644 --- a/timeseries_scripts/read.py +++ b/timeseries_scripts/read.py @@ -102,7 +102,7 @@ def read_entso_e_transparency( if dataset_name in ['Actual Total Load', 'Day-ahead Total Load Forecast']: # Zero load is highly unlikely. Such occurences are actually NaNs - df['load'].replace(0, np.nan, inplace=True) + df_raw['load'].replace(0, np.nan, inplace=True) # keep only entries for selected geographic entities as specified in # areas.csv @@ -411,8 +411,6 @@ def read_hertz(filepath, dataset_name): # Wind onshore if dataset_name == 'wind generation_actual pre-offshore': df['wind_onshore'] = df['MW'] - elif dataset_name == 'wind generation_actual with-offshore': - df['wind'] = df['Onshore MW'].add(df['Onshore MW']) # Until 2006, and in 2015 (except for wind_generation_pre-offshore), # during the fall dst-transistion, only the @@ -1167,13 +1165,6 @@ def read_dataset( logger.info('%s | %s | empty DataFrame: ', files[0], res_key) continue - # delete zeros before first/after last non-zero value in each column - for col_name, col in df.iteritems(): - nan_for_zero = col.replace(0, np.nan) - slicer = ((col.index <= nan_for_zero.first_valid_index()) | - (col.index >= nan_for_zero.last_valid_index())) - col.loc[slicer] = np.nan - if cumulated[res_key].empty: cumulated[res_key] = df else: @@ -1225,7 +1216,8 @@ def trim_df( end_from_user=None): ''' Reindex a DataFrame with a new index that is sure to be continuous in order - to expose gaps in the data and + to expose gaps in the data and cut off data outside the required period + Parameters ---------- df : pandas.DataFrame @@ -1272,6 +1264,13 @@ def trim_df( # Then cut off the data df = df.loc[start_from_user:end_from_user, :] + # delete zeros before first/after last non-zero value in each column + for col_name, col in df.iteritems(): + nan_for_zero = col.replace(0, np.nan) + slicer = ((col.index <= nan_for_zero.first_valid_index()) | + (col.index >= nan_for_zero.last_valid_index())) + col.loc[slicer] = np.nan + return df