Skip to content

Commit

Permalink
Support weekly and all other Pandas date intervals (#161)
Browse files Browse the repository at this point in the history
  • Loading branch information
berland authored Sep 7, 2020
1 parent add167a commit 7a4930b
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 44 deletions.
2 changes: 1 addition & 1 deletion src/fmu/ensemble/realization.py
Original file line number Diff line number Diff line change
Expand Up @@ -1230,7 +1230,7 @@ def get_smry_dates(
the returned list of datetime. 'report' will
yield the sorted union of all valid timesteps for
all realizations. Other valid options are
'daily', 'monthly' and 'yearly'.
'daily', 'weekly', 'monthly' and 'yearly'.
'first' will give out the first date (minimum) and
'last' will give out the last date (maximum),
both as lists with one element.
Expand Down
76 changes: 40 additions & 36 deletions src/fmu/ensemble/util/dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,35 @@
xfmu = Interaction()
logger = xfmu.functionlogger(__name__)

"""Mapping from fmu-ensemble custom offset strings to Pandas DateOffset strings.
See
https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects
"""
PD_FREQ_MNEMONICS = {
"monthly": "MS",
"yearly": "YS",
"daily": "D",
"weekly": "W-MON",
}


def date_range(start_date, end_date, freq):
"""Wrapper for pandas.date_range to allow for extra fmu-ensemble specific mnemonics
'yearly', 'daily', 'weekly', mapped over to pandas DateOffsets
Args:
start_date (datetime.date)
end_date (datetime.date)
freq (str): monthly, daily, weekly, yearly, or a Pandas date offset
frequency.
Returns:
list of datetimes
"""
if freq in PD_FREQ_MNEMONICS:
freq = PD_FREQ_MNEMONICS[freq]
return pd.date_range(start_date, end_date, freq=freq)


def unionize_smry_dates(eclsumsdates, freq, normalize, start_date=None, end_date=None):
"""
Expand All @@ -21,8 +50,8 @@ def unionize_smry_dates(eclsumsdates, freq, normalize, start_date=None, end_date
eclsumsdates (list of lists of datetimes)
freq (str): Requested frequency
normalize (bool): Normalize daterange to frequency or not.
start_date (datetime.date or str):
end_date (datetime.date or str)
start_date (datetime.date or str): Overridden if freq=='first'
end_date (datetime.date or str): Overridden if freq=='last'
Return:
list of datetime.date
Expand Down Expand Up @@ -68,8 +97,6 @@ def unionize_smry_dates(eclsumsdates, freq, normalize, start_date=None, end_date
start_smry = min([min(x) for x in eclsumsdates])
end_smry = max([max(x) for x in eclsumsdates])

pd_freq_mnenomics = {"monthly": "MS", "yearly": "YS", "daily": "D"}

(start_n, end_n) = normalize_dates(start_smry.date(), end_smry.date(), freq)

if not start_date and not normalize:
Expand All @@ -86,11 +113,8 @@ def unionize_smry_dates(eclsumsdates, freq, normalize, start_date=None, end_date
else:
end_date_range = end_date

if freq not in pd_freq_mnenomics:
raise ValueError("Requested frequency %s not supported" % freq)
datetimes = pd.date_range(
start_date_range, end_date_range, freq=pd_freq_mnenomics[freq]
)
datetimes = date_range(start_date_range, end_date_range, freq)

# Convert from Pandas' datetime64 to datetime.date:
datetimes = [x.date() for x in datetimes]

Expand Down Expand Up @@ -118,33 +142,13 @@ def normalize_dates(start_date, end_date, freq):
Args:
start_date: datetime.date
end_date: datetime.date
freq: string with either 'monthly' or 'yearly'.
Anything else will return the input as is
freq: string with either 'monthly', 'yearly', 'weekly'
or any other frequency offset accepted by Pandas
Return:
Tuple of normalized (start_date, end_date)
"""

if freq == "monthly":
start_date = start_date.replace(day=1)

# Avoid rolling forward if we are already at day 1 in a month
if end_date != end_date.replace(day=1):
end_date = end_date.replace(day=1) + dateutil.relativedelta.relativedelta(
months=1
)
elif freq == "yearly":
start_date = start_date.replace(day=1, month=1)
# Avoid rolling forward if we are already at day 1 in a year
if end_date != end_date.replace(day=1, month=1):
end_date = end_date.replace(
day=1, month=1
) + dateutil.relativedelta.relativedelta(years=1)
elif freq == "daily":
# This we don't need to normalize, but we should not give any warnings
pass
elif freq == "first" or freq == "last":
# This we don't need to normalize, but we should not give any warnings
pass
else:
logger.warning("Unrecognized frequency %s for date normalization", str(freq))
return (start_date, end_date)
if freq in PD_FREQ_MNEMONICS:
freq = PD_FREQ_MNEMONICS[freq]
offset = pd.tseries.frequencies.to_offset(freq)
return (offset.rollback(start_date).date(), offset.rollforward(end_date).date())
10 changes: 3 additions & 7 deletions src/fmu/ensemble/virtualrealization.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from .realizationcombination import RealizationCombination
from .util import shortcut2path
from .util.rates import compute_volumetric_rates
from .util.dates import date_range

fmux = Interaction()
logger = fmux.basiclogger(__name__)
Expand Down Expand Up @@ -396,7 +397,7 @@ def get_smry_dates(self, freq="monthly", normalize=False):
Args:
freq: string denoting requested frequency for
the list of datetimes.
'daily', 'monthly' and 'yearly'.
'daily', 'monthly', 'yearly' or 'weekly'
'first' will give out the first date (minimum) and
'last' will give out the last date (maximum),
both as lists with one element.
Expand Down Expand Up @@ -431,14 +432,9 @@ def get_smry_dates(self, freq="monthly", normalize=False):
return [end_date.date()]
if freq in ("custom", "raw"):
return available_dates
pd_freq_mnenomics = {"monthly": "MS", "yearly": "YS", "daily": "D"}
if normalize:
raise NotImplementedError
# (start_date, end_date) = normalize_dates(start_date, end_date,
# freq)
if freq not in pd_freq_mnenomics:
raise ValueError("Requested frequency %s not supported" % freq)
datetimes = pd.date_range(start_date, end_date, freq=pd_freq_mnenomics[freq])
datetimes = date_range(start_date, end_date, freq=freq)
# Convert from Pandas' datetime64 to datetime.date:
return [x.date() for x in datetimes]

Expand Down
6 changes: 6 additions & 0 deletions tests/test_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,12 @@ def test_ensemble_ecl():
assert len(reekensemble.get_smry_dates(freq="yearly")) == 5
assert len(reekensemble.get_smry_dates(freq="monthly")) == 38
assert len(reekensemble.get_smry_dates(freq="daily")) == 1098
assert len(reekensemble.get_smry_dates(freq="D")) == 1098
assert len(reekensemble.get_smry_dates(freq="2D")) == 1098 / 2
assert len(reekensemble.get_smry_dates(freq="weekly")) == 159
assert len(reekensemble.get_smry_dates(freq="W-MON")) == 159
assert len(reekensemble.get_smry_dates(freq="2W-MON")) == 80
assert len(reekensemble.get_smry_dates(freq="W-TUE")) == 159
assert len(reekensemble.get_smry_dates(freq="first")) == 1
assert len(reekensemble.get_smry_dates(freq="last")) == 1
assert reekensemble.get_smry_dates(freq="first") == reekensemble.get_smry_dates(
Expand Down
13 changes: 13 additions & 0 deletions tests/test_realization.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,12 +277,14 @@ def test_batch():
{"load_scalar": {"localpath": "npv.txt"}},
{"load_smry": {"column_keys": "FOPT", "time_index": "yearly"}},
{"load_smry": {"column_keys": "*", "time_index": "daily"}},
{"load_smry": {"column_keys": "*", "time_index": "weekly"}},
{"illegal-ignoreme": {}},
],
)
assert real.get_df("npv.txt") == 3444
assert len(real.get_df("unsmry--daily")["FOPR"]) > 2
assert len(real.get_df("unsmry--yearly")["FOPT"]) > 2
assert len(real.get_df("unsmry--weekly")["FOPT"]) > 2


def test_volumetric_rates():
Expand Down Expand Up @@ -441,6 +443,14 @@ def test_datenormalization():
assert str(monthly.index[-1]) == "2003-02-01"
yearly = real.get_smry(column_keys="FOPT", time_index="yearly")
assert str(yearly.index[-1]) == "2004-01-01"
weekly = real.get_smry(column_keys="FOPT", time_index="weekly")
assert str(weekly.index[-1]) == "2003-01-06" # First Monday after 2003-01-02
weekly = real.get_smry(column_keys="FOPT", time_index="W-MON")
assert str(weekly.index[-1]) == "2003-01-06" # First Monday after 2003-01-02
weekly = real.get_smry(column_keys="FOPT", time_index="W-TUE")
assert str(weekly.index[-1]) == "2003-01-07" # First Tuesday after 2003-01-02
weekly = real.get_smry(column_keys="FOPT", time_index="W-THU")
assert str(weekly.index[-1]) == "2003-01-02" # First Thursday after 2003-01-02

# Check that time_index=None and time_index="raw" behaves like default
raw = real.load_smry(column_keys="FOPT", time_index="raw")
Expand All @@ -461,6 +471,8 @@ def test_datenormalization():
assert str(real.get_df("unsmry--monthly")["DATE"].iloc[-1]) == "2003-02-01"
real.load_smry(column_keys="FOPT", time_index="yearly")
assert str(real.get_df("unsmry--yearly")["DATE"].iloc[-1]) == "2004-01-01"
real.load_smry(column_keys="FOPT", time_index="weekly")
assert str(real.get_df("unsmry--weekly")["DATE"].iloc[-1]) == "2003-01-06"


def test_singlereal_ecl(tmp="TMP"):
Expand Down Expand Up @@ -493,6 +505,7 @@ def test_singlereal_ecl(tmp="TMP"):
assert "FOPT" in real.get_smry(column_keys=["F*"], time_index="monthly")
assert "FOPT" in real.get_smry(column_keys="F*", time_index="yearly")
assert "FOPT" in real.get_smry(column_keys="FOPT", time_index="daily")
assert "FOPT" in real.get_smry(column_keys="FOPT", time_index="weekly")
assert "FOPT" in real.get_smry(column_keys="FOPT", time_index="raw")

# Test date functionality
Expand Down

0 comments on commit 7a4930b

Please sign in to comment.