Support weekly and all other Pandas date intervals (#161)

equinor · Sep 7, 2020 · 7a4930b · 7a4930b
1 parent add167a
commit 7a4930b
Show file tree

Hide file tree

Showing 5 changed files with 63 additions and 44 deletions.
diff --git a/src/fmu/ensemble/realization.py b/src/fmu/ensemble/realization.py
@@ -1230,7 +1230,7 @@ def get_smry_dates(
                 the returned list of datetime. 'report' will
                 yield the sorted union of all valid timesteps for
                 all realizations. Other valid options are
-                'daily', 'monthly' and 'yearly'.
+                'daily', 'weekly', 'monthly' and 'yearly'.
                 'first' will give out the first date (minimum) and
                 'last' will give out the last date (maximum),
                 both as lists with one element.

diff --git a/src/fmu/ensemble/util/dates.py b/src/fmu/ensemble/util/dates.py
@@ -10,6 +10,35 @@
 xfmu = Interaction()
 logger = xfmu.functionlogger(__name__)
 
+"""Mapping from fmu-ensemble custom offset strings to Pandas DateOffset strings.
+See
+https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects
+"""
+PD_FREQ_MNEMONICS = {
+    "monthly": "MS",
+    "yearly": "YS",
+    "daily": "D",
+    "weekly": "W-MON",
+}
+
+
+def date_range(start_date, end_date, freq):
+    """Wrapper for pandas.date_range to allow for extra fmu-ensemble specific mnemonics
+    'yearly', 'daily', 'weekly', mapped over to pandas DateOffsets
+
+    Args:
+        start_date (datetime.date)
+        end_date (datetime.date)
+        freq (str): monthly, daily, weekly, yearly, or a Pandas date offset
+            frequency.
+
+    Returns:
+        list of datetimes
+    """
+    if freq in PD_FREQ_MNEMONICS:
+        freq = PD_FREQ_MNEMONICS[freq]
+    return pd.date_range(start_date, end_date, freq=freq)
+
 
 def unionize_smry_dates(eclsumsdates, freq, normalize, start_date=None, end_date=None):
     """
@@ -21,8 +50,8 @@ def unionize_smry_dates(eclsumsdates, freq, normalize, start_date=None, end_date
         eclsumsdates (list of lists of datetimes)
         freq (str): Requested frequency
         normalize (bool): Normalize daterange to frequency or not.
-        start_date (datetime.date or str):
-        end_date (datetime.date or str)
+        start_date (datetime.date or str): Overridden if freq=='first'
+        end_date (datetime.date or str): Overridden if freq=='last'
 
     Return:
         list of datetime.date
@@ -68,8 +97,6 @@ def unionize_smry_dates(eclsumsdates, freq, normalize, start_date=None, end_date
     start_smry = min([min(x) for x in eclsumsdates])
     end_smry = max([max(x) for x in eclsumsdates])
 
-    pd_freq_mnenomics = {"monthly": "MS", "yearly": "YS", "daily": "D"}
-
     (start_n, end_n) = normalize_dates(start_smry.date(), end_smry.date(), freq)
 
     if not start_date and not normalize:
@@ -86,11 +113,8 @@ def unionize_smry_dates(eclsumsdates, freq, normalize, start_date=None, end_date
     else:
         end_date_range = end_date
 
-    if freq not in pd_freq_mnenomics:
-        raise ValueError("Requested frequency %s not supported" % freq)
-    datetimes = pd.date_range(
-        start_date_range, end_date_range, freq=pd_freq_mnenomics[freq]
-    )
+    datetimes = date_range(start_date_range, end_date_range, freq)
+
     # Convert from Pandas' datetime64 to datetime.date:
     datetimes = [x.date() for x in datetimes]
 
@@ -118,33 +142,13 @@ def normalize_dates(start_date, end_date, freq):
     Args:
         start_date: datetime.date
         end_date: datetime.date
-        freq: string with either 'monthly' or 'yearly'.
-            Anything else will return the input as is
+        freq: string with either 'monthly', 'yearly', 'weekly'
+            or any other frequency offset accepted by Pandas
+
     Return:
         Tuple of normalized (start_date, end_date)
     """
-
-    if freq == "monthly":
-        start_date = start_date.replace(day=1)
-
-        # Avoid rolling forward if we are already at day 1 in a month
-        if end_date != end_date.replace(day=1):
-            end_date = end_date.replace(day=1) + dateutil.relativedelta.relativedelta(
-                months=1
-            )
-    elif freq == "yearly":
-        start_date = start_date.replace(day=1, month=1)
-        # Avoid rolling forward if we are already at day 1 in a year
-        if end_date != end_date.replace(day=1, month=1):
-            end_date = end_date.replace(
-                day=1, month=1
-            ) + dateutil.relativedelta.relativedelta(years=1)
-    elif freq == "daily":
-        # This we don't need to normalize, but we should not give any warnings
-        pass
-    elif freq == "first" or freq == "last":
-        # This we don't need to normalize, but we should not give any warnings
-        pass
-    else:
-        logger.warning("Unrecognized frequency %s for date normalization", str(freq))
-    return (start_date, end_date)
+    if freq in PD_FREQ_MNEMONICS:
+        freq = PD_FREQ_MNEMONICS[freq]
+    offset = pd.tseries.frequencies.to_offset(freq)
+    return (offset.rollback(start_date).date(), offset.rollforward(end_date).date())
diff --git a/src/fmu/ensemble/virtualrealization.py b/src/fmu/ensemble/virtualrealization.py
@@ -14,6 +14,7 @@
 from .realizationcombination import RealizationCombination
 from .util import shortcut2path
 from .util.rates import compute_volumetric_rates
+from .util.dates import date_range
 
 fmux = Interaction()
 logger = fmux.basiclogger(__name__)
@@ -396,7 +397,7 @@ def get_smry_dates(self, freq="monthly", normalize=False):
         Args:
             freq: string denoting requested frequency for
                 the list of datetimes.
-                'daily', 'monthly' and 'yearly'.
+                'daily', 'monthly', 'yearly' or 'weekly'
                 'first' will give out the first date (minimum) and
                 'last' will give out the last date (maximum),
                 both as lists with one element.
@@ -431,14 +432,9 @@ def get_smry_dates(self, freq="monthly", normalize=False):
             return [end_date.date()]
         if freq in ("custom", "raw"):
             return available_dates
-        pd_freq_mnenomics = {"monthly": "MS", "yearly": "YS", "daily": "D"}
         if normalize:
             raise NotImplementedError
-            # (start_date, end_date) = normalize_dates(start_date, end_date,
-            #                                         freq)
-        if freq not in pd_freq_mnenomics:
-            raise ValueError("Requested frequency %s not supported" % freq)
-        datetimes = pd.date_range(start_date, end_date, freq=pd_freq_mnenomics[freq])
+        datetimes = date_range(start_date, end_date, freq=freq)
         # Convert from Pandas' datetime64 to datetime.date:
         return [x.date() for x in datetimes]
 

diff --git a/tests/test_ensemble.py b/tests/test_ensemble.py
@@ -385,6 +385,12 @@ def test_ensemble_ecl():
     assert len(reekensemble.get_smry_dates(freq="yearly")) == 5
     assert len(reekensemble.get_smry_dates(freq="monthly")) == 38
     assert len(reekensemble.get_smry_dates(freq="daily")) == 1098
+    assert len(reekensemble.get_smry_dates(freq="D")) == 1098
+    assert len(reekensemble.get_smry_dates(freq="2D")) == 1098 / 2
+    assert len(reekensemble.get_smry_dates(freq="weekly")) == 159
+    assert len(reekensemble.get_smry_dates(freq="W-MON")) == 159
+    assert len(reekensemble.get_smry_dates(freq="2W-MON")) == 80
+    assert len(reekensemble.get_smry_dates(freq="W-TUE")) == 159
     assert len(reekensemble.get_smry_dates(freq="first")) == 1
     assert len(reekensemble.get_smry_dates(freq="last")) == 1
     assert reekensemble.get_smry_dates(freq="first") == reekensemble.get_smry_dates(

diff --git a/tests/test_realization.py b/tests/test_realization.py
@@ -277,12 +277,14 @@ def test_batch():
             {"load_scalar": {"localpath": "npv.txt"}},
             {"load_smry": {"column_keys": "FOPT", "time_index": "yearly"}},
             {"load_smry": {"column_keys": "*", "time_index": "daily"}},
+            {"load_smry": {"column_keys": "*", "time_index": "weekly"}},
             {"illegal-ignoreme": {}},
         ],
     )
     assert real.get_df("npv.txt") == 3444
     assert len(real.get_df("unsmry--daily")["FOPR"]) > 2
     assert len(real.get_df("unsmry--yearly")["FOPT"]) > 2
+    assert len(real.get_df("unsmry--weekly")["FOPT"]) > 2
 
 
 def test_volumetric_rates():
@@ -441,6 +443,14 @@ def test_datenormalization():
     assert str(monthly.index[-1]) == "2003-02-01"
     yearly = real.get_smry(column_keys="FOPT", time_index="yearly")
     assert str(yearly.index[-1]) == "2004-01-01"
+    weekly = real.get_smry(column_keys="FOPT", time_index="weekly")
+    assert str(weekly.index[-1]) == "2003-01-06"  # First Monday after 2003-01-02
+    weekly = real.get_smry(column_keys="FOPT", time_index="W-MON")
+    assert str(weekly.index[-1]) == "2003-01-06"  # First Monday after 2003-01-02
+    weekly = real.get_smry(column_keys="FOPT", time_index="W-TUE")
+    assert str(weekly.index[-1]) == "2003-01-07"  # First Tuesday after 2003-01-02
+    weekly = real.get_smry(column_keys="FOPT", time_index="W-THU")
+    assert str(weekly.index[-1]) == "2003-01-02"  # First Thursday after 2003-01-02
 
     # Check that time_index=None and time_index="raw" behaves like default
     raw = real.load_smry(column_keys="FOPT", time_index="raw")
@@ -461,6 +471,8 @@ def test_datenormalization():
     assert str(real.get_df("unsmry--monthly")["DATE"].iloc[-1]) == "2003-02-01"
     real.load_smry(column_keys="FOPT", time_index="yearly")
     assert str(real.get_df("unsmry--yearly")["DATE"].iloc[-1]) == "2004-01-01"
+    real.load_smry(column_keys="FOPT", time_index="weekly")
+    assert str(real.get_df("unsmry--weekly")["DATE"].iloc[-1]) == "2003-01-06"
 
 
 def test_singlereal_ecl(tmp="TMP"):
@@ -493,6 +505,7 @@ def test_singlereal_ecl(tmp="TMP"):
     assert "FOPT" in real.get_smry(column_keys=["F*"], time_index="monthly")
     assert "FOPT" in real.get_smry(column_keys="F*", time_index="yearly")
     assert "FOPT" in real.get_smry(column_keys="FOPT", time_index="daily")
+    assert "FOPT" in real.get_smry(column_keys="FOPT", time_index="weekly")
     assert "FOPT" in real.get_smry(column_keys="FOPT", time_index="raw")
 
     # Test date functionality