From bc377a6c3add237f9c45d597eab72b0410e58709 Mon Sep 17 00:00:00 2001 From: "haakon.soehoel" Date: Tue, 3 Dec 2024 14:52:56 +0100 Subject: [PATCH 1/6] Fix summary.pandas_frame time_index TypeError --- python/resdata/summary/rd_sum.py | 2 +- python/tests/rd_tests/test_sum.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/python/resdata/summary/rd_sum.py b/python/resdata/summary/rd_sum.py index 3cc954c69..709b67f2b 100644 --- a/python/resdata/summary/rd_sum.py +++ b/python/resdata/summary/rd_sum.py @@ -615,7 +615,7 @@ def pandas_frame(self, time_index=None, column_keys=None): data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), ) - frame = pd.DataFrame(index=time_index, columns=list(keywords), data=data) + frame = pd.DataFrame(index=list(time_index), columns=list(keywords), data=data) return frame @staticmethod diff --git a/python/tests/rd_tests/test_sum.py b/python/tests/rd_tests/test_sum.py index 198f4627e..4fb455d5d 100644 --- a/python/tests/rd_tests/test_sum.py +++ b/python/tests/rd_tests/test_sum.py @@ -2,6 +2,7 @@ import datetime import os import os.path +import pytest import shutil import stat import datetime @@ -756,6 +757,17 @@ def test_resample_extrapolate(self): rd_sum.get_interp_direct(key_rate, t), ) + def test_pandas2_compatibility_dataframe_index(self): + # regression test to verify that pandas frames in pandas 2 + # does not break due to missing collection for time_index + path = os.path.join(self.TESTDATA_ROOT, "local/ECLIPSE/cp_simple3/SHORT.UNSMRY") + smry = Summary(path) + try: + smry.pandas_frame(time_index=smry.time_range(interval="1Y"), column_keys=["WELL:NAME"]) + except TypeError as err: + pytest.fail(repr(err)) + + def test_t_step(): sum = createSummary( From ceaae7e763bb801dbd963bb21d715d1d4061cafb Mon Sep 17 00:00:00 2001 From: Eivind Jahren Date: Tue, 3 Dec 2024 14:28:13 +0100 Subject: [PATCH 2/6] Fixes summary.pandas_frame not working for pandas 2 (cherry picked from commit 97e24460b5febbf4f2158fb0bdfe2bf50bec9791) --- python/resdata/summary/rd_sum.py | 15 +++++--- python/tests/rd_tests/test_sum.py | 58 ++++++++++++++++++------------- 2 files changed, 43 insertions(+), 30 deletions(-) diff --git a/python/resdata/summary/rd_sum.py b/python/resdata/summary/rd_sum.py index 709b67f2b..b644a0bc6 100644 --- a/python/resdata/summary/rd_sum.py +++ b/python/resdata/summary/rd_sum.py @@ -13,7 +13,7 @@ import ctypes import pandas as pd import re -from typing import Sequence, List, Tuple, Optional +from typing import Sequence, List, Tuple, Optional, Union # Observe that there is some convention conflict with the C code # regarding order of arguments: The C code generally takes the time @@ -477,7 +477,9 @@ def get_values(self, key, report_only=False): else: raise KeyError("Summary object does not have key:%s" % key) - def _make_time_vector(self, time_index): + def _make_time_vector( + self, time_index: Sequence[Union[CTime, datetime.datetime, int, datetime.date]] + ) -> TimeVector: time_points = TimeVector() for t in time_index: time_points.append(t) @@ -558,7 +560,11 @@ def report_dates(self): dates.append(self.get_report_time(report)) return dates - def pandas_frame(self, time_index=None, column_keys=None): + def pandas_frame( + self, + time_index: Optional[Sequence[datetime.datetime]] = None, + column_keys: Optional[Sequence[str]] = None, + ) -> pd.DataFrame: """Will create a pandas frame with summary data. By default you will get all time points in the summary case, but by @@ -615,8 +621,7 @@ def pandas_frame(self, time_index=None, column_keys=None): data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), ) - frame = pd.DataFrame(index=list(time_index), columns=list(keywords), data=data) - return frame + return pd.DataFrame(index=list(time_index), columns=list(keywords), data=data) @staticmethod def _compile_headers_list( diff --git a/python/tests/rd_tests/test_sum.py b/python/tests/rd_tests/test_sum.py index 4fb455d5d..79c6a94d1 100644 --- a/python/tests/rd_tests/test_sum.py +++ b/python/tests/rd_tests/test_sum.py @@ -1,4 +1,5 @@ import csv +import pytest import datetime import os import os.path @@ -27,6 +28,7 @@ def assert_frame_equal(a, b): from resdata.summary import Summary, SummaryKeyWordVector, SummaryVarType from resdata.util.test import TestAreaContext from resdata.util.test.mock import createSummary +from resdata.util.util import CTime, TimeVector from tests import ResdataTest @@ -597,30 +599,6 @@ def test_wells_and_groups(self): self.assertEqual(case.wells(), []) self.assertEqual(case.groups(), []) - def test_pandas(self): - case = create_case() - dates = ( - [datetime.datetime(2000, 1, 1)] - + case.dates - + [datetime.datetime(2020, 1, 1)] - ) - frame = case.pandas_frame(column_keys=["FOPT", "FOPR"], time_index=dates) - - fopr = frame["FOPR"] - fopt = frame["FOPT"] - - self.assertEqual(fopr[0], 0) - self.assertEqual(fopr[-1], 0) - - self.assertEqual(fopt[0], 0) - self.assertEqual(fopt[0], case.first_value("FOPT")) - self.assertEqual(fopt[-1], case.last_value("FOPT")) - - frame = case.pandas_frame() - rows, columns = frame.shape - self.assertEqual(len(case.keys()), columns) - self.assertEqual(len(case), rows) - def test_csv_load(self): case = create_case2() frame = case.pandas_frame() @@ -692,7 +670,6 @@ def test_resample_extrapolate(self): """ Test resampling of summary with extrapolate option of lower and upper boundaries enabled """ - from resdata.util.util import CTime, TimeVector time_points = TimeVector() @@ -769,6 +746,37 @@ def test_pandas2_compatibility_dataframe_index(self): +def create_time_vector(lst): + vec = TimeVector() + for l in lst: + vec.append(l) + return vec + + +@pytest.mark.parametrize("time_index_type", [list, create_time_vector, tuple]) +def test_pandas(time_index_type): + case = create_case() + dates = time_index_type( + [datetime.datetime(2000, 1, 1)] + case.dates + [datetime.datetime(2020, 1, 1)] + ) + frame = case.pandas_frame(column_keys=["FOPT", "FOPR"], time_index=dates) + + fopr = frame["FOPR"] + fopt = frame["FOPT"] + + assert fopr[0] == 0 + assert fopr[-1] == 0 + + assert fopt[0] == 0 + assert fopt[0] == case.first_value("FOPT") + assert fopt[-1] == case.last_value("FOPT") + + frame = case.pandas_frame() + rows, columns = frame.shape + assert len(case.keys()) == columns + assert len(case) == rows + + def test_t_step(): sum = createSummary( "CASE", From 80b670004b58927698c6a4d4d2cb64cb3676577d Mon Sep 17 00:00:00 2001 From: "haakon.soehoel" Date: Tue, 3 Dec 2024 15:08:23 +0100 Subject: [PATCH 3/6] Fix formatting --- python/tests/rd_tests/test_sum.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/tests/rd_tests/test_sum.py b/python/tests/rd_tests/test_sum.py index 79c6a94d1..69a29215e 100644 --- a/python/tests/rd_tests/test_sum.py +++ b/python/tests/rd_tests/test_sum.py @@ -740,12 +740,13 @@ def test_pandas2_compatibility_dataframe_index(self): path = os.path.join(self.TESTDATA_ROOT, "local/ECLIPSE/cp_simple3/SHORT.UNSMRY") smry = Summary(path) try: - smry.pandas_frame(time_index=smry.time_range(interval="1Y"), column_keys=["WELL:NAME"]) + smry.pandas_frame( + time_index=smry.time_range(interval="1Y"), column_keys=["WELL:NAME"] + ) except TypeError as err: pytest.fail(repr(err)) - def create_time_vector(lst): vec = TimeVector() for l in lst: From b844b55a71e6cf4746b6c41c785c743262a2d898 Mon Sep 17 00:00:00 2001 From: "haakon.soehoel" Date: Wed, 4 Dec 2024 10:00:47 +0100 Subject: [PATCH 4/6] Remove duplicate import --- python/tests/rd_tests/test_sum.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/tests/rd_tests/test_sum.py b/python/tests/rd_tests/test_sum.py index 69a29215e..f1773c32b 100644 --- a/python/tests/rd_tests/test_sum.py +++ b/python/tests/rd_tests/test_sum.py @@ -1,5 +1,4 @@ import csv -import pytest import datetime import os import os.path From 01402c8e590fcac81fcc1fcd13d3fc1eddc3eedb Mon Sep 17 00:00:00 2001 From: "haakon.soehoel" Date: Wed, 4 Dec 2024 10:01:18 +0100 Subject: [PATCH 5/6] Improve test name --- python/tests/rd_tests/test_sum.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tests/rd_tests/test_sum.py b/python/tests/rd_tests/test_sum.py index f1773c32b..88cddd370 100644 --- a/python/tests/rd_tests/test_sum.py +++ b/python/tests/rd_tests/test_sum.py @@ -754,7 +754,7 @@ def create_time_vector(lst): @pytest.mark.parametrize("time_index_type", [list, create_time_vector, tuple]) -def test_pandas(time_index_type): +def test_summary_to_pandas_frame(time_index_type): case = create_case() dates = time_index_type( [datetime.datetime(2000, 1, 1)] + case.dates + [datetime.datetime(2020, 1, 1)] From 5126c21bb994f98706b6134e9d4b0aa01eb7e859 Mon Sep 17 00:00:00 2001 From: "haakon.soehoel" Date: Wed, 4 Dec 2024 10:04:53 +0100 Subject: [PATCH 6/6] Remove duplicate test The test_pandas2_compatibility_dataframe_index test scenario is covered by test_summary_to_pandas_frame --- python/tests/rd_tests/test_sum.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/python/tests/rd_tests/test_sum.py b/python/tests/rd_tests/test_sum.py index 88cddd370..28faf6a86 100644 --- a/python/tests/rd_tests/test_sum.py +++ b/python/tests/rd_tests/test_sum.py @@ -733,18 +733,6 @@ def test_resample_extrapolate(self): rd_sum.get_interp_direct(key_rate, t), ) - def test_pandas2_compatibility_dataframe_index(self): - # regression test to verify that pandas frames in pandas 2 - # does not break due to missing collection for time_index - path = os.path.join(self.TESTDATA_ROOT, "local/ECLIPSE/cp_simple3/SHORT.UNSMRY") - smry = Summary(path) - try: - smry.pandas_frame( - time_index=smry.time_range(interval="1Y"), column_keys=["WELL:NAME"] - ) - except TypeError as err: - pytest.fail(repr(err)) - def create_time_vector(lst): vec = TimeVector()