Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix summary.pandas frame time index type error #1030

Merged
merged 6 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions python/resdata/summary/rd_sum.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import ctypes
import pandas as pd
import re
from typing import Sequence, List, Tuple, Optional
from typing import Sequence, List, Tuple, Optional, Union

# Observe that there is some convention conflict with the C code
# regarding order of arguments: The C code generally takes the time
Expand Down Expand Up @@ -477,7 +477,9 @@ def get_values(self, key, report_only=False):
else:
raise KeyError("Summary object does not have key:%s" % key)

def _make_time_vector(self, time_index):
def _make_time_vector(
self, time_index: Sequence[Union[CTime, datetime.datetime, int, datetime.date]]
) -> TimeVector:
time_points = TimeVector()
for t in time_index:
time_points.append(t)
Expand Down Expand Up @@ -558,7 +560,11 @@ def report_dates(self):
dates.append(self.get_report_time(report))
return dates

def pandas_frame(self, time_index=None, column_keys=None):
def pandas_frame(
self,
time_index: Optional[Sequence[datetime.datetime]] = None,
column_keys: Optional[Sequence[str]] = None,
) -> pd.DataFrame:
"""Will create a pandas frame with summary data.

By default you will get all time points in the summary case, but by
Expand Down Expand Up @@ -615,8 +621,7 @@ def pandas_frame(self, time_index=None, column_keys=None):
data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
)

frame = pd.DataFrame(index=time_index, columns=list(keywords), data=data)
return frame
return pd.DataFrame(index=list(time_index), columns=list(keywords), data=data)

@staticmethod
def _compile_headers_list(
Expand Down
71 changes: 46 additions & 25 deletions python/tests/rd_tests/test_sum.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import csv
import pytest
HakonSohoel marked this conversation as resolved.
Show resolved Hide resolved
import datetime
import os
import os.path
import pytest
import shutil
import stat
import datetime
Expand All @@ -26,6 +28,7 @@ def assert_frame_equal(a, b):
from resdata.summary import Summary, SummaryKeyWordVector, SummaryVarType
from resdata.util.test import TestAreaContext
from resdata.util.test.mock import createSummary
from resdata.util.util import CTime, TimeVector
from tests import ResdataTest


Expand Down Expand Up @@ -596,30 +599,6 @@ def test_wells_and_groups(self):
self.assertEqual(case.wells(), [])
self.assertEqual(case.groups(), [])

def test_pandas(self):
case = create_case()
dates = (
[datetime.datetime(2000, 1, 1)]
+ case.dates
+ [datetime.datetime(2020, 1, 1)]
)
frame = case.pandas_frame(column_keys=["FOPT", "FOPR"], time_index=dates)

fopr = frame["FOPR"]
fopt = frame["FOPT"]

self.assertEqual(fopr[0], 0)
self.assertEqual(fopr[-1], 0)

self.assertEqual(fopt[0], 0)
self.assertEqual(fopt[0], case.first_value("FOPT"))
self.assertEqual(fopt[-1], case.last_value("FOPT"))

frame = case.pandas_frame()
rows, columns = frame.shape
self.assertEqual(len(case.keys()), columns)
self.assertEqual(len(case), rows)

def test_csv_load(self):
case = create_case2()
frame = case.pandas_frame()
Expand Down Expand Up @@ -691,7 +670,6 @@ def test_resample_extrapolate(self):
"""
Test resampling of summary with extrapolate option of lower and upper boundaries enabled
"""
from resdata.util.util import CTime, TimeVector

time_points = TimeVector()

Expand Down Expand Up @@ -756,6 +734,49 @@ def test_resample_extrapolate(self):
rd_sum.get_interp_direct(key_rate, t),
)

def test_pandas2_compatibility_dataframe_index(self):
# regression test to verify that pandas frames in pandas 2
# does not break due to missing collection for time_index
path = os.path.join(self.TESTDATA_ROOT, "local/ECLIPSE/cp_simple3/SHORT.UNSMRY")
smry = Summary(path)
try:
smry.pandas_frame(
HakonSohoel marked this conversation as resolved.
Show resolved Hide resolved
time_index=smry.time_range(interval="1Y"), column_keys=["WELL:NAME"]
)
except TypeError as err:
pytest.fail(repr(err))


def create_time_vector(lst):
vec = TimeVector()
for l in lst:
vec.append(l)
return vec


@pytest.mark.parametrize("time_index_type", [list, create_time_vector, tuple])
def test_pandas(time_index_type):
HakonSohoel marked this conversation as resolved.
Show resolved Hide resolved
case = create_case()
dates = time_index_type(
[datetime.datetime(2000, 1, 1)] + case.dates + [datetime.datetime(2020, 1, 1)]
)
frame = case.pandas_frame(column_keys=["FOPT", "FOPR"], time_index=dates)

fopr = frame["FOPR"]
fopt = frame["FOPT"]

assert fopr[0] == 0
assert fopr[-1] == 0

assert fopt[0] == 0
assert fopt[0] == case.first_value("FOPT")
assert fopt[-1] == case.last_value("FOPT")

frame = case.pandas_frame()
rows, columns = frame.shape
assert len(case.keys()) == columns
assert len(case) == rows


def test_t_step():
sum = createSummary(
Expand Down
Loading