Skip to content

Commit

Permalink
feat(modflow): support dataframe for pkg data (#2010)
Browse files Browse the repository at this point in the history
* support pandas dataframe for node_data, stress_period_data etc for flopy.modflow.* pkgs
* just convert to recarray, don't reimplement data storage with pandas like for mf6
* add tests with mnw and wel
  • Loading branch information
wpbonelli authored Nov 22, 2023
1 parent d0546ce commit be104c4
Show file tree
Hide file tree
Showing 17 changed files with 185 additions and 61 deletions.
22 changes: 17 additions & 5 deletions autotest/test_mnw.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def mnw1_path(example_data_path):
return example_data_path / "mf2005_test"


def test_load(function_tmpdir, example_data_path, mnw2_examples_path):
def test_load(function_tmpdir, mnw2_examples_path):
"""t027 test load of MNW2 Package"""
# load in the test problem (1 well, 3 stress periods)
m = Modflow.load(
Expand Down Expand Up @@ -94,7 +94,8 @@ def test_mnw1_load_write(function_tmpdir, mnw1_path):
assert np.array_equal(v, m2.mnw1.stress_period_data[k])


def test_make_package(function_tmpdir):
@pytest.mark.parametrize("dataframe", [True, False])
def test_make_package(function_tmpdir, dataframe):
"""t027 test make MNW2 Package"""
ws = function_tmpdir
m4 = Modflow("mnw2example", model_ws=ws)
Expand Down Expand Up @@ -195,6 +196,9 @@ def test_make_package(function_tmpdir):
).view(np.recarray),
}

if dataframe:
node_data = pd.DataFrame(node_data)

mnw2_4 = ModflowMnw2(
model=m4,
mnwmax=2,
Expand Down Expand Up @@ -257,6 +261,9 @@ def test_make_package(function_tmpdir):
).view(np.recarray),
}

if dataframe:
node_data = pd.DataFrame(node_data)

mnw2_4 = ModflowMnw2(
model=m4,
mnwmax=2,
Expand Down Expand Up @@ -294,7 +301,8 @@ def test_make_package(function_tmpdir):
)


def test_mnw2_create_file(function_tmpdir):
@pytest.mark.parametrize("dataframe", [True, False])
def test_mnw2_create_file(function_tmpdir, dataframe):
"""
Test for issue #556, Mnw2 crashed if wells have
multiple node lengths
Expand Down Expand Up @@ -341,8 +349,12 @@ def test_mnw2_create_file(function_tmpdir):
wellids[i],
nnodes=nlayers[i],
nper=len(stress_period_data.index),
node_data=node_data.to_records(index=False),
stress_period_data=stress_period_data.to_records(index=False),
node_data=node_data.to_records(index=False)
if not dataframe
else node_data,
stress_period_data=stress_period_data.to_records(index=False)
if not dataframe
else stress_period_data,
)

wells.append(wl)
Expand Down
96 changes: 91 additions & 5 deletions autotest/test_modflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
import os
import shutil
from pathlib import Path
from typing import Dict

import numpy as np
import pandas as pd
import pytest
from autotest.conftest import get_example_data_path
from modflow_devtools.markers import excludes_platform, requires_exe
Expand Down Expand Up @@ -261,7 +263,7 @@ def test_mt_modelgrid(function_tmpdir):
assert np.array_equal(swt.modelgrid.idomain, ml.modelgrid.idomain)


@requires_exe("mp7")
@requires_exe("mp7", "mf2005")
def test_exe_selection(example_data_path, function_tmpdir):
model_path = example_data_path / "freyberg"
namfile_path = model_path / "freyberg.nam"
Expand Down Expand Up @@ -1287,7 +1289,91 @@ def test_load_with_list_reader(function_tmpdir):
assert np.array_equal(originalwelra, m2.wel.stress_period_data[0])


def get_basic_modflow_model(ws, name):
@requires_exe("mf2005")
@pytest.mark.parametrize(
"container",
[
str(np.recarray),
str(pd.DataFrame),
str(Dict[int, np.recarray]),
str(Dict[int, pd.DataFrame]),
],
)
def test_pkg_data_containers(function_tmpdir, container):
"""Test various containers for package data (list, ndarray, recarray, dataframe, dict of such)"""

name = "pkg_data"
ws = function_tmpdir
m = Modflow(name, model_ws=ws)
size = 100
nlay = 10
nper = 1

# grid discretization
dis = ModflowDis(
m,
nper=nper,
nlay=nlay,
nrow=size,
ncol=size,
top=nlay,
botm=list(range(nlay)),
)

# recharge pkg
rch = ModflowRch(
m, rech={k: 0.001 - np.cos(k) * 0.001 for k in range(nper)}
)

# well pkg, setup data per 'container' parameter
# to test support for various container types
ra = ModflowWel.get_empty(size**2)
ra_per = ra.copy()
ra_per["k"] = 1
ra_per["i"] = (
(np.ones((size, size)) * np.arange(size))
.transpose()
.ravel()
.astype(int)
)
ra_per["j"] = list(range(size)) * size
wel_dtype = np.dtype(
[
("k", int),
("i", int),
("j", int),
("flux", np.float32),
]
)
df_per = pd.DataFrame(ra_per)
if "'numpy.recarray'" in container:
well_spd = ra_per
elif "'pandas.core.frame.DataFrame'" in container:
well_spd = df_per
elif "Dict[int, numpy.recarray]" in container:
well_spd = {}
well_spd[0] = ra_per
elif "Dict[int, pandas.core.frame.DataFrame]" in container:
well_spd = {}
well_spd[0] = df_per
wel = ModflowWel(m, stress_period_data=well_spd)

# basic pkg
bas = ModflowBas(m)

# solver
pcg = ModflowPcg(m)

# output control pkg
oc = ModflowOc(m)

# write and run the model
m.write_input()
success, _ = m.run_model(silent=False)
assert success


def get_perftest_model(ws, name):
m = Modflow(name, model_ws=ws)

size = 100
Expand Down Expand Up @@ -1338,20 +1424,20 @@ def get_basic_modflow_model(ws, name):
@pytest.mark.slow
def test_model_init_time(function_tmpdir, benchmark):
name = inspect.getframeinfo(inspect.currentframe()).function
benchmark(lambda: get_basic_modflow_model(ws=function_tmpdir, name=name))
benchmark(lambda: get_perftest_model(ws=function_tmpdir, name=name))


@pytest.mark.slow
def test_model_write_time(function_tmpdir, benchmark):
name = inspect.getframeinfo(inspect.currentframe()).function
model = get_basic_modflow_model(ws=function_tmpdir, name=name)
model = get_perftest_model(ws=function_tmpdir, name=name)
benchmark(lambda: model.write_input())


@pytest.mark.slow
def test_model_load_time(function_tmpdir, benchmark):
name = inspect.getframeinfo(inspect.currentframe()).function
model = get_basic_modflow_model(ws=function_tmpdir, name=name)
model = get_perftest_model(ws=function_tmpdir, name=name)
model.write_input()
benchmark(
lambda: Modflow.load(
Expand Down
17 changes: 13 additions & 4 deletions flopy/modflow/mfag.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import os

import numpy as np
import pandas as pd

from ..pakbase import Package
from ..utils.flopy_io import multi_line_strip
Expand All @@ -29,9 +30,9 @@ class ModflowAg(Package):
model object
options : flopy.utils.OptionBlock object
option block object
time_series : np.recarray
time_series : np.recarray or pd.DataFrame
numpy recarray for the time series block
well_list : np.recarray
well_list : np.recarray or pd.DataFrame
recarray of the well_list block
irrdiversion : dict {per: np.recarray}
dictionary of the irrdiversion block
Expand Down Expand Up @@ -269,8 +270,16 @@ def __init__(
else:
self.options = OptionBlock("", ModflowAg)

self.time_series = time_series
self.well_list = well_list
self.time_series = (
time_series.to_records(index=False)
if isinstance(time_series, pd.DataFrame)
else time_series
)
self.well_list = (
well_list.to_records(index=False)
if isinstance(well_list, pd.DataFrame)
else well_list
)
self.irrdiversion = irrdiversion
self.irrwell = irrwell
self.supwell = supwell
Expand Down
3 changes: 1 addition & 2 deletions flopy/modflow/mfchd.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ class ModflowChd(Package):
model : model object
The model object (of type :class:`flopy.modflow.mf.Modflow`) to which
this package will be added.
stress_period_data : list of boundaries, recarrays, or dictionary of
boundaries.
stress_period_data : list, recarray, dataframe, or dictionary of boundaries.
Each chd cell is defined through definition of
layer (int), row (int), column (int), shead (float), ehead (float)
Expand Down
3 changes: 1 addition & 2 deletions flopy/modflow/mfdrn.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ class ModflowDrn(Package):
A flag that is used to determine if cell-by-cell budget data should be
saved. If ipakcb is non-zero cell-by-cell budget data will be saved.
(default is None).
stress_period_data : list of boundaries, recarrays, or dictionary of
boundaries.
stress_period_data : list, recarray, dataframe or dictionary of boundaries.
Each drain cell is defined through definition of
layer(int), row(int), column(int), elevation(float),
conductance(float).
Expand Down
3 changes: 1 addition & 2 deletions flopy/modflow/mfdrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ class ModflowDrt(Package):
A flag that is used to determine if cell-by-cell budget data should be
saved. If ipakcb is non-zero cell-by-cell budget data will be saved.
(default is None).
stress_period_data : list of boundaries, recarrays, or dictionary of
boundaries.
stress_period_data : list, recarray, dataframe or dictionary of boundaries.
Each drain return cell is defined through definition of
layer(int), row(int), column(int), elevation(float),
conductance(float), layerR (int) , rowR (int), colR (int) and rfprop (float).
Expand Down
9 changes: 7 additions & 2 deletions flopy/modflow/mffhb.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"""
import numpy as np
import pandas as pd

from ..pakbase import Package
from ..utils import read1d
Expand Down Expand Up @@ -64,7 +65,7 @@ class ModflowFhb(Package):
(default is 0.0)
cnstm5 : float
A constant multiplier for data list flwrat. (default is 1.0)
ds5 : list or numpy array or recarray
ds5 : list or numpy array or recarray or pandas dataframe
Each FHB flwrat cell (dataset 5) is defined through definition of
layer(int), row(int), column(int), iaux(int), flwrat[nbdtime](float).
There should be nflw entries. (default is None)
Expand All @@ -81,7 +82,7 @@ class ModflowFhb(Package):
cnstm7 : float
A constant multiplier for data list sbhedt. (default is 1.0)
ds7 : list or numpy array or recarray
ds7 : list or numpy array or recarray or pandas dataframe
Each FHB sbhed cell (dataset 7) is defined through definition of
layer(int), row(int), column(int), iaux(int), sbhed[nbdtime](float).
There should be nhed entries. (default is None)
Expand Down Expand Up @@ -211,6 +212,8 @@ def __init__(
raise TypeError(msg)
elif isinstance(ds5, list):
ds5 = np.array(ds5)
elif isinstance(ds5, pd.DataFrame):
ds5 = ds5.to_records(index=False)
# convert numpy array to a recarray
if ds5.dtype != dtype:
ds5 = np.core.records.fromarrays(ds5.transpose(), dtype=dtype)
Expand All @@ -228,6 +231,8 @@ def __init__(
raise TypeError(msg)
elif isinstance(ds7, list):
ds7 = np.array(ds7)
elif isinstance(ds7, pd.DataFrame):
ds7 = ds7.to_records(index=False)
# convert numpy array to a recarray
if ds7.dtype != dtype:
ds7 = np.core.records.fromarrays(ds7.transpose(), dtype=dtype)
Expand Down
5 changes: 4 additions & 1 deletion flopy/modflow/mfgage.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import os

import numpy as np
import pandas as pd

from ..pakbase import Package
from ..utils import read_fixed_var, write_fixed_var
Expand All @@ -27,7 +28,7 @@ class ModflowGage(Package):
this package will be added.
numgage : int
The total number of gages included in the gage file (default is 0).
gage_data : list or numpy array
gage_data : list or numpy array or recarray or pandas dataframe
data for dataset 2a and 2b in the gage package. If a list is provided
then the list includes 2 to 3 entries (LAKE UNIT [OUTTYPE]) for each
LAK Package entry and 4 entries (GAGESEG GAGERCH UNIT OUTTYPE) for
Expand Down Expand Up @@ -132,6 +133,8 @@ def __init__(
gage_data = np.core.records.fromarrays(
gage_data.transpose(), dtype=dtype
)
elif isinstance(gage_data, pd.DataFrame):
gage_data = gage_data.to_records(index=False)
elif isinstance(gage_data, list):
d = ModflowGage.get_empty(ncells=numgage)
for n in range(len(gage_data)):
Expand Down
3 changes: 1 addition & 2 deletions flopy/modflow/mfghb.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ class ModflowGhb(Package):
A flag that is used to determine if cell-by-cell budget data should be
saved. If ipakcb is non-zero cell-by-cell budget data will be saved.
(default is 0).
stress_period_data : list of boundaries, recarray of boundaries or,
dictionary of boundaries.
stress_period_data : list, recarray, dataframe or dictionary of boundaries.
Each ghb cell is defined through definition of
layer(int), row(int), column(int), stage(float), conductance(float)
Expand Down
5 changes: 4 additions & 1 deletion flopy/modflow/mfhyd.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"""
import numpy as np
import pandas as pd

from ..pakbase import Package
from ..utils.recarray_utils import create_empty_recarray
Expand All @@ -31,7 +32,7 @@ class ModflowHyd(Package):
is a user-specified value that is output if a value cannot be computed
at a hydrograph location. For example, the cell in which the hydrograph
is located may be a no-flow cell. (default is -999.)
obsdata : list of lists, numpy array, or numpy recarray (nhyd, 7)
obsdata : list of lists, numpy array or recarray, or pandas dataframe (nhyd, 7)
Each row of obsdata includes data defining pckg (3 character string),
arr (2 character string), intyp (1 character string) klay (int),
xl (float), yl (float), hydlbl (14 character string) for each
Expand Down Expand Up @@ -158,6 +159,8 @@ def __init__(

dtype = ModflowHyd.get_default_dtype()
obs = ModflowHyd.get_empty(nhyd)
if isinstance(obsdata, pd.DataFrame):
obsdata = obsdata.to_records(index=False)
if isinstance(obsdata, list):
if len(obsdata) != nhyd:
raise RuntimeError(
Expand Down
Loading

0 comments on commit be104c4

Please sign in to comment.