Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NASA] [Feature] Better translate test (#39) #47

Merged
merged 1 commit into from
Jan 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions dsl/pace/dsl/dace/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ def default_prefix(cls, config: DaceConfig) -> str:
return f"[{config.get_orchestrate()}]"

def __enter__(self):
pace_log.debug(self.prefix, f"{self.label}...")
pace_log.debug(f"{self.prefix} {self.label}...")
self.start = time.time()

def __exit__(self, _type, _val, _traceback):
elapsed = time.time() - self.start
pace_log.debug(self.prefix, f"{self.label}...{elapsed}s.")
pace_log.debug(f"{self.prefix} {self.label}...{elapsed}s.")


def _is_ref(sd: dace.sdfg.SDFG, aname: str):
Expand Down
4 changes: 3 additions & 1 deletion fv3core/pace/fv3core/stencils/yppm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
region,
)

from pace.dsl.dace.orchestration import orchestrate
from pace.dsl.stencil import StencilFactory
from pace.dsl.typing import FloatField, FloatFieldIJ, Index3D
from pace.fv3core.stencils import ppm
Expand Down Expand Up @@ -295,7 +296,7 @@ def compute_y_flux(

class YPiecewiseParabolic:
"""
Fortran name is xppm
Fortran name is yppm
"""

def __init__(
Expand All @@ -307,6 +308,7 @@ def __init__(
origin: Index3D,
domain: Index3D,
):
orchestrate(obj=self, config=stencil_factory.config.dace_config)
# Arguments come from:
# namelist.grid_type
# grid.dya
Expand Down
5 changes: 3 additions & 2 deletions fv3core/tests/savepoint/translate/translate_fvtp2d.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pace.dsl
import pace.dsl.gt4py_utils as utils
import pace.util
from pace.dsl.typing import Float
from pace.fv3core.stencils.fvtp2d import FiniteVolumeTransport
from pace.fv3core.testing import TranslateDycoreFortranData2Py

Expand Down Expand Up @@ -51,11 +52,11 @@ def compute_from_storage(self, inputs):
backend=self.stencil_factory.backend,
)
nord_col = self.grid.quantity_factory.zeros(
dims=[pace.util.Z_DIM], units="unknown"
dims=[pace.util.Z_DIM], units="unknown", dtype=Float
)
nord_col.data[:] = nord_col.np.asarray(inputs.pop("nord"))
damp_c = self.grid.quantity_factory.zeros(
dims=[pace.util.Z_DIM], units="unknown"
dims=[pace.util.Z_DIM], units="unknown", dtype=Float
)
damp_c.data[:] = damp_c.np.asarray(inputs.pop("damp_c"))
for optional_arg in ["mass"]:
Expand Down
5 changes: 4 additions & 1 deletion fv3core/tests/savepoint/translate/translate_yppm.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pace.dsl
import pace.dsl.gt4py_utils as utils
import pace.util
from pace.dsl.typing import Float
from pace.fv3core.stencils import yppm
from pace.fv3core.testing import TranslateDycoreFortranData2Py
from pace.stencils.testing import TranslateGrid
Expand Down Expand Up @@ -40,7 +41,9 @@ def process_inputs(self, inputs):
self.ivars(inputs)
self.make_storage_data_input_vars(inputs)
inputs["flux"] = utils.make_storage_from_shape(
inputs["q"].shape, backend=self.stencil_factory.backend
inputs["q"].shape,
backend=self.stencil_factory.backend,
dtype=Float,
)

def compute(self, inputs):
Expand Down
10 changes: 10 additions & 0 deletions stencils/pace/stencils/testing/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ First, make sure you have followed the instruction in the top level [README](../
The unit and regression tests of pace require data generated from the Fortran reference implementation which has to be downloaded from a Google Cloud Platform storage bucket. Since the bucket is setup as "requester pays", you need a valid GCP account to download the test data.

First, make sure you have configured the authentication with user credientials and configured Docker with the following commands:

```shell
gcloud auth login
gcloud auth configure-docker
Expand Down Expand Up @@ -74,3 +75,12 @@ DEV=y make savepoint_tests_mpi
DEV=y make physics_savepoint_tests
DEV=y make physics_savepoint_tests_mpi
```

## Test failure

Test are running for each gridpoint of the domain, unless the Translate class for the test specifically restricts it.
Upon failure, the test will drop a `netCDF` faile in a `./.translate-errors` directory and named `translate-TestCase(-Rank).nc` containing input, computed output, reference and errors.

## Environment variables

- `PACE_TEST_N_THRESHOLD_SAMPLES`: Upon failure the system will try to pertub the output in an attempt to check for numerical instability. This means re-running the test for N samples. Default is `10`, `0` or less turns this feature off.
3 changes: 2 additions & 1 deletion stencils/pace/stencils/testing/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pace.util
from pace.dsl import gt4py_utils as utils
from pace.dsl.stencil import GridIndexing
from pace.dsl.typing import Float
from pace.util.grid import (
AngleGridData,
ContravariantGridData,
Expand Down Expand Up @@ -504,7 +505,7 @@ def grid_data(self) -> "GridData":
data = getattr(self, name)
assert data is not None

quantity = self.quantity_factory.zeros(dims=dims, units=units)
quantity = self.quantity_factory.zeros(dims=dims, units=units, dtype=Float)
if len(quantity.shape) == 3:
quantity.data[:] = data[:, :, : quantity.shape[2]]
elif len(quantity.shape) == 2:
Expand Down
28 changes: 19 additions & 9 deletions stencils/pace/stencils/testing/test_translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
# this only matters for manually-added print statements
np.set_printoptions(threshold=4096)

OUTDIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "output")
OUTDIR = "./.translate-errors"
GPU_MAX_ERR = 1e-10
GPU_NEAR_ZERO = 1e-15

Expand Down Expand Up @@ -171,21 +171,23 @@ def process_override(threshold_overrides, testobj, test_name, backend):
)


N_THRESHOLD_SAMPLES = 10
N_THRESHOLD_SAMPLES = int(os.getenv("PACE_TEST_N_THRESHOLD_SAMPLES", 10))


def get_thresholds(testobj, input_data):
return _get_thresholds(testobj.compute, input_data)
_get_thresholds(testobj.compute, input_data)


def get_thresholds_parallel(testobj, input_data, communicator):
def compute(input):
return testobj.compute_parallel(input, communicator)

return _get_thresholds(compute, input_data)
_get_thresholds(compute, input_data)


def _get_thresholds(compute_function, input_data):
def _get_thresholds(compute_function, input_data) -> None:
if N_THRESHOLD_SAMPLES <= 0:
return
output_list = []
for _ in range(N_THRESHOLD_SAMPLES):
input = copy.deepcopy(input_data)
Expand Down Expand Up @@ -289,10 +291,14 @@ def test_sequential_savepoint(
ref_data_out[varname] = [ref_data]
if len(failing_names) > 0:
get_thresholds(case.testobj, input_data=original_input_data)
out_filename = os.path.join(OUTDIR, f"{case.savepoint_name}.nc")
os.makedirs(OUTDIR, exist_ok=True)
out_filename = os.path.join(OUTDIR, f"translate-{case.savepoint_name}.nc")
input_data_on_host = {}
for key, _input in input_data.items():
input_data_on_host[key] = gt_utils.asarray(_input)
save_netcdf(
case.testobj,
[input_data],
[input_data_on_host],
[output],
ref_data_out,
failing_names,
Expand Down Expand Up @@ -420,13 +426,17 @@ def test_parallel_savepoint(
)
passing_names.append(failing_names.pop())
if len(failing_names) > 0:
os.makedirs(OUTDIR, exist_ok=True)
out_filename = os.path.join(
OUTDIR, f"{case.savepoint_name}-{case.grid.rank}.nc"
OUTDIR, f"translate-{case.savepoint_name}-{case.grid.rank}.nc"
)
try:
input_data_on_host = {}
for key, _input in input_data.items():
input_data_on_host[key] = gt_utils.asarray(_input)
save_netcdf(
case.testobj,
[input_data],
[input_data_on_host],
[output],
ref_data,
failing_names,
Expand Down