Skip to content

Commit

Permalink
[Feature] Better translate test (#39) (#47)
Browse files Browse the repository at this point in the history
Translate test: small improvements

- Parametrize perturbation upon failure
- Refactor error folder to be `pwd` based
- Fix GPU translate unable to dump error `nc`
- Fix mixmatch precision on translate test
- Update README.md

Test fix:
- Orchestrate YPPM for translate purposes

Misc:
- Fix bad logger formatting on DaCeProgress
  • Loading branch information
FlorianDeconinck authored Jan 24, 2024
1 parent 735e972 commit 095ec26
Show file tree
Hide file tree
Showing 7 changed files with 43 additions and 16 deletions.
4 changes: 2 additions & 2 deletions dsl/pace/dsl/dace/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ def default_prefix(cls, config: DaceConfig) -> str:
return f"[{config.get_orchestrate()}]"

def __enter__(self):
pace_log.debug(self.prefix, f"{self.label}...")
pace_log.debug(f"{self.prefix} {self.label}...")
self.start = time.time()

def __exit__(self, _type, _val, _traceback):
elapsed = time.time() - self.start
pace_log.debug(self.prefix, f"{self.label}...{elapsed}s.")
pace_log.debug(f"{self.prefix} {self.label}...{elapsed}s.")


def _is_ref(sd: dace.sdfg.SDFG, aname: str):
Expand Down
4 changes: 3 additions & 1 deletion fv3core/pace/fv3core/stencils/yppm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
region,
)

from pace.dsl.dace.orchestration import orchestrate
from pace.dsl.stencil import StencilFactory
from pace.dsl.typing import FloatField, FloatFieldIJ, Index3D
from pace.fv3core.stencils import ppm
Expand Down Expand Up @@ -295,7 +296,7 @@ def compute_y_flux(

class YPiecewiseParabolic:
"""
Fortran name is xppm
Fortran name is yppm
"""

def __init__(
Expand All @@ -307,6 +308,7 @@ def __init__(
origin: Index3D,
domain: Index3D,
):
orchestrate(obj=self, config=stencil_factory.config.dace_config)
# Arguments come from:
# namelist.grid_type
# grid.dya
Expand Down
5 changes: 3 additions & 2 deletions fv3core/tests/savepoint/translate/translate_fvtp2d.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pace.dsl
import pace.dsl.gt4py_utils as utils
import pace.util
from pace.dsl.typing import Float
from pace.fv3core.stencils.fvtp2d import FiniteVolumeTransport
from pace.fv3core.testing import TranslateDycoreFortranData2Py

Expand Down Expand Up @@ -51,11 +52,11 @@ def compute_from_storage(self, inputs):
backend=self.stencil_factory.backend,
)
nord_col = self.grid.quantity_factory.zeros(
dims=[pace.util.Z_DIM], units="unknown"
dims=[pace.util.Z_DIM], units="unknown", dtype=Float
)
nord_col.data[:] = nord_col.np.asarray(inputs.pop("nord"))
damp_c = self.grid.quantity_factory.zeros(
dims=[pace.util.Z_DIM], units="unknown"
dims=[pace.util.Z_DIM], units="unknown", dtype=Float
)
damp_c.data[:] = damp_c.np.asarray(inputs.pop("damp_c"))
for optional_arg in ["mass"]:
Expand Down
5 changes: 4 additions & 1 deletion fv3core/tests/savepoint/translate/translate_yppm.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pace.dsl
import pace.dsl.gt4py_utils as utils
import pace.util
from pace.dsl.typing import Float
from pace.fv3core.stencils import yppm
from pace.fv3core.testing import TranslateDycoreFortranData2Py
from pace.stencils.testing import TranslateGrid
Expand Down Expand Up @@ -40,7 +41,9 @@ def process_inputs(self, inputs):
self.ivars(inputs)
self.make_storage_data_input_vars(inputs)
inputs["flux"] = utils.make_storage_from_shape(
inputs["q"].shape, backend=self.stencil_factory.backend
inputs["q"].shape,
backend=self.stencil_factory.backend,
dtype=Float,
)

def compute(self, inputs):
Expand Down
10 changes: 10 additions & 0 deletions stencils/pace/stencils/testing/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ First, make sure you have followed the instruction in the top level [README](../
The unit and regression tests of pace require data generated from the Fortran reference implementation which has to be downloaded from a Google Cloud Platform storage bucket. Since the bucket is setup as "requester pays", you need a valid GCP account to download the test data.

First, make sure you have configured the authentication with user credientials and configured Docker with the following commands:

```shell
gcloud auth login
gcloud auth configure-docker
Expand Down Expand Up @@ -74,3 +75,12 @@ DEV=y make savepoint_tests_mpi
DEV=y make physics_savepoint_tests
DEV=y make physics_savepoint_tests_mpi
```

## Test failure

Test are running for each gridpoint of the domain, unless the Translate class for the test specifically restricts it.
Upon failure, the test will drop a `netCDF` faile in a `./.translate-errors` directory and named `translate-TestCase(-Rank).nc` containing input, computed output, reference and errors.

## Environment variables

- `PACE_TEST_N_THRESHOLD_SAMPLES`: Upon failure the system will try to pertub the output in an attempt to check for numerical instability. This means re-running the test for N samples. Default is `10`, `0` or less turns this feature off.
3 changes: 2 additions & 1 deletion stencils/pace/stencils/testing/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pace.util
from pace.dsl import gt4py_utils as utils
from pace.dsl.stencil import GridIndexing
from pace.dsl.typing import Float
from pace.util.grid import (
AngleGridData,
ContravariantGridData,
Expand Down Expand Up @@ -504,7 +505,7 @@ def grid_data(self) -> "GridData":
data = getattr(self, name)
assert data is not None

quantity = self.quantity_factory.zeros(dims=dims, units=units)
quantity = self.quantity_factory.zeros(dims=dims, units=units, dtype=Float)
if len(quantity.shape) == 3:
quantity.data[:] = data[:, :, : quantity.shape[2]]
elif len(quantity.shape) == 2:
Expand Down
28 changes: 19 additions & 9 deletions stencils/pace/stencils/testing/test_translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
# this only matters for manually-added print statements
np.set_printoptions(threshold=4096)

OUTDIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "output")
OUTDIR = "./.translate-errors"
GPU_MAX_ERR = 1e-10
GPU_NEAR_ZERO = 1e-15

Expand Down Expand Up @@ -171,21 +171,23 @@ def process_override(threshold_overrides, testobj, test_name, backend):
)


N_THRESHOLD_SAMPLES = 10
N_THRESHOLD_SAMPLES = int(os.getenv("PACE_TEST_N_THRESHOLD_SAMPLES", 10))


def get_thresholds(testobj, input_data):
return _get_thresholds(testobj.compute, input_data)
_get_thresholds(testobj.compute, input_data)


def get_thresholds_parallel(testobj, input_data, communicator):
def compute(input):
return testobj.compute_parallel(input, communicator)

return _get_thresholds(compute, input_data)
_get_thresholds(compute, input_data)


def _get_thresholds(compute_function, input_data):
def _get_thresholds(compute_function, input_data) -> None:
if N_THRESHOLD_SAMPLES <= 0:
return
output_list = []
for _ in range(N_THRESHOLD_SAMPLES):
input = copy.deepcopy(input_data)
Expand Down Expand Up @@ -289,10 +291,14 @@ def test_sequential_savepoint(
ref_data_out[varname] = [ref_data]
if len(failing_names) > 0:
get_thresholds(case.testobj, input_data=original_input_data)
out_filename = os.path.join(OUTDIR, f"{case.savepoint_name}.nc")
os.makedirs(OUTDIR, exist_ok=True)
out_filename = os.path.join(OUTDIR, f"translate-{case.savepoint_name}.nc")
input_data_on_host = {}
for key, _input in input_data.items():
input_data_on_host[key] = gt_utils.asarray(_input)
save_netcdf(
case.testobj,
[input_data],
[input_data_on_host],
[output],
ref_data_out,
failing_names,
Expand Down Expand Up @@ -420,13 +426,17 @@ def test_parallel_savepoint(
)
passing_names.append(failing_names.pop())
if len(failing_names) > 0:
os.makedirs(OUTDIR, exist_ok=True)
out_filename = os.path.join(
OUTDIR, f"{case.savepoint_name}-{case.grid.rank}.nc"
OUTDIR, f"translate-{case.savepoint_name}-{case.grid.rank}.nc"
)
try:
input_data_on_host = {}
for key, _input in input_data.items():
input_data_on_host[key] = gt_utils.asarray(_input)
save_netcdf(
case.testobj,
[input_data],
[input_data_on_host],
[output],
ref_data,
failing_names,
Expand Down

0 comments on commit 095ec26

Please sign in to comment.