From 095ec265357e8de6cf7940bc44343260fc3dc6ec Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Tue, 23 Jan 2024 19:33:26 -0500 Subject: [PATCH] [Feature] Better translate test (#39) (#47) Translate test: small improvements - Parametrize perturbation upon failure - Refactor error folder to be `pwd` based - Fix GPU translate unable to dump error `nc` - Fix mixmatch precision on translate test - Update README.md Test fix: - Orchestrate YPPM for translate purposes Misc: - Fix bad logger formatting on DaCeProgress --- dsl/pace/dsl/dace/utils.py | 4 +-- fv3core/pace/fv3core/stencils/yppm.py | 4 ++- .../savepoint/translate/translate_fvtp2d.py | 5 ++-- .../savepoint/translate/translate_yppm.py | 5 +++- stencils/pace/stencils/testing/README.md | 10 +++++++ stencils/pace/stencils/testing/grid.py | 3 +- .../pace/stencils/testing/test_translate.py | 28 +++++++++++++------ 7 files changed, 43 insertions(+), 16 deletions(-) diff --git a/dsl/pace/dsl/dace/utils.py b/dsl/pace/dsl/dace/utils.py index c10ad6ec..28d8abe5 100644 --- a/dsl/pace/dsl/dace/utils.py +++ b/dsl/pace/dsl/dace/utils.py @@ -31,12 +31,12 @@ def default_prefix(cls, config: DaceConfig) -> str: return f"[{config.get_orchestrate()}]" def __enter__(self): - pace_log.debug(self.prefix, f"{self.label}...") + pace_log.debug(f"{self.prefix} {self.label}...") self.start = time.time() def __exit__(self, _type, _val, _traceback): elapsed = time.time() - self.start - pace_log.debug(self.prefix, f"{self.label}...{elapsed}s.") + pace_log.debug(f"{self.prefix} {self.label}...{elapsed}s.") def _is_ref(sd: dace.sdfg.SDFG, aname: str): diff --git a/fv3core/pace/fv3core/stencils/yppm.py b/fv3core/pace/fv3core/stencils/yppm.py index 69389e2b..53dba216 100644 --- a/fv3core/pace/fv3core/stencils/yppm.py +++ b/fv3core/pace/fv3core/stencils/yppm.py @@ -9,6 +9,7 @@ region, ) +from pace.dsl.dace.orchestration import orchestrate from pace.dsl.stencil import StencilFactory from pace.dsl.typing import FloatField, FloatFieldIJ, Index3D from pace.fv3core.stencils import ppm @@ -295,7 +296,7 @@ def compute_y_flux( class YPiecewiseParabolic: """ - Fortran name is xppm + Fortran name is yppm """ def __init__( @@ -307,6 +308,7 @@ def __init__( origin: Index3D, domain: Index3D, ): + orchestrate(obj=self, config=stencil_factory.config.dace_config) # Arguments come from: # namelist.grid_type # grid.dya diff --git a/fv3core/tests/savepoint/translate/translate_fvtp2d.py b/fv3core/tests/savepoint/translate/translate_fvtp2d.py index a8315e0b..b807254d 100644 --- a/fv3core/tests/savepoint/translate/translate_fvtp2d.py +++ b/fv3core/tests/savepoint/translate/translate_fvtp2d.py @@ -1,6 +1,7 @@ import pace.dsl import pace.dsl.gt4py_utils as utils import pace.util +from pace.dsl.typing import Float from pace.fv3core.stencils.fvtp2d import FiniteVolumeTransport from pace.fv3core.testing import TranslateDycoreFortranData2Py @@ -51,11 +52,11 @@ def compute_from_storage(self, inputs): backend=self.stencil_factory.backend, ) nord_col = self.grid.quantity_factory.zeros( - dims=[pace.util.Z_DIM], units="unknown" + dims=[pace.util.Z_DIM], units="unknown", dtype=Float ) nord_col.data[:] = nord_col.np.asarray(inputs.pop("nord")) damp_c = self.grid.quantity_factory.zeros( - dims=[pace.util.Z_DIM], units="unknown" + dims=[pace.util.Z_DIM], units="unknown", dtype=Float ) damp_c.data[:] = damp_c.np.asarray(inputs.pop("damp_c")) for optional_arg in ["mass"]: diff --git a/fv3core/tests/savepoint/translate/translate_yppm.py b/fv3core/tests/savepoint/translate/translate_yppm.py index 4fc49c9c..175be9bb 100644 --- a/fv3core/tests/savepoint/translate/translate_yppm.py +++ b/fv3core/tests/savepoint/translate/translate_yppm.py @@ -1,6 +1,7 @@ import pace.dsl import pace.dsl.gt4py_utils as utils import pace.util +from pace.dsl.typing import Float from pace.fv3core.stencils import yppm from pace.fv3core.testing import TranslateDycoreFortranData2Py from pace.stencils.testing import TranslateGrid @@ -40,7 +41,9 @@ def process_inputs(self, inputs): self.ivars(inputs) self.make_storage_data_input_vars(inputs) inputs["flux"] = utils.make_storage_from_shape( - inputs["q"].shape, backend=self.stencil_factory.backend + inputs["q"].shape, + backend=self.stencil_factory.backend, + dtype=Float, ) def compute(self, inputs): diff --git a/stencils/pace/stencils/testing/README.md b/stencils/pace/stencils/testing/README.md index a38655d0..f8b0f04f 100644 --- a/stencils/pace/stencils/testing/README.md +++ b/stencils/pace/stencils/testing/README.md @@ -7,6 +7,7 @@ First, make sure you have followed the instruction in the top level [README](../ The unit and regression tests of pace require data generated from the Fortran reference implementation which has to be downloaded from a Google Cloud Platform storage bucket. Since the bucket is setup as "requester pays", you need a valid GCP account to download the test data. First, make sure you have configured the authentication with user credientials and configured Docker with the following commands: + ```shell gcloud auth login gcloud auth configure-docker @@ -74,3 +75,12 @@ DEV=y make savepoint_tests_mpi DEV=y make physics_savepoint_tests DEV=y make physics_savepoint_tests_mpi ``` + +## Test failure + +Test are running for each gridpoint of the domain, unless the Translate class for the test specifically restricts it. +Upon failure, the test will drop a `netCDF` faile in a `./.translate-errors` directory and named `translate-TestCase(-Rank).nc` containing input, computed output, reference and errors. + +## Environment variables + +- `PACE_TEST_N_THRESHOLD_SAMPLES`: Upon failure the system will try to pertub the output in an attempt to check for numerical instability. This means re-running the test for N samples. Default is `10`, `0` or less turns this feature off. diff --git a/stencils/pace/stencils/testing/grid.py b/stencils/pace/stencils/testing/grid.py index 4cf623b1..23d25882 100644 --- a/stencils/pace/stencils/testing/grid.py +++ b/stencils/pace/stencils/testing/grid.py @@ -6,6 +6,7 @@ import pace.util from pace.dsl import gt4py_utils as utils from pace.dsl.stencil import GridIndexing +from pace.dsl.typing import Float from pace.util.grid import ( AngleGridData, ContravariantGridData, @@ -504,7 +505,7 @@ def grid_data(self) -> "GridData": data = getattr(self, name) assert data is not None - quantity = self.quantity_factory.zeros(dims=dims, units=units) + quantity = self.quantity_factory.zeros(dims=dims, units=units, dtype=Float) if len(quantity.shape) == 3: quantity.data[:] = data[:, :, : quantity.shape[2]] elif len(quantity.shape) == 2: diff --git a/stencils/pace/stencils/testing/test_translate.py b/stencils/pace/stencils/testing/test_translate.py index 0d0141d5..796f30c1 100644 --- a/stencils/pace/stencils/testing/test_translate.py +++ b/stencils/pace/stencils/testing/test_translate.py @@ -19,7 +19,7 @@ # this only matters for manually-added print statements np.set_printoptions(threshold=4096) -OUTDIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "output") +OUTDIR = "./.translate-errors" GPU_MAX_ERR = 1e-10 GPU_NEAR_ZERO = 1e-15 @@ -171,21 +171,23 @@ def process_override(threshold_overrides, testobj, test_name, backend): ) -N_THRESHOLD_SAMPLES = 10 +N_THRESHOLD_SAMPLES = int(os.getenv("PACE_TEST_N_THRESHOLD_SAMPLES", 10)) def get_thresholds(testobj, input_data): - return _get_thresholds(testobj.compute, input_data) + _get_thresholds(testobj.compute, input_data) def get_thresholds_parallel(testobj, input_data, communicator): def compute(input): return testobj.compute_parallel(input, communicator) - return _get_thresholds(compute, input_data) + _get_thresholds(compute, input_data) -def _get_thresholds(compute_function, input_data): +def _get_thresholds(compute_function, input_data) -> None: + if N_THRESHOLD_SAMPLES <= 0: + return output_list = [] for _ in range(N_THRESHOLD_SAMPLES): input = copy.deepcopy(input_data) @@ -289,10 +291,14 @@ def test_sequential_savepoint( ref_data_out[varname] = [ref_data] if len(failing_names) > 0: get_thresholds(case.testobj, input_data=original_input_data) - out_filename = os.path.join(OUTDIR, f"{case.savepoint_name}.nc") + os.makedirs(OUTDIR, exist_ok=True) + out_filename = os.path.join(OUTDIR, f"translate-{case.savepoint_name}.nc") + input_data_on_host = {} + for key, _input in input_data.items(): + input_data_on_host[key] = gt_utils.asarray(_input) save_netcdf( case.testobj, - [input_data], + [input_data_on_host], [output], ref_data_out, failing_names, @@ -420,13 +426,17 @@ def test_parallel_savepoint( ) passing_names.append(failing_names.pop()) if len(failing_names) > 0: + os.makedirs(OUTDIR, exist_ok=True) out_filename = os.path.join( - OUTDIR, f"{case.savepoint_name}-{case.grid.rank}.nc" + OUTDIR, f"translate-{case.savepoint_name}-{case.grid.rank}.nc" ) try: + input_data_on_host = {} + for key, _input in input_data.items(): + input_data_on_host[key] = gt_utils.asarray(_input) save_netcdf( case.testobj, - [input_data], + [input_data_on_host], [output], ref_data, failing_names,