From ead2f1bd36590c595f301cc073cf0a598fabfd2d Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Mon, 25 Sep 2023 19:32:38 -0700 Subject: [PATCH] Improve `subsample` across `Coreg` subclasses and pipelines (#436) * Add subsample to subclass init * Progress on subsampling in Coreg classes and pipelines * Finalize subsampling functionality * Add tests and minor fixes * Add test for get_subsample * Linting * Fix other tests * Get geoutils from git to check everything passes * Linting * Add exception in generate pip from conda script when pulling from repo directly * Change geoutils version required in advance * First commit for eriks comments * Finalize eriks comments * Linting --- .../scripts/generate_pip_deps_from_conda.py | 9 +- dev-environment.yml | 4 +- environment.yml | 4 +- requirements.txt | 2 +- tests/test_coreg/test_affine.py | 39 +-- tests/test_coreg/test_base.py | 178 +++++++---- tests/test_coreg/test_biascorr.py | 4 +- tests/test_examples.py | 10 +- tests/test_spatialstats.py | 6 +- xdem/_typing.py | 4 +- xdem/coreg/affine.py | 99 +++++-- xdem/coreg/base.py | 276 +++++++++++++----- xdem/coreg/biascorr.py | 111 +++++-- xdem/examples.py | 2 +- 14 files changed, 542 insertions(+), 206 deletions(-) diff --git a/.github/scripts/generate_pip_deps_from_conda.py b/.github/scripts/generate_pip_deps_from_conda.py index 844b80cd..2f71a0f6 100755 --- a/.github/scripts/generate_pip_deps_from_conda.py +++ b/.github/scripts/generate_pip_deps_from_conda.py @@ -98,7 +98,14 @@ def generate_pip_from_conda(conda_path: pathlib.Path, pip_path: pathlib.Path, co if conda_dep: pip_deps.append(conda_dep) elif isinstance(dep, dict) and len(dep) == 1 and "pip" in dep: - pip_deps.extend(dep["pip"]) + # If pulled directly from GitHub (temporary CI passing), + # such as git+https://github.com/GlacioHack/geoutils.git, + # rename to the package repo name + dep_pips = dep["pip"] + for dep_pip in dep_pips: + if "+" in dep_pip and dep_pip.split("+")[0] == "git": + dep_pip = dep_pip.split("/")[-1].split(".git")[0] + pip_deps.append(dep_pip) else: raise ValueError(f"Unexpected dependency {dep}") diff --git a/dev-environment.yml b/dev-environment.yml index d11fc67c..706f11d1 100644 --- a/dev-environment.yml +++ b/dev-environment.yml @@ -15,7 +15,7 @@ dependencies: - tqdm - scikit-image - scikit-gstat>=1.0 - - geoutils=0.0.13 + - geoutils=0.0.15 # Development-specific, to mirror manually in setup.cfg [options.extras_require]. - pip @@ -52,4 +52,4 @@ dependencies: - noisyopt # To run CI against latest GeoUtils -# - git+https://github.com/GlacioHack/GeoUtils.git +# - git+https://github.com/GlacioHack/geoutils.git diff --git a/environment.yml b/environment.yml index 507289c8..e8182b4d 100644 --- a/environment.yml +++ b/environment.yml @@ -15,8 +15,8 @@ dependencies: - tqdm - scikit-image - scikit-gstat>=1.0 - - geoutils=0.0.13 + - geoutils=0.0.15 - pip # - pip: -# - git+https://github.com/GlacioHack/GeoUtils.git +# - git+https://github.com/GlacioHack/geoutils.git diff --git a/requirements.txt b/requirements.txt index fb928fde..0176ab5e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,7 +13,7 @@ scipy tqdm scikit-image scikit-gstat>=1.0 -geoutils==0.0.13 +geoutils==0.0.15 pip setuptools>=42 setuptools_scm[toml]>=6.2 diff --git a/tests/test_coreg/test_affine.py b/tests/test_coreg/test_affine.py index cb9de107..04832ee1 100644 --- a/tests/test_coreg/test_affine.py +++ b/tests/test_coreg/test_affine.py @@ -139,16 +139,14 @@ def test_coreg_example(self, verbose: bool = False) -> None: # Run co-registration nuth_kaab = xdem.coreg.NuthKaab() - nuth_kaab.fit(self.ref, self.tba, inlier_mask=self.inlier_mask, verbose=verbose) + nuth_kaab.fit(self.ref, self.tba, inlier_mask=self.inlier_mask, verbose=verbose, random_state=42) # Check the output metadata is always the same - assert nuth_kaab._meta["offset_east_px"] == pytest.approx(-0.46255704521968716) - assert nuth_kaab._meta["offset_north_px"] == pytest.approx(-0.13618536563846081) - assert nuth_kaab._meta["vshift"] == pytest.approx(-1.9815309753424906) + assert nuth_kaab._meta["offset_east_px"] == pytest.approx(-0.45061858808956284) + assert nuth_kaab._meta["offset_north_px"] == pytest.approx(-0.14225262689582596) + assert nuth_kaab._meta["vshift"] == pytest.approx(-1.987523471566405) - def test_gradientdescending( - self, downsampling: int = 10000, samples: int = 5000, inlier_mask: bool = True, verbose: bool = False - ) -> None: + def test_gradientdescending(self, subsample: int = 10000, inlier_mask: bool = True, verbose: bool = False) -> None: """ Test the co-registration outputs performed on the example are always the same. This overlaps with the test in test_examples.py, but helps identify from where differences arise. @@ -159,9 +157,14 @@ def test_gradientdescending( inlier_mask = self.inlier_mask # Run co-registration - gds = xdem.coreg.GradientDescending(downsampling=downsampling) + gds = xdem.coreg.GradientDescending(subsample=subsample) gds.fit_pts( - self.ref.to_points().ds, self.tba, inlier_mask=inlier_mask, verbose=verbose, samples=samples, z_name="b1" + self.ref.to_points().ds, + self.tba, + inlier_mask=inlier_mask, + verbose=verbose, + subsample=subsample, + z_name="b1", ) assert gds._meta["offset_east_px"] == pytest.approx(-0.496000, rel=1e-1, abs=0.1) assert gds._meta["offset_north_px"] == pytest.approx(-0.1875, rel=1e-1, abs=0.1) @@ -170,7 +173,7 @@ def test_gradientdescending( @pytest.mark.parametrize("shift_px", [(1, 1), (2, 2)]) # type: ignore @pytest.mark.parametrize("coreg_class", [coreg.NuthKaab, coreg.GradientDescending, coreg.ICP]) # type: ignore @pytest.mark.parametrize("points_or_raster", ["raster", "points"]) - def test_coreg_example_shift(self, shift_px, coreg_class, points_or_raster, verbose=False, downsampling=5000): + def test_coreg_example_shift(self, shift_px, coreg_class, points_or_raster, verbose=False, subsample=5000): """ For comparison of coreg algorithms: Shift a ref_dem on purpose, e.g. shift_px = (1,1), and then applying coreg to shift it back. @@ -182,12 +185,12 @@ def test_coreg_example_shift(self, shift_px, coreg_class, points_or_raster, verb shifted_ref = self.ref.copy() shifted_ref.shift(shift_px[0] * res, shift_px[1] * res) - shifted_ref_points = shifted_ref.to_points(as_array=False, subset=downsampling, pixel_offset="center").ds + shifted_ref_points = shifted_ref.to_points(as_array=False, subset=subsample, pixel_offset="center").ds shifted_ref_points["E"] = shifted_ref_points.geometry.x shifted_ref_points["N"] = shifted_ref_points.geometry.y shifted_ref_points.rename(columns={"b1": "z"}, inplace=True) - kwargs = {} if coreg_class.__name__ != "GradientDescending" else {"downsampling": downsampling} + kwargs = {} if coreg_class.__name__ != "GradientDescending" else {"subsample": subsample} coreg_obj = coreg_class(**kwargs) @@ -265,20 +268,20 @@ def test_nuth_kaab(self) -> None: # Check that the z shift is close to the original vertical shift. assert abs((transformed_points[0, 2] - self.points[0, 2]) + vshift) < 0.1 - def test_deramping(self) -> None: + def test_tilt(self) -> None: warnings.simplefilter("error") # Try a 1st degree deramping. - deramp = coreg.Tilt() + tilt = coreg.Tilt() # Fit the data - deramp.fit(**self.fit_params) + tilt.fit(**self.fit_params) # Apply the deramping to a DEM - deramped_dem = deramp.apply(self.tba) + tilted_dem = tilt.apply(self.tba) # Get the periglacial offset after deramping - periglacial_offset = (self.ref - deramped_dem)[self.inlier_mask] + periglacial_offset = (self.ref - tilted_dem)[self.inlier_mask] # Get the periglacial offset before deramping pre_offset = (self.ref - self.tba)[self.inlier_mask] @@ -286,7 +289,7 @@ def test_deramping(self) -> None: assert np.abs(np.mean(periglacial_offset)) < np.abs(np.mean(pre_offset)) # Check that the mean periglacial offset is low - assert np.abs(np.mean(periglacial_offset)) < 1 + assert np.abs(np.mean(periglacial_offset)) < 0.01 def test_icp_opencv(self) -> None: warnings.simplefilter("error") diff --git a/tests/test_coreg/test_base.py b/tests/test_coreg/test_base.py index 5d75658b..b5c1427d 100644 --- a/tests/test_coreg/test_base.py +++ b/tests/test_coreg/test_base.py @@ -2,6 +2,7 @@ from __future__ import annotations +import inspect import re import warnings from typing import Any, Callable @@ -106,66 +107,143 @@ def test_ij_xy(self, i: int = 10, j: int = 20) -> None: assert i == pytest.approx(10) assert j == pytest.approx(20) - def test_subsample(self) -> None: - warnings.simplefilter("error") - - # Test subsampled vertical shift correction - vshift_sub = coreg.VerticalShift() - - # Fit the vertical shift using 50% of the unmasked data using a fraction - vshift_sub.fit(**self.fit_params, subsample=0.5) - # Do the same but specify the pixel count instead. - # They are not perfectly equal (np.count_nonzero(self.mask) // 2 would be exact) - # But this would just repeat the subsample code, so that makes little sense to test. - vshift_sub.fit(**self.fit_params, subsample=self.tba.data.size // 2) - - # Do full vertical shift corr to compare - vshift_full = coreg.VerticalShift() - vshift_full.fit(**self.fit_params) - - # Check that the estimated vertical shifts are similar - assert abs(vshift_sub._meta["vshift"] - vshift_full._meta["vshift"]) < 0.1 - - # Test NuthKaab with subsampling - nuthkaab_full = coreg.NuthKaab() - nuthkaab_sub = coreg.NuthKaab() + @pytest.mark.parametrize("subsample", [10, 10000, 0.5, 1]) # type: ignore + def test_get_subsample_on_valid_mask(self, subsample: float | int) -> None: + """Test the subsampling function called by all subclasses""" + + # Define a valid mask + width = height = 50 + np.random.seed(42) + valid_mask = np.random.randint(low=0, high=2, size=(width, height), dtype=bool) + + # Define a class with a subsample and random_state in the metadata + coreg = Coreg(meta={"subsample": subsample, "random_state": 42}) + subsample_mask = coreg._get_subsample_on_valid_mask(valid_mask=valid_mask) + + # Check that it returns a same-shaped array that is boolean + assert np.shape(valid_mask) == np.shape(subsample_mask) + assert subsample_mask.dtype == bool + # Check that the subsampled values are all within valid values + assert all(valid_mask[subsample_mask]) + # Check that the number of subsampled value is coherent, or the maximum possible + if subsample <= 1: + # If value lower than 1, fraction of valid pixels + subsample_val: float | int = int(subsample * np.count_nonzero(valid_mask)) + else: + # Otherwise the number of pixels + subsample_val = subsample + assert np.count_nonzero(subsample_mask) == min(subsample_val, np.count_nonzero(valid_mask)) - # Measure the start and stop time to get the duration - # start_time = time.time() - nuthkaab_full.fit(**self.fit_params) - # icp_full_duration = time.time() - start_time + # TODO: Activate NuthKaab once subsampling there is made consistent + all_coregs = [ + coreg.VerticalShift, + # coreg.NuthKaab, + coreg.ICP, + coreg.Deramp, + coreg.TerrainBias, + coreg.DirectionalBias, + ] - # Do the same with 50% subsampling - # start_time = time.time() - nuthkaab_sub.fit(**self.fit_params, subsample=0.5) - # icp_sub_duration = time.time() - start_time + @pytest.mark.parametrize("coreg", all_coregs) # type: ignore + def test_subsample(self, coreg: Callable) -> None: # type: ignore + warnings.simplefilter("error") - # Make sure that the subsampling increased performance - # Temporarily add a fallback assertion that if it's slower, it shouldn't be much slower (2021-05-17). - # This doesn't work with GitHub's CI, but it works locally. I'm disabling this for now (2021-05-20). - # assert icp_full_duration > icp_sub_duration or (abs(icp_full_duration - icp_sub_duration) < 1) + # Check that default value is set properly + coreg_full = coreg() + argspec = inspect.getfullargspec(coreg) + assert coreg_full._meta["subsample"] == argspec.defaults[argspec.args.index("subsample") - 1] # type: ignore - # Calculate the difference in the full vs. subsampled matrices - matrix_diff = np.abs(nuthkaab_full.to_matrix() - nuthkaab_sub.to_matrix()) - # Check that the x/y/z differences do not exceed 30cm - assert np.count_nonzero(matrix_diff > 0.5) == 0 + # But can be overridden during fit + coreg_full.fit(**self.fit_params, subsample=10000, random_state=42) + assert coreg_full._meta["subsample"] == 10000 - # Test subsampled deramping - deramp_sub = coreg.Tilt() + # Test subsampled vertical shift correction + coreg_sub = coreg(subsample=0.1) + assert coreg_sub._meta["subsample"] == 0.1 - # Fit the bias using 50% of the unmasked data using a fraction - deramp_sub.fit(**self.fit_params, subsample=0.5) + # Fit the vertical shift using 10% of the unmasked data using a fraction + coreg_sub.fit(**self.fit_params, random_state=42) # Do the same but specify the pixel count instead. # They are not perfectly equal (np.count_nonzero(self.mask) // 2 would be exact) # But this would just repeat the subsample code, so that makes little sense to test. - deramp_sub.fit(**self.fit_params, subsample=self.tba.data.size // 2) + coreg_sub = coreg(subsample=self.tba.data.size // 10) + assert coreg_sub._meta["subsample"] == self.tba.data.size // 10 + coreg_sub.fit(**self.fit_params, random_state=42) + + # Add a few performance checks + coreg_name = coreg.__name__ + if coreg_name == "VerticalShift": + # Check that the estimated vertical shifts are similar + assert abs(coreg_sub._meta["vshift"] - coreg_full._meta["vshift"]) < 0.1 + + elif coreg_name == "NuthKaab": + # Calculate the difference in the full vs. subsampled matrices + matrix_diff = np.abs(coreg_full.to_matrix() - coreg_sub.to_matrix()) + # Check that the x/y/z differences do not exceed 30cm + assert np.count_nonzero(matrix_diff > 0.5) == 0 + + elif coreg_name == "Tilt": + # Check that the estimated biases are similar + assert coreg_sub._meta["coefficients"] == pytest.approx(coreg_full._meta["coefficients"], rel=1e-1) + + def test_subsample__pipeline(self) -> None: + """Test that the subsample argument works as intended for pipelines""" + + # Check definition during instantiation + pipe = coreg.VerticalShift(subsample=200) + coreg.Deramp(subsample=5000) + + # Check the arguments are properly defined + assert pipe.pipeline[0]._meta["subsample"] == 200 + assert pipe.pipeline[1]._meta["subsample"] == 5000 + + # Check definition during fit + pipe = coreg.VerticalShift() + coreg.Deramp() + pipe.fit(**self.fit_params, subsample=1000) + assert pipe.pipeline[0]._meta["subsample"] == 1000 + assert pipe.pipeline[1]._meta["subsample"] == 1000 + + def test_subsample__errors(self) -> None: + """Check proper errors are raised when using the subsample argument""" + + # A warning should be raised when overriding with fit if non-default parameter was passed during instantiation + vshift = coreg.VerticalShift(subsample=100) + + with pytest.warns( + UserWarning, + match=re.escape( + "Subsample argument passed to fit() will override non-default " + "subsample value defined at instantiation. To silence this " + "warning: only define 'subsample' in either fit(subsample=...) " + "or instantiation e.g. VerticalShift(subsample=...)." + ), + ): + vshift.fit(**self.fit_params, subsample=1000) - # Do full bias corr to compare - deramp_full = coreg.Tilt() - deramp_full.fit(**self.fit_params) + # Same for a pipeline + pipe = coreg.VerticalShift(subsample=200) + coreg.Deramp() + with pytest.warns( + UserWarning, + match=re.escape( + "Subsample argument passed to fit() will override non-default " + "subsample values defined for individual steps of the pipeline. " + "To silence this warning: only define 'subsample' in either " + "fit(subsample=...) or instantiation e.g., VerticalShift(subsample=...)." + ), + ): + pipe.fit(**self.fit_params, subsample=1000) - # Check that the estimated biases are similar - assert deramp_sub._meta["coefficients"] == pytest.approx(deramp_full._meta["coefficients"], rel=1e-1) + # Same for a blockwise co-registration + block = coreg.BlockwiseCoreg(coreg.VerticalShift(subsample=200), subdivision=4) + with pytest.warns( + UserWarning, + match=re.escape( + "Subsample argument passed to fit() will override non-default subsample " + "values defined in the step within the blockwise method. To silence this " + "warning: only define 'subsample' in either fit(subsample=...) or " + "instantiation e.g., VerticalShift(subsample=...)." + ), + ): + block.fit(**self.fit_params, subsample=1000) def test_coreg_raster_and_ndarray_args(self) -> None: @@ -423,7 +501,7 @@ class TestCoregPipeline: inlier_mask=inlier_mask, transform=ref.transform, crs=ref.crs, - verbose=False, + verbose=True, ) # Create some 3D coordinates with Z coordinates being 0 to try the apply_pts functions. points = np.array([[1, 2, 3, 4], [1, 2, 3, 4], [0, 0, 0, 0]], dtype="float64").T diff --git a/tests/test_coreg/test_biascorr.py b/tests/test_coreg/test_biascorr.py index f9538fe9..b7a7e6b8 100644 --- a/tests/test_coreg/test_biascorr.py +++ b/tests/test_coreg/test_biascorr.py @@ -39,7 +39,7 @@ class TestBiasCorr: reference_dem=ref, dem_to_be_aligned=tba, inlier_mask=inlier_mask, - verbose=False, + verbose=True, ) # Create some 3D coordinates with Z coordinates being 0 to try the apply_pts functions. points = np.array([[1, 2, 3, 4], [1, 2, 3, 4], [0, 0, 0, 0]], dtype="float64").T @@ -144,7 +144,7 @@ def test_biascorr__errors(self) -> None: scipy.optimize.curve_fit, ], ) # type: ignore - def test_biascorr__fit_1d(self, fit_func, fit_optimizer) -> None: + def test_biascorr__fit_1d(self, fit_func, fit_optimizer, capsys) -> None: """Test the _fit_func and apply_func methods of BiasCorr for the fit case (called by all its subclasses).""" # Create a bias correction object diff --git a/tests/test_examples.py b/tests/test_examples.py index f6e028b4..67df7737 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -34,11 +34,11 @@ class TestExamples: ddem, np.array( [ - -4.669189453125000000e-02, - -7.413940429687500000e-01, - 1.499481201171875000e-01, - 1.095550537109375000e00, - -5.904846191406250000e00, + -0.22833252, + -0.82458496, + 0.18843079, + 1.0004578, + -5.755249, ], dtype=np.float32, ), diff --git a/tests/test_spatialstats.py b/tests/test_spatialstats.py index 292d808b..6b8b8cef 100644 --- a/tests/test_spatialstats.py +++ b/tests/test_spatialstats.py @@ -494,7 +494,7 @@ def test_sample_multirange_variogram_default(self) -> None: # Check the variogram output is consistent for a random state df = xdem.spatialstats.sample_empirical_variogram(values=self.diff, subsample=10, random_state=42) - assert df["exp"][15] == pytest.approx(5.087792205810548, abs=1e-3) + assert df["exp"][15] == pytest.approx(5.046060943603516, abs=1e-3) assert df["lags"][15] == pytest.approx(5120) assert df["count"][15] == 5 # With a single run, no error can be estimated @@ -1286,7 +1286,7 @@ def test_patches_method_loop_quadrant(self) -> None: assert all(df.columns == ["nmad", "nb_indep_patches", "exact_areas", "areas"]) # Check the sampling is fixed for a random state - assert df["nmad"][0] == pytest.approx(1.8392861049330378, abs=1e-3) + assert df["nmad"][0] == pytest.approx(1.8530521253631773, abs=1e-3) assert df["nb_indep_patches"][0] == 100 assert df["exact_areas"][0] == pytest.approx(df["areas"][0], rel=0.2) @@ -1295,7 +1295,7 @@ def test_patches_method_loop_quadrant(self) -> None: # Check the sampling is always fixed for a random state assert df_full["tile"].values[0] == "8_16" - assert df_full["nanmean"].values[0] == pytest.approx(0.24885657130475025, abs=1e-3) + assert df_full["nanmean"].values[0] == pytest.approx(0.3085488928369729, abs=1e-3) # Check that all counts respect the default minimum percentage of 80% valid pixels assert all(df_full["count"].values > 0.8 * np.max(df_full["count"].values)) diff --git a/xdem/_typing.py b/xdem/_typing.py index b8e20a06..13b89715 100644 --- a/xdem/_typing.py +++ b/xdem/_typing.py @@ -11,8 +11,10 @@ from numpy.typing import NDArray # this syntax works starting on Python 3.9 NDArrayf = NDArray[np.floating[Any]] + NDArrayb = NDArray[np.bool_] MArrayf = np.ma.masked_array[Any, np.dtype[np.floating[Any]]] else: - NDArrayf = np.array # type: ignore + NDArrayf = np.ndarray # type: ignore + NDArrayb = np.ndarray # type: ignore MArrayf = np.ma.masked_array # type: ignore diff --git a/xdem/coreg/affine.py b/xdem/coreg/affine.py index d9f09613..4462a971 100644 --- a/xdem/coreg/affine.py +++ b/xdem/coreg/affine.py @@ -21,7 +21,7 @@ from geoutils.raster import Raster, RasterType, get_array_and_mask from tqdm import trange -from xdem._typing import NDArrayf +from xdem._typing import NDArrayb, NDArrayf from xdem.coreg.base import ( Coreg, CoregDict, @@ -214,11 +214,19 @@ class AffineCoreg(Coreg): _fit_called: bool = False # Flag to check if the .fit() method has been called. _is_affine: bool | None = None - def __init__(self, meta: CoregDict | None = None, matrix: NDArrayf | None = None) -> None: - """Instantiate a generic Coreg method.""" + def __init__( + self, + subsample: float | int = 1.0, + matrix: NDArrayf | None = None, + meta: CoregDict | None = None, + ) -> None: + """Instantiate a generic AffineCoreg method.""" super().__init__(meta=meta) + # Define subsample size + self._meta["subsample"] = subsample + if matrix is not None: with warnings.catch_warnings(): # This error is fixed in the upcoming 1.8 @@ -295,6 +303,7 @@ def _fit_func( self, ref_dem: NDArrayf, tba_dem: NDArrayf, + inlier_mask: NDArrayb, transform: rio.transform.Affine, crs: rio.crs.CRS, weights: NDArrayf | None, @@ -328,21 +337,25 @@ class VerticalShift(AffineCoreg): Estimates the mean (or median, weighted avg., etc.) vertical offset between two DEMs. """ - def __init__(self, vshift_func: Callable[[NDArrayf], np.floating[Any]] = np.average) -> None: # pylint: + def __init__( + self, vshift_func: Callable[[NDArrayf], np.floating[Any]] = np.average, subsample: float | int = 1.0 + ) -> None: # pylint: # disable=super-init-not-called """ Instantiate a vertical shift correction object. :param vshift_func: The function to use for calculating the vertical shift. Default: (weighted) average. + :param subsample: Subsample the input for speed-up. <1 is parsed as a fraction. >1 is a pixel count. """ self._meta: CoregDict = {} # All __init__ functions should instantiate an empty dict. - super().__init__(meta={"vshift_func": vshift_func}) + super().__init__(meta={"vshift_func": vshift_func}, subsample=subsample) def _fit_func( self, ref_dem: NDArrayf, tba_dem: NDArrayf, + inlier_mask: NDArrayb, transform: rio.transform.Affine, crs: rio.crs.CRS, weights: NDArrayf | None, @@ -351,10 +364,15 @@ def _fit_func( **kwargs: Any, ) -> None: """Estimate the vertical shift using the vshift_func.""" + if verbose: print("Estimating the vertical shift...") diff = ref_dem - tba_dem - diff = diff[np.isfinite(diff)] + + valid_mask = np.logical_and.reduce((inlier_mask, np.isfinite(diff))) + subsample_mask = self._get_subsample_on_valid_mask(valid_mask=valid_mask) + + diff = diff[subsample_mask] if np.count_nonzero(np.isfinite(diff)) == 0: raise ValueError("No finite values in vertical shift comparison.") @@ -412,7 +430,12 @@ class ICP(AffineCoreg): """ def __init__( - self, max_iterations: int = 100, tolerance: float = 0.05, rejection_scale: float = 2.5, num_levels: int = 6 + self, + max_iterations: int = 100, + tolerance: float = 0.05, + rejection_scale: float = 2.5, + num_levels: int = 6, + subsample: float | int = 5e5, ) -> None: """ Instantiate an ICP coregistration object. @@ -421,20 +444,24 @@ def __init__( :param tolerance: The residual change threshold after which to stop the iterations. :param rejection_scale: The threshold (std * rejection_scale) to consider points as outliers. :param num_levels: Number of octree levels to consider. A higher number is faster but may be more inaccurate. + :param subsample: Subsample the input for speed-up. <1 is parsed as a fraction. >1 is a pixel count. """ if not _has_cv2: raise ValueError("Optional dependency needed. Install 'opencv'") + + # TODO: Move these to _meta? self.max_iterations = max_iterations self.tolerance = tolerance self.rejection_scale = rejection_scale self.num_levels = num_levels - super().__init__() + super().__init__(subsample=subsample) def _fit_func( self, ref_dem: NDArrayf, tba_dem: NDArrayf, + inlier_mask: NDArrayb, transform: rio.transform.Affine, crs: rio.crs.CRS, weights: NDArrayf | None, @@ -456,17 +483,20 @@ def _fit_func( normal_north = np.sin(np.arctan(gradient_x / resolution)) normal_up = 1 - np.linalg.norm([normal_east, normal_north], axis=0) - valid_mask = ~np.isnan(ref_dem) & ~np.isnan(normal_east) & ~np.isnan(normal_north) + valid_mask = np.logical_and.reduce( + (inlier_mask, np.isfinite(ref_dem), np.isfinite(normal_east), np.isfinite(normal_north)) + ) + subsample_mask = self._get_subsample_on_valid_mask(valid_mask=valid_mask) ref_pts = pd.DataFrame( np.dstack( [ - x_coords[valid_mask], - y_coords[valid_mask], - ref_dem[valid_mask], - normal_east[valid_mask], - normal_north[valid_mask], - normal_up[valid_mask], + x_coords[subsample_mask], + y_coords[subsample_mask], + ref_dem[subsample_mask], + normal_east[subsample_mask], + normal_north[subsample_mask], + normal_up[subsample_mask], ] ).squeeze(), columns=["E", "N", "z", "nx", "ny", "nz"], @@ -571,20 +601,17 @@ def __init__(self, subsample: int | float = 5e5) -> None: """ Instantiate a tilt correction object. - :param subsample: Factor for subsampling the input raster for speed-up. - If <= 1, will be considered a fraction of valid pixels to extract. - If > 1 will be considered the number of pixels to extract. - + :param subsample: Subsample the input for speed-up. <1 is parsed as a fraction. >1 is a pixel count. """ self.poly_order = 1 - self.subsample = subsample - super().__init__() + super().__init__(subsample=subsample) def _fit_func( self, ref_dem: NDArrayf, tba_dem: NDArrayf, + inlier_mask: NDArrayb, transform: rio.transform.Affine, crs: rio.crs.CRS, weights: NDArrayf | None, @@ -594,9 +621,10 @@ def _fit_func( ) -> None: """Fit the dDEM between the DEMs to a least squares polynomial equation.""" ddem = ref_dem - tba_dem + ddem[~inlier_mask] = np.nan x_coords, y_coords = _get_x_and_y_coords(ref_dem.shape, transform) fit_ramp, coefs = deramping( - ddem, x_coords, y_coords, degree=self.poly_order, subsample=self.subsample, verbose=verbose + ddem, x_coords, y_coords, degree=self.poly_order, subsample=self._meta["subsample"], verbose=verbose ) self._meta["coefficients"] = coefs[0] @@ -651,23 +679,25 @@ class NuthKaab(AffineCoreg): https://doi.org/10.5194/tc-5-271-2011 """ - def __init__(self, max_iterations: int = 10, offset_threshold: float = 0.05) -> None: + def __init__(self, max_iterations: int = 10, offset_threshold: float = 0.05, subsample: int | float = 5e5) -> None: """ Instantiate a new Nuth and Kääb (2011) coregistration object. :param max_iterations: The maximum allowed iterations before stopping. :param offset_threshold: The residual offset threshold after which to stop the iterations. + :param subsample: Subsample the input for speed-up. <1 is parsed as a fraction. >1 is a pixel count. """ self._meta: CoregDict self.max_iterations = max_iterations self.offset_threshold = offset_threshold - super().__init__() + super().__init__(subsample=subsample) def _fit_func( self, ref_dem: NDArrayf, tba_dem: NDArrayf, + inlier_mask: NDArrayb, transform: rio.transform.Affine, crs: rio.crs.CRS, weights: NDArrayf | None, @@ -694,6 +724,11 @@ def _fit_func( if verbose: print(" Calculate slope and aspect") + valid_mask = np.logical_and.reduce((inlier_mask, np.isfinite(ref_dem), np.isfinite(tba_dem))) + subsample_mask = self._get_subsample_on_valid_mask(valid_mask=valid_mask) + # TODO: Make this consistent with other subsampling once NK is updated (work on vector, not 2D with NaN) + ref_dem[~subsample_mask] = np.nan + slope_tan, aspect = _calculate_slope_and_aspect_nuthkaab(ref_dem) # Make index grids for the east and north dimensions @@ -983,36 +1018,35 @@ class GradientDescending(AffineCoreg): def __init__( self, - downsampling: int = 6000, x0: tuple[float, float] = (0, 0), bounds: tuple[float, float] = (-3, 3), deltainit: int = 2, deltatol: float = 0.004, feps: float = 0.0001, + subsample: int | float = 6000, ) -> None: """ Instantiate gradient descending coregistration object. - :param downsampling: The number of points of downsampling the df to run the coreg. Set None to disable it. :param x0: The initial point of gradient descending iteration. :param bounds: The boundary of the maximum shift. :param deltainit: Initial pattern size. :param deltatol: Target pattern size, or the precision you want achieve. :param feps: Parameters for algorithm. Smallest difference in function value to resolve. + :param subsample: Subsample the input for speed-up. <1 is parsed as a fraction. >1 is a pixel count. The algorithm terminates when the iteration is locally optimal at the target pattern size 'deltatol', or when the function value differs by less than the tolerance 'feps' along all directions. """ self._meta: CoregDict - self.downsampling = downsampling self.bounds = bounds self.x0 = x0 self.deltainit = deltainit self.deltatol = deltatol self.feps = feps - super().__init__() + super().__init__(subsample=subsample) def _fit_pts_func( self, @@ -1034,9 +1068,9 @@ def _fit_pts_func( if not _has_noisyopt: raise ValueError("Optional dependency needed. Install 'noisyopt'") - # downsampling if downsampling != None - if self.downsampling and len(ref_dem) > self.downsampling: - ref_dem = ref_dem.sample(frac=self.downsampling / len(ref_dem), random_state=random_state).copy() + # Perform downsampling if subsample != None + if self._meta["subsample"] and len(ref_dem) > self._meta["subsample"]: + ref_dem = ref_dem.sample(frac=self._meta["subsample"] / len(ref_dem), random_state=random_state).copy() else: ref_dem = ref_dem.copy() @@ -1052,7 +1086,7 @@ def _fit_pts_func( if verbose: print("Running Gradient Descending Coreg - Zhihao (in preparation) ") - if self.downsampling: + if self._meta["subsample"]: print("Running on downsampling. The length of the gdf:", len(ref_dem)) elevation_difference = _residuals_df(tba_dem, ref_dem, (0, 0), 0, z_name=z_name) @@ -1104,6 +1138,7 @@ def _fit_func( self, ref_dem: NDArrayf, tba_dem: NDArrayf, + inlier_mask: NDArrayb, transform: rio.transform.Affine, crs: rio.crs.CRS, weights: NDArrayf | None, diff --git a/xdem/coreg/base.py b/xdem/coreg/base.py index 13b3c4f0..e750ea66 100644 --- a/xdem/coreg/base.py +++ b/xdem/coreg/base.py @@ -4,6 +4,7 @@ import concurrent.futures import copy +import inspect import warnings from typing import ( Any, @@ -46,7 +47,7 @@ ) from tqdm import tqdm -from xdem._typing import MArrayf, NDArrayf +from xdem._typing import MArrayf, NDArrayb, NDArrayf from xdem.spatialstats import nmad from xdem.terrain import get_terrain_attribute @@ -143,7 +144,7 @@ def _residuals_df( def _df_sampling_from_dem( - dem: RasterType, tba_dem: RasterType, samples: int = 10000, order: int = 1, offset: str | None = None + dem: RasterType, tba_dem: RasterType, subsample: float | int = 10000, order: int = 1, offset: str | None = None ) -> pd.DataFrame: """ Generate a dataframe from a dem by random sampling. @@ -160,9 +161,18 @@ def _df_sampling_from_dem( else: offset = "center" + # Convert subsample to int + valid_mask = np.logical_and(~dem.mask, ~tba_dem.mask) + if (subsample <= 1) & (subsample > 0): + npoints = int(subsample * np.count_nonzero(valid_mask)) + elif subsample > 1: + npoints = int(subsample) + else: + raise ValueError("`subsample` must be > 0") + # Avoid edge, and mask-out area in sampling width, length = dem.shape - i, j = np.random.randint(10, width - 10, samples), np.random.randint(10, length - 10, samples) + i, j = np.random.randint(10, width - 10, npoints), np.random.randint(10, length - 10, npoints) mask = dem.data.mask # Get value @@ -202,7 +212,7 @@ def _mask_dataframe_by_dem(df: pd.DataFrame | NDArrayf, dem: RasterType) -> pd.D def _calculate_ddem_stats( ddem: NDArrayf | MArrayf, - inlier_mask: NDArrayf | None = None, + inlier_mask: NDArrayb | None = None, stats_list: tuple[Callable[[NDArrayf], Number], ...] | None = None, stats_labels: tuple[str, ...] | None = None, ) -> dict[str, float]: @@ -289,12 +299,10 @@ def _mask_as_array(reference_raster: gu.Raster, mask: str | gu.Vector | gu.Raste def _preprocess_coreg_raster_input( reference_dem: NDArrayf | MArrayf | RasterType, dem_to_be_aligned: NDArrayf | MArrayf | RasterType, - inlier_mask: NDArrayf | Mask | None = None, + inlier_mask: NDArrayb | Mask | None = None, transform: rio.transform.Affine | None = None, crs: rio.crs.CRS | None = None, - subsample: float | int = 1.0, - random_state: None | np.random.RandomState | np.random.Generator | int = None, -) -> tuple[NDArrayf, NDArrayf, affine.Affine, rio.crs.CRS]: +) -> tuple[NDArrayf, NDArrayf, NDArrayb, affine.Affine, rio.crs.CRS]: # Validate that both inputs are valid array-like (or Raster) types. if not all(isinstance(dem, (np.ndarray, gu.Raster)) for dem in (reference_dem, dem_to_be_aligned)): @@ -339,8 +347,10 @@ def _preprocess_coreg_raster_input( raise ValueError("'crs' must be given if both DEMs are array-like.") # Get a NaN array covering nodatas from the raster, masked array or integer-type array - ref_dem, ref_mask = get_array_and_mask(reference_dem) - tba_dem, tba_mask = get_array_and_mask(dem_to_be_aligned) + with warnings.catch_warnings(): + warnings.filterwarnings(action="ignore", category=UserWarning) + ref_dem, ref_mask = get_array_and_mask(reference_dem, copy=True) + tba_dem, tba_mask = get_array_and_mask(dem_to_be_aligned, copy=True) # Make sure that the mask has an expected format. if inlier_mask is not None: @@ -352,29 +362,21 @@ def _preprocess_coreg_raster_input( if np.all(~inlier_mask): raise ValueError("'inlier_mask' had no inliers.") - - ref_dem[~inlier_mask] = np.nan - tba_dem[~inlier_mask] = np.nan + else: + inlier_mask = np.ones(np.shape(ref_dem), dtype=bool) if np.all(ref_mask): raise ValueError("'reference_dem' had only NaNs") if np.all(tba_mask): raise ValueError("'dem_to_be_aligned' had only NaNs") - # If subsample is not equal to one, subsampling should be performed. - if subsample != 1.0: + # Isolate all invalid values + invalid_mask = np.logical_or.reduce((~inlier_mask, ref_mask, tba_mask)) - indices = gu.raster.subsample_array( - ref_dem, subsample=subsample, return_indices=True, random_state=random_state - ) + if np.all(invalid_mask): + raise ValueError("All values of the inlier mask are NaNs in either 'reference_dem' or 'dem_to_be_aligned'.") - mask_subsample = np.zeros(np.shape(ref_dem), dtype=bool) - mask_subsample[indices[0], indices[1]] = True - - ref_dem[~mask_subsample] = np.nan - tba_dem[~mask_subsample] = np.nan - - return ref_dem, tba_dem, transform, crs + return ref_dem, tba_dem, inlier_mask, transform, crs # TODO: Re-structure AffineCoreg apply function and move there? @@ -653,6 +655,10 @@ class CoregDict(TypedDict, total=False): # The pipeline metadata can have any value of the above pipeline: list[Any] + # Affine + BiasCorr classes + subsample: int | float + random_state: np.random.RandomState | np.random.Generator | int | None + # BiasCorr classes generic metadata # 1/ Inputs @@ -725,16 +731,56 @@ def is_affine(self) -> bool: return self._is_affine + def _get_subsample_on_valid_mask(self, valid_mask: NDArrayb, verbose: bool = False) -> NDArrayb: + """ + Get mask of values to subsample on valid mask. + + :param valid_mask: Mask of valid values (inlier and not nodata). + """ + + # This should never happen + if self._meta["subsample"] is None: + raise ValueError("Subsample should have been defined in metadata before reaching this class method.") + + # If subsample is not equal to one, subsampling should be performed. + elif self._meta["subsample"] != 1.0: + + # Build a low memory masked array with invalid values masked to pass to subsampling + ma_valid = np.ma.masked_array(data=np.ones(np.shape(valid_mask), dtype=bool), mask=~valid_mask) + # Take a subsample within the valid values + indices = gu.raster.subsample_array( + ma_valid, + subsample=self._meta["subsample"], + return_indices=True, + random_state=self._meta["random_state"], + ) + + # We return a boolean mask of the subsample within valid values + subsample_mask = np.zeros(np.shape(valid_mask), dtype=bool) + subsample_mask[indices[0], indices[1]] = True + else: + # If no subsample is taken, use all valid values + subsample_mask = valid_mask + + if verbose: + print( + "Using a subsample of {} among {} valid values.".format( + np.count_nonzero(valid_mask), np.count_nonzero(subsample_mask) + ) + ) + + return subsample_mask + def fit( self: CoregType, reference_dem: NDArrayf | MArrayf | RasterType, dem_to_be_aligned: NDArrayf | MArrayf | RasterType, - inlier_mask: NDArrayf | Mask | None = None, + inlier_mask: NDArrayb | Mask | None = None, transform: rio.transform.Affine | None = None, crs: rio.crs.CRS | None = None, bias_vars: dict[str, NDArrayf | MArrayf | RasterType] | None = None, weights: NDArrayf | None = None, - subsample: float | int = 1.0, + subsample: float | int | None = None, verbose: bool = False, random_state: None | np.random.RandomState | np.random.Generator | int = None, **kwargs: Any, @@ -757,25 +803,46 @@ def fit( if weights is not None: raise NotImplementedError("Weights have not yet been implemented") + # Override subsample argument of instantiation if passed to fit + if subsample is not None: + + # Check if subsample argument was also defined at instantiation (not default value), and raise warning + argspec = inspect.getfullargspec(self.__class__) + sub_meta = self._meta["subsample"] + if argspec.defaults is None or "subsample" not in argspec.args: + raise ValueError("The subsample argument and default need to be defined in this Coreg class.") + sub_is_default = argspec.defaults[argspec.args.index("subsample") - 1] == sub_meta # type: ignore + if not sub_is_default: + warnings.warn( + "Subsample argument passed to fit() will override non-default subsample value defined at " + "instantiation. To silence this warning: only define 'subsample' in either fit(subsample=...) or " + "instantiation e.g. VerticalShift(subsample=...)." + ) + + # In any case, override! + self._meta["subsample"] = subsample + + # Save random_state is a subsample is used + if self._meta["subsample"] != 1: + self._meta["random_state"] = random_state + # Pre-process the inputs, by reprojecting and subsampling - ref_dem, tba_dem, transform, crs = _preprocess_coreg_raster_input( + ref_dem, tba_dem, inlier_mask, transform, crs = _preprocess_coreg_raster_input( reference_dem=reference_dem, dem_to_be_aligned=dem_to_be_aligned, inlier_mask=inlier_mask, transform=transform, crs=crs, - subsample=subsample, - random_state=random_state, ) main_args = { "ref_dem": ref_dem, "tba_dem": tba_dem, + "inlier_mask": inlier_mask, "transform": transform, "crs": crs, "weights": weights, "verbose": verbose, - "random_state": random_state, } # If bias_vars are defined, update dictionary content to array @@ -804,7 +871,7 @@ def residuals( self, reference_dem: NDArrayf, dem_to_be_aligned: NDArrayf, - inlier_mask: NDArrayf | None = None, + inlier_mask: NDArrayb | None = None, transform: rio.transform.Affine | None = None, crs: rio.crs.CRS | None = None, subsample: float | int = 1.0, @@ -828,14 +895,12 @@ def residuals( aligned_dem = self.apply(dem_to_be_aligned, transform=transform, crs=crs)[0] # Pre-process the inputs, by reprojecting and subsampling - ref_dem, align_dem, transform, crs = _preprocess_coreg_raster_input( + ref_dem, align_dem, inlier_mask, transform, crs = _preprocess_coreg_raster_input( reference_dem=reference_dem, dem_to_be_aligned=aligned_dem, inlier_mask=inlier_mask, transform=transform, crs=crs, - subsample=subsample, - random_state=random_state, ) # Calculate the DEM difference @@ -853,9 +918,8 @@ def fit_pts( self: CoregType, reference_dem: NDArrayf | MArrayf | RasterType | pd.DataFrame, dem_to_be_aligned: RasterType, - inlier_mask: NDArrayf | Mask | None = None, + inlier_mask: NDArrayb | Mask | None = None, transform: rio.transform.Affine | None = None, - samples: int = 10000, subsample: float | int = 1.0, verbose: bool = False, mask_high_curv: bool = False, @@ -870,7 +934,6 @@ def fit_pts( :param dem_to_be_aligned: 2D array of elevation values to be aligned. :param inlier_mask: Optional. 2D boolean array of areas to include in the analysis (inliers=True). :param transform: Optional. Transform of the reference_dem. Mandatory in some cases. - :param samples: The sample count to run co-registration on. Equivalent to subsample. :param subsample: Subsample the input to increase performance. <1 is parsed as a fraction. >1 is a pixel count. :param verbose: Print progress messages to stdout. :param order: interpolation 0=nearest, 1=linear, 2=cubic. @@ -890,7 +953,7 @@ def fit_pts( # How to make sure sample point locates in stable terrain? if isinstance(reference_dem, (np.ndarray, gu.Raster)): reference_dem = _df_sampling_from_dem( - reference_dem, dem_to_be_aligned, samples=samples, order=1, offset=None + reference_dem, dem_to_be_aligned, subsample=subsample, order=1, offset=None ) # Validate that at least one input is a valid point data type. @@ -1214,7 +1277,7 @@ def error( reference_dem: NDArrayf, dem_to_be_aligned: NDArrayf, error_type: list[str], - inlier_mask: NDArrayf | None = None, + inlier_mask: NDArrayb | None = None, transform: rio.transform.Affine | None = None, crs: rio.crs.CRS | None = None, ) -> list[np.floating[Any] | float | np.integer[Any] | int]: @@ -1226,7 +1289,7 @@ def error( reference_dem: NDArrayf, dem_to_be_aligned: NDArrayf, error_type: str = "nmad", - inlier_mask: NDArrayf | None = None, + inlier_mask: NDArrayb | None = None, transform: rio.transform.Affine | None = None, crs: rio.crs.CRS | None = None, ) -> np.floating[Any] | float | np.integer[Any] | int: @@ -1237,7 +1300,7 @@ def error( reference_dem: NDArrayf, dem_to_be_aligned: NDArrayf, error_type: str | list[str] = "nmad", - inlier_mask: NDArrayf | None = None, + inlier_mask: NDArrayb | None = None, transform: rio.transform.Affine | None = None, crs: rio.crs.CRS | None = None, ) -> np.floating[Any] | float | np.integer[Any] | int | list[np.floating[Any] | float | np.integer[Any] | int]: @@ -1306,6 +1369,7 @@ def _fit_func( self, ref_dem: NDArrayf, tba_dem: NDArrayf, + inlier_mask: NDArrayb, transform: rio.transform.Affine, crs: rio.crs.CRS, weights: NDArrayf | None, @@ -1400,34 +1464,65 @@ def _parse_bias_vars(self, step: int, bias_vars: dict[str, NDArrayf] | None) -> # Add subset dict for this pipeline step to args of fit and apply return {n: bias_vars[n] for n in var_names} - def _fit_func( - self, - ref_dem: NDArrayf, - tba_dem: NDArrayf, - transform: rio.transform.Affine, - crs: rio.crs.CRS, - weights: NDArrayf | None, - bias_vars: dict[str, NDArrayf] | None = None, + def fit( + self: CoregType, + reference_dem: NDArrayf | MArrayf | RasterType, + dem_to_be_aligned: NDArrayf | MArrayf | RasterType, + inlier_mask: NDArrayb | Mask | None = None, + transform: rio.transform.Affine | None = None, + crs: rio.crs.CRS | None = None, + bias_vars: dict[str, NDArrayf | MArrayf | RasterType] | None = None, + weights: NDArrayf | None = None, + subsample: float | int | None = None, verbose: bool = False, + random_state: None | np.random.RandomState | np.random.Generator | int = None, **kwargs: Any, - ) -> None: - """Fit each processing step with the previously transformed DEM.""" + ) -> CoregType: + + # Check if subsample arguments are different from their default value for any of the coreg steps: + # get default value in argument spec and "subsample" stored in meta, and compare both are consistent + argspec = [inspect.getfullargspec(c.__class__) for c in self.pipeline] + sub_meta = [c._meta["subsample"] for c in self.pipeline] + sub_is_default = [ + argspec[i].defaults[argspec[i].args.index("subsample") - 1] == sub_meta[i] # type: ignore + for i in range(len(argspec)) + ] + if subsample is not None and not all(sub_is_default): + warnings.warn( + "Subsample argument passed to fit() will override non-default subsample values defined for" + " individual steps of the pipeline. To silence this warning: only define 'subsample' in " + "either fit(subsample=...) or instantiation e.g., VerticalShift(subsample=...)." + ) + + # Pre-process the inputs, by reprojecting and subsampling, without any subsampling (done in each step) + ref_dem, tba_dem, inlier_mask, transform, crs = _preprocess_coreg_raster_input( + reference_dem=reference_dem, + dem_to_be_aligned=dem_to_be_aligned, + inlier_mask=inlier_mask, + transform=transform, + crs=crs, + ) + tba_dem_mod = tba_dem.copy() + out_transform = transform for i, coreg in enumerate(self.pipeline): if verbose: print(f"Running pipeline step: {i + 1} / {len(self.pipeline)}") main_args_fit = { - "ref_dem": ref_dem, - "tba_dem": tba_dem_mod, - "transform": transform, + "reference_dem": ref_dem, + "dem_to_be_aligned": tba_dem, + "inlier_mask": inlier_mask, + "transform": out_transform, "crs": crs, "weights": weights, "verbose": verbose, + "subsample": subsample, + "random_state": random_state, } - main_args_apply = {"dem": tba_dem_mod, "transform": transform, "crs": crs} + main_args_apply = {"dem": tba_dem_mod, "transform": out_transform, "crs": crs} # If non-affine method that expects a bias_vars argument if coreg._needs_vars: @@ -1436,11 +1531,15 @@ def _fit_func( main_args_fit.update({"bias_vars": step_bias_vars}) main_args_apply.update({"bias_vars": step_bias_vars}) - coreg._fit_func(**main_args_fit) - coreg._fit_called = True + coreg.fit(**main_args_fit) tba_dem_mod, out_transform = coreg.apply(**main_args_apply) + # Flag that the fitting function has been called. + self._fit_called = True + + return self + def _fit_pts_func( self: CoregType, ref_dem: NDArrayf | MArrayf | RasterType | pd.DataFrame, @@ -1575,19 +1674,48 @@ def __init__( self._meta: CoregDict = {"step_meta": []} - def _fit_func( - self, - ref_dem: NDArrayf, - tba_dem: NDArrayf, - transform: rio.transform.Affine, - crs: rio.crs.CRS, - weights: NDArrayf | None, - bias_vars: dict[str, NDArrayf] | None = None, + def fit( + self: CoregType, + reference_dem: NDArrayf | MArrayf | RasterType, + dem_to_be_aligned: NDArrayf | MArrayf | RasterType, + inlier_mask: NDArrayb | Mask | None = None, + transform: rio.transform.Affine | None = None, + crs: rio.crs.CRS | None = None, + bias_vars: dict[str, NDArrayf | MArrayf | RasterType] | None = None, + weights: NDArrayf | None = None, + subsample: float | int | None = None, verbose: bool = False, + random_state: None | np.random.RandomState | np.random.Generator | int = None, **kwargs: Any, - ) -> None: - """Fit the coreg approach for each subdivision.""" + ) -> CoregType: + + # Check if subsample arguments are different from their default value for any of the coreg steps: + # get default value in argument spec and "subsample" stored in meta, and compare both are consistent + if not isinstance(self.procstep, CoregPipeline): + steps = [self.procstep] + else: + steps = list(self.procstep.pipeline) + argspec = [inspect.getfullargspec(s.__class__) for s in steps] + sub_meta = [s._meta["subsample"] for s in steps] + sub_is_default = [ + argspec[i].defaults[argspec[i].args.index("subsample") - 1] == sub_meta[i] # type: ignore + for i in range(len(argspec)) + ] + if subsample is not None and not all(sub_is_default): + warnings.warn( + "Subsample argument passed to fit() will override non-default subsample values defined in the" + " step within the blockwise method. To silence this warning: only define 'subsample' in " + "either fit(subsample=...) or instantiation e.g., VerticalShift(subsample=...)." + ) + # Pre-process the inputs, by reprojecting and subsampling, without any subsampling (done in each step) + ref_dem, tba_dem, inlier_mask, transform, crs = _preprocess_coreg_raster_input( + reference_dem=reference_dem, + dem_to_be_aligned=dem_to_be_aligned, + inlier_mask=inlier_mask, + transform=transform, + crs=crs, + ) groups = self.subdivide_array(tba_dem.shape) indices = np.unique(groups) @@ -1617,7 +1745,7 @@ def process(i: int) -> dict[str, Any] | BaseException | None: return None mask_subset = inlier_mask[arrayslice].copy() west, top = rio.transform.xy(transform, min(rows), min(cols), offset="ul") - transform_subset = rio.transform.from_origin(west, top, transform.a, -transform.e) + transform_subset = rio.transform.from_origin(west, top, transform.a, -transform.e) # type: ignore procstep = self.procstep.copy() # Try to run the coregistration. If it fails for any reason, skip it and save the exception. @@ -1627,7 +1755,12 @@ def process(i: int) -> dict[str, Any] | BaseException | None: dem_to_be_aligned=tba_subset, transform=transform_subset, inlier_mask=mask_subset, + bias_vars=bias_vars, + weights=weights, crs=crs, + subsample=subsample, + random_state=random_state, + verbose=verbose, ) nmad, median = procstep.error( reference_dem=ref_subset, @@ -1726,6 +1859,11 @@ def process(i: int) -> dict[str, Any] | BaseException | None: for step in self.procstep.pipeline: step._fit_called = True + # Flag that the fitting function has been called. + self._fit_called = True + + return self + def _restore_metadata(self, meta: CoregDict) -> None: """ Given some metadata, set it in the right place. diff --git a/xdem/coreg/biascorr.py b/xdem/coreg/biascorr.py index b312800c..4ce6d042 100644 --- a/xdem/coreg/biascorr.py +++ b/xdem/coreg/biascorr.py @@ -11,7 +11,7 @@ import scipy import xdem.spatialstats -from xdem._typing import NDArrayf +from xdem._typing import NDArrayb, NDArrayf from xdem.coreg.base import Coreg from xdem.fit import ( polynomial_1d, @@ -48,6 +48,7 @@ def __init__( bin_statistic: Callable[[NDArrayf], np.floating[Any]] = np.nanmedian, bin_apply_method: Literal["linear"] | Literal["per_bin"] = "linear", bias_var_names: Iterable[str] = None, + subsample: float | int = 1.0, ): """ Instantiate a bias correction object. @@ -130,6 +131,9 @@ def __init__( } super().__init__(meta=meta_bin_and_fit) # type: ignore + # Add subsample attribute + self._meta["subsample"] = subsample + # Update attributes self._fit_or_bin = fit_or_bin self._is_affine = False @@ -139,6 +143,7 @@ def _fit_func( # type: ignore self, ref_dem: NDArrayf, tba_dem: NDArrayf, + inlier_mask: NDArrayb, transform: rio.transform.Affine, # Never None thanks to Coreg.fit() pre-process crs: rio.crs.CRS, # Never None thanks to Coreg.fit() pre-process bias_vars: None | dict[str, NDArrayf] = None, @@ -149,8 +154,6 @@ def _fit_func( # type: ignore """Should only be called through subclassing.""" # This is called by subclasses, so the bias_var should always be defined - # TODO: Move this up to Coreg class, checking kwargs of fit(), or better to overload function - # description in fit() here? if bias_vars is None: raise ValueError("At least one `bias_var` should be passed to the fitting function, got None.") @@ -166,13 +169,19 @@ def _fit_func( # type: ignore self._meta["bias_var_names"] = list(bias_vars.keys()) # Compute difference and mask of valid data + # TODO: Move the check up to Coreg.fit()? + diff = ref_dem - tba_dem - ind_valid = np.logical_and.reduce((np.isfinite(diff), *(np.isfinite(var) for var in bias_vars.values()))) + valid_mask = np.logical_and.reduce( + (inlier_mask, np.isfinite(diff), *(np.isfinite(var) for var in bias_vars.values())) + ) # Raise errors if all values are NaN after introducing masks from the variables # (Others are already checked in Coreg.fit()) - if np.all(~ind_valid): - raise ValueError("One of the 'bias_vars' had only NaNs.") + if np.all(~valid_mask): + raise ValueError("Some 'bias_vars' have only NaNs in the inlier mask.") + + subsample_mask = self._get_subsample_on_valid_mask(valid_mask=valid_mask, verbose=verbose) # Get number of variables nd = len(bias_vars) @@ -207,9 +216,9 @@ def _fit_func( # type: ignore results = self._meta["fit_optimizer"]( f=self._meta["fit_func"], - xdata=np.array([var[ind_valid].flatten() for var in bias_vars.values()]).squeeze(), - ydata=diff[ind_valid].flatten(), - sigma=weights[ind_valid].flatten() if weights is not None else None, + xdata=np.array([var[subsample_mask].flatten() for var in bias_vars.values()]).squeeze(), + ydata=diff[subsample_mask].flatten(), + sigma=weights[subsample_mask].flatten() if weights is not None else None, absolute_sigma=True, **kwargs, ) @@ -224,8 +233,8 @@ def _fit_func( # type: ignore ) df = xdem.spatialstats.nd_binning( - values=diff[ind_valid], - list_var=[var[ind_valid] for var in bias_vars.values()], + values=diff[subsample_mask], + list_var=[var[subsample_mask] for var in bias_vars.values()], list_var_names=list(bias_vars.keys()), list_var_bins=bin_sizes, statistics=(self._meta["bin_statistic"], "count"), @@ -246,8 +255,8 @@ def _fit_func( # type: ignore ) df = xdem.spatialstats.nd_binning( - values=diff[ind_valid], - list_var=[var[ind_valid] for var in bias_vars.values()], + values=diff[subsample_mask], + list_var=[var[subsample_mask] for var in bias_vars.values()], list_var_names=list(bias_vars.keys()), list_var_bins=bin_sizes, statistics=(self._meta["bin_statistic"], "count"), @@ -263,6 +272,7 @@ def _fit_func( # type: ignore # TODO: pass a new sigma based on "count" and original sigma (and correlation?)? # sigma values would have to be binned above also + # Valid values for the binning output ind_valid = np.logical_and.reduce((np.isfinite(new_diff), *(np.isfinite(var) for var in new_vars))) if np.all(~ind_valid): @@ -375,6 +385,7 @@ def __init__( bin_statistic: Callable[[NDArrayf], np.floating[Any]] = np.nanmedian, bin_apply_method: Literal["linear"] | Literal["per_bin"] = "linear", bias_var_names: Iterable[str] = None, + subsample: float | int = 1.0, ): """ Instantiate a 1D bias correction. @@ -388,15 +399,24 @@ def __init__( :param bin_apply_method: Method to correct with the binned statistics, either "linear" to interpolate linearly between bins, or "per_bin" to apply the statistic for each bin. :param bias_var_names: (Optional) For pipelines, explicitly define bias variables names to use during .fit(). + :param subsample: Subsample the input for speed-up. <1 is parsed as a fraction. >1 is a pixel count. """ super().__init__( - fit_or_bin, fit_func, fit_optimizer, bin_sizes, bin_statistic, bin_apply_method, bias_var_names + fit_or_bin, + fit_func, + fit_optimizer, + bin_sizes, + bin_statistic, + bin_apply_method, + bias_var_names, + subsample, ) def _fit_func( # type: ignore self, ref_dem: NDArrayf, tba_dem: NDArrayf, + inlier_mask: NDArrayb, bias_vars: dict[str, NDArrayf], transform: rio.transform.Affine, # Never None thanks to Coreg.fit() pre-process crs: rio.crs.CRS, # Never None thanks to Coreg.fit() pre-process @@ -416,6 +436,7 @@ def _fit_func( # type: ignore super()._fit_func( ref_dem=ref_dem, tba_dem=tba_dem, + inlier_mask=inlier_mask, bias_vars=bias_vars, transform=transform, crs=crs, @@ -439,6 +460,7 @@ def __init__( bin_statistic: Callable[[NDArrayf], np.floating[Any]] = np.nanmedian, bin_apply_method: Literal["linear"] | Literal["per_bin"] = "linear", bias_var_names: Iterable[str] = None, + subsample: float | int = 1.0, ): """ Instantiate a 2D bias correction. @@ -452,15 +474,24 @@ def __init__( :param bin_apply_method: Method to correct with the binned statistics, either "linear" to interpolate linearly between bins, or "per_bin" to apply the statistic for each bin. :param bias_var_names: (Optional) For pipelines, explicitly define bias variables names to use during .fit(). + :param subsample: Subsample the input for speed-up. <1 is parsed as a fraction. >1 is a pixel count. """ super().__init__( - fit_or_bin, fit_func, fit_optimizer, bin_sizes, bin_statistic, bin_apply_method, bias_var_names + fit_or_bin, + fit_func, + fit_optimizer, + bin_sizes, + bin_statistic, + bin_apply_method, + bias_var_names, + subsample, ) def _fit_func( # type: ignore self, ref_dem: NDArrayf, tba_dem: NDArrayf, + inlier_mask: NDArrayb, bias_vars: dict[str, NDArrayf], transform: rio.transform.Affine, # Never None thanks to Coreg.fit() pre-process crs: rio.crs.CRS, # Never None thanks to Coreg.fit() pre-process @@ -479,6 +510,7 @@ def _fit_func( # type: ignore super()._fit_func( ref_dem=ref_dem, tba_dem=tba_dem, + inlier_mask=inlier_mask, bias_vars=bias_vars, transform=transform, crs=crs, @@ -504,6 +536,7 @@ def __init__( bin_statistic: Callable[[NDArrayf], np.floating[Any]] = np.nanmedian, bin_apply_method: Literal["linear"] | Literal["per_bin"] = "linear", bias_var_names: Iterable[str] = None, + subsample: float | int = 1.0, ): """ Instantiate an N-D bias correction. @@ -517,15 +550,24 @@ def __init__( :param bin_apply_method: Method to correct with the binned statistics, either "linear" to interpolate linearly between bins, or "per_bin" to apply the statistic for each bin. :param bias_var_names: (Optional) For pipelines, explicitly define bias variables names to use during .fit(). + :param subsample: Subsample the input for speed-up. <1 is parsed as a fraction. >1 is a pixel count. """ super().__init__( - fit_or_bin, fit_func, fit_optimizer, bin_sizes, bin_statistic, bin_apply_method, bias_var_names + fit_or_bin, + fit_func, + fit_optimizer, + bin_sizes, + bin_statistic, + bin_apply_method, + bias_var_names, + subsample, ) def _fit_func( # type: ignore self, ref_dem: NDArrayf, tba_dem: NDArrayf, + inlier_mask: NDArrayb, bias_vars: dict[str, NDArrayf], # Never None thanks to BiasCorr.fit() pre-process transform: rio.transform.Affine, # Never None thanks to Coreg.fit() pre-process crs: rio.crs.CRS, # Never None thanks to Coreg.fit() pre-process @@ -541,6 +583,7 @@ def _fit_func( # type: ignore super()._fit_func( ref_dem=ref_dem, tba_dem=tba_dem, + inlier_mask=inlier_mask, bias_vars=bias_vars, transform=transform, crs=crs, @@ -564,6 +607,7 @@ def __init__( bin_sizes: int | dict[str, int | Iterable[float]] = 10, bin_statistic: Callable[[NDArrayf], np.floating[Any]] = np.nanmedian, bin_apply_method: Literal["linear"] | Literal["per_bin"] = "linear", + subsample: float | int = 1.0, ): """ Instantiate a directional bias correction. @@ -577,8 +621,11 @@ def __init__( :param bin_statistic: Statistic of central tendency (e.g., mean) to apply during the binning. :param bin_apply_method: Method to correct with the binned statistics, either "linear" to interpolate linearly between bins, or "per_bin" to apply the statistic for each bin. + :param subsample: Subsample the input for speed-up. <1 is parsed as a fraction. >1 is a pixel count. """ - super().__init__(fit_or_bin, fit_func, fit_optimizer, bin_sizes, bin_statistic, bin_apply_method, ["angle"]) + super().__init__( + fit_or_bin, fit_func, fit_optimizer, bin_sizes, bin_statistic, bin_apply_method, ["angle"], subsample + ) self._meta["angle"] = angle self._needs_vars = False @@ -586,6 +633,7 @@ def _fit_func( # type: ignore self, ref_dem: NDArrayf, tba_dem: NDArrayf, + inlier_mask: NDArrayb, transform: rio.transform.Affine, crs: rio.crs.CRS, bias_vars: dict[str, NDArrayf] = None, @@ -611,6 +659,7 @@ def _fit_func( # type: ignore super()._fit_func( ref_dem=ref_dem, tba_dem=tba_dem, + inlier_mask=inlier_mask, bias_vars={"angle": x}, transform=transform, crs=crs, @@ -660,6 +709,7 @@ def __init__( bin_sizes: int | dict[str, int | Iterable[float]] = 100, bin_statistic: Callable[[NDArrayf], np.floating[Any]] = np.nanmedian, bin_apply_method: Literal["linear"] | Literal["per_bin"] = "linear", + subsample: float | int = 1.0, ): """ Instantiate a terrain bias correction. @@ -673,10 +723,18 @@ def __init__( :param bin_statistic: Statistic of central tendency (e.g., mean) to apply during the binning. :param bin_apply_method: Method to correct with the binned statistics, either "linear" to interpolate linearly between bins, or "per_bin" to apply the statistic for each bin. + :param subsample: Subsample the input for speed-up. <1 is parsed as a fraction. >1 is a pixel count. """ super().__init__( - fit_or_bin, fit_func, fit_optimizer, bin_sizes, bin_statistic, bin_apply_method, [terrain_attribute] + fit_or_bin, + fit_func, + fit_optimizer, + bin_sizes, + bin_statistic, + bin_apply_method, + [terrain_attribute], + subsample, ) # This is the same as bias_var_names, but let's leave the duplicate for clarity self._meta["terrain_attribute"] = terrain_attribute @@ -686,6 +744,7 @@ def _fit_func( # type: ignore self, ref_dem: NDArrayf, tba_dem: NDArrayf, + inlier_mask: NDArrayb, transform: rio.transform.Affine, crs: rio.crs.CRS, bias_vars: dict[str, NDArrayf] = None, @@ -706,6 +765,7 @@ def _fit_func( # type: ignore super()._fit_func( ref_dem=ref_dem, tba_dem=tba_dem, + inlier_mask=inlier_mask, bias_vars={self._meta["terrain_attribute"]: attr}, transform=transform, crs=crs, @@ -750,6 +810,7 @@ def __init__( bin_sizes: int | dict[str, int | Iterable[float]] = 10, bin_statistic: Callable[[NDArrayf], np.floating[Any]] = np.nanmedian, bin_apply_method: Literal["linear"] | Literal["per_bin"] = "linear", + subsample: float | int = 1.0, ): """ Instantiate a directional bias correction. @@ -763,8 +824,18 @@ def __init__( :param bin_statistic: Statistic of central tendency (e.g., mean) to apply during the binning. :param bin_apply_method: Method to correct with the binned statistics, either "linear" to interpolate linearly between bins, or "per_bin" to apply the statistic for each bin. + :param subsample: Subsample the input for speed-up. <1 is parsed as a fraction. >1 is a pixel count. """ - super().__init__(fit_or_bin, fit_func, fit_optimizer, bin_sizes, bin_statistic, bin_apply_method, ["xx", "yy"]) + super().__init__( + fit_or_bin, + fit_func, + fit_optimizer, + bin_sizes, + bin_statistic, + bin_apply_method, + ["xx", "yy"], + subsample, + ) self._meta["poly_order"] = poly_order self._needs_vars = False @@ -772,6 +843,7 @@ def _fit_func( # type: ignore self, ref_dem: NDArrayf, tba_dem: NDArrayf, + inlier_mask: NDArrayb, transform: rio.transform.Affine, crs: rio.crs.CRS, bias_vars: dict[str, NDArrayf] | None = None, @@ -789,6 +861,7 @@ def _fit_func( # type: ignore super()._fit_func( ref_dem=ref_dem, tba_dem=tba_dem, + inlier_mask=inlier_mask, bias_vars={"xx": xx, "yy": yy}, transform=transform, crs=crs, diff --git a/xdem/examples.py b/xdem/examples.py index f0137a76..01769e19 100644 --- a/xdem/examples.py +++ b/xdem/examples.py @@ -102,7 +102,7 @@ def process_coregistered_examples(name: str, overwrite: bool = False) -> None: inlier_mask = ~glacier_mask.create_mask(reference_raster) nuth_kaab = xdem.coreg.NuthKaab() - nuth_kaab.fit(reference_raster, to_be_aligned_raster, inlier_mask=inlier_mask) + nuth_kaab.fit(reference_raster, to_be_aligned_raster, inlier_mask=inlier_mask, random_state=42) aligned_raster = nuth_kaab.apply(to_be_aligned_raster, resample=True) diff = reference_raster - aligned_raster