Skip to content

Commit

Permalink
Improve subsample across Coreg subclasses and pipelines (GlacioHa…
Browse files Browse the repository at this point in the history
…ck#436)

* Add subsample to subclass init

* Progress on subsampling in Coreg classes and pipelines

* Finalize subsampling functionality

* Add tests and minor fixes

* Add test for get_subsample

* Linting

* Fix other tests

* Get geoutils from git to check everything passes

* Linting

* Add exception in generate pip from conda script when pulling from repo directly

* Change geoutils version required in advance

* First commit for eriks comments

* Finalize eriks comments

* Linting
  • Loading branch information
rhugonnet authored Sep 26, 2023
1 parent a5635a5 commit ead2f1b
Show file tree
Hide file tree
Showing 14 changed files with 542 additions and 206 deletions.
9 changes: 8 additions & 1 deletion .github/scripts/generate_pip_deps_from_conda.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,14 @@ def generate_pip_from_conda(conda_path: pathlib.Path, pip_path: pathlib.Path, co
if conda_dep:
pip_deps.append(conda_dep)
elif isinstance(dep, dict) and len(dep) == 1 and "pip" in dep:
pip_deps.extend(dep["pip"])
# If pulled directly from GitHub (temporary CI passing),
# such as git+https://github.com/GlacioHack/geoutils.git,
# rename to the package repo name
dep_pips = dep["pip"]
for dep_pip in dep_pips:
if "+" in dep_pip and dep_pip.split("+")[0] == "git":
dep_pip = dep_pip.split("/")[-1].split(".git")[0]
pip_deps.append(dep_pip)
else:
raise ValueError(f"Unexpected dependency {dep}")

Expand Down
4 changes: 2 additions & 2 deletions dev-environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ dependencies:
- tqdm
- scikit-image
- scikit-gstat>=1.0
- geoutils=0.0.13
- geoutils=0.0.15

# Development-specific, to mirror manually in setup.cfg [options.extras_require].
- pip
Expand Down Expand Up @@ -52,4 +52,4 @@ dependencies:
- noisyopt

# To run CI against latest GeoUtils
# - git+https://github.com/GlacioHack/GeoUtils.git
# - git+https://github.com/GlacioHack/geoutils.git
4 changes: 2 additions & 2 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ dependencies:
- tqdm
- scikit-image
- scikit-gstat>=1.0
- geoutils=0.0.13
- geoutils=0.0.15
- pip

# - pip:
# - git+https://github.com/GlacioHack/GeoUtils.git
# - git+https://github.com/GlacioHack/geoutils.git
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ scipy
tqdm
scikit-image
scikit-gstat>=1.0
geoutils==0.0.13
geoutils==0.0.15
pip
setuptools>=42
setuptools_scm[toml]>=6.2
39 changes: 21 additions & 18 deletions tests/test_coreg/test_affine.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,16 +139,14 @@ def test_coreg_example(self, verbose: bool = False) -> None:

# Run co-registration
nuth_kaab = xdem.coreg.NuthKaab()
nuth_kaab.fit(self.ref, self.tba, inlier_mask=self.inlier_mask, verbose=verbose)
nuth_kaab.fit(self.ref, self.tba, inlier_mask=self.inlier_mask, verbose=verbose, random_state=42)

# Check the output metadata is always the same
assert nuth_kaab._meta["offset_east_px"] == pytest.approx(-0.46255704521968716)
assert nuth_kaab._meta["offset_north_px"] == pytest.approx(-0.13618536563846081)
assert nuth_kaab._meta["vshift"] == pytest.approx(-1.9815309753424906)
assert nuth_kaab._meta["offset_east_px"] == pytest.approx(-0.45061858808956284)
assert nuth_kaab._meta["offset_north_px"] == pytest.approx(-0.14225262689582596)
assert nuth_kaab._meta["vshift"] == pytest.approx(-1.987523471566405)

def test_gradientdescending(
self, downsampling: int = 10000, samples: int = 5000, inlier_mask: bool = True, verbose: bool = False
) -> None:
def test_gradientdescending(self, subsample: int = 10000, inlier_mask: bool = True, verbose: bool = False) -> None:
"""
Test the co-registration outputs performed on the example are always the same. This overlaps with the test in
test_examples.py, but helps identify from where differences arise.
Expand All @@ -159,9 +157,14 @@ def test_gradientdescending(
inlier_mask = self.inlier_mask

# Run co-registration
gds = xdem.coreg.GradientDescending(downsampling=downsampling)
gds = xdem.coreg.GradientDescending(subsample=subsample)
gds.fit_pts(
self.ref.to_points().ds, self.tba, inlier_mask=inlier_mask, verbose=verbose, samples=samples, z_name="b1"
self.ref.to_points().ds,
self.tba,
inlier_mask=inlier_mask,
verbose=verbose,
subsample=subsample,
z_name="b1",
)
assert gds._meta["offset_east_px"] == pytest.approx(-0.496000, rel=1e-1, abs=0.1)
assert gds._meta["offset_north_px"] == pytest.approx(-0.1875, rel=1e-1, abs=0.1)
Expand All @@ -170,7 +173,7 @@ def test_gradientdescending(
@pytest.mark.parametrize("shift_px", [(1, 1), (2, 2)]) # type: ignore
@pytest.mark.parametrize("coreg_class", [coreg.NuthKaab, coreg.GradientDescending, coreg.ICP]) # type: ignore
@pytest.mark.parametrize("points_or_raster", ["raster", "points"])
def test_coreg_example_shift(self, shift_px, coreg_class, points_or_raster, verbose=False, downsampling=5000):
def test_coreg_example_shift(self, shift_px, coreg_class, points_or_raster, verbose=False, subsample=5000):
"""
For comparison of coreg algorithms:
Shift a ref_dem on purpose, e.g. shift_px = (1,1), and then applying coreg to shift it back.
Expand All @@ -182,12 +185,12 @@ def test_coreg_example_shift(self, shift_px, coreg_class, points_or_raster, verb
shifted_ref = self.ref.copy()
shifted_ref.shift(shift_px[0] * res, shift_px[1] * res)

shifted_ref_points = shifted_ref.to_points(as_array=False, subset=downsampling, pixel_offset="center").ds
shifted_ref_points = shifted_ref.to_points(as_array=False, subset=subsample, pixel_offset="center").ds
shifted_ref_points["E"] = shifted_ref_points.geometry.x
shifted_ref_points["N"] = shifted_ref_points.geometry.y
shifted_ref_points.rename(columns={"b1": "z"}, inplace=True)

kwargs = {} if coreg_class.__name__ != "GradientDescending" else {"downsampling": downsampling}
kwargs = {} if coreg_class.__name__ != "GradientDescending" else {"subsample": subsample}

coreg_obj = coreg_class(**kwargs)

Expand Down Expand Up @@ -265,28 +268,28 @@ def test_nuth_kaab(self) -> None:
# Check that the z shift is close to the original vertical shift.
assert abs((transformed_points[0, 2] - self.points[0, 2]) + vshift) < 0.1

def test_deramping(self) -> None:
def test_tilt(self) -> None:
warnings.simplefilter("error")

# Try a 1st degree deramping.
deramp = coreg.Tilt()
tilt = coreg.Tilt()

# Fit the data
deramp.fit(**self.fit_params)
tilt.fit(**self.fit_params)

# Apply the deramping to a DEM
deramped_dem = deramp.apply(self.tba)
tilted_dem = tilt.apply(self.tba)

# Get the periglacial offset after deramping
periglacial_offset = (self.ref - deramped_dem)[self.inlier_mask]
periglacial_offset = (self.ref - tilted_dem)[self.inlier_mask]
# Get the periglacial offset before deramping
pre_offset = (self.ref - self.tba)[self.inlier_mask]

# Check that the error improved
assert np.abs(np.mean(periglacial_offset)) < np.abs(np.mean(pre_offset))

# Check that the mean periglacial offset is low
assert np.abs(np.mean(periglacial_offset)) < 1
assert np.abs(np.mean(periglacial_offset)) < 0.01

def test_icp_opencv(self) -> None:
warnings.simplefilter("error")
Expand Down
178 changes: 128 additions & 50 deletions tests/test_coreg/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import inspect
import re
import warnings
from typing import Any, Callable
Expand Down Expand Up @@ -106,66 +107,143 @@ def test_ij_xy(self, i: int = 10, j: int = 20) -> None:
assert i == pytest.approx(10)
assert j == pytest.approx(20)

def test_subsample(self) -> None:
warnings.simplefilter("error")

# Test subsampled vertical shift correction
vshift_sub = coreg.VerticalShift()

# Fit the vertical shift using 50% of the unmasked data using a fraction
vshift_sub.fit(**self.fit_params, subsample=0.5)
# Do the same but specify the pixel count instead.
# They are not perfectly equal (np.count_nonzero(self.mask) // 2 would be exact)
# But this would just repeat the subsample code, so that makes little sense to test.
vshift_sub.fit(**self.fit_params, subsample=self.tba.data.size // 2)

# Do full vertical shift corr to compare
vshift_full = coreg.VerticalShift()
vshift_full.fit(**self.fit_params)

# Check that the estimated vertical shifts are similar
assert abs(vshift_sub._meta["vshift"] - vshift_full._meta["vshift"]) < 0.1

# Test NuthKaab with subsampling
nuthkaab_full = coreg.NuthKaab()
nuthkaab_sub = coreg.NuthKaab()
@pytest.mark.parametrize("subsample", [10, 10000, 0.5, 1]) # type: ignore
def test_get_subsample_on_valid_mask(self, subsample: float | int) -> None:
"""Test the subsampling function called by all subclasses"""

# Define a valid mask
width = height = 50
np.random.seed(42)
valid_mask = np.random.randint(low=0, high=2, size=(width, height), dtype=bool)

# Define a class with a subsample and random_state in the metadata
coreg = Coreg(meta={"subsample": subsample, "random_state": 42})
subsample_mask = coreg._get_subsample_on_valid_mask(valid_mask=valid_mask)

# Check that it returns a same-shaped array that is boolean
assert np.shape(valid_mask) == np.shape(subsample_mask)
assert subsample_mask.dtype == bool
# Check that the subsampled values are all within valid values
assert all(valid_mask[subsample_mask])
# Check that the number of subsampled value is coherent, or the maximum possible
if subsample <= 1:
# If value lower than 1, fraction of valid pixels
subsample_val: float | int = int(subsample * np.count_nonzero(valid_mask))
else:
# Otherwise the number of pixels
subsample_val = subsample
assert np.count_nonzero(subsample_mask) == min(subsample_val, np.count_nonzero(valid_mask))

# Measure the start and stop time to get the duration
# start_time = time.time()
nuthkaab_full.fit(**self.fit_params)
# icp_full_duration = time.time() - start_time
# TODO: Activate NuthKaab once subsampling there is made consistent
all_coregs = [
coreg.VerticalShift,
# coreg.NuthKaab,
coreg.ICP,
coreg.Deramp,
coreg.TerrainBias,
coreg.DirectionalBias,
]

# Do the same with 50% subsampling
# start_time = time.time()
nuthkaab_sub.fit(**self.fit_params, subsample=0.5)
# icp_sub_duration = time.time() - start_time
@pytest.mark.parametrize("coreg", all_coregs) # type: ignore
def test_subsample(self, coreg: Callable) -> None: # type: ignore
warnings.simplefilter("error")

# Make sure that the subsampling increased performance
# Temporarily add a fallback assertion that if it's slower, it shouldn't be much slower (2021-05-17).
# This doesn't work with GitHub's CI, but it works locally. I'm disabling this for now (2021-05-20).
# assert icp_full_duration > icp_sub_duration or (abs(icp_full_duration - icp_sub_duration) < 1)
# Check that default value is set properly
coreg_full = coreg()
argspec = inspect.getfullargspec(coreg)
assert coreg_full._meta["subsample"] == argspec.defaults[argspec.args.index("subsample") - 1] # type: ignore

# Calculate the difference in the full vs. subsampled matrices
matrix_diff = np.abs(nuthkaab_full.to_matrix() - nuthkaab_sub.to_matrix())
# Check that the x/y/z differences do not exceed 30cm
assert np.count_nonzero(matrix_diff > 0.5) == 0
# But can be overridden during fit
coreg_full.fit(**self.fit_params, subsample=10000, random_state=42)
assert coreg_full._meta["subsample"] == 10000

# Test subsampled deramping
deramp_sub = coreg.Tilt()
# Test subsampled vertical shift correction
coreg_sub = coreg(subsample=0.1)
assert coreg_sub._meta["subsample"] == 0.1

# Fit the bias using 50% of the unmasked data using a fraction
deramp_sub.fit(**self.fit_params, subsample=0.5)
# Fit the vertical shift using 10% of the unmasked data using a fraction
coreg_sub.fit(**self.fit_params, random_state=42)
# Do the same but specify the pixel count instead.
# They are not perfectly equal (np.count_nonzero(self.mask) // 2 would be exact)
# But this would just repeat the subsample code, so that makes little sense to test.
deramp_sub.fit(**self.fit_params, subsample=self.tba.data.size // 2)
coreg_sub = coreg(subsample=self.tba.data.size // 10)
assert coreg_sub._meta["subsample"] == self.tba.data.size // 10
coreg_sub.fit(**self.fit_params, random_state=42)

# Add a few performance checks
coreg_name = coreg.__name__
if coreg_name == "VerticalShift":
# Check that the estimated vertical shifts are similar
assert abs(coreg_sub._meta["vshift"] - coreg_full._meta["vshift"]) < 0.1

elif coreg_name == "NuthKaab":
# Calculate the difference in the full vs. subsampled matrices
matrix_diff = np.abs(coreg_full.to_matrix() - coreg_sub.to_matrix())
# Check that the x/y/z differences do not exceed 30cm
assert np.count_nonzero(matrix_diff > 0.5) == 0

elif coreg_name == "Tilt":
# Check that the estimated biases are similar
assert coreg_sub._meta["coefficients"] == pytest.approx(coreg_full._meta["coefficients"], rel=1e-1)

def test_subsample__pipeline(self) -> None:
"""Test that the subsample argument works as intended for pipelines"""

# Check definition during instantiation
pipe = coreg.VerticalShift(subsample=200) + coreg.Deramp(subsample=5000)

# Check the arguments are properly defined
assert pipe.pipeline[0]._meta["subsample"] == 200
assert pipe.pipeline[1]._meta["subsample"] == 5000

# Check definition during fit
pipe = coreg.VerticalShift() + coreg.Deramp()
pipe.fit(**self.fit_params, subsample=1000)
assert pipe.pipeline[0]._meta["subsample"] == 1000
assert pipe.pipeline[1]._meta["subsample"] == 1000

def test_subsample__errors(self) -> None:
"""Check proper errors are raised when using the subsample argument"""

# A warning should be raised when overriding with fit if non-default parameter was passed during instantiation
vshift = coreg.VerticalShift(subsample=100)

with pytest.warns(
UserWarning,
match=re.escape(
"Subsample argument passed to fit() will override non-default "
"subsample value defined at instantiation. To silence this "
"warning: only define 'subsample' in either fit(subsample=...) "
"or instantiation e.g. VerticalShift(subsample=...)."
),
):
vshift.fit(**self.fit_params, subsample=1000)

# Do full bias corr to compare
deramp_full = coreg.Tilt()
deramp_full.fit(**self.fit_params)
# Same for a pipeline
pipe = coreg.VerticalShift(subsample=200) + coreg.Deramp()
with pytest.warns(
UserWarning,
match=re.escape(
"Subsample argument passed to fit() will override non-default "
"subsample values defined for individual steps of the pipeline. "
"To silence this warning: only define 'subsample' in either "
"fit(subsample=...) or instantiation e.g., VerticalShift(subsample=...)."
),
):
pipe.fit(**self.fit_params, subsample=1000)

# Check that the estimated biases are similar
assert deramp_sub._meta["coefficients"] == pytest.approx(deramp_full._meta["coefficients"], rel=1e-1)
# Same for a blockwise co-registration
block = coreg.BlockwiseCoreg(coreg.VerticalShift(subsample=200), subdivision=4)
with pytest.warns(
UserWarning,
match=re.escape(
"Subsample argument passed to fit() will override non-default subsample "
"values defined in the step within the blockwise method. To silence this "
"warning: only define 'subsample' in either fit(subsample=...) or "
"instantiation e.g., VerticalShift(subsample=...)."
),
):
block.fit(**self.fit_params, subsample=1000)

def test_coreg_raster_and_ndarray_args(self) -> None:

Expand Down Expand Up @@ -423,7 +501,7 @@ class TestCoregPipeline:
inlier_mask=inlier_mask,
transform=ref.transform,
crs=ref.crs,
verbose=False,
verbose=True,
)
# Create some 3D coordinates with Z coordinates being 0 to try the apply_pts functions.
points = np.array([[1, 2, 3, 4], [1, 2, 3, 4], [0, 0, 0, 0]], dtype="float64").T
Expand Down
4 changes: 2 additions & 2 deletions tests/test_coreg/test_biascorr.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class TestBiasCorr:
reference_dem=ref,
dem_to_be_aligned=tba,
inlier_mask=inlier_mask,
verbose=False,
verbose=True,
)
# Create some 3D coordinates with Z coordinates being 0 to try the apply_pts functions.
points = np.array([[1, 2, 3, 4], [1, 2, 3, 4], [0, 0, 0, 0]], dtype="float64").T
Expand Down Expand Up @@ -144,7 +144,7 @@ def test_biascorr__errors(self) -> None:
scipy.optimize.curve_fit,
],
) # type: ignore
def test_biascorr__fit_1d(self, fit_func, fit_optimizer) -> None:
def test_biascorr__fit_1d(self, fit_func, fit_optimizer, capsys) -> None:
"""Test the _fit_func and apply_func methods of BiasCorr for the fit case (called by all its subclasses)."""

# Create a bias correction object
Expand Down
10 changes: 5 additions & 5 deletions tests/test_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ class TestExamples:
ddem,
np.array(
[
-4.669189453125000000e-02,
-7.413940429687500000e-01,
1.499481201171875000e-01,
1.095550537109375000e00,
-5.904846191406250000e00,
-0.22833252,
-0.82458496,
0.18843079,
1.0004578,
-5.755249,
],
dtype=np.float32,
),
Expand Down
Loading

0 comments on commit ead2f1b

Please sign in to comment.