From 1626a3d3a891c79c73b403c1a4d4837e682c212f Mon Sep 17 00:00:00 2001 From: Frank Date: Wed, 21 Aug 2024 17:03:29 +0200 Subject: [PATCH 1/4] possiblility to pass kwargs to distance function --- skgstat/Kriging.py | 130 ++++++++++++-------- skgstat/MetricSpace.py | 269 ++++++++++++++++++++++++----------------- skgstat/Variogram.py | 8 +- 3 files changed, 238 insertions(+), 169 deletions(-) diff --git a/skgstat/Kriging.py b/skgstat/Kriging.py index b4f4932..fa9ebe0 100644 --- a/skgstat/Kriging.py +++ b/skgstat/Kriging.py @@ -3,6 +3,7 @@ used together with the skgstat.Variogram class. The usage of the class is inspired by the scipy.interpolate classes. """ + import time import numpy as np @@ -33,18 +34,18 @@ def inv_solve(a, b): class OrdinaryKriging: def __init__( - self, - variogram, - min_points=5, - max_points=15, - mode='exact', - precision=100, - solver='inv', - n_jobs=1, - perf=False, - sparse=False, - coordinates=None, - values=None + self, + variogram, + min_points=5, + max_points=15, + mode="exact", + precision=100, + solver="inv", + n_jobs=1, + perf=False, + sparse=False, + coordinates=None, + values=None, ): """Ordinary Kriging routine @@ -121,18 +122,33 @@ def __init__( self.n_jobs = n_jobs self.perf = perf - self.range = variogram['effective_range'] - self.nugget = variogram['nugget'] - self.sill = variogram['sill'] - self.dist_metric = variogram["dist_func"] + self.range = variogram["effective_range"] + self.nugget = variogram["nugget"] + self.sill = variogram["sill"] + if isinstance(coordinates, MetricSpace): + self.dist_metric = coordinates.dist_metric + self.dist_metric_kwargs = coordinates.dist_metric_kwargs + else: + self.dist_metric = variogram["dist_func"] + self.dist_metric_kwargs = {} # coordinates and semivariance function if not isinstance(coordinates, MetricSpace): - coordinates, values = self._remove_duplicated_coordinates(coordinates, values) - coordinates = MetricSpace(coordinates.copy(), self.dist_metric, self.range if self.sparse else None) + coordinates, values = self._remove_duplicated_coordinates( + coordinates, values + ) + coordinates = MetricSpace( + coordinates.copy(), + self.dist_metric, + self.range if self.sparse else None, + ) else: - assert self.dist_metric == coordinates.dist_metric, "Distance metric of variogram differs from distance metric of coordinates" - assert coordinates.max_dist is None or coordinates.max_dist == self.range, "Sparse coordinates must have max_dist == variogram.effective_range" + assert ( + self.dist_metric == coordinates.dist_metric + ), "Distance metric of variogram differs from distance metric of coordinates" + assert ( + coordinates.max_dist is None or coordinates.max_dist == self.range + ), "Sparse coordinates must have max_dist == variogram.effective_range" self.values = values.copy() self.coords = coordinates self.gamma_model = Variogram.fitted_model_function(**variogram) @@ -163,7 +179,9 @@ def __init__( self.perf_solv = list() def dist(self, x): - return Variogram.wrapped_distance_function(self.dist_metric, x) + return Variogram.wrapped_distance_function( + self.dist_metric, x, **self.dist_metric_kwargs + ) @classmethod def _remove_duplicated_coordinates(cls, coords, values): @@ -193,11 +211,11 @@ def min_points(self): def min_points(self, value): # check the value if not isinstance(value, int): - raise ValueError('min_points has to be an integer.') + raise ValueError("min_points has to be an integer.") if value < 0: - raise ValueError('min_points can\'t be negative.') + raise ValueError("min_points can't be negative.") if value > self._maxp: - raise ValueError('min_points can\'t be larger than max_points.') + raise ValueError("min_points can't be larger than max_points.") # set self._minp = value @@ -210,11 +228,11 @@ def max_points(self): def max_points(self, value): # check the value if not isinstance(value, int): - raise ValueError('max_points has to be an integer.') + raise ValueError("max_points has to be an integer.") if value < 0: - raise ValueError('max_points can\'t be negative.') + raise ValueError("max_points can't be negative.") if value < self._minp: - raise ValueError('max_points can\'t be smaller than min_points.') + raise ValueError("max_points can't be smaller than min_points.") # set self._maxp = value @@ -225,10 +243,10 @@ def mode(self): @mode.setter def mode(self, value): - if value == 'exact': + if value == "exact": self._prec_g = None self._prec_dist = None - elif value == 'estimate': + elif value == "estimate": self._precalculate_matrix() else: raise ValueError("mode has to be one of 'exact', 'estimate'.") @@ -241,9 +259,9 @@ def precision(self): @precision.setter def precision(self, value): if not isinstance(value, int): - raise TypeError('precision has to be of type int') + raise TypeError("precision has to be of type int") if value < 1: - raise ValueError('The precision has be be > 1') + raise ValueError("The precision has be be > 1") self._precision = value self._precalculate_matrix() @@ -253,11 +271,11 @@ def solver(self): @solver.setter def solver(self, value): - if value == 'numpy': + if value == "numpy": self._solve = numpy_solve - elif value == 'scipy': + elif value == "scipy": self._solve = scipy_solve - elif value == 'inv': + elif value == "inv": self._solve = inv_solve else: raise AttributeError("solver has to be ['inv', 'numpy', 'scipy']") @@ -294,34 +312,46 @@ def transform(self, *x): self.perf_dist, self.perf_mat, self.perf_solv = [], [], [] if len(x) != 1 or not isinstance(x[0], MetricSpace): - self.transform_coords = MetricSpace(np.column_stack(x).copy(), self.dist_metric, self.range if self.sparse else None) + self.transform_coords = MetricSpace( + np.column_stack(x).copy(), + self.dist_metric, + self.range if self.sparse else None, + ) else: self.transform_coords = x[0] self.transform_coords_pair = MetricSpacePair(self.transform_coords, self.coords) # DEV: this is dirty, not sure how to do it better at the moment - #self.sigma = np.empty(len(x[0])) + # self.sigma = np.empty(len(x[0])) self.sigma = np.ones(len(x[0])) * np.nan self.__sigma_index = 0 # if multi-core, than here if self.n_jobs is None or self.n_jobs == 1: - z = np.fromiter(map(self._estimator, range(len(self.transform_coords))), dtype=float) + z = np.fromiter( + map(self._estimator, range(len(self.transform_coords))), dtype=float + ) else: + def f(idxs): return self._estimator(idxs) + with Pool(self.n_jobs) as p: z = p.starmap(f, range(len(self.transform_coords))) # print warnings if self.singular_error > 0: - print('Warning: %d kriging matrices were singular.' % self.singular_error) + print("Warning: %d kriging matrices were singular." % self.singular_error) if self.no_points_error > 0: - print('Warning: for %d locations, not enough neighbors were ' - 'found within the range.' % self.no_points_error) + print( + "Warning: for %d locations, not enough neighbors were " + "found within the range." % self.no_points_error + ) if self.ill_matrix > 0: - print('Warning: %d kriging matrices were ill-conditioned.' - ' The result may not be accurate.' % self.ill_matrix) + print( + "Warning: %d kriging matrices were ill-conditioned." + " The result may not be accurate." % self.ill_matrix + ) # store the field in the instance itself self.z = np.array(z) @@ -424,11 +454,7 @@ def _krige(self, idx): # get the point and index p = self.transform_coords.coords[idx, :] - idx = self.transform_coords_pair.find_closest( - idx, - self.range, - self._maxp - ) + idx = self.transform_coords_pair.find_closest(idx, self.range, self._maxp) # raise an error if not enough points are found if idx.size < self._minp: @@ -445,7 +471,7 @@ def _krige(self, idx): self.perf_dist.append(t1 - t0) # build the kriging Matrix; needs N + 1 dimensionality - if self.mode == 'exact': + if self.mode == "exact": a = self._build_matrix(dist_mat) else: a = self._estimate_matrix(dist_mat) @@ -464,7 +490,7 @@ def _krige(self, idx): # build the matrix of solutions A _p = np.concatenate(([p], in_range)) - _dists = self.dist(_p)[:len(_p) - 1] + _dists = self.dist(_p)[: len(_p) - 1] _g = self.gamma_model(_dists) b = np.concatenate((_g, [1])) @@ -473,18 +499,18 @@ def _krige(self, idx): _lambda = self._solve(a, b) except LinAlgError as e: print(a) - if str(e) == 'Matrix is singular.': + if str(e) == "Matrix is singular.": raise SingularMatrixError else: raise e except RuntimeWarning as w: - if 'Ill-conditioned matrix' in str(w): + if "Ill-conditioned matrix" in str(w): print(a) raise IllMatrixError else: raise w except ValueError as e: - print('[DEBUG]: print variogram matrix and distance matrix:') + print("[DEBUG]: print variogram matrix and distance matrix:") print(a) print(_dists) raise e diff --git a/skgstat/MetricSpace.py b/skgstat/MetricSpace.py index c77f697..27c54ee 100644 --- a/skgstat/MetricSpace.py +++ b/skgstat/MetricSpace.py @@ -49,9 +49,7 @@ def find_closest(self, idx, max_dist=None, N=None): max_dist = self.max_dist else: if self.max_dist is not None and max_dist != self.max_dist: - raise AttributeError( - "max_dist specified and max_dist != self.max_dist" - ) + raise AttributeError("max_dist specified and max_dist != self.max_dist") if isinstance(self.dists, sparse.spmatrix): dists = self.dists.getrow(idx) @@ -86,7 +84,7 @@ class MetricSpace(DistanceMethods): cloud in space. However, it slows things down for small datasets. """ - def __init__(self, coords, dist_metric="euclidean", max_dist=None): + def __init__(self, coords, dist_metric="euclidean", max_dist=None, dist_metric_kwargs): """ProbabalisticMetricSpace class Parameters @@ -104,13 +102,20 @@ def __init__(self, coords, dist_metric="euclidean", max_dist=None): self.max_dist = max_dist self._tree = None self._dists = None + self.dist_metric_kwargs = dist_metric_kwargs # Check if self.dist_metric is valid try: - if self.dist_metric=='mahalanobis': - _ = pdist(self.coords[:self.coords.shape[1]+1, :], metric=self.dist_metric) + if self.dist_metric == "mahalanobis": + _ = pdist( + self.coords[: self.coords.shape[1] + 1, :], metric=self.dist_metric + ) else: - pdist(self.coords[:1, :], metric=self.dist_metric) + pdist( + self.coords[:1, :], + metric=self.dist_metric, + **self.dist_metric_kwargs + ) except ValueError as e: raise e @@ -120,10 +125,9 @@ def tree(self): instance of `self.coords`. Undefined otherwise.""" # only Euclidean supported if self.dist_metric != "euclidean": - raise ValueError(( - "A coordinate tree can only be constructed " - "for an euclidean space" - )) + raise ValueError( + ("A coordinate tree can only be constructed " "for an euclidean space") + ) # if not cached - calculate if self._tree is None: @@ -143,15 +147,15 @@ def dists(self): # check if max dist is given if self.max_dist is not None and self.dist_metric == "euclidean": self._dists = self.tree.sparse_distance_matrix( - self.tree, - self.max_dist, - output_type="coo_matrix" + self.tree, self.max_dist, output_type="coo_matrix" ).tocsr() # otherwise use pdist else: self._dists = squareform( - pdist(self.coords, metric=self.dist_metric) + pdist( + self.coords, metric=self.dist_metric, **self.dist_metric_kwargs + ) ) # return @@ -200,6 +204,7 @@ class MetricSpacePair(DistanceMethods): than the maximum distance). The two point clouds are required to have the same distance metric as well as maximum distance. """ + def __init__(self, ms1, ms2): """ Parameters @@ -213,15 +218,11 @@ def __init__(self, ms1, ms2): # check input data # same distance metrix if ms1.dist_metric != ms2.dist_metric: - raise ValueError( - "Both MetricSpaces need to have the same distance metric" - ) + raise ValueError("Both MetricSpaces need to have the same distance metric") # same max_dist setting if ms1.max_dist != ms2.max_dist: - raise ValueError( - "Both MetricSpaces need to have the same max_dist" - ) + raise ValueError("Both MetricSpaces need to have the same max_dist") self.ms1 = ms1 self.ms2 = ms2 self._dists = None @@ -245,9 +246,7 @@ def dists(self): # handle euclidean with max_dist with Tree if self.max_dist is not None and self.dist_metric == "euclidean": self._dists = self.ms1.tree.sparse_distance_matrix( - self.ms2.tree, - self.max_dist, - output_type="coo_matrix" + self.ms2.tree, self.max_dist, output_type="coo_matrix" ).tocsr() # otherwise Tree not possible @@ -255,7 +254,8 @@ def dists(self): self._dists = cdist( self.ms1.coords, self.ms2.coords, - metric=self.ms1.dist_metric + metric=self.ms1.dist_metric, + **self.ms1.dist_metric_kwargs ) # return @@ -264,17 +264,13 @@ def dists(self): class ProbabalisticMetricSpace(MetricSpace): """Like MetricSpace but samples the distance pairs only returning a - `samples` sized subset. `samples` can either be a fraction of - the total number of pairs (float < 1), or an integer count. + `samples` sized subset. `samples` can either be a fraction of + the total number of pairs (float < 1), or an integer count. """ + def __init__( - self, - coords, - dist_metric="euclidean", - max_dist=None, - samples=0.5, - rnd=None - ): + self, coords, dist_metric="euclidean", max_dist=None, samples=0.5, rnd=None + ): """ProbabalisticMetricSpace class Parameters @@ -300,7 +296,9 @@ def __init__( elif isinstance(rnd, np.random.RandomState): self.rnd = rnd else: - self.rnd = np.random.RandomState(np.random.MT19937(np.random.SeedSequence(rnd))) + self.rnd = np.random.RandomState( + np.random.MT19937(np.random.SeedSequence(rnd)) + ) self._lidx = None self._ridx = None @@ -321,14 +319,18 @@ def sample_count(self): def lidx(self): """The sampled indices into `self.coords` for the left sample.""" if self._lidx is None: - self._lidx = self.rnd.choice(len(self.coords), size=self.sample_count, replace=False) + self._lidx = self.rnd.choice( + len(self.coords), size=self.sample_count, replace=False + ) return self._lidx @property def ridx(self): """The sampled indices into `self.coords` for the right sample.""" if self._ridx is None: - self._ridx = self.rnd.choice(len(self.coords), size=self.sample_count, replace=False) + self._ridx = self.rnd.choice( + len(self.coords), size=self.sample_count, replace=False + ) return self._ridx @property @@ -338,10 +340,9 @@ def ltree(self): # only Euclidean supported if self.dist_metric != "euclidean": - raise ValueError(( - "A coordinate tree can only be constructed " - "for an euclidean space" - )) + raise ValueError( + ("A coordinate tree can only be constructed " "for an euclidean space") + ) if self._ltree is None: self._ltree = cKDTree(self.coords[self.lidx, :]) @@ -354,10 +355,9 @@ def rtree(self): # only Euclidean supported if self.dist_metric != "euclidean": - raise ValueError(( - "A coordinate tree can only be constructed " - "for an euclidean space" - )) + raise ValueError( + ("A coordinate tree can only be constructed " "for an euclidean space") + ) if self._rtree is None: self._rtree = cKDTree(self.coords[self.ridx, :]) @@ -366,15 +366,13 @@ def rtree(self): @property def dists(self): """A distance matrix of the sampled point pairs as a - `scipy.sparse.csr_matrix` sparse matrix. """ + `scipy.sparse.csr_matrix` sparse matrix.""" if self._dists is None: max_dist = self.max_dist if max_dist is None: max_dist = np.finfo(float).max dists = self.ltree.sparse_distance_matrix( - self.rtree, - max_dist, - output_type="coo_matrix" + self.rtree, max_dist, output_type="coo_matrix" ).tocsr() dists.resize((len(self.coords), len(self.coords))) dists.indices = self.ridx[dists.indices] @@ -392,7 +390,7 @@ def _get_disk_sample( center: Tuple[float, float], center_radius: float, rnd_func: np.random.RandomState, - sample_count: int + sample_count: int, ): """ Subfunction for RasterEquidistantMetricSpace. @@ -400,8 +398,9 @@ def _get_disk_sample( Same parameters as in the class. """ # First index: preselect samples in a disk of certain radius - dist_center = np.sqrt((coords[:, 0] - center[0]) ** 2 + ( - coords[:, 1] - center[1]) ** 2) + dist_center = np.sqrt( + (coords[:, 0] - center[0]) ** 2 + (coords[:, 1] - center[1]) ** 2 + ) idx1 = dist_center < center_radius count = np.count_nonzero(idx1) @@ -422,7 +421,8 @@ def _get_successive_ring_samples( coords: np.ndarray, center: Tuple[float, float], equidistant_radii: List[float], - rnd_func: np.random.RandomState, sample_count: int + rnd_func: np.random.RandomState, + sample_count: int, ): """ Subfunction for RasterEquidistantMetricSpace. @@ -430,11 +430,13 @@ def _get_successive_ring_samples( "equidistant sample". Same parameters as in the class. """ # First index: preselect samples in a ring of certain inside radius and outside radius - dist_center = np.sqrt((coords[:, 0] - center[0]) ** 2 + (coords[:, 1] - center[1]) ** 2) + dist_center = np.sqrt( + (coords[:, 0] - center[0]) ** 2 + (coords[:, 1] - center[1]) ** 2 + ) idx = np.logical_and( dist_center[None, :] >= np.array(equidistant_radii[:-1])[:, None], - dist_center[None, :] < np.array(equidistant_radii[1:])[:, None] + dist_center[None, :] < np.array(equidistant_radii[1:])[:, None], ) # Loop over an iterative sampling in rings @@ -468,7 +470,7 @@ def _get_idx_dists( max_dist: float, i: int, imax: int, - verbose: bool + verbose: bool, ): """ Subfunction for RasterEquidistantMetricSpace. @@ -477,13 +479,14 @@ def _get_idx_dists( """ if verbose: - print('Working on subsample ' + str(i+1) + ' out of ' + str(imax)) + print("Working on subsample " + str(i + 1) + " out of " + str(imax)) cidx = _get_disk_sample( - coords=coords, center=center, + coords=coords, + center=center, center_radius=center_radius, rnd_func=rnd_func, - sample_count=sample_count + sample_count=sample_count, ) eqidx = _get_successive_ring_samples( @@ -491,17 +494,13 @@ def _get_idx_dists( center=center, equidistant_radii=equidistant_radii, rnd_func=rnd_func, - sample_count=sample_count + sample_count=sample_count, ) ctree = cKDTree(coords[cidx, :]) eqtree = cKDTree(coords[eqidx, :]) - dists = ctree.sparse_distance_matrix( - eqtree, - max_dist, - output_type="coo_matrix" - ) + dists = ctree.sparse_distance_matrix(eqtree, max_dist, output_type="coo_matrix") return dists.data, cidx[dists.row], eqidx[dists.col] @@ -542,24 +541,24 @@ class RasterEquidistantMetricSpace(MetricSpace): - A list of radii for the equidistant ring subsets. When providing those spatial parameters, all other sampling parameters will be ignored except for the raw number of samples to draw in each subset. - """ + """ def __init__( - self, - coords, - shape, - extent, - samples=100, - ratio_subsample=0.2, - runs=None, - n_jobs=1, - exp_increase_fac=np.sqrt(2), - center_radius=None, - equidistant_radii=None, - max_dist=None, - dist_metric="euclidean", - rnd=None, - verbose=False + self, + coords, + shape, + extent, + samples=100, + ratio_subsample=0.2, + runs=None, + n_jobs=1, + exp_increase_fac=np.sqrt(2), + center_radius=None, + equidistant_radii=None, + max_dist=None, + dist_metric="euclidean", + rnd=None, + verbose=False, ): """RasterEquidistantMetricSpace class @@ -598,27 +597,36 @@ def __init__( """ if dist_metric != "euclidean": - raise ValueError(( - "A RasterEquidistantMetricSpace class can only be constructed " - "for an euclidean space" - )) + raise ValueError( + ( + "A RasterEquidistantMetricSpace class can only be constructed " + "for an euclidean space" + ) + ) self.coords = coords.copy() self.dist_metric = dist_metric self.shape = shape self.extent = extent - self.res = np.mean([(extent[1] - extent[0])/(shape[0]-1),(extent[3] - extent[2])/(shape[1]-1)]) + self.res = np.mean( + [ + (extent[1] - extent[0]) / (shape[0] - 1), + (extent[3] - extent[2]) / (shape[1] - 1), + ] + ) # if the maximum distance is not specified, find the maximum possible distance from the extent if max_dist is None: - max_dist = np.sqrt((extent[1] - extent[0])**2 + (extent[3] - extent[2])**2) + max_dist = np.sqrt( + (extent[1] - extent[0]) ** 2 + (extent[3] - extent[2]) ** 2 + ) self.max_dist = max_dist self.samples = samples if runs is None: # If None is provided, try to sample center samples for about one percent of the area - runs = int((self.shape[0] * self.shape[1]) / self.samples * 1/100.) + runs = int((self.shape[0] * self.shape[1]) / self.samples * 1 / 100.0) self.runs = runs self.n_jobs = n_jobs @@ -628,16 +636,24 @@ def __init__( elif isinstance(rnd, np.random.RandomState): self.rnd = rnd else: - self.rnd = np.random.RandomState(np.random.MT19937(np.random.SeedSequence(rnd))) + self.rnd = np.random.RandomState( + np.random.MT19937(np.random.SeedSequence(rnd)) + ) # Radius of center subsample, based on sample count # If None is provided, the disk is defined with the exact size to hold the number of percentage of samples # defined by the user if center_radius is None: - center_radius = np.sqrt(1. / ratio_subsample * self.sample_count / np.pi) * self.res + center_radius = ( + np.sqrt(1.0 / ratio_subsample * self.sample_count / np.pi) * self.res + ) if verbose: - print('Radius of center disk sample for sample count of '+str(self.sample_count)+ ' and subsampling ratio' - ' of '+str(ratio_subsample)+': '+str(center_radius)) + print( + "Radius of center disk sample for sample count of " + + str(self.sample_count) + + " and subsampling ratio" + " of " + str(ratio_subsample) + ": " + str(center_radius) + ) self._center_radius = center_radius # Radii of equidistant ring subsamples @@ -646,14 +662,18 @@ def __init__( # for each of the following, due to: # (sqrt(2)R)**2 - R**2 = R**2 if equidistant_radii is None: - equidistant_radii = [0.] + equidistant_radii = [0.0] increasing_rad = self._center_radius while increasing_rad < self.max_dist: equidistant_radii.append(increasing_rad) increasing_rad *= exp_increase_fac equidistant_radii.append(self.max_dist) if verbose: - print('Radii of equidistant ring samples for increasing factor of ' + str(exp_increase_fac) + ': ') + print( + "Radii of equidistant ring samples for increasing factor of " + + str(exp_increase_fac) + + ": " + ) print(equidistant_radii) self._equidistant_radii = equidistant_radii @@ -691,16 +711,16 @@ def ctree(self): # only Euclidean supported if self.dist_metric != "euclidean": - raise ValueError(( - "A coordinate tree can only be constructed " - "for an euclidean space" - )) + raise ValueError( + ("A coordinate tree can only be constructed " "for an euclidean space") + ) if self._ctree is None: - self._ctree = [cKDTree(self.coords[self.cidx[i], :]) for i in range(len(self.cidx))] + self._ctree = [ + cKDTree(self.coords[self.cidx[i], :]) for i in range(len(self.cidx)) + ] return self._ctree - @property def eqidx(self): """The sampled indices into `self.coords` for the equidistant sample.""" @@ -714,18 +734,22 @@ def eqtree(self): # only Euclidean supported if self._eqtree is None: - self._eqtree = [cKDTree(self.coords[self.eqidx[i], :]) for i in range(len(self.eqidx))] + self._eqtree = [ + cKDTree(self.coords[self.eqidx[i], :]) for i in range(len(self.eqidx)) + ] return self._eqtree @property def dists(self): """A distance matrix of the sampled point pairs as a - `scipy.sparse.csr_matrix` sparse matrix. """ + `scipy.sparse.csr_matrix` sparse matrix.""" # Derive distances if self._dists is None: - idx_center = self.rnd.choice(len(self.coords), size=min(self.runs, len(self.coords)), replace=False) + idx_center = self.rnd.choice( + len(self.coords), size=min(self.runs, len(self.coords)), replace=False + ) # Each run has a different center centers = self.coords[idx_center] @@ -738,10 +762,18 @@ def dists(self): for i in range(self.runs): center = centers[i] - dists, cidx, eqidx = _get_idx_dists(self.coords, center=center, center_radius=self._center_radius, - equidistant_radii=self._equidistant_radii, rnd_func=self.rnd, - sample_count=self.sample_count, max_dist=self.max_dist, i=i, - imax=self.runs, verbose=self.verbose) + dists, cidx, eqidx = _get_idx_dists( + self.coords, + center=center, + center_radius=self._center_radius, + equidistant_radii=self._equidistant_radii, + rnd_func=self.rnd, + sample_count=self.sample_count, + max_dist=self.max_dist, + i=i, + imax=self.runs, + verbose=self.verbose, + ) list_dists.append(dists) list_cidx.append(cidx) list_eqidx.append(eqidx) @@ -749,10 +781,21 @@ def dists(self): # Running on several cores: multiprocessing else: # Arguments to pass: only centers and loop index for verbose are changing - argsin = [{'center': centers[i], 'coords': self.coords, 'center_radius': self._center_radius, - 'equidistant_radii': self._equidistant_radii, 'rnd_func': self.rnd, - 'sample_count': self.sample_count, 'max_dist': self.max_dist, 'i': i, 'imax': self.runs, - 'verbose': self.verbose} for i in range(self.runs)] + argsin = [ + { + "center": centers[i], + "coords": self.coords, + "center_radius": self._center_radius, + "equidistant_radii": self._equidistant_radii, + "rnd_func": self.rnd, + "sample_count": self.sample_count, + "max_dist": self.max_dist, + "i": i, + "imax": self.runs, + "verbose": self.verbose, + } + for i in range(self.runs) + ] # Process in parallel pool = mp.Pool(self.n_jobs, maxtasksperchild=1) @@ -778,7 +821,9 @@ def dists(self): # Stable solution but a bit slow c, eq, d = zip(*set(zip(c, eq, d))) - dists = sparse.csr_matrix((d, (c, eq)), shape=(len(self.coords), len(self.coords))) + dists = sparse.csr_matrix( + (d, (c, eq)), shape=(len(self.coords), len(self.coords)) + ) # comment mmaelicke: We need to fall back to the slower solution as dok._update is not supported in scipy 1.0 anymore # diff --git a/skgstat/Variogram.py b/skgstat/Variogram.py index d86c4e7..5aa1a46 100644 --- a/skgstat/Variogram.py +++ b/skgstat/Variogram.py @@ -1148,11 +1148,9 @@ def dist_function(self): return self._X.dist_metric @classmethod - def wrapped_distance_function(cls, dist_func, x): - if callable(dist_func): - return dist_func(x) - else: - return pdist(X=x, metric=dist_func) + def wrapped_distance_function(cls, dist_func, x, **kwargs): + return pdist(X=x, metric=dist_func, **kwargs) + @dist_function.setter def dist_function(self, func): From 5bbd7ed2bc1de760a563fbb571199995e80055de Mon Sep 17 00:00:00 2001 From: Frank Date: Thu, 22 Aug 2024 08:45:22 +0200 Subject: [PATCH 2/4] formatter disabled --- skgstat/Kriging.py | 10 ++++------ skgstat/MetricSpace.py | 14 ++++---------- skgstat/Variogram.py | 6 ++++-- 3 files changed, 12 insertions(+), 18 deletions(-) diff --git a/skgstat/Kriging.py b/skgstat/Kriging.py index fa9ebe0..7761202 100644 --- a/skgstat/Kriging.py +++ b/skgstat/Kriging.py @@ -122,9 +122,9 @@ def __init__( self.n_jobs = n_jobs self.perf = perf - self.range = variogram["effective_range"] - self.nugget = variogram["nugget"] - self.sill = variogram["sill"] + self.range = variogram['effective_range'] + self.nugget = variogram['nugget'] + self.sill = variogram['sill'] if isinstance(coordinates, MetricSpace): self.dist_metric = coordinates.dist_metric self.dist_metric_kwargs = coordinates.dist_metric_kwargs @@ -179,9 +179,7 @@ def __init__( self.perf_solv = list() def dist(self, x): - return Variogram.wrapped_distance_function( - self.dist_metric, x, **self.dist_metric_kwargs - ) + return Variogram.wrapped_distance_function(self.dist_metric, x, **self.dist_metric_kwargs) @classmethod def _remove_duplicated_coordinates(cls, coords, values): diff --git a/skgstat/MetricSpace.py b/skgstat/MetricSpace.py index 27c54ee..e2f2077 100644 --- a/skgstat/MetricSpace.py +++ b/skgstat/MetricSpace.py @@ -84,7 +84,7 @@ class MetricSpace(DistanceMethods): cloud in space. However, it slows things down for small datasets. """ - def __init__(self, coords, dist_metric="euclidean", max_dist=None, dist_metric_kwargs): + def __init__(self, coords, dist_metric="euclidean", max_dist=None, dist_metric_kwargs={}): """ProbabalisticMetricSpace class Parameters @@ -111,11 +111,7 @@ def __init__(self, coords, dist_metric="euclidean", max_dist=None, dist_metric_k self.coords[: self.coords.shape[1] + 1, :], metric=self.dist_metric ) else: - pdist( - self.coords[:1, :], - metric=self.dist_metric, - **self.dist_metric_kwargs - ) + pdist(self.coords[:1, :], metric=self.dist_metric, **self.dist_metric_kwargs) except ValueError as e: raise e @@ -153,9 +149,7 @@ def dists(self): # otherwise use pdist else: self._dists = squareform( - pdist( - self.coords, metric=self.dist_metric, **self.dist_metric_kwargs - ) + pdist(self.coords, metric=self.dist_metric, **self.dist_metric_kwargs) ) # return @@ -254,7 +248,7 @@ def dists(self): self._dists = cdist( self.ms1.coords, self.ms2.coords, - metric=self.ms1.dist_metric, + metric=self.ms1.dist_metric **self.ms1.dist_metric_kwargs ) diff --git a/skgstat/Variogram.py b/skgstat/Variogram.py index 5aa1a46..ee8d43a 100644 --- a/skgstat/Variogram.py +++ b/skgstat/Variogram.py @@ -1149,8 +1149,10 @@ def dist_function(self): @classmethod def wrapped_distance_function(cls, dist_func, x, **kwargs): - return pdist(X=x, metric=dist_func, **kwargs) - + if callable(dist_func): + return dist_func(x, **kwargs) + else: + return pdist(X=x, metric=dist_func, **kwargs) @dist_function.setter def dist_function(self, func): From 57491f5f004c503acf0364cc7e84125eeab98ad3 Mon Sep 17 00:00:00 2001 From: Frank Date: Thu, 22 Aug 2024 08:52:41 +0200 Subject: [PATCH 3/4] kriging formatter removed --- skgstat/Kriging.py | 114 +++++++++++++++++++-------------------------- 1 file changed, 47 insertions(+), 67 deletions(-) diff --git a/skgstat/Kriging.py b/skgstat/Kriging.py index 7761202..1551ef4 100644 --- a/skgstat/Kriging.py +++ b/skgstat/Kriging.py @@ -3,7 +3,6 @@ used together with the skgstat.Variogram class. The usage of the class is inspired by the scipy.interpolate classes. """ - import time import numpy as np @@ -34,18 +33,18 @@ def inv_solve(a, b): class OrdinaryKriging: def __init__( - self, - variogram, - min_points=5, - max_points=15, - mode="exact", - precision=100, - solver="inv", - n_jobs=1, - perf=False, - sparse=False, - coordinates=None, - values=None, + self, + variogram, + min_points=5, + max_points=15, + mode='exact', + precision=100, + solver='inv', + n_jobs=1, + perf=False, + sparse=False, + coordinates=None, + values=None ): """Ordinary Kriging routine @@ -130,25 +129,14 @@ def __init__( self.dist_metric_kwargs = coordinates.dist_metric_kwargs else: self.dist_metric = variogram["dist_func"] - self.dist_metric_kwargs = {} # coordinates and semivariance function if not isinstance(coordinates, MetricSpace): - coordinates, values = self._remove_duplicated_coordinates( - coordinates, values - ) - coordinates = MetricSpace( - coordinates.copy(), - self.dist_metric, - self.range if self.sparse else None, - ) + coordinates, values = self._remove_duplicated_coordinates(coordinates, values) + coordinates = MetricSpace(coordinates.copy(), self.dist_metric, self.range if self.sparse else None) else: - assert ( - self.dist_metric == coordinates.dist_metric - ), "Distance metric of variogram differs from distance metric of coordinates" - assert ( - coordinates.max_dist is None or coordinates.max_dist == self.range - ), "Sparse coordinates must have max_dist == variogram.effective_range" + assert self.dist_metric == coordinates.dist_metric, "Distance metric of variogram differs from distance metric of coordinates" + assert coordinates.max_dist is None or coordinates.max_dist == self.range, "Sparse coordinates must have max_dist == variogram.effective_range" self.values = values.copy() self.coords = coordinates self.gamma_model = Variogram.fitted_model_function(**variogram) @@ -209,11 +197,11 @@ def min_points(self): def min_points(self, value): # check the value if not isinstance(value, int): - raise ValueError("min_points has to be an integer.") + raise ValueError('min_points has to be an integer.') if value < 0: - raise ValueError("min_points can't be negative.") + raise ValueError('min_points can\'t be negative.') if value > self._maxp: - raise ValueError("min_points can't be larger than max_points.") + raise ValueError('min_points can\'t be larger than max_points.') # set self._minp = value @@ -226,11 +214,11 @@ def max_points(self): def max_points(self, value): # check the value if not isinstance(value, int): - raise ValueError("max_points has to be an integer.") + raise ValueError('max_points has to be an integer.') if value < 0: - raise ValueError("max_points can't be negative.") + raise ValueError('max_points can\'t be negative.') if value < self._minp: - raise ValueError("max_points can't be smaller than min_points.") + raise ValueError('max_points can\'t be smaller than min_points.') # set self._maxp = value @@ -241,10 +229,10 @@ def mode(self): @mode.setter def mode(self, value): - if value == "exact": + if value == 'exact': self._prec_g = None self._prec_dist = None - elif value == "estimate": + elif value == 'estimate': self._precalculate_matrix() else: raise ValueError("mode has to be one of 'exact', 'estimate'.") @@ -257,9 +245,9 @@ def precision(self): @precision.setter def precision(self, value): if not isinstance(value, int): - raise TypeError("precision has to be of type int") + raise TypeError('precision has to be of type int') if value < 1: - raise ValueError("The precision has be be > 1") + raise ValueError('The precision has be be > 1') self._precision = value self._precalculate_matrix() @@ -269,11 +257,11 @@ def solver(self): @solver.setter def solver(self, value): - if value == "numpy": + if value == 'numpy': self._solve = numpy_solve - elif value == "scipy": + elif value == 'scipy': self._solve = scipy_solve - elif value == "inv": + elif value == 'inv': self._solve = inv_solve else: raise AttributeError("solver has to be ['inv', 'numpy', 'scipy']") @@ -310,46 +298,34 @@ def transform(self, *x): self.perf_dist, self.perf_mat, self.perf_solv = [], [], [] if len(x) != 1 or not isinstance(x[0], MetricSpace): - self.transform_coords = MetricSpace( - np.column_stack(x).copy(), - self.dist_metric, - self.range if self.sparse else None, - ) + self.transform_coords = MetricSpace(np.column_stack(x).copy(), self.dist_metric, self.range if self.sparse else None) else: self.transform_coords = x[0] self.transform_coords_pair = MetricSpacePair(self.transform_coords, self.coords) # DEV: this is dirty, not sure how to do it better at the moment - # self.sigma = np.empty(len(x[0])) + #self.sigma = np.empty(len(x[0])) self.sigma = np.ones(len(x[0])) * np.nan self.__sigma_index = 0 # if multi-core, than here if self.n_jobs is None or self.n_jobs == 1: - z = np.fromiter( - map(self._estimator, range(len(self.transform_coords))), dtype=float - ) + z = np.fromiter(map(self._estimator, range(len(self.transform_coords))), dtype=float) else: - def f(idxs): return self._estimator(idxs) - with Pool(self.n_jobs) as p: z = p.starmap(f, range(len(self.transform_coords))) # print warnings if self.singular_error > 0: - print("Warning: %d kriging matrices were singular." % self.singular_error) + print('Warning: %d kriging matrices were singular.' % self.singular_error) if self.no_points_error > 0: - print( - "Warning: for %d locations, not enough neighbors were " - "found within the range." % self.no_points_error - ) + print('Warning: for %d locations, not enough neighbors were ' + 'found within the range.' % self.no_points_error) if self.ill_matrix > 0: - print( - "Warning: %d kriging matrices were ill-conditioned." - " The result may not be accurate." % self.ill_matrix - ) + print('Warning: %d kriging matrices were ill-conditioned.' + ' The result may not be accurate.' % self.ill_matrix) # store the field in the instance itself self.z = np.array(z) @@ -452,7 +428,11 @@ def _krige(self, idx): # get the point and index p = self.transform_coords.coords[idx, :] - idx = self.transform_coords_pair.find_closest(idx, self.range, self._maxp) + idx = self.transform_coords_pair.find_closest( + idx, + self.range, + self._maxp + ) # raise an error if not enough points are found if idx.size < self._minp: @@ -469,7 +449,7 @@ def _krige(self, idx): self.perf_dist.append(t1 - t0) # build the kriging Matrix; needs N + 1 dimensionality - if self.mode == "exact": + if self.mode == 'exact': a = self._build_matrix(dist_mat) else: a = self._estimate_matrix(dist_mat) @@ -488,7 +468,7 @@ def _krige(self, idx): # build the matrix of solutions A _p = np.concatenate(([p], in_range)) - _dists = self.dist(_p)[: len(_p) - 1] + _dists = self.dist(_p)[:len(_p) - 1] _g = self.gamma_model(_dists) b = np.concatenate((_g, [1])) @@ -497,18 +477,18 @@ def _krige(self, idx): _lambda = self._solve(a, b) except LinAlgError as e: print(a) - if str(e) == "Matrix is singular.": + if str(e) == 'Matrix is singular.': raise SingularMatrixError else: raise e except RuntimeWarning as w: - if "Ill-conditioned matrix" in str(w): + if 'Ill-conditioned matrix' in str(w): print(a) raise IllMatrixError else: raise w except ValueError as e: - print("[DEBUG]: print variogram matrix and distance matrix:") + print('[DEBUG]: print variogram matrix and distance matrix:') print(a) print(_dists) raise e From bb63f8840b59e38a35e6bc84c4aced811af42ba9 Mon Sep 17 00:00:00 2001 From: Frank Date: Fri, 23 Aug 2024 10:43:57 +0200 Subject: [PATCH 4/4] tested locally --- skgstat/Kriging.py | 2 +- skgstat/MetricSpace.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/skgstat/Kriging.py b/skgstat/Kriging.py index 1551ef4..9b2c90a 100644 --- a/skgstat/Kriging.py +++ b/skgstat/Kriging.py @@ -129,7 +129,7 @@ def __init__( self.dist_metric_kwargs = coordinates.dist_metric_kwargs else: self.dist_metric = variogram["dist_func"] - + self.dist_metric_kwargs = {} # coordinates and semivariance function if not isinstance(coordinates, MetricSpace): coordinates, values = self._remove_duplicated_coordinates(coordinates, values) diff --git a/skgstat/MetricSpace.py b/skgstat/MetricSpace.py index e2f2077..61b15c3 100644 --- a/skgstat/MetricSpace.py +++ b/skgstat/MetricSpace.py @@ -248,7 +248,7 @@ def dists(self): self._dists = cdist( self.ms1.coords, self.ms2.coords, - metric=self.ms1.dist_metric + metric=self.ms1.dist_metric, **self.ms1.dist_metric_kwargs )