diff --git a/.travis.yml b/.travis.yml index 4b5aadf..50f8e82 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,17 +1,35 @@ sudo: required language: python python: + - "3.4" - "3.5" - "3.6" before_install: - "export DISPLAY=:99.0" - "sh -e /etc/init.d/xvfb start" install: +install: + - sudo apt-get update + - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; + - bash miniconda.sh -b -p $HOME/miniconda + - export PATH="$HOME/miniconda/bin:$PATH" + - hash -r + - conda config --set always_yes yes --set changeps1 no + - conda update -q conda + + # Useful for debugging any issues with conda + - conda info -a + + # install requirements + - conda create -q -n travis python=$TRAVIS_PYTHON_VERSION --file requirements.txt + - source activate travis + - python setup.py install + + # install codecov - pip install --upgrade pip - - pip install -r requirements.txt - - pip install . - pip install codecov script: - nosetests --with-coverage --cover-package=skgstat + - nosetests --with-coverage --cover-package=skgstat after_success: - codecov \ No newline at end of file diff --git a/README.rst b/README.rst index 29aba20..9a6b486 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ Scikit-Gstat ============ -Info: scikit-gstat needs Python >= 3.5! +Info: scikit-gstat needs Python >= 3.4! .. image:: https://travis-ci.org/mmaelicke/scikit-gstat.svg?branch=master :target: https://travis-ci.org/mmaelicke/scikit-gstat @@ -16,26 +16,19 @@ Info: scikit-gstat needs Python >= 3.5! :alt: Codecov -Deprecation Warning -------------------- -This version of of scikit-gstat is deprecated. However, -the current state of this module is, without this warning, -conserved in the branch version-0.1.8 on GitHub, but will no -longer be maintained. - -On the dev branch, the Variogram class is completely rewritten -and will also change the used slightly. It will soon be merged -into the master branch, as soon as it is stable. Sorry for any -inconvenience. - -You can distable this Warning by setting the ignore_deprecation -attribute to True: - ->>> V =Variogram(c, v, ignore_deprecation=True) +New Version 0.2 +~~~~~~~~~~~~~~~ +Scikit-gstat was rewritten in major parts. Most of the changes are internal, +but the attributes and behaviour of the `Variogram` has also changed +substantially. +A detailed description of of the new versions usage will follow. The last +version of the old Variogram class, 0.1.8, is kept in the `version-0.1.8` +branch on GitHub, but not developed any further. Those two versions are not +compatible. Description ------------ +~~~~~~~~~~~ At current state, this module offers a scipy-styled `Variogram` class for performing geostatistical analysis. This class can be used to derive variograms. Key benefits are a number of semivariance estimators and theoretical variogram functions. The module is planned to be hold in the manner of scikit modules and be based upon `numpy` and @@ -46,9 +39,9 @@ The estimators include: - matheron - cressie - dowd -- genton (still buggy) +- genton - entropy -- bin quantiles +- two experimental ones: quantiles, minmax The models include: @@ -59,19 +52,15 @@ The models include: - stable - matérn -with all of them in a nugget and no-nugget variation. All the estimator functions are written `numba` compatible, -therefore you can just download it and include the `@jit` decorator. This can speed up the calculation for bigger -data sets up to 100x. Nevertheless, this is not included in this sckit-gstat version as these functions might be -re-implemented using Cython. This is still under evaluation. - +with all of them in a nugget and no-nugget variation. All the estimator are +implemented using numba's jit decorator. The usage of numba might be subject +to change in future versions. At the current stage, the package does not include any kriging. This is planned for a future release. Installation ~~~~~~~~~~~~ -You can either install scikit-gstat using pip or you download the latest version from github. - PyPI: .. code-block:: bash diff --git a/VERSION b/VERSION index 84aa3a7..341cf11 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.1.8 \ No newline at end of file +0.2.0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 7753dab..236f972 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ scipy numpy pandas nose -matplotlib \ No newline at end of file +matplotlib +numba \ No newline at end of file diff --git a/skgstat/Variogram.py b/skgstat/Variogram.py index c0488ef..ae33add 100644 --- a/skgstat/Variogram.py +++ b/skgstat/Variogram.py @@ -1,136 +1,306 @@ """ -Variogram Klasse +Variogram class """ +import copy -from skgstat.distance import nd_dist -from skgstat.binning import binify_even_width, binify_even_bin, group_to_bin -from skgstat.estimator import matheron, cressie, dowd, genton, minmax, entropy -from skgstat.models import spherical, exponential, gaussian, cubic, stable, matern import numpy as np from pandas import DataFrame -import copy import matplotlib.pyplot as plt -from matplotlib.axes import SubplotBase from scipy.optimize import curve_fit +from scipy.spatial.distance import pdist, squareform +from numba import jit +from skgstat import estimators, models, binning -class Variogram(object): - """ - Variogram repesentation. - This class can generate a Semivariogram from a given sample. - By default, the sample point pairs are ordered into even-width bin, - separated by the euclidean distance of the point pairs. - The Semivariance in the bin is calculated by the Matheron estimator - and a spherical Varigram function is fitted by least squares to the experimental Variogram. +class Variogram(object): + """Variogram Class - The distance matrix, bin matrix, semi-variance estimator and theoretical variogram function can all be changed on - instantiation. The Variogram class can be used to return the result data, plot the Variogram, estimate the - kriging weights and create a Kriging instance. + Calculates a variogram of the separating distances in the given + coordinates and relates them to one of the semi-variance measures of + the given dependent values. """ - def __init__(self, coordinates=None, values=None, dm_func=nd_dist, bm_func=binify_even_width, - estimator=matheron, model=spherical, dm=None, bm=None, normalize=True, fit_method='lm', - pec_punish=1.0, is_directional=False, azimuth=0, tolerance=45.0, use_nugget=False, maxlag=None, - N=None, verbose=False, harmonize=False, - ignore_deprecation=False): - """ - - :param coordinates: numpy array or list with the coordinates of the sample as tuples - :param values: numpy array or list with the Values of the sample. If ndim > 1 an aggregator is used - :param dm_func: function which is used to calculate the distance matrix - :param bm_func: function which is used to calculate the binning matrix - :param estimator: estimator can be a function or a string identifying a standard estimator - Supported are 'matheron', 'cressie', 'dowd' 'minmax', 'percentile' or 'genton' - :param model: string or callable with the theoretical variogram function - :param dm: numpy array with the distance matrix of the given sample - :param bm: numpy array with the binning matrix of the given sample - :param normalize: boolean, specify if the lag should be given absolute or relative to the maxlag - :param fit_method: The method for fitting the theoretical model. - Either 'lm' for least squares or 'pec' for point exclusion cost' - :param pec_punish: If 'pec' is the fit_method, give the power of punishing the point exclusion. - 1 is full punish; 0 non-punish. - :param is_directional: - :param azimuth - :param tolerance: - :param use_nugget: boolean, set if nugget effect shall be used - :param maxlag: - :param N: number of bins - :param verbose: - :param harmonize: bool, if True, the experimental variogram will be harmonized. - """ - if not ignore_deprecation: - raise DeprecationWarning(""" - Deprecation Warning - ------------------- - This version of of scikit-gstat is deprecated. However, - the current state of this module is, without this warning, - conserved in the branch version-0.1.8 on GitHub, but will no - longer be maintained. - - On the dev branch, the Variogram class is completely rewritten - and will also change the used slightly. It will soon be merged - into the master branch, as soon as it is stable. Sorry for any - inconvenience. - - You can distable this Warning by setting the ignore_deprecation - attribute to True: - >>> V =Variogram(c, v, ignore_deprecation=True) - """) - - # Set coordinates and values - self._X = list(coordinates) - self.values = list(values) + def __init__(self, + coordinates=None, + values=None, + estimator=estimators.matheron, + model=models.spherical, + dist_func='euclidean', + bin_func='even', + normalize=True, + fit_method='trf', + fit_sigma=None, + is_directional=False, + azimuth=0, + tolerance=45.0, + use_nugget=False, + maxlag=None, + n_lags=10, + verbose=False, + harmonize=False + ): + r"""Variogram Class + + Note: The directional variogram estimation is not re-implemented yet. + Therefore the parameters is-directional, azimuth and tolerance will + be ignored at the moment and can be subject to changes. + + Parameters + ---------- + coordinates : array + Array of shape (m, n). Will be used as m observation points of + n-dimensions. This variogram can be calculated on 1 - n + dimensional coordinates. In case a 1-dimensional array is passed, + a second array of same length containing only zeros will be + stacked to the passed one. + values : array + Array of values observed at the given coordinates. The length of + the values array has to match the m dimension of the coordinates + array. Will be used to calculate the dependent variable of the + variogram. + estimator : str, callable + String identifying the semi-variance estimator to be used. + Defaults to the Matheron estimator. Possible values are: + * matheron [Matheron, default] + * cressie [Cressie-Hawkins] + * dowd [Dowd-Estimator] + * genton [Genton] + * minmax [MinMax Scaler] + * entropy [Shannon Entropy] + If a callable is passed, it has to accept an array of absoulte + differences, aligned to the 1D distance matrix (flattened upper + triangle) and return a scalar, that converges towards small + values for similarity (high covariance). + model : str + String identifying the theoretical variogram function to be used + to describe the experimental variogram. Can be one of: + * spherical [Spherical, default] + * exponential [Exponential] + * gaussian [Gaussian] + * cubic [Cubic] + * stable [Stable model] + * matern [Matérn model] + * nugget [nugget effect variogram] + dist_func : str + String identifying the distance function. Defaults to + 'euclidean'. Can be any metric accepted by + scipy.spatial.distance.pdist. Additional parameters are not (yet) + passed through to pdist. These are accepted by pdist for some of + the metrics. In these cases the default values are used. + bin_func : str + String identifying the binning function used to find lag class + edges. At the moment there are two possible values: 'even' + (default) or 'uniform'. Even will find n_lags bins of same width + in the interval [0,maxlag[. 'uniform' will identfy n_lags bins on + the same interval, but with varying edges so that all bins count + the same amount of observations. + normalize : boolean + Defaults to False. If True, the independent and dependent + variable will be normalized to the range [0,1]. + fit_method : str + String identifying the method to be used for fitting the + theoretical variogram function to the experimental. More info is + given in the Variogram.fit docs. Can be one of: + * 'lm': Levenberg-Marquardt algorithm for unconstrained + problems. This is the faster algorithm, yet is the fitting of + a variogram not unconstrianed. + *'trf': Trust Region Reflective function for non-linear + constrained problems. The class will set the boundaries + itself. This is the default function. + fit_sigma : array, str + Defaults to None. The sigma is used as measure of uncertainty + during variogram fit. If fit_sigma is an array, it has to hold + n_lags elements, giving the uncertainty for all lags classes. If + fit_sigma is None (default), it will give no weight to any lag. + Higher values indicate higher uncertainty and will lower the + influcence of the corresponding lag class for the fit. + If fit_sigma is a string, a pre-defined function of separating + distance will be used to fill the array. Can be one of: + * 'linear': Linear loss with distance. Small bins will have + higher impact. + * 'exp': The weights decrease by a e-function of distance + * 'sqrt': The weights decrease by the squareroot of distance + * 'sq': The weights decrease by the squared distance. + More info is given in the Variogram.fit_sigma documentation. + is_directional : boolean + Not Implemented yet, will be ignored. + azimuth : float + Not Implemented yet, will be ignored. + tolerance : float + Not Implemented yet, will be ignored. + use_nugget : boolean + Defaults to False. If True, a nugget effet will be added to all + Variogram.models as a third (or fourth) fitting parameter. A + nugget is essentially the y-axis interception of the theoretical + variogram function. + maxlag : float, str + Can specify the maximum lag distance directly by giving a value + larger than 1. The binning function will not find any lag class + with an edge larger than maxlag. If 0 < maxlag < 1, then maxlag + is relative and maxlag * max(Variogram.distance) will be used. + In case maxlag is a string it has to be one of 'median', 'mean'. + Then the median or mean of all Variogram.distance will be used. + Note maxlag=0.5 will use half the maximum separating distance, + this is not the same as 'median', which is the median of all + separating distances + n_lags : int + Specify the number of lag classes to be defined by the binning + function. + verbose : boolean + Set the Verbosity of the class. Not Implemented yet. + harmonize : boolean + this kind of works so far, but will be rewritten (and documented) + """ + # Set coordinates + self._X = np.asarray(coordinates) + + # pairwise differences + self._diff = None # set verbosity self.verbose = verbose - # if values is given with ndim > 1, set an aggregator - self.agg = np.nanmean + # set values + self._values = None + self.set_values(values=values) - # bm properites - self.maxlag = maxlag - self._bm_kwargs = {} - self._dm_kwargs = {} + # distance matrix + self._dist = None - # save the functions, if the matrixes are not given - if dm is None: - self.dm_func = dm_func - else: - self._dm = dm + # set distance calculation function + self._dist_func = None + self.set_dist_function(func=dist_func) - if bm is None: - self.bm_func = bm_func - else: - self._bm = bm + # lags and max lag + self.n_lags = n_lags + self._maxlag = None + self.maxlag = maxlag + + # estimator can be a function or a string + self._estimator = None + self.set_estimator(estimator_name=estimator) - # estimator can be a function or a string identifying a standard estimator - self.set_estimator(estimator=estimator) + # model can be a function or a string + self._model = None + self.set_model(model_name=model) - # model can be a function or a string identifying a standard variogram function - self.set_model(model=model) + # the binning settings + self._bin_func = None + self._groups = None + self._bins = None + self.set_bin_func(bin_func=bin_func) # specify if the lag should be given absolute or relative to the maxlag - self.normalized = normalize + self._normalized = normalize # specify if the experimental variogram shall be harmonized self.harmonize = harmonize - # set the fitting method and model quality measure + # set the fitting method and sigma array self.fit_method = fit_method - self.pec_punish = pec_punish + self._fit_sigma = None + self.fit_sigma = fit_sigma # set directionality self.is_directional = is_directional - self.azimuth = azimuth # Set is_directional as True if azimuth is given? + self.azimuth = azimuth self.tolerance = tolerance # set if nugget effect shall be used self.use_nugget = use_nugget - # set binning matrix if N was given - if N is not None: - self.set_bm(N=N) + # set attributes to be filled during calculation + self.cov = None + self.cof = None + + # settings, not reachable by init (not yet) + self._cache_experimental = False + + # do the preprocessing and fitting upon initialization + self.preprocessing(force=True) + self.fit(force=True) + + @property + def values(self): + return self._values + + @values.setter + def values(self, values): + self.set_values(values=values) + + @property + def value_matrix(self): + return squareform(self._diff) + + def set_values(self, values): + # check dimensions + if not len(values) == len(self._X): + raise ValueError('The length of the values array has to match' + + 'the length of coordinates') + + # reset fitting parameter + self.cof, self.cov = None, None + self._diff = None + + # use an array + _y = np.asarray(values) + if not _y.ndim == 1: + raise ValueError('The values shall be a 1-D array.' + + 'Multi-dimensional values not supported yet.') + + # set new values + self._values = np.asarray(values) + + # recalculate the pairwise differences + self._calc_diff(force=True) + + @property + def bin_func(self): + return self._bin_func + + @bin_func.setter + def bin_func(self, bin_func): + self.set_bin_func(bin_func=bin_func) + + def set_bin_func(self, bin_func): + # reset groups and bins + self._groups = None + self._bins = None + self.cof, self.cov = None, None + + if bin_func.lower() == 'even': + self._bin_func = binning.even_width_lags + elif bin_func.lower() == 'uniform': + self._bin_func = binning.uniform_count_lags + else: + raise ValueError('%s binning method is not known' % bin_func) + + @property + def normalized(self): + return self._normalized + + @normalized.setter + def normalized(self, status): + # set the new value + self._normalized = status + + @property + def bins(self): + if self._bins is None: + self._bins = self.bin_func(self.distance, self.n_lags, self.maxlag) + + return self._bins.copy() + + @bins.setter + def bins(self, bins): + # set the new bins + self._bins = bins + + # clean the groups as they are not valid anymore + self._groups = None + self.cov = None + self.cof = None @property def estimator(self): @@ -138,31 +308,40 @@ def estimator(self): @estimator.setter def estimator(self, value): - self.set_estimator(estimator=value) - - def set_estimator(self, estimator): - """ - Set estimator as the new Variogram estimator. Either a function returning a single or a list of semivariance - values is needed, or a string identifiying a default one. - Supported are 'matheron', 'cressie', 'dowd' or 'genton'. - """ - if isinstance(estimator, str): - if estimator.lower() == 'matheron': - self._estimator = matheron - elif estimator.lower() == 'cressie': - self._estimator = cressie - elif estimator.lower() == 'dowd': - self._estimator = dowd - elif estimator.lower() == 'genton': - self._estimator = genton - elif estimator.lower() == 'minmax': - self._estimator = minmax - elif estimator.lower() == 'entropy' or estimator.lower() == 'h': - self._estimator = entropy + self.set_estimator(estimator_name=value) + + def set_estimator(self, estimator_name): + """ + Set estimator as the new variogram estimator. + + """ + # reset the fitting + self.cof, self.cov = None, None + + if isinstance(estimator_name, str): + if estimator_name.lower() == 'matheron': + self._estimator = estimators.matheron + elif estimator_name.lower() == 'cressie': + self._estimator = estimators.cressie + elif estimator_name.lower() == 'dowd': + self._estimator = estimators.dowd + elif estimator_name.lower() == 'genton': + self._estimator = estimators.genton + elif estimator_name.lower() == 'minmax': + self._estimator = estimators.minmax + elif estimator_name.lower() == 'percentile': + self._estimator = estimators.percentile + elif estimator_name.lower() == 'entropy': + self._estimator = estimators.entropy else: - raise ValueError('Variogram estimator %s is not understood, please provide the function.' % estimator) + raise ValueError( + 'Variogram estimator %s is not understood, please' + + 'provide the function.' % estimator_name + ) + elif callable(estimator_name): + self._estimator = estimator_name else: - self._estimator = estimator + raise ValueError('The estimator has to be a string or callable.') @property def model(self): @@ -170,186 +349,536 @@ def model(self): @model.setter def model(self, value): - self.set_model(model=value) + self.set_model(model_name=value) - def set_model(self, model): + def set_model(self, model_name): """ - Set model as the new theoretical variogram function. Either a function returning the semivariance at a given lag - for given parameters is needed, or a string identifying a default one. Supported are 'spherical', 'exponential' - or 'gaussian'. + Set model as the new theoretical variogram function. - :param model: - :return: """ - if isinstance(model, str): - if model.lower() == 'spherical': - self._model = spherical - elif model.lower() == 'exponential': - self._model = exponential - elif model.lower() == 'gaussian': - self._model = gaussian - elif model.lower() == 'cubic': - self._model = cubic - elif model.lower() == 'stable': - self._model = stable - elif model.lower() == 'matern': - self._model = matern + # reset the fitting + self.cof, self.cov = None, None + + if isinstance(model_name, str): + if model_name.lower() == 'spherical': + self._model = models.spherical + elif model_name.lower() == 'exponential': + self._model = models.exponential + elif model_name.lower() == 'gaussian': + self._model = models.gaussian + elif model_name.lower() == 'cubic': + self._model = models.cubic + elif model_name.lower() == 'stable': + self._model = models.stable + elif model_name.lower() == 'matern': + self._model = models.matern else: - raise ValueError('The theoretical Variogram function %s is not understood, ' - 'please provide the function' % model) + raise ValueError( + 'The theoretical Variogram function %s is not' + + 'understood, please provide the function' % model_name) else: - self._model = model + self._model = model_name - def binify_like(self, how='even width'): - """ - Specify, how the bins for the Variogram shall be drawn. how can identify one of the prepared binning algorithms. - Use 'even_width' or 'even width' for the binify_even_width function and 'even bin' or 'even_bin' for the - binity_even_bins function. If how is callable, it will be used as bm_func. - It will be called by the bm property with N, the number of bins and the bm_kwargs as arguments and has to return - the bm matrix and an array of bin widths. + @property + def dist_function(self): + return self._dist_func + + @dist_function.setter + def dist_function(self, func): + self.set_dist_function(func=func) + + def set_dist_function(self, func): + """Set distance function + + Set the function used for distance calculation. func can either be a + callable or a string. The ranked distance function is not implemented + yet. strings will be forwarded to the scipy.spatial.distance.pdist + function as the metric argument. + If func is a callable, it has to return the upper triangle of the + distance matrix as a flat array (Like the pdist function). + + Parameters + ---------- + func : string, callable + + Returns + ------- + numpy.array - :param how: - :return: """ - # remove the _bm if necessary - if hasattr(self, '_bm'): - delattr(self, '_bm') - if hasattr(self, 'bin_widths'): - delattr(self, 'bin_widths') - - if callable(how): - self.bm_func = how - elif how.lower().replace('_', ' ') == 'even width': - self.bm_func = binify_even_width - elif how.lower().replace('_', ' ') == 'even bin': - self.bm_func = binify_even_bin + # reset the distances and fitting + self._dist = None + self.cof, self.cov = None, None + + if isinstance(func, str): + if func.lower() == 'rank': + raise NotImplementedError + else: + # if not ranks, it has to be a scipy metric + self._dist_func = lambda x: pdist(X=x, metric=func) + + elif callable(func): + self._dist_func = func else: - raise ValueError("how has to be a callable or one of ['even width', 'even bin']") + raise ValueError('Input not supported. Pass a string or callable.') - def clone(self): - """ - Wrapper for copy.deepcopy function of self. - """ - return copy.deepcopy(self) + @property + def distance(self): + if self._dist is None: + self._calc_distances() + return self._dist - def set_dm(self, dm=None, **kwargs): - """ - calculates the distance matrix if needed and sets it as a static attribute self._dm. + @distance.setter + def distance(self, dist_array): + self._dist = dist_array - """ - # update kwargs - self._dm_kwargs.update(kwargs) + @property + def distance_matrix(self): + return squareform(self.distance) + + @property + def maxlag(self): + return self._maxlag + + @maxlag.setter + def maxlag(self, value): + # reset fitting + self.cof, self.cov = None, None + + # remove bins + self._bins = None + self._groups = None - if dm is None: - self._dm = self.dm_func(self._X, **self._dm_kwargs) - if hasattr(self, '_bm'): - self.set_bm() + # set new maxlag + if value is None: + self._maxlag = None + elif isinstance(value, str): + if value == 'median': + self._maxlag = np.median(self.distance) + elif value == 'mean': + self._maxlag = np.mean(self.distance) + elif value < 1: + self._maxlag = value * np.max(self.distance) else: - self._dm = dm + self._maxlag = value @property - def dm(self): + def fit_sigma(self): + r"""Fitting Uncertainty + + Set or calculate an array of observation uncertainties aligned to the + Variogram.bins. These will be used to weight the observations in the + cost function, which divides the residuals by their uncertainty. + + When setting fit_sigma, the array of uncertainties itself can be + given, or one of the strings: ['linear', 'exp', 'sqrt', 'sq']. The + parameters described below refer to the setter of this property. + + Parameters + ---------- + sigma : string, array + Sigma can either be an array of discrete uncertainty values, + which have to align to the Variogram.bins, or of type string. + Then, the weights for fitting are calculated as a function of + (lag) distance. + + * sigma='linear': The residuals get weighted by the lag + distance normalized to the maximum lag distance, denoted as w_n + * sigma='exp': The residuals get weighted by the function: + w = e^{1 / w_n} + * sigma='sqrt': The residuals get weighted by the function: + w = sqrt(w_n) + * sigma='sq': The residuals get weighted by the function: + w = w_n ** 2 + + Returns + ------- + void + + + Notes + ----- + The cost function is defined as: + + .. math:: + chisq = \sum {\frac{r}{\sigma}}^2 + + where r are the residuals between the experimental variogram and the + modeled values for the same lag. Following this function, + small values will increase the influence of that residual, while a very + large sigma will cause the observation to be ignored. + + See Also + -------- + scipy.optimize.curve_fit + + """ + # unweighted + if self._fit_sigma is None: + return None + + # discrete uncertainties + elif isinstance(self._fit_sigma, (list, tuple, np.ndarray)): + assert len(self._fit_sigma) == len(self.bins) + return self._fit_sigma + + # linear function of distance + elif self.fit_sigma == 'linear': + return self.bins / np.max(self.bins) + + # e function of distance + elif self.fit_sigma == 'exp': + return np.exp(1. / (self.bins / np.max(self.bins))) + + # sqrt function of distance + elif self.fit_sigma == 'sqrt': + return np.sqrt(self.bins / np.max(self.bins)) + + # squared function of distance + elif self.fit_sigma == 'sq': + return (self.bins / np.max(self.bins)) ** 2 + else: + raise ValueError("fit_sigma is not understood. It has to be an " + + "array or one of ['linear', 'exp', 'sqrt', 'sq'].") + + @fit_sigma.setter + def fit_sigma(self, sigma): + self._fit_sigma = sigma + + def lag_groups(self): + """Lag class groups + + Retuns a mask array with as many elements as self._diff has, + identifying the lag class group for each pairwise difference. Can be + used to extract all pairwise values within the same lag bin. + + Returns + ------- + numpy.array + + See Also + -------- + Variogram.lag_classes + """ - Return the distance matrix, this variogram instance is using. - In case a static distance matrix self._dm was set before, this will be returned. - Otherwise a distance matrix will be calculated using the function self.dm_func along with the - keyword arguments self._dm_kwargs. + if self._groups is None: + self._calc_groups() + + return self._groups + + def lag_classes(self): + """Iterate over the lag classes + + Generates an iterator over all lag classes. Can be zipped with + Variogram.bins to identify the lag. + + Returns + ------- + iterable - :return: distance matrix, like returned by :func:`scipy.spatial.squareform` """ - if hasattr(self, '_dm'): - return self._dm + # yield all groups + for i in np.unique(self._groups): + if i < 0: + continue + else: + yield self._diff[np.where(self._groups == i)] + + def preprocessing(self, force=False): + """Preprocessing function + + Prepares all input data for the fit and transform functions. Namely, + the distances are calculated and the value differences. Then the + binning is set up and bin edges are calculated. If any of the listed + subsets are already prepared, their processing is skipped. This + behaviour can be changed by the force parameter. This will cause a + clean preprocessing. + + Parameters + ---------- + force : bool + If set to True, all preprocessing data sets will be deleted. Use + it in case you need a clean preprocessing. + + Returns + ------- + void + + """ + # call the _calc functions + self._calc_distances(force=force) + self._calc_diff(force=force) + self._calc_groups(force=force) + + def fit(self, force=False, method=None, sigma=None, **kwargs): + """Fit the variogram + + The fit function will fit the theoretical variogram function to the + experimental. The preprocessed distance matrix, pairwise differences + and binning will not be recalculated, if already done. This could be + forced by setting the force parameter to true. + + In case you call fit function directly, with method or sigma, + the parameters set on Variogram object instantiation will get + overwritten. All other keyword arguments will be passed to + scipy.optimize.curve_fit function. + + Parameters + ---------- + force : bool + If set to True, a clean preprocessing of the distance matrix, + pairwise differences and the binning will be forced. Default is + False. + method : string + A string identifying one of the implemented fitting procedures. + Can be one of ['lm', 'trf']. + * lm: Levenberg-Marquardt algorithms implemented in + scipy.optimize.leastsq function. + * trf: Trust Region Reflective algorithm implemented in + scipy.optimize.least_squares(method='trf') + sigma : string, array + Uncertainty array for the bins. Has to have the same dimension as + self.bins. Refer to Variogram.fit_sigma for more information. + + Returns + ------- + void + + See Also + -------- + scipy.optimize + scipy.optimize.curve_fit + scipy.optimize.leastsq + scipy.optimize.least_squares + + """ + # TODO: the kwargs need to be preserved somehow + # delete the last cov and cof + self.cof = None + self.cov = None + + # if force, force a clean preprocessing + self.preprocessing(force=force) + + # load the data + x = self.bins + y = self.experimental + + # overwrite fit setting if new params are given + if method is not None: + self.fit_method = method + if sigma is not None: + self.fit_sigma = sigma + + # remove nans + _x = x[~np.isnan(y)] + _y = y[~np.isnan(y)] + + # the model function is re-defined. otherwise scipy cannot determine + # the number of parameters + # TODO: def f(n of par) + + # Switch the method + # Trust Region Reflective + if self.fit_method == 'trf': + bounds = (0, self.__get_fit_bounds(x, y)) + self.cof, self.cov = curve_fit( + self._model, + _x, _y, + method='trf', + sigma=self.fit_sigma, + p0=bounds[1], + bounds=bounds, + **kwargs + ) + + # Levenberg-Marquardt + elif self.fit_method == 'lm': + self.cof, self.cov = curve_fit( + self.model, + _x, _y, + method='lm', + sigma=self.fit_sigma, + **kwargs + ) + else: - return self.dm_func(self._X, **self._dm_kwargs) + raise ValueError('Only the \'lm\' and \'trf\' algorithms are ' + + 'supported at the moment.') + + def transform(self, x): + """Transform + + Transform a given set of lag values to the theoretical variogram + function using the actual fitting and preprocessing parameters in + this Variogram instance + Parameters + ---------- + x : numpy.array + Array of lag values to be used as model input for the fitted + theoretical variogram model + + Returns + ------- + numpy.array - def set_bm(self, bm=None, maxlag=None, **kwargs): """ - Calculate a static binning matrix on the basis of the distance matrix self.dm. Will be set as - attribute self._bm. - Whenever a new binning matrix is set, the self.bin_widths attribute gets updated. + self.preprocessing() + + # if instance is not fitted, fit it + if self.cof is None: + self.fit(force=True) + + return np.fromiter(map(self.compiled_model, x), dtype=float) + + @property + def compiled_model(self): + """Compiled theoretical variogram model + + Compile the model using the actual fitting parameters to return a + function implementing them. + + Returns + ------- + callable - :return: """ - # overwrite maxlag - if maxlag is not None: - if maxlag < 1: - self.__maxlag = maxlag * np.max(self.dm) - else: - self._maxlag = maxlag + if self.cof is None: + self.fit(force=True) - # store the bm_kwargs - self._bm_kwargs.update(kwargs) + # get the function + func = self._model - if bm is None: - self._bm, self.bin_widths = self.bm_func(X=self._X, dm=self.dm, maxlag=self.maxlag, **self._bm_kwargs) + # define the wrapper + def model(x): + return func(x, *self.cof) + + # return + return model + + def _calc_distances(self, force=False): + if self._dist is not None and not force: + return + + # if self._X is of just one dimension, concat zeros. + if self._X.ndim == 1: + _x = np.vstack(zip(self._X, np.zeros(len(self._X)))) else: - if hasattr(self, 'bin_widths'): - delattr(self, 'bin_widths') - self._bm = bm + _x = self._X + # else calculate the distances + self._dist = self._dist_func(_x) + + def _calc_diff(self, force=False): + """Calculates the pairwise differences + + Returns + ------- + void - @property - def bm(self): """ - Return the binning matrix, this variogram instance is using. - In case a static binning matrix self._bm was set before, this will be returned. - Otherwise a binning matrix will be calculated using the function self.bm_func along with the - keyword arguments self._bm_kwargs. - The binning matrix will assign for each point pair the corresponding bin. By setting a custom - bin matrix unsing Variogram.set_bm, you can also implement non-regular bins. + # already calculated + if self._diff is not None and not force: + return + + v = self.values + l = len(v) + self._diff = np.zeros(int((l**2 - l) / 2)) + + # calculate the pairwise differences + for t, k in zip(self.__vdiff_indexer(), range(len(self._diff))): + self._diff[k] = np.abs(v[t[0]] - v[t[1]]) + + #@jit + def __vdiff_indexer(self): + """Pairwise indexer + + Returns an iterator over the values or coordinates in squareform + coordinates. The iterable will be of type tuple. + + Returns + ------- + iterable - :return: binning matrix in the style of :func:`scipy.spatial.squareform` """ - if hasattr(self, '_bm'): - return self._bm - else: - _bm, self.bin_widths = self.bm_func(X=self._X, dm=self.dm, maxlag=self.maxlag, **self._bm_kwargs) - return _bm + l = len(self.values) - @property - def maxlag(self): - return self._maxlag + for i in range(l): + for j in range(l): + if i > j: + yield i, j - @maxlag.setter - def maxlag(self, value): - # a maxlag was set, therefore the _bm and bin_widths attributes have to be cleared - if hasattr(self, '_bm'): - delattr(self, '_bm') - if hasattr(self, 'bin_widths'): - delattr(self, 'bin_widths') + def _calc_groups(self, force=False): + """Calculate the lag class mask array - # set new maxlag - if value is None: - self._maxlag = None - elif value < 1: - self._maxlag = value * np.max(self.dm) - else: - self._maxlag = value + Returns + ------- + + """ + # already calculated + if self._groups is not None and not force: + return + + # get the bin edges and distances + bin_edges = self.bins + d = self.distance + + # -1 is the group fir distances outside maxlag + self._groups = np.ones(len(d), dtype=int) * -1 + + for i, bounds in enumerate(zip([0] + list(bin_edges), bin_edges)): + self._groups[np.where((d >= bounds[0]) & (d < bounds[1]))] = i + + def clone(self): + """ + Wrapper for copy.deepcopy function of self. + """ + return copy.deepcopy(self) @property def experimental(self): """ - Return the experimental variogram. - If :param harmonize: is True, it will return self.isotonic, instead of self.experimental. - :return: np.array, the isotonic or non-isotonic experimental varigram + Returns + ------- + """ if self.harmonize: return self.isotonic else: return self._experimental - @property + @jit def _experimental(self): """ - :return: experimental variogram as a numpy.ndarray. + + Returns + ------- + """ - # group the values to bins and apply the estimator - _g = self.grouped_pairs + # prepare the result array + y = np.zeros(len(self.bins)) + + # args, can set the bins for entropy + # and should set p of percentile, not properly implemented + if self._estimator.__name__ == 'entropy': + bins = np.linspace( + np.min(self.distance), + np.max(self.distance), + 50 + ) + # apply + for i, lag_values in enumerate(self.lag_classes()): + y[i] = self._estimator(lag_values, bins=bins) + + # default + else: + for i, lag_values in enumerate(self.lag_classes()): + y[i] = self._estimator(lag_values) # apply - return np.array(self._estimator(_g)) + return y.copy() @property def isotonic(self): @@ -370,6 +899,8 @@ def isotonic(self): :return: np.ndarray, monotonized experimental variogram """ # TODO this is imported in the function as sklearn is not a dependency (and should not be for now) + raise NotImplementedError + try: from sklearn.isotonic import IsotonicRegression y = self._experimental @@ -378,83 +909,15 @@ def isotonic(self): except ImportError: raise NotImplementedError('scikit-learn is not installed, but the isotonic function without sklear dependency is not installed yet.') - @property - def grouped_pairs(self): - """ - Result of the group_to_bin function. This property will be used for wrapping the function, in case there are - alternative grouping functions implemented one day. - - :return: - """ - if self.is_directional: - return group_to_bin(self.values, self.bm, X=self._X, azimuth_deg=self.azimuth, tolerance=self.tolerance, - maxlag=self.maxlag) - else: - return group_to_bin(self.values, self.bm, maxlag=self.maxlag) - - - def fit(self, x, y): - """ - Fit the theoretical variogram function to the experimental values. The function will be fitted using the least - square method, with a maximum of maxiter iteration, defaults to 1000. For fitting the starting parameters of - the theoretical function a, C0, b can be given as kwargs. If the Variogram uses a custom model, the parameters - might have other names. - The parameter set with best fit will be returned. - - THIS STUFF NEEDS A COMPLETE REWORK - - :return: - """ - # if last bin is set, delete it - if hasattr(self, 'last_bin'): - del self.last_bin - - # remove nans - _x = x[~np.isnan(y)] - _y = y[~np.isnan(y)] - - if self.fit_method == 'lm': -# bounds = (0, [np.nanmax(x), np.nanmax(y)]) - bounds = (0, self.__get_fit_bounds(x, y)) - return curve_fit(self._model, _x, _y, p0=bounds[1], bounds=bounds) - - elif self.fit_method == 'pec': - # Run the fitting with each point excluded - rmse, cof, cov, punish = list(), list(), list(), list() - # get the histogram counts (the cumsum of bin sizes, summed from right to left) - _h = self.hist[~np.isnan(y)] - h = np.flipud(np.cumsum(np.flipud(_h))) - - for i in range(1, len(_x) - 2): -# bnd = (0, [np.max(_x[:-i]), np.max(_y[:-i])]) - bnd = (0, self.__get_fit_bounds(x[:-i], y[:-i])) - cf, cv = curve_fit(self._model, _x[:-i], _y[:-i], p0=bnd[1], bounds=bnd) - cof.append(cf) - cov.append(cov) - p = ((h[0] + 1) / ((h[0] + 1) - h[-i]))**self.pec_punish - rmse.append(p * (np.sqrt(np.sum((self._model(_x[:-i], *cf) - _x[:-i]) ** 2) / len(_x[:-i])))) - punish.append(p) - - if self.verbose: - print('Punish Weights: ', ['%.3f' % _ for _ in punish]) - print('RMSE: ', ['%.2f' % _ for _ in rmse]) - # here rmse is the error for the cf set in cof - # find the optimum of excluded points to rmse error improvement - best_rmse = rmse.index(np.nanmin(rmse)) - self.last_bin = len(x) - (best_rmse + 1) - - return cof[best_rmse], cov[best_rmse] - - else: - raise ValueError('The fit method {} is not understood. Use either \'lm\' (least squares) or \'pec\' (point exclusion cost).'.format(self.fit_method)) - - def __get_fit_bounds(self, x, y): """ - Return the bounds for parameter space in fitting a variogram model. The bounds are depended on the Model - that is used + Return the bounds for parameter space in fitting a variogram model. + The bounds are depended on the Model that is used + + Returns + ------- + list - :return: """ mname = self._model.__name__ @@ -484,20 +947,36 @@ def __get_fit_bounds(self, x, y): return bounds - @property - def data(self): - """ - Calculates the experimental Variogram. As the bins are only indexed by a integer, the lag array is caculated - by cummulative summarizing the bin_widths array. If this bin_widths was not returned by the bm_func, even bin - widths are created matching the number of bins at the maximum entry in the distance matrix. This will lead to - calculation errors, in case the bin widths are not even. - The theoretical variogram function is fitted to the experimental values at given lags - and the theoretical function is calculated for all increments and returned. + def data(self, n=100, force=False): + """Theoretical variogram function + + Calculate the experimental variogram and apply the binning. On + success, the variogram model will be fitted and applied to n lag + values. Returns the lags and the calculated semi-variance values. + If force is True, a clean preprocessing and fitting run will be + executed. + + Parameters + ---------- + n : integer + length of the lags array to be used for fitting. Defaults to 100, + which will be fine for most plots + force: boolean + If True, the preprocessing and fitting will be executed as a + clean run. This will force all intermediate results to be + recalculated. Defaults to False + + Returns + ------- + variogram : tuple + first element is the created lags array + second element are the calculated semi-variance values - :return: """ + # force a clean preprocessing if needed + self.preprocessing(force=force) + # calculate the experimental variogram - # this might raise an exception if the bm is not present yet _exp = self.experimental _bin = self.bins @@ -505,53 +984,146 @@ def data(self): if self.normalized: _bin /= np.nanmax(_bin) # normalize X _exp /= np.nanmax(_exp) # normalize Y - x = np.linspace(0, 1, 100) # use 100 increments + x = np.linspace(0, 1, n) # use n increments else: -# x = np.arange(0, np.float64(np.max(_bin)), 1) - x = np.linspace(0, np.float64(np.nanmax(_bin)), 100) + x = np.linspace(0, np.float64(np.nanmax(_bin)), n) - # fit - self.cof, self.cov = self.fit(_bin, _exp) + # fit if needed + if self.cof is None: + self.fit(force=force) return x, self._model(x, *self.cof) @property def residuals(self): - """ + """Model residuals + + Calculate the model residuals defined as the differences between the + experimental variogram and the theoretical model values at + corresponding lag values + + Returns + ------- + numpy.ndarray - :return: """ # get the deviations - experimental, model = self.__model_deviations() + experimental, model = self.model_deviations() # calculate the residuals - return np.fromiter(map(lambda x, y: x - y, model, experimental), np.float) + return np.fromiter( + map(lambda x, y: x - y, model, experimental), + np.float + ) @property def mean_residual(self): - """ + """Mean Model residuals - :return: + Calculates the mean, absoulte deviations between the experimental + variogram and theretical model values. + + Returns + ------- + float """ return np.nanmean(np.fromiter(map(np.abs, self.residuals), np.float)) @property - def RMSE(self): + def rmse(self): + r"""RMSE + + Calculate the Root Mean squared error between the experimental + variogram and the theoretical model values at corresponding lags. + Can be used as a fitting quality measure. + + Returns + ------- + float + + See Also + -------- + Variogram.residuals + + Notes + ----- + The RMSE is implemented like: + + .. math:: + RMSE = \sqrt{\frac{\sum_{i=0}^{i=N(x)} (x-y)^2}{N(x)}} + + """ # get the deviations - experimental, model = self.__model_deviations() + experimental, model = self.model_deviations() # get the sum of squares - rsum = np.nansum(np.fromiter(map(lambda x, y: (x - y)**2, experimental, model), np.float)) + rsum = np.nansum(np.fromiter( + map(lambda x, y: (x - y)**2, experimental, model), + np.float + )) return np.sqrt(rsum / len(model)) @property - def NRMSE(self): - return self.RMSE / np.nanmean(self.experimental) + def nrmse(self): + r"""NRMSE + + Calculate the normalized root mean squared error between the + experimental variogram and the theoretical model values at + corresponding lags. Can be used as a fitting quality measure + + Returns + ------- + float + + See Also + -------- + Variogram.residuals + Variogram.rmse + + Notes + ----- + + The NRMSE is implemented as: + + .. math:: + + NRMSE = \frac{RMSE}{mean(y)} + + where RMSE is Variogram.rmse and y is Variogram.experimental + + + """ + return self.rmse / np.nanmean(self.experimental) @property - def NRMSE_r(self): - return self.RMSE / (np.nanmax(self.experimental) - np.nanmean(self.experimental)) + def nrmse_r(self): + r"""NRMSE + + Alternative normalized root mean squared error between the + experimental variogram and the theoretical model values at + corresponding lags. Can be used as a fitting quality measure. + + Returns + ------- + float + + See Also + -------- + Variogram.rmse + Variogram.nrmse + + Notes + ----- + Unlike Variogram.nrmse, nrmse_r is not normalized to the mean of y, + but the differece of the maximum y to its mean: + + .. math:: + NRMSE_r = \frac{RMSE}{max(y) - mean(y)} + + """ + _y = self.experimental + return self.rmse / (np.nanmax(_y) - np.nanmean(_y)) @property def r(self): @@ -561,7 +1133,7 @@ def r(self): :return: """ # get the experimental and theoretical variogram and cacluate means - experimental, model = self.__model_deviations() + experimental, model = self.model_deviations() mx = np.nanmean(experimental) my = np.nanmean(model) @@ -580,7 +1152,7 @@ def NS(self): :return: """ - experimental, model = self.__model_deviations() + experimental, model = self.model_deviations() mx = np.nanmean(experimental) # calculate the single nash-sutcliffe terms @@ -589,10 +1161,21 @@ def NS(self): return 1 - (term1 / term2) - def __model_deviations(self): - """ - These model deviations can be used to calculate different model quality parameters like residuals, RMSE. - :return: + def model_deviations(self): + """Model Deviations + + Calculate the deviations between the experimental variogram and the + recalculated values for the same bins using the fitted theoretical + variogram function. Can be utilized to calculate a quality measure + for the variogram fit. + + Returns + ------- + deviations : tuple + first element is the experimental variogram + second element are the corresponding values of the theoretical + model. + """ # get the experimental values and their bin bounds _exp = self.experimental @@ -601,44 +1184,54 @@ def __model_deviations(self): # get the model parameters param = self.describe() if 'error' in param: - raise RuntimeError('The Variogram cannot be applied properly. First, calculate the variogram.') + raise RuntimeError('The variogram cannot be calculated.') # calculate the model values at bin bounds - _model = self._model(_bin, *self.cof) + _model = self.transform(_bin) return _exp, _model def describe(self): - """ - Return a dictionary of the Variogram parameters + """Variogram parameters + + Return a dictionary of the variogram parameters. + + Returns + ------- + dict - :return: """ - try: - if self.normalized: - maxlag = np.nanmax(self.bins) - maxvar = np.nanmax(self.experimental) - else: - maxlag = 1 - maxvar = 1 - index, data = self.data - cof = self.cof - except: - return dict(name=self._model.__name__, estimator = self._estimator.__name__, error='Variogram not calculated.') + # fit, if not already done + if self.cof is None: + self.fit(force=True) + + # scale sill and range + if self.normalized: + maxlag = np.nanmax(self.bins) + maxvar = np.nanmax(self.experimental) + else: + maxlag = 1. + maxvar = 1. + + # get the fitting coefficents + cof = self.cof + + # build the dict rdict = dict( name=self._model.__name__, estimator=self._estimator.__name__, - range=cof[0] * maxlag, + effective_range=cof[0] * maxlag, sill=cof[1] * maxvar, nugget=cof[-1] * maxvar if self.use_nugget else 0 ) - # handle s parameters + # handle s parameters for matern and stable model if self._model.__name__ == 'matern': rdict['smoothness'] = cof[2] elif self._model.__name__ == 'stable': rdict['shape'] = cof[2] + # return return rdict @property @@ -652,189 +1245,208 @@ def parameters(self): if 'error' in d: return [None, None, None] elif self._model.__name__ == 'matern': - return list([d['range'], d['sill'], d['smoothness'], d['nugget']]) + return list([ + d['effective_range'], + d['sill'], + d['smoothness'], + d['nugget'] + ]) elif self._model.__name__ == 'stable': - return list([d['range'], d['sill'], d['shape'], d['nugget']]) + return list([ + d['effective_range'], + d['sill'], + d['shape'], + d['nugget'] + ]) elif self._model.__name__ == 'nugget': return list([d['nugget']]) else: - return list([d['range'], d['sill'], d['nugget']]) - - @property - def bins(self): - """ - Return the upper bin bounds of the experimental Variogram. - If no self.bin_widths is set, even width bins are assumed and will be calculated from the distance matrix, - as the maximum distance divided by the amount of bins. - - This will cause errors if the bins are not evenly distributed. - - :return: - """ - # do a dummy bm calculation to set actual bin widths - # TODO: this is just a ugly workaround. - _bm = self.bm + return list([ + d['effective_range'], + d['sill'], + d['nugget'] + ]) + + def to_DataFrame(self, n=100, force=False): + """Variogram DataFrame + + Returns the fitted theoretical variogram as a pandas.DataFrame + instance. The n and force parameter control the calaculation, + refer to the data funciton for more info. + + Parameters + ---------- + n : integer + length of the lags array to be used for fitting. Defaults to 100, + which will be fine for most plots + force: boolean + If True, the preprocessing and fitting will be executed as a + clean run. This will force all intermediate results to be + recalculated. Defaults to False + + Returns + ------- + pandas.DataFrame + + See Also + -------- + Variogram.data + + """ + lags, data = self.data(n=n, force=force) + + return DataFrame({ + 'lags': lags, + self._model.__name__: data} + ).copy() + + def plot(self, axes=None, grid=True, show=True, hist=True): + """Variogram Plot + + Plot the experimental variogram, the fitted theoretical function and + an histogram for the lag classes. The axes attribute can be used to + pass a list of AxesSubplots or a single instance to the plot + function. Then these Subplots will be used. If only a single instance + is passed, the hist attribute will be ignored as only the variogram + will be plotted anyway. + + Parameters + ---------- + axes : list, tuple, array, AxesSubplot or None + If None, the plot function will create a new matplotlib figure. + Otherwise a single instance or a list of AxesSubplots can be + passed to be used. If a single instance is passed, the hist + attribute will be ignored. + grid : bool + Defaults to True. If True a custom grid will be drawn through + the lag class centers + show : bool + Defaults to True. If True, the show method of the passed or + created matplotlib Figure will be called before returning the + Figure. This should be set to False, when used in a Notebook, + as a returned Figure object will be plotted anyway. + hist : bool + Defaults to True. If False, the creation of a histogram for the + lag classes will be suppressed. + + Returns + ------- + matplotlib.Figure + + """ + # get the parameters + _bins = self.bins + _exp = self.experimental + x = np.linspace(0, np.nanmax(_bins), 100) # make the 100 a param? - if hasattr(self, 'bin_widths'): - _bin = np.cumsum(self.bin_widths) + # do the plotting + if axes is None: + if hist: + fig = plt.figure(figsize=(8, 5)) + ax1 = plt.subplot2grid((5, 1), (1, 0), rowspan=4) + ax2 = plt.subplot2grid((5, 1), (0, 0), sharex=ax1) + fig.subplots_adjust(hspace=0) + else: + fig, ax1 = plt.subplots(1, 1, figsize=(8, 4)) + ax2 = None + elif isinstance(axes, (list, tuple, np.ndarray)): + ax1, ax2 = axes + fig = ax1.get_figure() else: - print('Warning: Bin edges were calcuated on the fly.') - n = int(np.max(self.bm) + 1) - _bin = np.cumsum(np.ones(n) * np.max(self.dm) / n) - - return _bin - - @property - def hist(self): - """ - Return a histogram for the present bins in the Variogram. The bin matrix bm will be unstacked and counted. - Variogram.bins, Variogram.hist could be used to produce a properly labeled histogram. - - :return: - """ - # get the upper triangle from the bin matrix -# _bm = self.bm -# ut = [_bm[i, j] for i in range(len(_bm)) for j in range(len(_bm)) if j > i] -# return np.bincount(ut) - - # get the grouped pairs - _g = self.grouped_pairs - return np.array([len(_) / 2 for _ in _g]) - - @property - def values1D(self): - """ - If the values were given with ndim > 1, the value1D property will return the aggreagtes - using the self.agg function. - - :return: - """ - return [self.agg(_) for _ in self.values] - - def to_DataFrame(self): - """ - Return the result of the theoretical Variogram as a pandas.DataFrame. The lag will index the semivariance - values. - - :return: - """ - index, data = self.data - - # translate the normalized lag to real lag - maxlag = np.nanmax(self.bins) - maxvar = np.nanmax(self.experimental) - - return DataFrame({'lag': index * maxlag, self._model.__name__: data * maxvar}).copy() - - - def plot(self, axes=None, grid=True, show=True, cof=None): - """ - Plot the variogram, including the experimental and theoretical variogram. By default, the experimental data - will be represented by blue points and the theoretical model by a green line. + ax1 = axes + ax2 = None + fig = ax1.get_figure() - :return: - """ - try: - _bin, _exp = self.bins, self.experimental - if cof is None: - x, data = self.data - else: - x = np.linspace(0, 1, 100) if self.normalized else np.linspace(0, np.nanmax(_bin), 100) - data = self._model(x, *cof) - _hist = self.hist - except Exception as e: - raise RuntimeError('A chart could not be drawn, as the input data is not complete. Please calculate the Variogram first.\nDetailed message: %s.' % str(e)) + # apply the model + y = self.transform(x) # handle the relative experimental variogram if self.normalized: - _bin /= np.nanmax(_bin) + _bins /= np.nanmax(_bins) + y /= np.max(_exp) _exp /= np.nanmax(_exp) + x /= np.nanmax(x) - # do the plotting - if axes is None: - fig = plt.figure() - ax1 = plt.subplot2grid((5, 1), (1, 0), rowspan=4) - ax2 = plt.subplot2grid((5, 1), (0, 0), sharex=ax1) - fig.subplots_adjust(hspace=0) - else: - ax1, ax2 = axes - fig = ax1.get_figure() - - # calc histgram bar width - # bar use 70% of the axis, w is the total width divided by amount of bins in the histogram - w = (np.max(_bin) * 0.7) / len(_hist) - + # ------------------------ # plot Variograms - - # if last_bin is set, plot only considered bins in blue, excluded in red - ax1.plot(_bin, _exp, '.b') - ax1.plot(x, data, '-g') - - if hasattr(self, 'last_bin'): - ax1.plot(_bin[self.last_bin:], _exp[self.last_bin:], '.r') - - - # plot histogram - ax2.bar(_bin, _hist, width=w, align='center') - # adjust - plt.setp(ax2.axes.get_xticklabels(), visible=False) - ax2.axes.set_yticks(ax2.axes.get_yticks()[1:]) + ax1.plot(_bins, _exp, '.b') + ax1.plot(x, y, '-g') # ax limits if self.normalized: ax1.set_xlim([0, 1.05]) ax1.set_ylim([0, 1.05]) - if grid: - ax1.vlines(_bin, *ax1.axes.get_ybound(), colors=(.85, .85, .85), linestyles='dashed') - ax2.vlines(_bin, *ax2.axes.get_ybound(), colors=(.85, .85, .85), linestyles='dashed') - + ax1.grid('off') + ax1.vlines(_bins, *ax1.axes.get_ybound(), colors=(.85, .85, .85), + linestyles='dashed') # annotation ax1.axes.set_ylabel('semivariance (%s)' % self._estimator.__name__) ax1.axes.set_xlabel('Lag (-)') - ax2.axes.set_ylabel('N') - # show the figure - if show: - fig.show() + # ------------------------ + # plot histogram + if ax2 is not None and hist: + # calc the histogram + _count = np.fromiter( + (g.size for g in self.lag_classes()), dtype=int + ) - return fig + # set the sum of hist bar widths to 70% of the x-axis space + w = (np.max(_bins) * 0.7) / len(_count) + # plot + ax2.bar(_bins, _count, width=w, align='center', color='red') - def scattergram(self, ax=None, plot=True): - """ - Plot a scattergram, which is a scatter plot of + # adjust + plt.setp(ax2.axes.get_xticklabels(), visible=False) + ax2.axes.set_yticks(ax2.axes.get_yticks()[1:]) - :return: - """ - # calculate population mean - _mean = np.mean(self.values1D) + # need a grid? + if grid: + ax2.grid('off') + ax2.vlines(_bins, *ax2.axes.get_ybound(), + colors=(.85, .85, .85), linestyles='dashed') - # group the values to bins - gbm = self.grouped_pairs + # anotate + ax2.axes.set_ylabel('N') - # create the tail and head arrays - tail, head = list(), list() + # show the figure + if show: + fig.show() - # order the point pairs to tail and head - for grp in gbm: - # the even values are z(x) and odd values are z(x+h) - for i in range(0, len(grp) - 1, 2): - tail.append(_mean - grp[i]) - head.append(_mean - grp[i + 1]) + return fig - # if no plot is questioned, return tail and head arrays - if not plot: - return tail, head + def scattergram(self, ax=None): - # else plot in either given Ax object or create a new one + # create a new plot or use the given if ax is None: fig, ax = plt.subplots(1, 1) else: fig = ax.get_figure() + tail = np.empty(0) + head = tail.copy() + + for h in np.unique(self.lag_groups()): + # get the head and tail + x, y = np.where(squareform(self.lag_groups()) == h) + + # concatenate + tail = np.concatenate((tail, self.values[x])) + head = np.concatenate((head, self.values[y])) + + # plot the mean on tail and head + ax.vlines(np.mean(tail), np.min(tail), np.max(tail), linestyles='--', + color='red', lw=2) + ax.hlines(np.mean(head), np.min(head), np.max(head), linestyles='--', + color='red', lw=2) # plot - ax.plot(tail, head, '.k') + ax.scatter(tail, head, 10, marker='o', color='orange') + + # annotate + ax.set_ylabel('head') + ax.set_xlabel('tail') # show the figure fig.show() @@ -842,16 +1454,27 @@ def scattergram(self, ax=None, plot=True): return fig def location_trend(self, axes=None): - """ - Plot the values over each dimension in the coordinates as a scatter plot. - These plots can be used to identify a correlation between the value of an observation - and its location. If this is the case, it would violate the fundamental geostatistical - assumption, that a oberservation is independed of its observation + """Location Trend plot - :param axes: list of `matplotlib.AxesSubplots`. The len has to match the dimensionality - of the coordiantes. + Plots the values over each dimension of the coordinates in a scatter + plot. This will visually show correlations between the values and any + of the coordinate dimension. If there is a value dependence on the + location, this would violate the intrinsic hypothesis. This is a + weaker form of stationarity of second order. + + Parameters + ---------- + axes : list + Can be None (default) or a list of matplotlib.AxesSubplots. If a + list is passed, the location trend plots will be plotted on the + given instances. Note that then length of the list has to match + the dimeonsionality of the coordinates array. In case 3D + coordinates are used, three subplots have to be given. + + Returns + ------- + matplotlib.Figure - :return: """ N = len(self._X[0]) if axes is None: @@ -861,7 +1484,9 @@ def location_trend(self, axes=None): fig, axes = plt.subplots(nrow, ncol, figsize=(ncol * 6 ,nrow * 6)) else: if not len(axes) == N: - raise ValueError('The amount of passed axes does not fit the coordinate dimensionality of %d' % N) + raise ValueError( + 'The amount of passed axes does not fit the coordinate' + + ' dimensionality of %d' % N) fig = axes[0].get_figure() for i in range(N): @@ -875,10 +1500,6 @@ def location_trend(self, axes=None): return fig - - - - ## ----- implementing some Python functions ---- ## def __repr__(self): """ Textual representation of this Variogram instance. diff --git a/skgstat/__init__.py b/skgstat/__init__.py index d9c37b7..46329c2 100644 --- a/skgstat/__init__.py +++ b/skgstat/__init__.py @@ -1,5 +1 @@ -from .binning import binify_even_width, group_to_bin -from .distance import nd_dist from .Variogram import Variogram -from .optimize import VariogramFitter -from .estimator import matheron, cressie, dowd, genton, minmax, percentile \ No newline at end of file diff --git a/skgstat/binning.py b/skgstat/binning.py index fa1d204..cceac2a 100644 --- a/skgstat/binning.py +++ b/skgstat/binning.py @@ -1,218 +1,63 @@ -import copy import numpy as np -from scipy.stats.mstats import mquantiles -from .distance import nd_dist -def binify_even_width(X, N=10, w=None, dm=None, maxlag=None, **kwargs): - """ - Returns a distance matrix with all entries sorted into bin numbers, along with an array of bin widths. - The matrix has the same form as the distance matrix dm in squareform. The bins will be indexed from 0 to n. - If dm is None, then the point_dist function will be used to calculate a distance matrix. - kwargs will be passed to point_matrix. - For the bins, either N or w has to be given. N specifies the number of bins and w their width. - If both are given, N bins of width w will be specified, which might result in unexpected results. - - :param X: np.array of x, y coordinates. - :param N: int with the number of bins - :param w: integer or float with width of the bins - :param dm: numpy.ndarray with the distance matrix - :param maxlag: maximum lag for the binning - :param kwargs: will be passed to calculate the point_matrix if no dm is given - :return: distance matrix - """ - - _X = list(X) - - # check that all coordinates in the list have the same dimension and are not empty - if not len(set([len(e) for e in _X])) == 1 or len(_X[0]) == 0: - raise ValueError("One or more Coordinates are missing.\nPlease provide the coordinates for all values ") - - # get the distance matrix - if dm is None: - _dm = nd_dist(_X, **kwargs) - else: - _dm = dm - - # get the max distance - maxval = np.max(_dm) - - # check if there was a maximum lag set - if maxlag is not None: - if maxlag < maxval: - maxval = maxlag - - # either N or w has to be given - if N is None and w is None: - raise ValueError("Either N or w have to be given") +def even_width_lags(distances, n, maxlag): + """Even lag edges - # If N is not given, calculate it from maxval and width - if N is None: - N = int(maxval / w) + Calculate the lag edges for a given amount of bins using the same lag + step width for all bins. - # If N is given, calculate w from - else: - if w is not None: - print('Warning! w = %d is ignored because N is already given' % w) - w = maxval / N - - binubound = np.linspace(w, N * w, N) - - # set the last bound to the actual maxval - binubound[-1] = np.max(_dm) - - # create bin matrix as copy of dm - bm = copy.deepcopy(_dm) - - # set all bins except the first and last one - for i in range(1, N - 1): - bm[(_dm > binubound[i - 1]) & (_dm <= binubound[i])] = i + Parameters + ---------- + distances : numpy.array + Flat numpy array representing the upper triangle of the distance matrix. + n: integer + Amount of lag classes to find + maxlag : integer, float + Limit the last lag class to this separating distance. - # set the first bin - bm[_dm < binubound[0]] = 0 + Returns + ------- + numpy.array - # set the last bin - bm[_dm > binubound[-2]] = N - 1 - - # iter all bin upper bounds -# for i, ubound in enumerate(binubound): -# bm[ (_dm > (ubound - w)) & (_dm <= ubound) ] = i - - return np.matrix(bm), np.ones(N) * w - - -def binify_even_bin(X, N=10, dm=None, maxlag=None, **kwargs): """ - Returns a distance matrix with all entries sorted into bin numbers, along with an array of bin widths. - The matrix has the same form as the distance matrix dm in squareform. The bins will be indexed from 0 to n. - If dm is None, then the point_dist function will be used to calculate a distance matrix. kwargs will be passed to point_matrix. - For the bins, either N or w has to be given. N specifies the number of bins and w their width. If both are given, - N bins of width w will be specified, which might result in unexpected results. + # maxlags larger than the maximum separating distance will be ignored + if maxlag is None or maxlag > np.max(distances): + maxlag = np.max(distances) - :param X: np.array of x, y coordinates. - :param N: int with the number of bins - :param dm: numpy.ndarray with the distance matrix - :param maxlag: maximum lag for the binning - :param kwargs: will be passed to calculate the point_matrix if no dm is given - :return: - """ - - _X = list(X) - - # check that all coordinates in the list have the same dimension and are not empty - if not len(set([len(e) for e in _X])) == 1 or len(_X[0]) == 0: - raise ValueError("One or more Coordinates are missing.\nPlease provide the coordinates for all values ") - - # get the distance matrix - if dm is None: - _dm = nd_dist(_X, **kwargs) - else: - _dm = dm + return np.linspace(0, maxlag, n + 1)[1:] - # create bin matrix as copy of dm - bm = copy.deepcopy(_dm) - # get the upper bounds by calculating the quantiles of the upper bounds - binubound = mquantiles(np.array(_dm).flatten(), prob=[i/N for i in range(1, N+1)]) +def uniform_count_lags(distances, n, maxlag): + """Uniform lag counts - # set all bins except the first one - for i in range(1, N): - bm[(_dm > binubound[i - 1]) & (_dm <= binubound[i])] = i + Calculate the lag edges for a given amount of bins with the same amount + of observations in each lag class. The lag step width will be variable. - # set the first bin - bm[_dm < binubound[0]] = 0 + Parameters + ---------- + distances : numpy.array + Flat numpy array representing the upper triangle of the distance matrix. + n: integer + Amount of lag classes to find + maxlag : integer, float + Limit the last lag class to this separating distance. - return np.matrix(bm), np.diff([0, *binubound]) + Returns + ------- + numpy.array - -def group_to_bin(values, bm=None, X=None, azimuth_deg=None, tolerance=22.5, maxlag=None, **kwargs): - """ - The given values array represents values at coordinates X. - A distance matrix is calculated and afterwards organized into bins with even width. - This bin matrix can be given as bm; if None, The coordinates have to be given. - For each bin, the corresponding values will be appended to a list - The index is equal to the bin number in the returned nested list. - The array length has to exactly fill the upper or lower triangle of the bin matrix (which is a squred triangular matrix). - In case the azimuth_deg is not None, a irregular (or directional) Variogram will be calculated. - This means, that only the pairs where the vector direction is within azimuth_deg +/- tolerance - will be grouped into the bin. Otherwise they will be ignored. - - :param values: np.array with the values at the coordinates X - :param bm: binning matrix of the given values - :param X: 1d np.array with x and y coordinates of the given values - :param azimuth_deg: int or float with the azimuth degree if a directional Variogram is given - :param tolerance: int or float with the tolerance for the directional Variogram - :param maxlag: int or float with the maximum lag for the binning - :param kwargs: kwargs which will be passed to binify_even_bin - :return: list of lists including the values in every bin """ - # check if the input was 2D (that means, for each pair, more than one observation is used) -# if np.array(values).ndim == 2: - - if any([isinstance(_, (tuple, list, np.ndarray)) for _ in values]): - multidim = True - else: - multidim = False + # maxlags larger than the maximum separating distance will be ignored + if maxlag is None or maxlag > np.max(distances): + maxlag = np.max(distances) - # calculate bm if needed - if bm is None and X is None: - raise AttributeError('If no bin matrix bm is given, you have to specify an array X of x,y coordinates.') - elif bm is None: - bm, bw = binify_even_width(X=X, maxlag=maxlag, **kwargs) + # filter for distances < maxlag + d = distances[np.where(distances <= maxlag)] - # check if the azimuth_deg is given - if azimuth_deg is not None: - if X is None: - raise ValueError('A directional Variogram can only be calculated, ' - 'if the coordinate tuples are given in list X.') - if azimuth_deg < 0 or azimuth_deg >= 360: - raise ValueError('The Azimuth has to be given in degrees.') - - # now calculate the band bounds - up = azimuth_deg + tolerance - lo = azimuth_deg - tolerance - while up >= 360: - up -= 360 - while lo < 0: - lo += 360 - - # make bm integers - _bm = bm.astype(int) - - # check the dimensions - if not (len(values) == bm.shape[0] and len(values) == bm.shape[1]): - raise ValueError('The values are not of same length as the bm axes.') - - # result container - bin_grp = list([list() for _ in range(np.max(_bm) + 1)]) - - if azimuth_deg is not None: - dir_m = direction(X=X) - - # i is the column, j is the row, both are indexing the same points - for i in range(len(_bm)): - for j in range(len(_bm)): -# if i >= j: -# continue # use only the upper triangle, omitting the mid-diagonal - # if the azimuth is within the band, append otherwise ignore - if azimuth_deg is not None: - if _in_bounds(dir_m[i, j], lo, up): - if multidim: -# bin_grp[_bm[i, j]].extend([*values[i], *values[j]]) - bin_grp[_bm[i, j]].extend(list(np.array(list(zip(values[i], values[j]))).flatten())) - else: - bin_grp[_bm[i, j]].extend([values[i], values[j]]) - - # non - directional - else: - # append the values at i and j to the correct bin array - if multidim: - bin_grp[_bm[i, j]].extend(list(np.array(list(zip(values[i], values[j]))).flatten())) - else: - bin_grp[_bm[i, j]].extend([values[i], values[j]]) - - # return - return bin_grp + return np.fromiter( + (np.percentile(d, (i / n) * 100) for i in range(1, n + 1)), dtype=float + ) def direction(X): diff --git a/skgstat/distance.py b/skgstat/distance.py deleted file mode 100644 index 014c93d..0000000 --- a/skgstat/distance.py +++ /dev/null @@ -1,138 +0,0 @@ -""" -Common distance functions for calculating the distance between two geometries, or a distance matrix for a set -of geometries are collected here. Most functions wrap either scipy.spatial.distance or shapely funcitonality -these functions accept numpy.matrix/numpy.ndarray objects and can be imported like: - -import skgstat # then skgstat.func-name -from scikit-gstat import skgstat # then skgstat.func-name - -""" -import numpy as np -from scipy.spatial.distance import pdist as scipy_pdist, squareform -# if numba is installed uncomment -# from numba import jit -from scipy.stats import rankdata - - -def point_dist(X, metric='euclidean', **kwargs): - """ - Wrapper for scipy.spatial.distance.pdist function. - Returns a distance matrix in squareform for a 1D array of x, y coordinates - - :param X: 1D array of x, y coordinates - :param metric: will be passed to scipy_pdist - As for now, only euclidean distances are implemented. Others will follow. - :param kwargs: will be passes to scipy_pdist - :return: distance matrix of the given coordinates - """ - # check X - _X = list(X) - - # switch metric - if metric.lower() == 'euclidean': - # check that all elements in the index have exactly a x and y coordinate - if any([not len(e) == 2 for e in _X]): - raise ValueError('The passed point data does not have a x and y coordinate for each point.') - - # data seems to be ok, return the distance matrix in squareform - return np.matrix(squareform(scipy_pdist(_X, metric=metric, **kwargs))) - - elif metric.lower() == 'rank': - return np.matrix(rankdata(point_dist(X, metric='euclidean'))) - - # this metric is not known - else: - raise ValueError("The metric '%s' is not known. Use one of: ['euclidean', 'rank']" % str(metric)) - - -def nd_dist(X, metric='euclidean'): - """ - Wrapper for the different distance functions. - - The wrapper checks the dimensionality and chooses the correct matrix function. - In case the metric is 'rank', the result of metric='euclidean' (either 2 or N dimensional) will be ranked using - scipy.stats.rankdata function. - - :param X: 1D array of x, y coordinates - :return: distance matrix of the given coordinates - """ - - _X = np.array(X) - - # switch metric - if metric.lower() == 'euclidean': - # check dimensionality of elements - if all([len(e) == 2 for e in _X]): - return np.matrix(squareform(_euclidean_dist2D(_X))) - # check if all coordinates have the same dimension and the array is not empty - elif len(set([len(e) for e in _X])) == 1 and len(_X[0]) != 0: - # N-Dimensional - return np.matrix(squareform(_euclidean_distND(_X))) - else: - raise ValueError("One or more Coordinates are missing.\nPlease provide the coordinates for all values ") - - elif metric.lower() == 'rank': - return np.matrix(rankdata(nd_dist(X, metric='euclidean'))) - - # this metric is not known - else: - raise ValueError("The metric '%s' is not known. Use one of: ['euclidean', 'rank']" % str(metric)) - - -# if numba is installed uncommment -#@jit -def _euclidean_dist2D(X): - """ - Returns the upper triangle of the distance matrix for an array of 2D coordinates. - - :param X: np.ndarray of the x, y coordinates - :return: upper triangle of the distance matrice - :param X: np.ndarray - :return: upper triangle of the distance matrix - """ - n = len(X) # number of pairs - N = int((n**2 - n) / 2) # dimension (n*n) - diagonal; half of it for upper triangle - - # define the return array - out = np.empty(N, dtype=np.float) - lastindex = 0 # indexing through out - - for i in np.arange(n): - for j in np.arange(i + 1, n): # skip diagonal (j=i), use only upper (j > i) - out[lastindex] = np.sqrt( (X[i][0] - X[j][0])**2 + (X[i][1] - X[j][1])**2 ) - lastindex += 1 - return out - - -#_d = lambda p1, p2: np.sqrt(np.sum([np.diff(tup)**2 for tup in zip(p1, p2)])) -def _d(p1, p2): - s = 0.0 - for i in range(len(p1)): - s += (p1[i] - p2[i])**2 - return np.sqrt(s) - -pyd = lambda p1, p2: np.sqrt(np.sum([np.diff(tup)**2 for tup in zip(p1, p2)])) - - -# if numba is installed uncommment -#@jit -def _euclidean_distND(X): - """ - Returns the upper triangle of the distance matrix for an array of N-dimensional coordinates. - :param X: np.array of n-dimensional coordinates - :return: upper triangle of the distance matrix - """ - - n = len(X) # number of pairs - N = int((n**2 - n) / 2) # dimension (n*n) - diagonal ; half of it for upper triangle - - # define the return array - out = np.empty(N, dtype=np.float) - lastindex = 0 # indexing through out - - for i in np.arange(n): - for j in np.arange(i + 1, n): # skip diagonal (j=i), use only upper (j > i) - out[lastindex] = _d(X[i], X[j]) - lastindex += 1 - return out - diff --git a/skgstat/estimator.py b/skgstat/estimator.py deleted file mode 100644 index 31cd470..0000000 --- a/skgstat/estimator.py +++ /dev/null @@ -1,290 +0,0 @@ -""" -This file collects different functions for estimating the semivariance of a group of values. These functions -can be used to fit a experimental variogram to a list of points. Each of the given functions just calculate -the estimate for one bin. If you want a Variogram, use the variogram functions or Class from the Variogram and vario -submodule, or order the bins yourself -""" -import numpy as np -from scipy.special import binom - - -def matheron(X, power=2): - """ - Return the Matheron Variogram of the given sample X. - X has to be an even-length array of point pairs like: x1, x1+h, x2, x2+h ...., xn, xn + h. - If X.ndim > 1, matheron will be called recursively and a list of Matheron Variances is returned. - - Matheron, G. (1965): Les variables regionalisées et leur estimation. Editions Masson et Cie, 212 S., Paris. - Matheron, G. (1962): Traité de Géostatistique Appliqué, Tonne 1. Memoires de Bureau de Recherches Géologiques et Miniéres, Paris. - - :param X: - :param power: - :return: - """ - _X = np.array(X) - - if any([isinstance(_, list) or isinstance(_, np.ndarray) for _ in _X]): - return np.array([matheron(_, power=power) for _ in _X]) - - # check even - if len(_X) % 2 > 0: - raise ValueError('The sample does not have an even length: {}'.format(_X)) - - # calculate: - if len(_X) == 0: - # would give ZeroDivisionError - return np.nan - return (1 / len(_X)) * np.nansum([np.power(_X[i] - _X[i + 1], power) for i in np.arange(0, len(_X) - 1, 2)]) - - -def cressie(X): - """ - Return the Cressie-Hawkins Variogram of the given sample X. - X has to be an even-length array of point pairs like: x1, x1+h, x2, x2+h ...., xn, xn + h. - If X.ndim > 1, cressie will be called recursively and a list of Cressie-Hawkins Variances is returned. - - Cressie, N., and D. Hawkins (1980): Robust estimation of the variogram. Math. Geol., 12, 115-125. - - :param X: - :return: - """ - _X = np.array(X) - - if any([isinstance(_, list) or isinstance(_, np.ndarray) for _ in _X]): - return np.array([cressie(_) for _ in _X]) - - # check even - if len(_X) % 2 > 0: - raise ValueError('The sample does not have an even length: {}'.format(_X)) - - # calculate - N = 0.5 * len(_X) - if N == 0: - # would raise ZeroDivisonError - return np.nan - - term1 = (1 / N) * np.nansum([ np.sqrt(np.abs(_X[i] - _X[i + 1])) for i in np.arange(0, len(_X) - 1, 2)]) - term2 = 0.457 + (0.494 / N) + (0.045 / np.power(N, 2)) - - return 0.5 * np.power(term1, 4) / term2 - - -def dowd(X): - """ - Return the Dowd Variogram of the given sample X. - X has to be an even-length array of point pairs like: x1, x1+h, x2, x2+h ...., xn, xn + h. - If X.ndim > 1, dowd will be called recursively and a list of Cressie-Hawkins Variances is returned. - - Dowd, P. A., (1984): The variogram and kriging: Robust and resistant estimators, in Geostatistics for Natural - Resources Characterization. Edited by G. Verly et al., pp. 91 - 106, D. Reidel, Dordrecht. - - :param X: - :return: - """ - _X = np.array(X) - - if any([isinstance(_, list) or isinstance(_, np.ndarray) for _ in _X]): - return np.array([dowd(_) for _ in _X]) - - # check even - if len(_X) % 2 > 0: - raise ValueError('The sample does not have an even length: {}'.format(_X)) - - # calculate - term1 = np.nanmedian([np.abs(_X[i] - _X[i + 1]) for i in np.arange(0, len(_X) - 1, 2)]) - return 0.5 * (2.198 * np.power(term1, 2)) - - -def genton(X): - r""" Genton robust semi-variance estimator - - Return the Genton Variogram of the given sample X. Genton is a highly - robust varigram estimator, that is designed to be location free and - robust on extreme values in X. - Genton is based on calculating kth order statistics and will for large - data sets be close or equal to the 25% quartile of all ordered point pairs - in X. - - Parameters - ---------- - X : list, numpy.ndarray - X has to be an even-length array of point pairs like: - x1, x1+h, x2, x2+h ...., xn, xn + h. - If X.ndim > 1, genton will be called recursively and a list of Genton - variances is returned. - - Returns - ------- - list - float - - Notes - ----- - - The Genton estimator is described in great detail in the original - publication [1]_ and befined as: - - .. math:: Q_{N_h} = 2.2191\{|V_i(h) - V_j(h)|; i < j\}_{(k)} - - and - - .. math:: k = \binom{[N_h / 2] + 1}{2} - - and - - .. math:: q = \binom{N_h}{2} - - where k is the kth qunatile of all q point pairs. For large N (k/q) will be - close to 0.25. For N >= 500, (k/q) is close to 0.25 by two decimals and - will therefore be set to 0.5 and the two binomial coeffiecents k, - q are not calculated. - - References - ---------- - - .. [1] Genton, M. G., (1998): Highly robust variogram estimation, - Math. Geol., 30, 213 - 221. - - """ - _X = np.array(X) - - if any([isinstance(_, list) or isinstance(_, np.ndarray) for _ in _X]): - return np.array([genton(_) for _ in _X]) - - # check even - if len(_X) % 2 > 0: - raise ValueError('The sample does not have an even length: {}' - .format(_X)) - else: - N = len(_X) / 2 - - # calculate - try: - y = [np.abs((_X[i] - _X[i + 1]) - (_X[j]) - _X[j + 1]) - for i in np.arange(0, len(_X), 2) - for j in np.arange(0, len(_X), 2) if i < j] - - # if N > 500, (k/q) will be ~ 1/4 anyway - if N >= 500: - k, q, = 1, 4 - else: - # get k k is binom(N(x)/2+1, 2) - k = binom(N / 2 + 1, 2) - - # get q. Genton needs the kth quantile of q - q = binom(N, 2) - - # return the kth percentile - return 0.5 * np.power(2.219 * np.percentile(y, (k / q)), 2) - except ZeroDivisionError: # pragma: no cover - return np.nan - - -def minmax(X): - """ - Returns the MinMax Semivariance of sample X pairwise differences. - X has to be an even-length array of point pairs like: x1, x1+h, x2, x2+h, ..., xn, xn+h. - - CAUTION: this is actually an changed behaviour to scikit-gstat<=0.1.5 - - :param X: - :return: - """ - _X = np.asarray(X) - - if any([isinstance(_, list) or isinstance(_, np.ndarray) for _ in _X]): - return [minmax(_) for _ in _X] - - # check even - if len(_X) % 2 > 0: - raise ValueError('The sample does not have an even length: {}'.format(_X)) - - # calculate the point pair values - # helper function - ppairs = lambda x: [np.abs(x[i] - x[i+1]) for i in np.arange(0, len(x), 2)] - p = ppairs(_X) - - return (np.nanmax(p) - np.nanmin(p)) / np.nanmean(p) - - -def percentile(X, p=50): - """ - Returns the wanted percentile of sample X pairwise differences. - X has to be an even-length array of point pairs like: x1, x1+h, x2, x2+h, ..., xn, xn+h. - - CAUTION: this is actually an changed behaviour to scikit-gstat<=0.1.5 - - :param X: np.ndarray with the given sample to calculate the Semivariance from - :param p: float with the percentile of sample X - :return: - """ - _X = np.asarray(X) - - if any([isinstance(_, list) or isinstance(_, np.ndarray) for _ in _X]): - return [percentile(_) for _ in _X] - - # check even - if len(_X) % 2 > 0: - raise ValueError('The sample does not have an even length: {}'.format(_X)) - - # calculate the point pair values - # helper function - ppairs = lambda x: [np.abs(x[i] - x[i+1]) for i in np.arange(0, len(x), 2)] - pairs = ppairs(_X) - - return np.percentile(pairs, q=p) - - -def entropy(X, bins=None): - """ - Use the Shannon Entropy H to describe the distribution of the given sample. - For calculating the Shannon Entropy, the bin edges are needed and can be passed as pk. - If pk is None, these edges will be calculated using the numpy.histogram function with bins='fq'. - This uses Freedman Diacons Estimator and is fairly resilient to outliers. - If the input data X is 2D (Entropy for more than one bin needed), it will derive the histogram once and - use the same edges in all bins. - CAUTION: this is actually an changed behaviour to scikit-gstat<=0.1.5 - - # TODO: handle the 0s in output of X - - :param X: np.ndarray with the given sample to calculate the Shannon entropy from - :param bins: The bin edges for entropy calculation, or an amount of even spaced bins - :return: - """ - _X = np.array(X) - - # helper function - ppairs = lambda x: [np.abs(x[i] - x[i+1]) for i in np.arange(0, len(x), 2)] - - if any([isinstance(_, (list, np.ndarray)) for _ in _X]): - # if bins is not set, use the histogram over the full value range - if bins is None: - # could not fiugre out a better way here. I need the values before calculating the entropy - # in order to use the full value range in all bins - allp = [ppairs(_) for _ in _X] - minv = np.min(list(map(np.min, allp))) - maxv = np.max(list(map(np.max, allp))) - bins = np.linspace(minv, maxv, 50).tolist() + [maxv] # have no better idea to include the end edge as well - return np.array([entropy(_, bins=bins) for _ in _X]) - - # check even - if len(_X) % 2 > 0: - raise ValueError('The sample does not have an even length: {}'.format(_X)) - - # calculate the values - vals = ppairs(_X) - - # claculate the bins - if bins is None: - bins = 15 - - # get the amounts - amt = np.histogram(vals, bins=bins)[0] - - # add a very small value to the p, otherwise the log2 will be -inf. - p = (amt / np.sum(amt)) + 1e-5 - info = lambda p: -np.log2(p) - - # map info to p and return the inner product - return np.fromiter(map(info, p), dtype=np.float).dot(p) - diff --git a/skgstat/estimators.py b/skgstat/estimators.py new file mode 100644 index 0000000..e1e4d05 --- /dev/null +++ b/skgstat/estimators.py @@ -0,0 +1,376 @@ +""" +This file collects different functions for estimating the semivariance of a group of values. These functions +can be used to fit a experimental variogram to a list of points. Each of the given functions just calculate +the estimate for one bin. If you want a Variogram, use the variogram functions or Class from the Variogram and vario +submodule, or order the bins yourself +""" +import numpy as np +from scipy.special import binom +from numba import jit + + +@jit +def matheron(x): + r"""Matheron Semi-Variance + + Calculates the Matheron Semi-Variance from an array of pairwise differences. + Returns the semi-variance for the whole array. In case a semi-variance is + needed for multiple groups, this function has to be mapped on each group. + That is the typical use case in geostatistics. + + Parameters + ---------- + x : numpy.ndarray + Array of pairwise differences. These values should be the distances + between pairwise observations in value space. If xi and x[i+h] fall + into the h separating distance class, x should contain abs(xi - x[i+h]) + as an element. + + Returns + ------- + numpy.float64 + + Notes + ----- + + This implementation is done after the original publication [1]_ and the + notes on their application [2]_. Following [1]_, the semi-variance is + calculated as: + + .. math :: + \gamma (h) = \frac{1}{2N(h)} * \sum_{i=1}^{N(h)}(x)^2 + + with: + + .. math :: + x = (Z(x_i) - Z(x_{i+1}) + + where x is exactly the input array x. + + References + ---------- + + .. [1] Matheron, G. (1962): Traité de Géostatistique Appliqué, Tonne 1. + Memoires de Bureau de Recherches Géologiques et Miniéres, Paris. + + .. [2] Matheron, G. (1965): Les variables regionalisées et leur estimation. + Editions Masson et Cie, 212 S., Paris. + + """ + # convert + x = np.asarray(x) + + # prevent ZeroDivisionError + if x.size == 0: + return np.nan + + return (1. / (2 * x.size)) * np.sum(np.power(x, 2)) + + +@jit +def cressie(x): + r""" Cressie-Hawkins Semi-Variance + + Calculates the Cressie-Hawkins Semi-Variance from an array of pairwise + differences. Returns the semi-variance for the whole array. In case a + semi-variance is needed for multiple groups, this function has to be + mapped on each group. That is the typical use case in geostatistics. + + Parameters + ---------- + x : numpy.ndarray + Array of pairwise differences. These values should be the distances + between pairwise observations in value space. If xi and x[i+h] fall + into the h separating distance class, x should contain abs(xi - x[i+h]) + as an element. + + Returns + ------- + numpy.float64 + + Notes + ----- + + This implementation is done after the publication by Cressie and Hawkins + from 1980 [3]_: + + .. math :: + 2\gamma (h) = \frac{(\frac{1}{N(h)} \sum_{i=1}^{N(h)} |x|^{0.5})^4} + {0.457 + \frac{0.494}{N(h)} + \frac{0.045}{N^2(h)}} + + with: + + .. math :: + x = (Z(x_i) - Z(x_{i+1}) + + where x is exactly the input array x. + + References + ---------- + + .. [3] Cressie, N., and D. Hawkins (1980): Robust estimation of the + variogram. Math. Geol., 12, 115-125. + + + """ + # convert + x = np.asarray(x) + + # get the length + n = x.size + + # prevent ZeroDivisionError + if n == 0: + return np.nan + + # Nominator + nominator = np.power((1 / n) * np.sum(np.power(x, 0.5)), 4) + + # Denominator + denominator = 0.457 + (0.494 / n) + (0.045 / n**2) + + return nominator / (2 * denominator) + + +def dowd(x): + r"""Dowd semi-variance + + Calculates the Dowd semi-variance from an array of pairwise + differences. Returns the semi-variance for the whole array. In case a + semi-variance is needed for multiple groups, this function has to be + mapped on each group. That is the typical use case in geostatistics. + + Parameters + ---------- + x : numpy.ndarray + Array of pairwise differences. These values should be the distances + between pairwise observations in value space. If xi and x[i+h] fall + into the h separating distance class, x should contain abs(xi - x[i+h]) + as an element. + + Returns + ------- + numpy.float64 + + Notes + ----- + The Dowd estimator is based on the median of all pairwise differences in + each lag class and is therefore robust to exteme values at the cost of + variability. + This implementation is done after the publication _[4]: + + .. math:: + + 2\gamma (h) = 2.198 * {median(x)}^2 + + with: + + .. math:: + x = (Z(x_i) - Z(x_{i+1}) + + where x is exactly the input array x. + + References + ---------- + + .. [4] Dowd, P. A., (1984): The variogram and kriging: Robust and resistant + estimators, in Geostatistics for Natural Resources Characterization. + Edited by G. Verly et al., pp. 91 - 106, D. Reidel, Dordrecht. + + """ + # convert + x = np.asarray(x) + + return 2.198 * np.nanmedian(x)**2 + + +@jit +def genton(x): + r""" Genton robust semi-variance estimator + + Return the Genton semi-variance of the given sample x. Genton is a highly + robust varigram estimator, that is designed to be location free and + robust on extreme values in x. + Genton is based on calculating kth order statistics and will for large + data sets be close or equal to the 25% quartile of all ordered point pairs + in X. + + Parameters + ---------- + x : numpy.ndarray + Array of pairwise differences. These values should be the distances + between pairwise observations in value space. If xi and x[i+h] fall + into the h separating distance class, x should contain abs(xi - x[i+h]) + as an element. + + Returns + ------- + numpy.float64 + + Notes + ----- + The Genton estimator is described in great detail in the original + publication [1]_ and befined as: + + .. math:: Q_{N_h} = 2.2191\{|V_i(h) - V_j(h)|; i < j\}_{(k)} + + and + + .. math:: k = \binom{[N_h / 2] + 1}{2} + + and + + .. math:: q = \binom{N_h}{2} + + where k is the kth quantile of all q point pairs. For large N (k/q) will be + close to 0.25. For N >= 500, (k/q) is close to 0.25 by two decimals and + will therefore be set to 0.5 and the two binomial coefficients k, + q are not calculated. + + References + ---------- + + .. [1] Genton, M. G., (1998): Highly robust variogram estimation, + Math. Geol., 30, 213 - 221. + + """ + x = np.array(x) + + # get length + n = x.size + + if n < 2: + return np.nan + + # pre-populate y => we need (n*n -n) / 2 + y = np.zeros(int((n*n - n) / 2)) + + # calculate + z = 0 + for i in range(n): + for j in range(n): + if i < j: + y[z] = np.abs(x[i] - x[j]) + z += 1 + + # if N > 500, (k/q) will be ~ 1/4 anyway + if n >= 500: + k, q, = 1, 4 + else: + # get k k is binom(N(x)/2+1, 2) + k = binom(n / 2 + 1, 2) + + # get q. Genton needs the kth quantile of q + q = binom(n, 2) + + # return the kth percentile + return 0.5 * np.power(2.219 * np.percentile(y, (k / q)), 2) + + +def minmax(x): + """Minimum - Maximum Estimator + + Returns a custom value. This estimator is the difference of maximum and + minimum pairwise differences, normalized by the mean. MinMax will be very + sensitive to extreme values. + + Do only use this estimator, in case you know what you are doing. It is + experimental and might change its behaviour in a future version. + + Parameters + ---------- + x : numpy.ndarray + Array of pairwise differences. These values should be the distances + between pairwise observations in value space. If xi and x[i+h] fall + into the h separating distance class, x should contain abs(xi - x[i+h]) + as an element. + + Returns + ------- + numpy.float64 + + """ + x = np.asarray(x) + + return (np.nanmax(x) - np.nanmin(x)) / np.nanmean(x) + + +def percentile(x, p=50): + """Percentile estimator + + Returns a given percentile as semi-variance. Do only use this estimator, + in case you know what you are doing. + + Do only use this estimator, in case you know what you are doing. It is + experimental and might change its behaviour in a future version. + + Parameters + ---------- + x : numpy.ndarray + Array of pairwise differences. These values should be the distances + between pairwise observations in value space. If xi and x[i+h] fall + into the h separating distance class, x should contain abs(xi - x[i+h]) + as an element. + p : int + Desired percentile. Should be given as whole numbers 0 < p < 100. + + Returns + ------- + np.float64 + + """ + x = np.asarray(x) + + return np.percentile(x, q=p) + + +def entropy(x, bins=None): + """Shannon Entropy estimator + + Calculates the Shannon Entropy H as a variogram estimator. It is highly + recommended to calculate the bins and explicitly set them as a list. + In case this function is called for more than one lag class in a + variogram, setting bins to None would result in different bin edges in + each lag class. This would be very difficult to interpret. + + Parameters + ---------- + x : numpy.ndarray + Array of pairwise differences. These values should be the distances + between pairwise observations in value space. If xi and x[i+h] fall + into the h separating distance class, x should contain abs(xi - x[i+h]) + as an element. + bins : int, list, str + list of the bin edges used to calculate the empirical distribution of x. + If bins is a list, these values are used directly. In case bins is a + integer, as many even width bins will be calculated between the + minimum and maximum value of x. In case bins is a string, it will be + passed as bins argument to numpy.histograms function. + + Returns + ------- + entropy : numpy.float64 + Shannon entropy of the given pairwise differences. + + Notes + ----- + + """ + x = np.asarray(x) + + if bins is None: + bins = 15 + + # calculate the histogram + count = np.histogram(x, bins=bins)[0] + + # get the probability and add 1e-5 to prevent log2 of -inf + p = (count / np.sum(count)) + 1e-5 + + # map info to p and return the inner product + return np.fromiter(map(info, p), dtype=np.float).dot(p) + + +@jit +def info(p): + return - np.log2(p) diff --git a/skgstat/models.py b/skgstat/models.py index 04e2952..3a2b844 100644 --- a/skgstat/models.py +++ b/skgstat/models.py @@ -1,192 +1,368 @@ -import numpy as np import math -from scipy import special - - -class Variogram_Wrapper: - """ - The Variogram Wrapper should be used to decorate the mathematical expression of the variogram function. The typical - signature is - - func(h, *args) - - When decorated by Variogram_Wrapper, func will accept iterables as well and will be called in a list comprehension. - The Wrapper preserves the original __name__ attribute of the function. - - """ - - def __init__(self, func): - """ +from functools import wraps - :param func: - """ - self.func = func - self.__name__ = func.__name__ +import numpy as np +from scipy import special +from numba import jit - def __call__(self, *args, **kwargs): - """ - :param args: - :param kwargs: - :return: - """ +def variogram(func): + @wraps(func) + def wrapper(*args, **kwargs): if hasattr(args[0], '__iter__'): - # this is an iterable new_args = args[1:] - return np.array([self.func(value, *new_args, **kwargs) for value in args[0]]) -# return np.fromiter(map(self.func, args[0], *new_args, **kwargs), dtype=np.float) + mapping = map(lambda h: func(h, *new_args, **kwargs), args[0]) + return np.fromiter(mapping, dtype=float) else: - return self.func(*args, **kwargs) - -# --- generic theoretical Variogram Models --- # + return func(*args, **kwargs) + return wrapper + + +@variogram +@jit +def spherical(h, r, c0, b=0): + r"""Spherical Variogram function + + Implementation of the spherical variogram function. Calculates the + dependent variable for a given lag (h). The nugget (b) defaults to be 0. + + Parameters + ---------- + h : float + Specifies the lag of separating distances that the dependent variable + shall be calculated for. It has to be a positive real number. + r : float + The effective range. Note this is not the range parameter! However, + for the spherical variogram the range and effective range are the same. + c0 : float + The sill of the variogram, where it will flatten out. The function + will not return a value higher than C0 + b. + b : float + The nugget of the variogram. This is the value of independent + variable at the distance of zero. This is usually attributed to + non-spatial variance. + + Returns + ------- + gamma : numpy.float64 + Unlike in most variogram function formulas, which define the function + for :math:`2*\gamma`, this function will return :math:`\gamma` only. + + Notes + ----- + The implementation follows _[10]: + + .. math:: + \gamma = b + C_0 * \left({1.5*\frac{h}{r} - 0.5*{\frac{h}{r]^3}\right) + + if :math:`h < r`, and + + .. math:: + \gamma = b + C_0 + + else. r is the effective range, which is in case of the spherical + variogram just a. + + References + ---------- + .. [10]: Burgess, T. M., & Webster, R. (1980). Optimal interpolation + and isarithmic mapping of soil properties. I.The semi-variogram and + punctual kriging. Journal of Soil and Science, 31(2), 315–331, + http://doi.org/10.1111/j.1365-2389.1980.tb02084.x - -@Variogram_Wrapper -# if numba is installed uncommment -# @jit -def spherical(h, a, C0, b=0): - """ - The Spherical variogram function. - - For the function definition see: - Burgess, T. M., & Webster, R. (1980). Optimal interpolation and isarithmic mapping of soil properties. I. - The semi-variogram and punctual kriging. Journal of Soil and Science, 31(2), 315–331, 7 figs, 1 table, 27 refs. - http://doi.org/10.1111/j.1365-2389.1980.tb02084.x - - :param h: the separation lag - :param a: the range parameter (not effective range!) - :param C0: the Variogram sill - :param b: the Variogram nugget - - :return: float, or list of; the semivariance at separation lag h """ # prepare parameters - r = a / 1. - C0 -= b + a = r / 1. - if h <= a: - return b + C0 * ((1.5 * (h / r)) - (0.5 * ((h / r)**3.0))) + if h <= r: + return b + c0 * ((1.5 * (h / a)) - (0.5 * ((h / a) ** 3.0))) else: - return b + C0 - + return b + c0 + + +@variogram +@jit +def exponential(h, r, c0, b=0): + r"""Exponential Variogram function + + Implementation of the exponential variogram function. Calculates the + dependent variable for a given lag (h). The nugget (b) defaults to be 0. + + Parameters + ---------- + h : float + Specifies the lag of separating distances that the dependent variable + shall be calculated for. It has to be a positive real number. + r : float + The effective range. Note this is not the range parameter! For the + exponential variogram function the range parameter a is defined to be + :math:`a=\frac{r}{3}`. The effective range is the lag where 95% of the + sill are exceeded. This is needed as the sill is only approached + asymptotically by an exponential function. + c0 : float + The sill of the variogram, where it will flatten out. The function + will not return a value higher than C0 + b. + b : float + The nugget of the variogram. This is the value of independent + variable at the distance of zero. This is usually attributed to + non-spatial variance. + + Returns + ------- + gamma : numpy.float64 + Unlike in most variogram function formulas, which define the function + for :math:`2*\gamma`, this function will return :math:`\gamma` only. + + Notes + ----- + The implementation following _[11] and _[12] is as: + .. math:: + \gamma = b + C_0 * \left({1 - e^{-\frac{h}{a}}}\right) + + a is the range parameter, that can be calculated from the + effective range r as: :math:`a = \frac{r}{3}`. + + References + ---------- + + .. [11]: Cressie, N. (1993): Statistics for spatial data. + Wiley Interscience. + + .. [12]: Chiles, J.P., Delfiner, P. (1999). Geostatistics. Modeling Spatial + Uncertainty. Wiley Interscience. -@Variogram_Wrapper -def exponential(h, a, C0, b=0): - """ - The Exponential variogram function. - - :param h: the separation lag - :param a: the range parameter (not effective range!) - :param C0: the Variogram sill - :param b: the Variogram nugget - - :return: float, or list of; the semivariance at separation lag h """ # prepare parameters - r = a / 3. - C0 -= 0 - - try: - return b + C0 * (1. - math.exp(-(h / r))) - except: - return b + C0 - -@Variogram_Wrapper -# if numba is installed uncommment -# @jit -def gaussian(h, a, C0, b=0): - """ - The Gaussian variogram function. - - :param h: the separation lag - :param a: the range parameter (not effective range!) - :param C0: the Variogram sill - :param b: the Variogram nugget + a = r / 3. + + return b + c0 * (1. - math.exp(-(h / a))) + + +@variogram +@jit +def gaussian(h, r, c0, b=0): + """ Gaussian Variogram function + + Implementation of the Gaussian variogram function. Calculates the + dependent variable for a given lag (h). The nugget (b) defaults to be 0. + + Parameters + ---------- + h : float + Specifies the lag of separating distances that the dependent variable + shall be calculated for. It has to be a positive real number. + r : float + The effective range. Note this is not the range parameter! For the + exponential variogram function the range parameter a is defined to be + :math:`a=\frac{r}{3}`. The effetive range is the lag where 95% of the + sill are exceeded. This is needed as the sill is only approached + asymptotically by an exponential function. + c0 : float + The sill of the variogram, where it will flatten out. The function + will not return a value higher than C0 + b. + b : float + The nugget of the variogram. This is the value of independent + variable at the distance of zero. This is usually attributed to + non-spatial variance. + + Returns + ------- + gamma : numpy.float64 + Unlike in most variogram function formulas, which define the function + for :math:`2*\gamma`, this function will return :math:`\gamma` only. + + Notes + ----- + + This implementation follows _[12]: + .. math:: + \gamma = b + c_0 * \left({1 - e^{-\frac{h^2}{a^2}}}\right) + + a is the range parameter, that can be calculated from the + effective range r as: :math:`a = \frac{r}{2}`. + + References + ---------- + + .. [12]: Chiles, J.P., Delfiner, P. (1999). Geostatistics. Modeling Spatial + Uncertainty. Wiley Interscience. - :return: float, or list of; the semivariance at separation lag h """ # prepare parameters - r = a / np.sqrt(3) - C0 -= b + a = r / 2. + + return b + c0 * (1. - math.exp(- (h ** 2 / a ** 2))) + + +@variogram +@jit +def cubic(h, r, c0, b=0): + """Cubic Variogram function + + Implementation of the Cubic variogram function. Calculates the + dependent variable for a given lag (h). The nugget (b) defaults to be 0. + + Parameters + ---------- + h : float + Specifies the lag of separating distances that the dependent variable + shall be calculated for. It has to be a positive real number. + r : float + The effective range. Note this is not the range parameter! However, + for the cubic variogram the range and effective range are the same. + c0 : float + The sill of the variogram, where it will flatten out. The function + will not return a value higher than C0 + b. + b : float + The nugget of the variogram. This is the value of independent + variable at the distance of zero. This is usually attributed to + non-spatial variance. + + Returns + ------- + gamma : numpy.float64 + Unlike in most variogram function formulas, which define the function + for :math:`2*\gamma`, this function will return :math:`\gamma` only. + + Notes + ----- + + This implementation is like: + .. math:: + + \gamma = b + c_0 * \left[{ + 7 * \left(\frac{h^2}{a^2}\right) - + \frac{35}{4} * \left(\frac{h^3}{a^3}\right) + + \frac{7}{2} * \left(\frac{h^5}{a^5}\right) - + \frac{3}{4} * \left(\frac{h^7}{a^7}\right) + ]\right] + + a is the range parameter. For the cubic function, the effective range and + range parameter are the same. - return b + C0 * (1. - math.exp(- (h ** 2 / r ** 2))) - - -@Variogram_Wrapper -# if numba is installed uncommment -# @jit -def cubic(h, a, C0, b=0): - """ - The Cubic Variogram function - - :param h: the separation lag - :param a: the range parameter (not effective range!) - :param C0: the Variogram sill - :param b: the Variogram nugget """ # prepare parameters - C0 -= b + a = r / 1. - if h <= a: - return b + C0 * ( (7*(h**2 / a**2)) - ((35/4)*(h**3/a**3)) + ((7/2)*(h**5/a**5)) - ((3/4)*(h**7/a**7)) ) + if h <= r: + return b + c0 * ((7 * (h ** 2 / a ** 2)) - + ((35 / 4) * (h ** 3 / a ** 3)) + + ((7 / 2) * (h ** 5 / a ** 5)) - + ((3 / 4) * (h ** 7 / a ** 7))) else: - return b + C0 - + return b + c0 + + +@variogram +@jit +def stable(h, r, c0, s, b=0): + r"""Stable Variogram function + + Implementation of the stable variogram function. Calculates the + dependent variable for a given lag (h). The nugget (b) defaults to be 0. + + Parameters + ---------- + h : float + Specifies the lag of separating distances that the dependent variable + shall be calculated for. It has to be a positive real number. + r : float + The effective range. Note this is not the range parameter! For the + stable variogram function the range parameter a is defined to be + :math:`a = \frac{r}{3^{\frac{1}{s}}}`. The effective range is the lag + where 95% of the sill are exceeded. This is needed as the sill is + only approached asymptotically by the e-function part of the stable + model. + c0 : float + The sill of the variogram, where it will flatten out. The function + will not return a value higher than C0 + b. + s : float + Shape parameter. For s <= 2 the model will be shaped more like a + exponential or spherical model, for s > 2 it will be shaped most like + a Gaussian function. + b : float + The nugget of the variogram. This is the value of independent + variable at the distance of zero. This is usually attributed to + non-spatial variance. + + Returns + ------- + gamma : numpy.float64 + Unlike in most variogram function formulas, which define the function + for :math:`2*\gamma`, this function will return :math:`\gamma` only. + + Notes + ----- + The implementation is: + + .. math:: + \gamma = b + C_0 * \left({1. - e^{- {\frac{h}{a}}^s}}\right) + + a is the range parameter and is calculated from the effective range r as: + + .. math:: + a = \frac{r}{3^{\frac{1}{s}}} -@Variogram_Wrapper -# if numba is installed uncommment -# @jit -def stable(h, a, C0, s, b=0): - """ - The Stable Variogram function. - - :param h: - :param a: - :param C0: - :param s: - :param b: - :return: """ # prepare parameters - r = a * math.pow(3, 1 / s) - C0 -= b - - if s > 2: - s = 2 - return b + C0 * (1. - math.exp(- math.pow(h / r, s)) ) - + a = r / np.power(3, 1 / s) + +# if s > 2: +# s = 2 + return b + c0 * (1. - math.exp(- math.pow(h / a, s))) + + +@variogram +@jit +def matern(h, r, c0, s, b=0): + r"""Matérn Variogram function + + Implementation of the Matérn variogram function. Calculates the + dependent variable for a given lag (h). The nugget (b) defaults to be 0. + + Parameters + ---------- + h : float + Specifies the lag of separating distances that the dependent variable + shall be calculated for. It has to be a positive real number. + r : float + The effective range. Note this is not the range parameter! For the + Matérn variogram function the range parameter a is defined to be + :math:`a = \frac{r}{2}`. The effective range is the lag + where 95% of the sill are exceeded. This is needed as the sill is + only approached asymptotically by Matérn model. + c0 : float + The sill of the variogram, where it will flatten out. The function + will not return a value higher than C0 + b. + s : float + Smoothness parameter. The smoothness parameter can shape a smooth or + rough variogram function. A value of 0.5 will yield the exponential + function, while a smoothness of +inf is exactly the Gaussian model. + Typically a value of 10 is close enough to Gaussian shape to simulate + its behaviour. Low values are considered to be 'smooth', while larger + values are considered to describe a 'rough' random field. + b : float + The nugget of the variogram. This is the value of independent + variable at the distance of zero. This is usually attributed to + non-spatial variance. + + Returns + ------- + gamma : numpy.float64 + Unlike in most variogram function formulas, which define the function + for :math:`2*\gamma`, this function will return :math:`\gamma` only. + + Notes + ----- + The formula and references will follow. -@Variogram_Wrapper -# if numba is installed uncommment -# @jit -def matern(h, a, C0, s, b=0): - """ - The Matérn model. - - For Matérn function see: - Minasny, B., & McBratney, A. B. (2005). The Matérn function as a general model for soil variograms. - Geoderma, 128(3–4 SPEC. ISS.), 192–207. http://doi.org/10.1016/j.geoderma.2005.04.003. - - :param h: lag - :param a: range - :param C0: sill - :param s: smoothness parameter - :param b: nugget - :return: """ # prepare parameters - r = a - C0 -= b - - return b + C0 * (1 - ( (1 / (np.power(2, s - 1) * special.gamma(s))) * np.power(h / r, s) * special.kv(s, h / r) )) - - -# --- Adaptions using no nugget effect --- # -def debug_spherical(h, a, C0): - if isinstance(h, list) or isinstance(h, np.ndarray): - return np.array([debug_spherical(_, a, C0) for _ in h]) - else: - if h <= a: - return C0 * ((1.5*h/a) - (0.5*(h/a)))**3 - else: - return C0 + # TODO: depend a on s, for 0.5 should be 3, above 10 should be 3 + a = r / 2. + + # calculate + return b + c0 * (1. - (2 / special.gamma(s)) * + np.power((h * np.sqrt(s)) / a, s) * + special.kv(s, 2 * ((h * np.sqrt(s)) / a)) + ) diff --git a/skgstat/optimize.py b/skgstat/optimize.py deleted file mode 100644 index d958613..0000000 --- a/skgstat/optimize.py +++ /dev/null @@ -1,161 +0,0 @@ -""" -This module defines optimization classes for variograms. The class can be used to test all variogram combinations -on a dataset and evaluate by the given model fit parameter. -""" -from .distance import nd_dist -from skgstat import binify_even_width, Variogram -import numpy as np -import sys, itertools, warnings -from matplotlib.backends.backend_pdf import PdfPages -import matplotlib.pyplot as plt - - -class VariogramFitter: - """ - - """ - - def __init__(self, coordinates=None, values=None, verbose=True, eval='RMSE', autorun=False, - dm=None, bm=None, normalize=True, fit_method='lm', pec_punish=1.0, - dm_func=nd_dist, bm_func=binify_even_width, is_directional=False, azimuth=0, tolerance=45.0, - use_nugget=[False, True], estimator=['matheron', 'cressie', 'dowd'], - model=['spherical', 'exponential', 'gaussian', 'cubic', 'stable', 'matern'], N=[8, 10, 15, 20], - maxlag=[0.1, 0.25, 0.3, 0.5, 0.75]): - - # set verbosity and evaluation parameter - self.verbose=verbose - self.eval=eval - - # set the static parameters - self.static = dict( - coordinates=coordinates, - values=values, - dm=dm, - bm=bm, - normalize=normalize, - fit_method=fit_method, - pec_punish=pec_punish, - dm_func=dm_func, - bm_func=bm_func, - is_directional=is_directional, - azimuth=azimuth, - tolerance=tolerance - ) - - # check the optimization parameters for data tyoe - # if list or tuple, optimize, else add to static - self.params = dict( - use_nugget=use_nugget, - estimator=estimator, - model=model, - N=N, - maxlag=maxlag - ) - - # move all params with only one option to the static dict - keys = list(self.params.keys()) - for k in keys: - if not isinstance(self.params[k], (list, tuple)): - # this has only one option - self.static[k] = self.params[k] - del self.params[k] - - elif len(self.params[k]) == 1: - self.static[k] = self.params[k][0] - del self.params[k] - - # combine the params - self.names = list(self.params.keys()) - self.combinations = list(itertools.product(*self.params.values())) - self.n = len(self.combinations) - - # build the result container - self.V = list() - self.e = list() - self.error = list() - - if autorun: - self.Variogram() - - def run(self): - """ - - :return: - """ - # reset the result container - self.V = list() - self.e = list() - self.error = list() - - for i, combination in enumerate(self.combinations): - self.__build_variogram(combination=combination) - - if self.verbose: - sys.stdout.write('%d/%d (%.1f%%) calculated.\n' % (i, self.n, (i+1) / self.n *100)) - - def Variogram(self): - """ - - :return: - """ - self.run() - - # find the best Variogram - idx = np.nanargmin(self.e) - - return self.V[idx] - - def plot(self, path): - """ - - :param path: - :return: - """ - self.run() - with warnings.catch_warnings(): - warnings.simplefilter('ignore') - with PdfPages(path) as pdf: - for i, tup in enumerate(zip(self.V, self.e)): - V, e = tup - if V is not None: - V.plot(); - plt.subplots_adjust(right=0.7) - plt.figtext(0.73, 0.73, '%s: %.2f' % (self.eval, e), fontsize=14) - plt.figtext(0.73, 0.45, str(V), fontsize=10) - plt.figtext(0.73, 0.29, '\n'.join(['%s: %s' % (n, str(v)) for n,v in zip(self.names, self.combinations[i]) ]), fontsize=10) - pdf.savefig(plt.gcf()) - else: - f,ax = plt.subplot(1,1) - - plt.figtext(1, 0.73, 'No Result for combination: ', fontsize=14) - plt.figtext(0.1, 0.45, '\n'.join(['%s: %s' % (n, str(v)) for n,v in zip(self.names, self.combinations[i]) ]), fontsize=10) - pdf.savefig() - - if self.verbose: - sys.stdout.write('%d/%d plots drawn.\n' % (i+1, self.n)) - - - def __build_variogram(self, combination): - """ - Return the Variogram of the given combination. This combination tuple has to align with the self.names property - - :param combination: - :return: - """ - with warnings.catch_warnings(): - warnings.simplefilter('ignore') - try: - # build the variogram - V = Variogram(**self.static, **{n:v for n,v in zip(self.names, combination)}) - - # evaluate the result - e = getattr(V, self.eval) - - except Exception as err: - V = None - e = np.NaN - self.error.append([combination, str(err)]) - finally: - # append - self.V.append(V) - self.e.append(e) \ No newline at end of file diff --git a/skgstat/tests/Variogram.py b/skgstat/tests/Variogram.py new file mode 100644 index 0000000..ab306a5 --- /dev/null +++ b/skgstat/tests/Variogram.py @@ -0,0 +1,81 @@ +import unittest + +import numpy as np +from numpy.testing import assert_array_almost_equal + +from skgstat import Variogram + + +class TestVariogram(unittest.TestCase): + def setUp(self): + # set up default values, whenever c and v are not important + np.random.seed(42) + self.c = np.random.gamma(10, 4, (30, 2)) + np.random.seed(42) + self.v = np.random.normal(10, 4, 30) + + def test_standard_settings(self): + V = Variogram(self.c, self.v) + + for x, y in zip(V.parameters, [439.405, 281.969, 0]): + self.assertAlmostEqual(x, y, places=3) + + def test_pass_median_maxlag_on_instantiation(self): + np.random.seed(1312) + c = np.random.gamma(5, 1, (50, 2)) + + np.random.seed(1312) + v = np.random.weibull(5, 50) + + V = Variogram(c, v, maxlag='median') + + for x, y in zip(V.parameters, [1.914077, 0.002782, 0]): + self.assertAlmostEqual(x, y, places=6) + + def test_residuals(self): + V = Variogram(self.c, self.v) + assert_array_almost_equal( + V.residuals, + np.array([0.96, -1.19, -0.28, 2.61, -0.9, + -0.43, -0.1, -2.32, -8.61, 10.61]), + decimal=2 + ) + + def test_rmse(self): + V = Variogram(self.c, self.v) + + for model, rmse in zip( + ['spherical', 'gaussian', 'matern', 'stable'], + [4.4968, 4.4878, 4.4905, 4.4878] + ): + V.set_model(model) + self.assertAlmostEqual(V.rmse, rmse, places=4) + + def test_mean_residual(self): + V = Variogram(self.c, self.v) + + for model, mr in zip( + ['spherical', 'cubic', 'matern', 'stable'], + [2.8006, 2.711, 2.7433, 2.7315] + ): + V.set_model(model) + self.assertAlmostEqual(V.mean_residual, mr, places=4) + + def test_nrmse(self): + V = Variogram(self.c, self.v, n_lags=15) + + for model, nrmse in zip( + ['spherical', 'gaussian', 'stable', 'exponential'], + [0.4751, 0.4784, 0.4598, 0.4695] + ): + V.set_model(model) + self.assertAlmostEqual(V.nrmse, nrmse, places=4) + + def test_nrmse_r(self): + V = Variogram(self.c, self.v, estimator='cressie') + + self.assertAlmostEqual(V.nrmse_r, 0.40796, places=5) + + +if __name__ == '__main__': + unittest.main() diff --git a/skgstat/tests/__init__.py b/skgstat/tests/__init__.py index 5d12c9e..bd63ca5 100644 --- a/skgstat/tests/__init__.py +++ b/skgstat/tests/__init__.py @@ -1,5 +1,3 @@ -from skgstat.tests.distance import TestPointDist, TestNdDist -from skgstat.tests.binning import TestBinifyEvenWidth, TestBinifyEvenBin, \ - TestGrouToBin, TestInBounds from skgstat.tests.estimator import TestEstimator -from skgstat.tests.models import TestModels +from skgstat.tests.models import TestModels, TestVariogram +from skgstat.tests.binning import TestEvenWidth, TestUniformCount diff --git a/skgstat/tests/binning.py b/skgstat/tests/binning.py index c3d50e3..f1af6e4 100644 --- a/skgstat/tests/binning.py +++ b/skgstat/tests/binning.py @@ -1,384 +1,59 @@ -""" -PyUnit Tests for the geostat.binning functions - -TODO -Save arrays in another file? -Test direction and _in_bounds -""" - import unittest -import sys -from contextlib import contextmanager -from io import StringIO import numpy as np from numpy.testing import assert_array_almost_equal -from skgstat.binning import binify_even_width, binify_even_bin, group_to_bin, direction, _in_bounds - -# TODO: test only against the first (or any other significant) line of results -# Result arrays -result_binify_even_width_bm = np.array( - [0., 3., 6., 9., 0., 3., 6., 9., 0., 3., 6., 9., 0., - 3., 6., 9., 0., 3., 6., 9.]) - -result_binify_even_width_bw = np.array( - [0.42426407, 0.42426407, 0.42426407, 0.42426407, 0.42426407, - 0.42426407, 0.42426407, 0.42426407, 0.42426407, 0.42426407]) - -result_binify_even_width_bm_n6 = np.array( - [0., 2., 4., 5., 0., 2., 4., 5., 0., 2., 4., 5., 0., - 2., 4., 5., 0., 2., 4., 5.]) - -result_binify_even_width_bw_n6 = np.array( - [0.70710678, 0.70710678, 0.70710678, 0.70710678, 0.70710678, - 0.70710678]) - -binify_dm = np.array([0., 1.41421356, 2.82842712, 4.24264069, 0., - 1.41421356, 2.82842712, 4.24264069, 0., 1.41421356, - 2.82842712, 4.24264069, 0., 1.41421356, 2.82842712, - 4.24264069, 0., 1.41421356, 2.82842712, 4.24264069]) - -result_binify_even_width_bm_maxlag = np.array( - [0., 3., 7., 9., 0., 3., 7., 9., 0., 3., 7., 9., 0., - 3., 7., 9., 0., 3., 7., 9.]) - -result_binify_even_width_bw_maxlag = np.array( - [0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4]) - -result_binify_even_bin_bm = np.array( - [0., 2., 6., 8., 0., 2., 6., 8., 0., 2., 6., 8., 0., - 2., 6., 8., 0., 2., 6., 8.]) - -result_binify_even_bin_bw = np.array([0., 0., 1.41421356, 0., 0., - 0., 1.41421356, 0., 1.41421356, 0.]) - -result_binify_even_bin_bm_n6 = np.array( - [0., 1., 3., 5., 0., 1., 3., 5., 0., 1., 3., 5., 0., - 1., 3., 5., 0., 1., 3., 5.]) - -result_binify_even_bin_bw_n6 = np.array([0., 1.41421356, 0., 1.41421356, 0., - 1.41421356]) - -result_binify_even_bin_bm_maxlag = np.array( - [0., 2., 6., 8., 0., 2., 6., 8., 0., 2., 6., 8., 0., - 2., 6., 8., 0., 2., 6., 8.]) - -result_binify_even_bin_bw_maxlag = np.array([0., 0., 1.41421356, 0., 0., - 0., 1.41421356, 0., 1.41421356, - 0.]) - -result_group_to_bin_azimuth_and_tolerance = [ - [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, - 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, - 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3], [], [], [], [], [], [], [], [], []] - - -@contextmanager -def get_stdout(): - outs, errs = StringIO(), StringIO() - stdout, stderr = sys.stdout, sys.stderr - try: - sys.stdout, sys.stderr = outs, errs - yield sys.stdout, sys.stderr - finally: - sys.stdout = stdout - sys.stderr = stderr +from skgstat.binning import even_width_lags, uniform_count_lags -class TestBinifyEvenWidth(unittest.TestCase): - def setUp(self): - self.coordinates = np.array([(0, 0), (1, 1), (2, 2), (3, 3)] * 5) - self.values = np.array([0, 1, 2, 3] * 5) - def test_bm(self): - """ - Testing binning matrix of binify_even_width Function - """ - res = np.asarray(binify_even_width(self.coordinates)[0]) - - assert_array_almost_equal(res[0],result_binify_even_width_bm) - self.assertEqual(res.size, 400) - self.assertEqual(res.shape, (20, 20)) - - def test_bw(self): - """ - Testing binning width array of binify_even_width Function - """ - - assert_array_almost_equal(np.asarray(binify_even_width(self.coordinates)[1]), - result_binify_even_width_bw) - - def test_bm_n6(self): - """ - Testing binning matrix of binify_even_width Function with 6 bins - """ - res = np.asarray(binify_even_width(self.coordinates, N=6)[0]) - - assert_array_almost_equal(res[0], result_binify_even_width_bm_n6) - self.assertEqual(res.size, 400) - self.assertEqual(res.shape, (20, 20)) - - def test_bw_n6(self): - """ - Testing binning width array of binify_even_width Function with 6 bins - """ - - assert_array_almost_equal(np.asarray(binify_even_width(self.coordinates, N=6)[1]), - result_binify_even_width_bw_n6) - - def test_bm_with_dm(self): - """ - Testing binning matrix of binify_even_width Function with a distance matrix - """ - res = np.asarray(binify_even_width(self.coordinates, dm=binify_dm)[0]) - - assert_array_almost_equal(res[0], result_binify_even_width_bm) - self.assertEqual(res.size, 20) - self.assertEqual(res.shape, (1, 20)) - - def test_bm_maxlag(self): - """ - Testing binning matrix of binify_even_width Function with a maxlag - """ - res = np.asarray(binify_even_width(self.coordinates, maxlag=4)[0]) - - assert_array_almost_equal(res[0], result_binify_even_width_bm_maxlag) - self.assertEqual(res.size, 400) - self.assertEqual(res.shape, (20, 20)) - - def test_bw_maxlag(self): - """ - Testing binning matrix of binify_even_width Function with a maxlag - """ - res = np.asarray(binify_even_width(self.coordinates, maxlag=4)[1]) +class TestEvenWidth(unittest.TestCase): + def test_normal(self): + np.random.seed(42) assert_array_almost_equal( - res, - result_binify_even_width_bw_maxlag + even_width_lags(np.random.normal(5, 1, 1000), 4, None), + np.array([2.21318287, 4.42636575, 6.63954862, 8.85273149]) ) - def test_misshaped_coordinates(self): - """ - Pass misshaped coordinates - """ - coords = list(self.coordinates) - coords[2] = (2, 2, 2) - with self.assertRaises(ValueError): - binify_even_width(coords) - - def test_missing_arguments(self): - """ - Provide too less arguments - - """ - with self.assertRaises(ValueError): - binify_even_width(self.coordinates, N=None, w=None) - - def test_warning_too_much_arguments(self): - """ - Raise a Warning, becasue too many arguments were passed - """ - with get_stdout() as (out, err): - binify_even_width(self.coordinates, N=5, w=1) - self.assertEqual( - out.getvalue().strip(), - 'Warning! w = 1 is ignored because N is already given' - ) - - -class TestBinifyEvenBin(unittest.TestCase): - def setUp(self): - self.coordinates = np.array([(0, 0), (1, 1), (2, 2), (3, 3)] * 5) - self.values = np.array([0, 1, 2, 3] * 5) - - def test_bm(self): - """ - Testing binning matrix of binify_even_bin Function - """ - res = np.asarray(binify_even_bin(self.coordinates)[0]) - - assert_array_almost_equal(res[0], result_binify_even_bin_bm) - self.assertEqual(res.size, 400) - self.assertEqual(res.shape, (20, 20)) - - def test_bw(self): - """ - Testing binning width array of binify_even_bin Function - """ - + def test_more_bins(self): + np.random.seed(42) assert_array_almost_equal( - np.asarray(binify_even_bin(self.coordinates)[1]), - result_binify_even_bin_bw + even_width_lags(np.random.normal(5, 1, 1000), 10, None), + np.array([0.88527315, 1.7705463, 2.65581945, 3.5410926, 4.42636575, + 5.3116388, 6.19691204, 7.08218519, 7.96745834, 8.8527314]) ) - def test_bm_n6(self): - """ - Testing binning matrix of binify_even_bin Function with 6 bins - """ - res = np.asarray(binify_even_bin(self.coordinates, N=6)[0]) - - assert_array_almost_equal(res[0], result_binify_even_bin_bm_n6) - self.assertEqual(res.size, 400) - self.assertEqual(res.shape, (20, 20)) - - def test_bw_n6(self): - """ - Testing binning width array of binify_even_bin Function with 6 bins - """ - + def test_maxlag(self): + np.random.seed(42) assert_array_almost_equal( - np.asarray(binify_even_bin(self.coordinates, N=6)[1]), - result_binify_even_bin_bw_n6 + even_width_lags(np.random.normal(5, 1, 1000), 4, 4.4), + np.array([1.1, 2.2, 3.3, 4.4]) ) - def test_bm_maxlag(self): - """ - Testing binning matrix of binify_even_bin Function with a maxlag - """ - res = np.asarray(binify_even_bin(self.coordinates, maxlag=4)[0]) - - assert_array_almost_equal(res[0], result_binify_even_bin_bm_maxlag) - self.assertEqual(res.size, 400) - self.assertEqual(res.shape, (20, 20)) - - def test_bw_maxlag(self): - """ - Testing binning width array of binify_even_bin Function with a maxlag - """ - + def test_too_large_maxlag(self): + np.random.seed(42) assert_array_almost_equal( - np.asarray(binify_even_bin(self.coordinates, maxlag=4)[1]), - result_binify_even_bin_bw_maxlag + even_width_lags(np.random.normal(5, 1, 1000), 4, 400), + np.array([2.21318287, 4.42636575, 6.63954862, 8.85273149]) ) - def test_misshaped_coordinates(self): - """ - Pass misshaped coordinates - """ - coords = list(self.coordinates) - coords[2] = (2, 2, 2) - with self.assertRaises(ValueError): - binify_even_bin(coords) - - -class TestGrouToBin(unittest.TestCase): - def setUp(self): - self.coordinates = np.array([(0, 0), (1, 1), (2, 2), (3, 3)] * 5) - self.values = np.array([0, 1, 2, 3] * 5) - - - def test_default(self): - """ - Testing group_to_bin function - """ - res = group_to_bin(self.values, X=self.coordinates) - self.assertEqual(len(res), 10) - self.assertEqual( - [len(_) for _ in res], - [200, 0, 0, 300, 0, 0, 200, 0, 0, 100] - ) - - def test_with_bm(self): - """ - Testing group_to_bin function - """ - - self.assertEqual( - group_to_bin([[1, 2], [3, 4]], bm=np.array([[0, 1], [0, 1]])), - [[1, 1, 2, 2, 3, 1, 4, 2], [1, 3, 2, 4, 3, 3, 4, 4]] - ) - self.assertEqual( - group_to_bin([[1, 1], [2, 2]], bm=np.array([[1, 1], [1, 0]])), - [[2, 2, 2, 2], [1, 1, 1, 1, 1, 2, 1, 2, 2, 1, 2, 1]] - ) - - def test_with_maxlag(self): - """ - Testing group_to_bin function with maxlag - """ - res = group_to_bin(self.values, X=self.coordinates, maxlag=4) - self.assertEqual(len(res), 10) - self.assertEqual( - [len(_) for _ in res], - [200, 0, 0, 300, 0, 0, 0, 200, 0, 100] - ) - - def test_with_azimuth_and_tolerance(self): - """ - Testing group_to_bin function with azimuth and tolerance - """ - res_90 = group_to_bin( - self.values, - X=self.coordinates, - azimuth_deg=90, - tolerance=22.5 - ) - self.assertEqual( - [len(_) for _ in res_90], - [160, 0, 0, 0, 0, 0, 0, 0, 0, 0] - ) - - def test_azimuth_tolerance_negative_overflow(self): - res_270 = group_to_bin( - self.values, - X=self.coordinates, - azimuth_deg=270, - tolerance=90 - ) - self.assertEqual( - [len(_) for _ in res_270], - [0, 0, 0, 150, 0, 0, 100, 0, 0, 50] + def test_median_split(self): + np.random.seed(42) + assert_array_almost_equal( + even_width_lags(np.random.normal(5, 1, 1000), 2, None), + np.array([4.42636575, 8.85273149]) ) - def test_azimuth_tolerance_positive_overflow(self): - res_15 = group_to_bin( - self.values, - X=self.coordinates, - azimuth_deg=15, - tolerance=45 - ) - self.assertEqual( - [len(_) for _ in res_15], - [0, 0, 0, 150, 0, 0, 100, 0, 0, 50] +class TestUniformCount(unittest.TestCase): + def test_normal(self): + np.random.seed(42) + assert_array_almost_equal( + uniform_count_lags(np.random.normal(10, 2, 1000), 4, None), + np.array([8.7048, 10.0506, 11.2959, 17.7055]), + decimal=4 ) - def test_missing_coords_and_X(self): - """ - Raise Attribute error if X and coordinates is None - """ - with self.assertRaises(AttributeError): - group_to_bin(self.values) - - def test_set_invalid_azimuth(self): - with self.assertRaises(ValueError): - group_to_bin(self.values, X=self.coordinates, azimuth_deg=-899) - - def test_set_azimuth_without_coordinate(self): - with self.assertRaises(ValueError): - group_to_bin(self.values, - bm=self.coordinates, - X=None, - azimuth_deg=45 - ) - - def test_dimension_check(self): - with self.assertRaises(ValueError): - group_to_bin([0, 1, 2], np.array([(0, 0), (1, 0)])) - - -class TestInBounds(unittest.TestCase): - def setUp(self): - self.coordinates = np.array([(0, 0), (1, 1), (2, 2), (3, 3)] * 5) - self.values = np.array([0, 1, 2, 3] * 5) - - def test_in_bounds(self): - """ Testing in_bounds function with result = True - """ - - self.assertTrue(_in_bounds(50, 40, 10)) - self.assertFalse(_in_bounds(50, 10, 40)) +# TODO: put the other test for binning here if __name__ == '__main__': diff --git a/skgstat/tests/distance.py b/skgstat/tests/distance.py deleted file mode 100644 index 000687c..0000000 --- a/skgstat/tests/distance.py +++ /dev/null @@ -1,116 +0,0 @@ -""" -PyUnit Tests for the geostat.distance functions - -TODO -Test different Parameter Combinations -""" - -import unittest -import numpy as np -from numpy.testing import assert_allclose -from skgstat.distance import point_dist, nd_dist - -# Get the result arrays -result_2D = np.array([[0., 1., 1., 1.41421356, 4.24264069], - [1., 0., 1.41421356, 1., 3.60555128], - [1., 1.41421356, 0., 1., 3.60555128], - [1.41421356, 1., 1., 0., 2.82842712], - [4.24264069, 3.60555128, 3.60555128, 2.82842712, 0.]]) - -result_ND = np.array([[0., 10., 16., 6.], - [10., 0., 6., 4.], - [16., 6., 0., 10.], - [6., 4., 10., 0.]]) - -rank_2D = np.matrix([[3., 9.5, 9.5, 15.5, 24.5, 9.5, 3., 15.5, 9.5, - 21.5, 9.5, 15.5, 3., 9.5, 21.5, 15.5, 9.5, 9.5, - 3., 18.5, 24.5, 21.5, 21.5, 18.5, 3.]]) - -rank_ND = np.matrix([[2.5, 12.5, 15.5, 8.5, 12.5, 2.5, 8.5, 5.5, 15.5, - 8.5, 2.5, 12.5, 8.5, 5.5, 12.5, 2.5]]) - -class TestPointDist(unittest.TestCase): - def setUp(self): - self.coords = [(0, 0), (0, 1), (1, 0), (1, 1), (3, 3)] - - def test_point_dist(self): - """ - Test point_dist function with 'euclidean' metric. - """ - assert_allclose(np.asarray(point_dist(self.coords)), result_2D, atol=1e-5) - - def test_point_rank(self): - """ - Test point_dist function with 'rank' metric. - """ - assert_allclose(np.asarray(point_dist(self.coords, metric='rank')), - rank_2D, atol=0.1) - - def test_coordinate_check(self): - """ - Use a not allowed coordinate. - """ - coordinates = self.coords - coordinates[2] = (1, 2, 3, 4) - with self.assertRaises(ValueError): - point_dist(coordinates, metric='euclidean') - - def test_unknown_metric(self): - """ - Use a not known metric - """ - with self.assertRaises(ValueError): - point_dist(self.coords, metric='I am not a metric') - - -class TestNdDist(unittest.TestCase): - def setUp(self): - self.coords2d = [(0, 0), (0, 1), (1, 0), (1, 1), (3, 3)] - self.coords = [(0, 0, 0, 0), (5, 5, 5, 5), (8, 8, 8, 8), (3, 3, 3, 3)] - - def test_euclidean_dist2D(self): - """ - Test nd_dist function on 2D data with 'euclidean' metric. - """ - assert_allclose(np.asarray(nd_dist(self.coords2d)), result_2D, atol=0.1) - - def test_euclidean_distND(self): - """ - Test nd_dist function on n-dimensional data with 'euclidean' metric. - """ - assert_allclose(np.asarray(nd_dist(self.coords)), result_ND, atol=0.1) - - def test_rank_dist2D(self): - """ - Test nd_dist function on 2D data with 'rank' metric. - """ - assert_allclose(np.asarray(nd_dist(self.coords2d, metric='rank')), rank_2D, - atol=0.1) - - def test_rank_distND(self): - """ - Test nd_dist function on n-dimensional data with 'rank' metric. - """ - assert_allclose(np.asarray(nd_dist(self.coords, metric='rank')), - rank_ND, atol=0.1) - - def test_misshaped_coordinates(self): - """ - Mishape a coordinate, then a ValueError should get fired - """ - coordinates = self.coords - coordinates[1] = (1,2) # not enough dimensions - with self.assertRaises(ValueError): - nd_dist(coordinates) - - def test_unknown_metric(self): - """ - Use a unknown metric - """ - with self.assertRaises(ValueError): - nd_dist(self.coords, metric='I am not a metric') - - - -if __name__ == '__main__': - unittest.main() diff --git a/skgstat/tests/estimator.py b/skgstat/tests/estimator.py index 354bdee..6829378 100644 --- a/skgstat/tests/estimator.py +++ b/skgstat/tests/estimator.py @@ -1,259 +1,128 @@ """ -TODO -Make a new test for the entropy estimator -Test different Parameter Combinations """ import unittest -import numpy as np -from numpy.testing import assert_array_almost_equal -from skgstat.estimator import matheron, cressie, dowd, genton, minmax, percentile, entropy - -# result arrays - -result_matheron = np.array([5.00000000e-01, 0.00000000e+00, 5.00000000e+01, - 5.00000000e+03]) - -result_cressie = np.array([8.96700143e-01, 0.00000000e+00, 8.96700143e+01, - 8.96700143e+03]) - -result_dowd = np.array([1.09900000e+00, 0.00000000e+00, 1.09900000e+02, - 1.09900000e+04]) -result_minmax = np.array([1.9927489681047741, 1.8267020635804991, 1.5392934574169328, 2.0360906123190832]) - -result_percentile_r = np.array([6.3486663708926443, 6.5981915461999279, 4.7812030455283168, 7.2582516292816663]) -result_percentile = np.array([1.0, 0.0, 10.0, 100.0]) +import numpy as np -# entropies -result_uni_entropy = np.array([0.0081243, 0.0081243, 0.0081243, 0.0081243]) -result_uni_entropy_fd = np.array([-1.44270225e-05, -1.44270225e-05, -1.44270225e-05, - -1.44270225e-05]) -result_uni_entropy_5b = np.array([0.00064996, 0.00064996, 0.00064996, 0.00064996]) -result_uni_entropy_ar = np.array([0.00064996, 0.00064996, 0.00064996, 0.00064996]) +from skgstat.estimators import matheron, cressie, dowd, genton +from skgstat.estimators import minmax, percentile, entropy, info -result_entropy = np.array([1.9295937, 2.32944639, 2.32944639, 2.32944639]) -result_entropy_fd = np.array([1.92211936, 1.37096112, 0.97094233, 1.37096112]) -result_entropy_5b = np.array([1.92211936, 1.92211936, 1.92211936, 1.92211936]) -result_entropy_ar = np.array([1.92211936, 1.52226666, 0.97144062, 1.52226666]) class TestEstimator(unittest.TestCase): def setUp(self): - """ - Setting up the values - """ + pass - self.grouped = [list(np.arange(10)), [5] * 10, list(np.arange(0, 100, 10)), - list(np.arange(0, 1000, 100))] + def test_matheron(self): + # extract actual estimator + e = matheron.py_func np.random.seed(42) - self.random_grouped = [list(np.random.gamma(10,2, 10)), list(np.random.gamma(4,4, 10)), - list(np.random.gamma(4, 2, 10)), list(np.random.gamma(10,5, 10))] - def test_matheron(self): - """ - Testing matheron estimator - """ - assert_array_almost_equal(matheron(self.grouped), result_matheron) - - def test_matheron_uneven_input(self): - """ - Raise the ValueError on uneven length input - """ - with self.assertRaises(ValueError): - matheron([0, 1, 2]) - - def test_matheron_nan_on_zerodivision(self): - """ - return nan on empty input - """ - self.assertTrue(np.isnan(matheron([]))) + self.assertAlmostEqual( + e(np.random.normal(0, 1, 10000)), + 0.50342, + places=6 + ) + + def test_matheron_nan(self): + # extract actual estimator + e = matheron.py_func + + self.assertTrue(np.isnan(e(np.array([])))) def test_cressie(self): - """ - Testing cressie estimator - """ - assert_array_almost_equal(cressie(self.grouped), result_cressie, - decimal=5) - - def test_cressie_uneven_input(self): - """ - Raise the ValueError on uneven length input - """ - with self.assertRaises(ValueError): - cressie([0, 1, 2]) - - def test_cressie_nan_on_zerodivision(self): - """ - return nan on empty input - """ - self.assertTrue(np.isnan(cressie([]))) + # extract actual estimator + e = cressie.py_func - def test_dowd(self): - """ - Testing dowd estimator - """ - assert_array_almost_equal(dowd(self.grouped), result_dowd) - - def test_dowd_uneven_input(self): - """ - Raise the ValueError on uneven length input - """ - with self.assertRaises(ValueError): - dowd([0, 1, 2]) - - def test_genton_single_value(self): - """ - Testing genton estimator with a single value calculation - - """ np.random.seed(42) - self.assertAlmostEqual( - genton(np.random.normal(4, 2, size=20)), - 1.97039693, - places=8 - ) - def test_genton_large_dataset(self): - """ - Testing genton on large dataset - """ - np.random.seed(42) self.assertAlmostEqual( - genton(np.random.normal(4,2, size=1000)), - 0.0329977, - places=8 + e(np.random.gamma(10, 4, 10000)), + 1686.7519, + places=4 ) + def test_cressie_nan(self): + # extract actual estimator + e = cressie.py_func - def test_minmax(self): - """ - Testing minmax estimator - """ - assert_array_almost_equal(minmax(self.random_grouped), result_minmax) - - def test_minmax_uneven_input(self): - """ - Raise the ValueError on uneven length input - """ - with self.assertRaises(ValueError): - minmax([0, 1, 2]) - - def test_percentile_random(self): - """ - Testing percentile estimator on randomized data - """ - assert_array_almost_equal(percentile(self.random_grouped), - result_percentile_r) - - def test_percentile_grouped(self): - """ - Testing percentile estimator on grouped data - """ - assert_array_almost_equal(percentile(self.grouped), result_percentile) - - def test_percentile_uneven_input(self): - """ - Raise the ValueError on uneven length input - """ - with self.assertRaises(ValueError): - percentile([0, 1, 2]) - - def test_entropy_uniform_default(self): - """ - Testing the entropy estimator on uniform distributions, - with and without gaps - - """ - assert_array_almost_equal(entropy(self.grouped), result_uni_entropy) - - def test_entropy_uniform_string(self): - """ - Testing entropy estimator with string as bin on uniform distributions - """ - assert_array_almost_equal( - np.asarray(entropy(self.grouped, bins='fd')), - result_uni_entropy_fd - ) + self.assertTrue(np.isnan(e([]))) - def test_entropy_uniform_integer(self): - """ - Testing entropy estimator with integer as bin on uniform distributions - """ - assert_array_almost_equal( - np.asarray(entropy(self.grouped, bins=5)), - result_uni_entropy_5b - ) + def test_dowd(self): + np.random.seed(1306) + x1 = np.random.weibull(14, 1000) + np.random.seed(1312) + x2 = np.random.gamma(10, 4, 100) - def test_entropy_uniform_list(self): - """ - Testing entropy estimator with list as bin on uniform distributions - """ - assert_array_almost_equal( - np.asarray(entropy(self.grouped, bins=[0, 0.1, 5, 10, 20, 100])), - result_uni_entropy_ar - ) + # test + self.assertAlmostEqual(dowd(x1), 2.0873, places=4) + self.assertAlmostEqual(dowd(x2), 3170.97, places=2) - def test_entropy_default(self): - """ - Testing entropy estimator with default settings - """ - assert_array_almost_equal( - np.asarray(entropy(self.random_grouped)), - result_entropy - ) + def test_genton(self): + # extract actual estimator + e = genton.py_func - def test_entropy_string(self): - """ - Testing entropy estimator with string as bin - """ - assert_array_almost_equal( - np.asarray(entropy(self.random_grouped, bins='fd')), - result_entropy_fd - ) + np.random.seed(42) + x1 = np.random.gamma(40, 2, 100) + np.random.seed(42) + x2 = np.random.gamma(30, 5, 1000) - def test_entropy_integer(self): - """ - Testing entropy estimator with integer as bin - """ - assert_array_almost_equal( - np.asarray(entropy(self.random_grouped, bins=5)), - result_entropy_5b - ) + self.assertAlmostEqual(e(x1), 0.0089969, places=7) + self.assertAlmostEqual(e(x2), 0.0364393, places=7) - def test_entropy_list(self): - """ - Testing entropy estimator with list as bin - """ - assert_array_almost_equal( - np.asarray( - entropy(self.random_grouped, bins=[0, 0.1, 5, 10, 20, 100]) - ), - result_entropy_ar - ) + def test_genton_nan(self): + # extract actual estimator + e = genton.py_func + + # genton cannot be solved for only one element + self.assertTrue(np.isnan(e([0.1]))) + + def test_minmax_skew(self): + # heavily skewed gamma + np.random.seed(1306) + x = np.random.gamma(15, 20, 100) + self.assertAlmostEqual(minmax(x), 1.5932, places=4) + + def test_minmax_pow(self): + # L-stable pareto + np.random.seed(2409) + x = np.random.pareto(2, 10) + self.assertAlmostEqual(minmax(x), 2.5, places=2) - def test_entropy_uneven_input(self): - """ - Raise the ValueError on uneven length input - """ - with self.assertRaises(ValueError): - entropy([0, 1, 2]) - - def test_entropy_single_value(self): - """ - Calculate a single value with bins=None. - """ + def test_percentile(self): np.random.seed(42) + x = np.abs(np.random.normal(0, 1, 100000)) + + self.assertAlmostEqual(percentile(x), 0.67588, places=5) + self.assertAlmostEqual(percentile(x, 20), 0.25277, places=5) + + def test_entropy_default_bins(self): + np.random.seed(42) + x = np.random.normal(5, 1, 10000) + + self.assertAlmostEqual(entropy(x, bins=None), 3.01, places=2) + + def test_entropy_custom_bins(self): + np.random.seed(123456789) + x = np.random.gamma(10, 5, 10000) + + # custom bins self.assertAlmostEqual( - entropy(np.random.normal(4, 2, size=150)), - 3.412321, places=6 + entropy(x, [5, 15, 50, 51, 52, 53, 54, 55, 56, 100, 120, 150]), + 1.823, places=3 ) - np.random.seed(1337) - self.assertNotAlmostEqual( - entropy(np.random.normal(4, 2, size=150)), - 3.412321, places=6 - ) + # default bins + self.assertAlmostEqual(entropy(x), 2.91, places=2) + + def test_internal_info(self): + # extract actual function + i = info.py_func + + # test + self.assertAlmostEqual(i(0.3), 1.74, places=2) + self.assertAlmostEqual(i(0.8), 0.32, places=2) if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() diff --git a/skgstat/tests/models.py b/skgstat/tests/models.py index 5586d7f..415fa75 100644 --- a/skgstat/tests/models.py +++ b/skgstat/tests/models.py @@ -1,68 +1,174 @@ """ -TODO -test Variogram_Wrapper and debug_spherical """ import unittest -from skgstat.models import spherical, exponential, gaussian, cubic, stable, matern + +import numpy as np + +from skgstat.models import spherical, exponential +from skgstat.models import gaussian, cubic, stable, matern +from skgstat.models import variogram class TestModels(unittest.TestCase): def setUp(self): - """ - Setting up the values + self.h = np.array([5, 10, 30, 50, 100]) + + def test_spherical_default(self): + # extract the actual function + f = spherical.py_func + + result = [13.75, 20.0, 20.0, 20.0, 20.0] + + model = list(map(f, self.h, [10]*5, [20]*5)) + + for r, m in zip(result, model): + self.assertAlmostEqual(r, m, places=2) + + def test_spherical_nugget(self): + # extract the actual function + f = spherical.py_func + + result = [15.44, 27.56, 33.0, 34.0, 35.0] + + # calculate + nuggets = [1, 2, 3, 4, 5] + model = list(map(f, self.h, [15] * 5, [30] * 5, nuggets)) + + for r, m in zip(result, model): + self.assertAlmostEqual(r, m, places=2) + + def test_exponential_default(self): + # extract the actual function + f = exponential.py_func + + result = [5.18, 9.02, 16.69, 19., 19.95] + model = list(map(f, self.h, [50]*5, [20]*5)) + + for r, m in zip(result, model): + self.assertAlmostEqual(r, m, places=2) + + def test_exponential_nugget(self): + # extract the actual function + f = exponential.py_func + + result = [7.64, 13.8, 26.31, 31.54, 34.8] + + # calculate + nuggets = [1, 2, 3, 4, 5] + model = list(map(f, self.h, [60] * 5, [30] * 5, nuggets)) + + for r, m in zip(result, model): + self.assertAlmostEqual(r, m, places=2) + + def test_gaussian_default(self): + # extract the actual function + f = gaussian.py_func + + result = [0.96, 3.58, 16.62, 19.86, 20.] + model = list(map(f, self.h, [45]*5, [20]*5)) + + for r, m in zip(result, model): + self.assertAlmostEqual(r, m, places=2) + + def test_gaussian_nugget(self): + # extract the actual function + f = gaussian.py_func + + result = [1.82, 5.15, 21.96, 32.13, 35.] + + # calculate + nuggets = [1, 2, 3, 4, 5] + model = list(map(f, self.h, [60] * 5, [30] * 5, nuggets)) + + for r, m in zip(result, model): + self.assertAlmostEqual(r, m, places=2) + + def test_cubic_default(self): + # extract the actual function + f = cubic.py_func + + result = [6.13, 21.11, 88.12, 100., 100.] + model = list(map(f, self.h, [50]*5, [100]*5)) + + for r, m in zip(result, model): + self.assertAlmostEqual(r, m, places=2) + + def test_cubic_nugget(self): + # extract the actual function + f = cubic.py_func + + result = [11.81, 34.74, 73., 74., 75.] + + # calculate + nuggets = [1, 2, 3, 4, 5] + model = list(map(f, self.h, [30] * 5, [70] * 5, nuggets)) + + for r, m in zip(result, model): + self.assertAlmostEqual(r, m, places=2) + + def test_stable_default(self): + # extract the actual function + f = stable.py_func + + result = [9.05, 23.53, 75.2, 95.02, 99.98] + model = list(map(f, self.h, [50]*5, [100]*5, [1.5]*5)) + + for r, m in zip(result, model): + self.assertAlmostEqual(r, m, places=2) + + def test_stable_nugget(self): + # extract the actual function + f = stable.py_func + + result = [8.77, 10.8, 12.75, 13.91, 14.99] - :param h: the separation lag - :param a: the range parameter (not effective range!) - :param C0: the Variogram sill - :param b: the Variogram nugget - """ + # calculate + nuggets = [1, 2, 3, 4, 5] + model = list(map(f, self.h, [20] * 5, [10] * 5, [0.5] * 5, nuggets)) - self.h = 1 - self.a = 2 - self.c0 = 3 - self.b = 4 - self.s = 5 + for r, m in zip(result, model): + self.assertAlmostEqual(r, m, places=2) - def test_spherical(self): - """ - Testing spherical model - """ + def test_matern_default(self): + # extract the actual function + f = matern.py_func - self.assertAlmostEqual(spherical(self.h, self.a, self.c0, self.b), 3.3125) + result = [24.64, 43.2, 81.68, 94.09, 99.65] + model = list(map(f, self.h, [50]*5, [100]*5, [0.5]*5)) - def test_exponential(self): - """ - Testing exponential model - """ + for r, m in zip(result, model): + self.assertAlmostEqual(r, m, places=2) - self.assertAlmostEqual(exponential(self.h, self.a, self.c0, self.b), 6.330609519554711) + def test_matern_nugget(self): + # extract the actual function + f = matern.py_func - def test_gaussian(self): - """ - Testing gaussian model - """ + result = [3.44, 8.52, 12.99, 14., 15.] - self.assertAlmostEqual(gaussian(self.h, self.a, self.c0, self.b), 3.472366552741015) + # calculate + nuggets = [1, 2, 3, 4, 5] + model = list(map(f, self.h, [20] * 5, [10] * 5, [8] * 5, nuggets)) - def test_cubic(self): - """ - Testing cubic model - """ + for r, m in zip(result, model): + self.assertAlmostEqual(r, m, places=2) - self.assertAlmostEqual(cubic(self.h, self.a, self.c0, self.b), 3.240234375) - def test_stable(self): - """ - Testing stable model - """ +class TestVariogram(unittest.TestCase): + def test_scalar(self): + @variogram + def scalar_function(a, b): + return a, b - self.assertAlmostEqual(stable(self.h, self.a, self.c0, self.s, self.b), 3.8512082197805455) + a, b = 1, 4 + self.assertEqual(scalar_function(1, 4), (a, b)) - def test_matern(self): - """ - Testing matern model - """ + def test_list(self): + @variogram + def adder(l, a): + return l + a - self.assertAlmostEqual(matern(self.h, self.a, self.c0, self.s, self.b), 3.984536090177115) + res = [5, 8, 12] + for r, c in zip(res, adder([1, 4, 8], 4)): + self.assertEqual(r, c) \ No newline at end of file