-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Test showing that Danilo's implementation of Gaussian KDE leads to th…
…e same result as NumPy's, but with better numerical precision.
- Loading branch information
Danilo Enoque Ferreira De Lima
committed
Oct 24, 2018
1 parent
49a04a0
commit 8c4199e
Showing
1 changed file
with
87 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
import numpy as np | ||
from scipy import stats | ||
import matplotlib.pyplot as plt | ||
import copy | ||
|
||
x1 = np.array([-7, -5, 1, 4, 5.]) | ||
|
||
|
||
kde = stats.gaussian_kde(x1) | ||
|
||
class GKDE: | ||
def __init__(self, value): | ||
self.value = copy.deepcopy(np.atleast_2d(value.astype(np.float64))) | ||
self.d, self.n = self.value.shape | ||
self.cov = np.atleast_2d(np.cov(self.value, rowvar = 1, bias = False)) | ||
self.invcov = np.linalg.inv(self.cov) | ||
# multiply cov by scotts factor^2 and invcov by scotts factor^-2 | ||
# this is the default in NumPy | ||
self.cov *= np.power(self.n, -2./(self.d+4.0)) | ||
self.invcov *= np.power(self.n, 2./(self.d+4.0)) | ||
# this is the Silverman one | ||
#self.cov *= np.power(self.n*(self.d+2)/4.0, -2./(self.d+4.0)) | ||
#self.invcov *= np.power(self.n*(self.d+2)/4.0, 2./(self.d+4.0)) | ||
# to be conservative, do not scale the covariance ... assume a unit covariance in all dimensions | ||
self.norm = np.sqrt(np.linalg.det(2*np.pi*self.cov)) * self.n | ||
|
||
def logpdf(self, points): | ||
points = np.atleast_2d(points) | ||
d, m = points.shape | ||
if d != self.d: | ||
if d == 1 and m == self.d: | ||
points = np.reshape(points, (self.d, 1)) | ||
d, m = points.shape | ||
else: | ||
print("Wrong dimensions") | ||
result = np.zeros((m,), dtype = np.float64) | ||
for i in range(m): | ||
diff = self.value - points[:, i, np.newaxis] | ||
tdiff = np.matmul(self.invcov, diff) | ||
energy = np.sum(diff * tdiff, axis = 0)*0.5 | ||
maxE = np.amax(-energy) | ||
result[i] = np.sum(np.exp(-energy - maxE), axis = 0) | ||
result[i] = maxE + np.log(result[i]) | ||
return result - np.log(self.norm) | ||
|
||
def evaluate(self, points): | ||
return np.exp(self.logpdf(points)) | ||
|
||
# return array with derivative of the function w.r.t. x_i, where i = 1..d, at points | ||
def jac(self, points): | ||
points = np.atleast_2d(points) | ||
d, m = points.shape | ||
if d != self.d: | ||
if d == 1 and m == self.d: | ||
points = np.reshape(points, (self.d, 1)) | ||
d, m = points.shape | ||
else: | ||
print("Wrong dimensions") | ||
result = np.zeros((1,m), dtype = np.float64) | ||
result_diff = np.zeros((d,m), dtype = np.float64) | ||
for i in range(m): | ||
diff = self.value - points[:, i, np.newaxis] | ||
#print("diff shape: ", diff.shape) | ||
|
||
kde_danilo = GKDE(x1) | ||
|
||
xs = np.linspace(-10, 10, num=50) | ||
y1 = kde(xs) | ||
kde.set_bandwidth(bw_method='silverman') | ||
y2 = kde(xs) | ||
kde.set_bandwidth(bw_method=kde.factor / 3.) | ||
y3 = kde(xs) | ||
kde.set_bandwidth(bw_method=1.0) | ||
y4 = kde(xs) | ||
|
||
y1_danilo = kde_danilo.evaluate(xs) | ||
|
||
fig = plt.figure() | ||
ax = fig.add_subplot(111) | ||
ax.plot(x1, np.ones(x1.shape) / (4. * x1.size), 'bo',label='Data points (rescaled)') | ||
ax.plot(xs, y1, label='Scott (default)') | ||
ax.plot(xs, y2, label='Silverman') | ||
ax.plot(xs, y3, label='Const (1/3 * Silverman)') | ||
#ax.plot(xs, y4, label='Const (1.0)') | ||
ax.plot(xs, y1_danilo, ':', label='Scott (by Danilo)') | ||
ax.legend() | ||
plt.show() |