Skip to content

Commit

Permalink
Test showing that Danilo's implementation of Gaussian KDE leads to th…
Browse files Browse the repository at this point in the history
…e same result as NumPy's, but with better numerical precision.
  • Loading branch information
Danilo Enoque Ferreira De Lima committed Oct 24, 2018
1 parent 49a04a0 commit 8c4199e
Showing 1 changed file with 87 additions and 0 deletions.
87 changes: 87 additions & 0 deletions test_kde.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import copy

x1 = np.array([-7, -5, 1, 4, 5.])


kde = stats.gaussian_kde(x1)

class GKDE:
def __init__(self, value):
self.value = copy.deepcopy(np.atleast_2d(value.astype(np.float64)))
self.d, self.n = self.value.shape
self.cov = np.atleast_2d(np.cov(self.value, rowvar = 1, bias = False))
self.invcov = np.linalg.inv(self.cov)
# multiply cov by scotts factor^2 and invcov by scotts factor^-2
# this is the default in NumPy
self.cov *= np.power(self.n, -2./(self.d+4.0))
self.invcov *= np.power(self.n, 2./(self.d+4.0))
# this is the Silverman one
#self.cov *= np.power(self.n*(self.d+2)/4.0, -2./(self.d+4.0))
#self.invcov *= np.power(self.n*(self.d+2)/4.0, 2./(self.d+4.0))
# to be conservative, do not scale the covariance ... assume a unit covariance in all dimensions
self.norm = np.sqrt(np.linalg.det(2*np.pi*self.cov)) * self.n

def logpdf(self, points):
points = np.atleast_2d(points)
d, m = points.shape
if d != self.d:
if d == 1 and m == self.d:
points = np.reshape(points, (self.d, 1))
d, m = points.shape
else:
print("Wrong dimensions")
result = np.zeros((m,), dtype = np.float64)
for i in range(m):
diff = self.value - points[:, i, np.newaxis]
tdiff = np.matmul(self.invcov, diff)
energy = np.sum(diff * tdiff, axis = 0)*0.5
maxE = np.amax(-energy)
result[i] = np.sum(np.exp(-energy - maxE), axis = 0)
result[i] = maxE + np.log(result[i])
return result - np.log(self.norm)

def evaluate(self, points):
return np.exp(self.logpdf(points))

# return array with derivative of the function w.r.t. x_i, where i = 1..d, at points
def jac(self, points):
points = np.atleast_2d(points)
d, m = points.shape
if d != self.d:
if d == 1 and m == self.d:
points = np.reshape(points, (self.d, 1))
d, m = points.shape
else:
print("Wrong dimensions")
result = np.zeros((1,m), dtype = np.float64)
result_diff = np.zeros((d,m), dtype = np.float64)
for i in range(m):
diff = self.value - points[:, i, np.newaxis]
#print("diff shape: ", diff.shape)

kde_danilo = GKDE(x1)

xs = np.linspace(-10, 10, num=50)
y1 = kde(xs)
kde.set_bandwidth(bw_method='silverman')
y2 = kde(xs)
kde.set_bandwidth(bw_method=kde.factor / 3.)
y3 = kde(xs)
kde.set_bandwidth(bw_method=1.0)
y4 = kde(xs)

y1_danilo = kde_danilo.evaluate(xs)

fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(x1, np.ones(x1.shape) / (4. * x1.size), 'bo',label='Data points (rescaled)')
ax.plot(xs, y1, label='Scott (default)')
ax.plot(xs, y2, label='Silverman')
ax.plot(xs, y3, label='Const (1/3 * Silverman)')
#ax.plot(xs, y4, label='Const (1.0)')
ax.plot(xs, y1_danilo, ':', label='Scott (by Danilo)')
ax.legend()
plt.show()

0 comments on commit 8c4199e

Please sign in to comment.