Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add negative binomial distribution #356

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
2 changes: 2 additions & 0 deletions ngboost/distns/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from .laplace import Laplace
from .lognormal import LogNormal
from .multivariate_normal import MultivariateNormal
from .negative_binomial import NegativeBinomial
from .normal import Normal, NormalFixedMean, NormalFixedVar
from .poisson import Poisson
from .t import T, TFixedDf, TFixedDfFixedVar
Expand All @@ -23,6 +24,7 @@
"Laplace",
"LogNormal",
"MultivariateNormal",
"NegativeBinomial",
"Normal",
"NormalFixedMean",
"NormalFixedVar",
Expand Down
74 changes: 74 additions & 0 deletions ngboost/distns/negative_binomial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""The NGBoost NegativeBinomial distribution and scores"""
import numpy as np
from scipy.stats import nbinom as dist
from scipy.special import digamma
from scipy.optimize import Bounds, minimize

from ngboost.distns.distn import RegressionDistn
from ngboost.scores import LogScore

#helper function because scipy doesn't provide a fit function natively
def negative_log_likelihood(params,k):
return -dist.logpmf(k = k, n = params[0], p = params[1]).sum()

class NegativeBinomialLogScore(LogScore):

def score(self, Y):
return -self.dist.logpmf(Y)

def d_score(self, Y):
D = np.zeros((len(Y),2))
D[:,0] = -self.n * (digamma(Y + self.n) + np.log(self.p) - digamma(self.n))
D[:,1] = (Y * np.exp(self.z) - self.n)/(np.exp(self.z) + 1)
return D

def metric(self):
FI = np.zeros((self.n.shape[0], 2, 2))
FI[:, 0, 0] = (self.n * self.p)/(self.p + 1)
FI[:, 1, 1] = self.n * self.p
return FI

class NegativeBinomial(RegressionDistn):

n_params = 2
scores = [NegativeBinomialLogScore]

def __init__(self,params):
# save the parameters
self._params = params

self.logn = params[0]
self.n = np.exp(self.logn)
#z = log(p/(1-p)) => p = 1/(1 + e^(-z))
self.z = params[1]
self.p = 1/(1 + np.exp(-self.z))
self.dist = dist(n = self.n, p = self.p)

def fit(Y):
assert np.equal(
np.mod(Y, 1), 0
).all(), "All Negative Binomial target data must be discrete integers"
assert np.all([y >= 0 for y in Y]), "Count data must be >= 0"

m = minimize(
negative_log_likelihood,
x0=np.array([np.max(Y),.5]), # initialized value
args=(Y,),
bounds=Bounds((1e-8,1e-8),(np.inf,1-1e-8)),

)
return np.array([np.log(m.x[0]), np.log(m.x[1]/(1 - m.x[1]))])

def sample(self,m):
return np.array([self.dist.rvs() for i in range(m)])

def __getattr__(
self, name
): # gives us access to NegativeBinomial.mean() required for RegressionDist.predict()
if name in dir(self.dist):
return getattr(self.dist, name)
return None

@property
def params(self):
return {'n':self.n, 'p':self.p}
2 changes: 2 additions & 0 deletions tests/test_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
Gamma,
Laplace,
MultivariateNormal,
NegativeBinomial,
Normal,
Poisson,
T,
Expand Down Expand Up @@ -100,6 +101,7 @@ def idfn(dist_score: DistScore):
(Laplace, LogScore),
(Poisson, LogScore),
(Gamma, LogScore),
(NegativeBinomial,LogScore),
] + [(MultivariateNormal(i), LogScore) for i in range(2, 5)]
# Fill in the dist, score pair to test the gradient
# Tests all in TEST_METRIC by default
Expand Down
Loading