Minor updates

ai2es · Feb 27, 2024 · 0f0a02f · 0f0a02f
1 parent 2ea5032
commit 0f0a02f
Show file tree

Hide file tree

Showing 2 changed files with 77 additions and 14 deletions.
diff --git a/mlguess/keras/layers.py b/mlguess/keras/layers.py
@@ -10,7 +10,7 @@ class DenseNormalGamma(tf.keras.layers.Layer):
     Args:
         units (int): Output size of regression tasks
         name (str): Name of the layer
-        spectral_normalization (Bool): Whether to use Spectral Normalization
+        spectral_normalization (bool): Whether to use Spectral Normalization
         eps (float): Minimum value for evidence (prevents potential division by zero)
 
     Returns:
@@ -19,7 +19,7 @@ class DenseNormalGamma(tf.keras.layers.Layer):
         Make Spectral Normalization an option
     """
 
-    def __init__(self, units, name, spectral_normalization=False, eps=1e-12, **kwargs):
+    def __init__(self, units: int, name: str, spectral_normalization=False, eps=1e-12, **kwargs):
         """ """
         super(DenseNormalGamma, self).__init__(name=name, **kwargs)
         self.units = int(units)
@@ -55,15 +55,15 @@ class DenseNormal(tf.keras.layers.Layer):
     Args:
         units (int): Output size of regression tasks
         name (str): Name of the layer
-        spectral_normalization (Bool): Whether to use Spectral Normalization
+        spectral_normalization (bool): Whether to use Spectral Normalization
         eps (float): Minimum value for evidence (prevents potential division by zero)
 
     Returns:
         [mu, sigma] (mean, standard deviation)
     todo:
         Make Spectral Normalization an option
     """
-    def __init__(self, units, name, spectral_normalization=False, eps=1e-12):
+    def __init__(self, units: int, name: str, spectral_normalization=False, eps=1e-12):
         super(DenseNormal, self).__init__(name=name)
         self.units = int(units)
         if spectral_normalization:

diff --git a/mlguess/keras/losses.py b/mlguess/keras/losses.py
@@ -22,7 +22,7 @@ def __init__(self, callback=None, name="dirichlet", this_epoch_num=None, class_w
         if self.class_weights:
             logging.warning("The application of class weights to this loss is experimental.")
 
-    def KL(self, alpha):
+    def kl(self, alpha):
         beta = tf.constant(np.ones((1, alpha.shape[1])), dtype=tf.float32)
         S_alpha = tf.reduce_sum(alpha, axis=1, keepdims=True)
         S_beta = tf.reduce_sum(beta, axis=1, keepdims=True)
@@ -65,6 +65,69 @@ def __call__(self, y, output, sample_weight=None):
         return tf.reduce_mean(A + B + C)
 
 
+class DirichletEvidentialLoss_keras3(tf.keras.losses.Loss):
+    """
+    Loss function for an evidential categorical model.
+    Args:
+        callback (list): List of callbacks.
+        name (str): reference name
+        this_epoch_num (int):  Epoch callback
+        class_weights (list): List of class weights (experimental)
+    """
+    def __init__(self, callback=None, name="dirichlet", this_epoch_num=None, class_weights=None):
+
+        super().__init__()
+        self.callback = callback
+        self.__name__ = name
+        self.class_weights = class_weights
+        self.this_epoch_num = this_epoch_num
+        if self.class_weights:
+            logging.warning("The application of class weights to this loss is experimental.")
+
+    def kl(self, alpha):
+        beta = tf.constant(np.ones((1, alpha.shape[1])), dtype=tf.float32)
+        S_alpha = tf.reduce_sum(alpha, axis=1, keepdims=True)
+        S_beta = tf.reduce_sum(beta, axis=1, keepdims=True)
+        lnB = tf.math.lgamma(S_alpha) - tf.reduce_sum(
+            tf.math.lgamma(alpha), axis=1, keepdims=True
+        )
+        lnB_uni = tf.reduce_sum(
+            tf.math.lgamma(beta), axis=1, keepdims=True
+        ) - tf.math.lgamma(S_beta)
+
+        dg0 = tf.math.digamma(S_alpha)
+        dg1 = tf.math.digamma(alpha)
+
+        if self.class_weights:
+            kl = (tf.reduce_sum(self.class_weights * (alpha - beta) * (dg1 - dg0), axis=1, keepdims=True) + lnB +
+                  lnB_uni)
+        else:
+            kl = (tf.reduce_sum((alpha - beta) * (dg1 - dg0), axis=1, keepdims=True) + lnB + lnB_uni)
+        return kl
+
+    def __call__(self, y, output, sample_weight=None):
+        evidence = tf.nn.relu(output)
+        alpha = evidence + 1
+
+        S = tf.reduce_sum(alpha, axis=1, keepdims=True)
+        m = alpha / S
+
+        if self.class_weights:
+            A = tf.reduce_sum(self.class_weights * (y - m) ** 2, axis=1, keepdims=True)
+            B = tf.reduce_sum(self.class_weights * alpha * (S - alpha) / (S * S * (S + 1)), axis=1, keepdims=True)
+        else:
+            A = tf.reduce_sum((y - m) ** 2, axis=1, keepdims=True)
+            B = tf.reduce_sum(alpha * (S - alpha) / (S * S * (S + 1)), axis=1, keepdims=True)
+
+        annealing_coef = tf.minimum(1.0, self.this_epoch_num / self.callback.annealing_coef)
+        alpha_hat = y + (1 - y) * alpha
+        C = annealing_coef * self.KL(alpha_hat)
+        C = tf.reduce_mean(C, axis=1)
+
+        return tf.reduce_mean(A + B + C)
+
+
+
 class EvidentialRegressionLoss(tf.keras.losses.Loss):
     """
     Loss function for an evidential regression model. The total loss is the Negative Log Likelihood of the
@@ -79,7 +142,7 @@ def __init__(self, coeff=1.0):
         super(EvidentialRegressionLoss, self).__init__()
         self.coeff = coeff
 
-    def NIG_NLL(self, y, gamma, v, alpha, beta, reduce=True):
+    def nig_nll(self, y, gamma, v, alpha, beta, reduce=True):
         v = tf.math.maximum(v, tf.keras.backend.epsilon())
         twoBlambda = 2 * beta * (1 + v)
         nll = (0.5 * tf.math.log(np.pi / v)
@@ -90,7 +153,7 @@ def NIG_NLL(self, y, gamma, v, alpha, beta, reduce=True):
 
         return tf.reduce_mean(nll) if reduce else nll
 
-    def NIG_Reg(self, y, gamma, v, alpha, reduce=True):
+    def nig_reg(self, y, gamma, v, alpha, reduce=True):
         error = tf.abs(y - gamma)
         evi = 2 * v + alpha
         reg = error * evi
@@ -99,8 +162,8 @@ def NIG_Reg(self, y, gamma, v, alpha, reduce=True):
 
     def call(self, y_true, evidential_output):
         gamma, v, alpha, beta = tf.split(evidential_output, 4, axis=-1)
-        loss_nll = self.NIG_NLL(y_true, gamma, v, alpha, beta)
-        loss_reg = self.NIG_Reg(y_true, gamma, v, alpha)
+        loss_nll = self.nig_nll(y_true, gamma, v, alpha, beta)
+        loss_reg = self.nig_reg(y_true, gamma, v, alpha)
 
         return loss_nll + self.coeff * loss_reg
 
@@ -110,7 +173,7 @@ def get_config(self):
         return config
 
 
-def GaussianNLL(y, y_pred, reduce=True):
+def gaussian_nll(y, y_pred, reduce=True):
     """
     Loss function for a parametric Gaussian Loss.
     Args:
@@ -139,7 +202,7 @@ def __init__(self, r=1.0, coeff=1.0):
         self.coeff = coeff
         self.r = r
 
-    def NIG_NLL(self, y, gamma, v, alpha, beta, reduce=True):
+    def nig_nll(self, y, gamma, v, alpha, beta, reduce=True):
         # couple the parameters as per meinert and lavin
 
         twoBlambda = 2 * beta * (1 + v)
@@ -151,7 +214,7 @@ def NIG_NLL(self, y, gamma, v, alpha, beta, reduce=True):
 
         return tf.reduce_mean(nll) if reduce else nll
 
-    def NIG_Reg(self, y, gamma, v, alpha, reduce=True):
+    def nig_reg(self, y, gamma, v, alpha, reduce=True):
         error = tf.abs(y - gamma)  # can try squared loss here to target the right minimizer
         evi = (v + 2 * alpha)  # new paper: = v + 2 * alpha, can try to change this to just 2alpha
         reg = error * evi
@@ -162,8 +225,8 @@ def call(self, y_true, evidential_output):
         gamma, v, alpha, beta = tf.split(evidential_output, 4, axis=-1)
         v = (2 * (alpha - 1) / self.r)  # need to couple this way otherwise alpha could be negative
 
-        loss_nll = self.NIG_NLL(y_true, gamma, v, alpha, beta)
-        loss_reg = self.NIG_Reg(y_true, gamma, v, alpha)
+        loss_nll = self.nig_nll(y_true, gamma, v, alpha, beta)
+        loss_reg = self.nig_reg(y_true, gamma, v, alpha)
 
         return loss_nll + self.coeff * loss_reg