From 113b3656bdc6b7f5e6912e5b80bfb07b00629e6b Mon Sep 17 00:00:00 2001
From: oviazlo <Oleksandr.Viazlo@cern.ch>
Date: Fri, 14 Jan 2022 02:12:55 +0100
Subject: [PATCH 1/3] the first prototype of the function to calculate local
 energy conservation loss

---
 modules/local_energy_conservation_loss.py | 116 ++++++++++++++++++++++
 1 file changed, 116 insertions(+)
 create mode 100644 modules/local_energy_conservation_loss.py

diff --git a/modules/local_energy_conservation_loss.py b/modules/local_energy_conservation_loss.py
new file mode 100644
index 00000000..8abbc390
--- /dev/null
+++ b/modules/local_energy_conservation_loss.py
@@ -0,0 +1,116 @@
+import tensorflow as tf
+from oc_helper_ops import SelectWithDefault
+from oc_helper_ops import CreateMidx
+
+def get_local_energy_conservation_loss_per_batch_element(
+        beta,
+        x,
+        q_min,
+        object_weights,  # V x 1 !!
+        truth_idx,
+        is_spectator,
+        payload_loss,
+        hit_energy,
+        t_energy,
+        pred_energy,
+        n_shower_neighbours=3,
+        noise_q_min=None,
+        distance_scale=None,
+        use_mean_x=0.,
+        kalpha_damping_strength=0.,
+        beta_gradient_damping=0.,
+        soft_q_scaling=True,
+):
+    tf.assert_equal(True, is_spectator >= 0.)
+    tf.assert_equal(True, beta >= 0.)
+
+    # set all spectators invalid here, everything scales with beta, so:
+    if beta_gradient_damping > 0.:
+        beta = beta_gradient_damping * tf.stop_gradient(beta) + (1. - beta_gradient_damping) * beta
+    beta_in = beta
+    beta = tf.clip_by_value(beta, 0., 1. - 1e-4)
+    beta *= (1. - is_spectator)
+    qraw = tf.math.atanh(beta) ** 2
+
+    is_noise = tf.where(truth_idx < 0, tf.zeros_like(truth_idx, dtype='float32') + 1., 0.)  # V x 1
+    if noise_q_min is not None:
+        q_min = (1. - is_noise) * q_min + is_noise * noise_q_min
+
+    if soft_q_scaling:
+        qraw = tf.math.atanh(beta_in / 1.002) ** 2  # beta_in**4 *20.
+        beta = beta_in * (1. - is_spectator)  # no need for clipping
+
+    q = (qraw + q_min) * (1. - is_spectator)  # V x 1
+
+    N = tf.cast(beta.shape[0], dtype='float32')
+
+    Msel, M_not, N_per_obj = CreateMidx(truth_idx, calc_m_not=True)
+    # use eager here
+    if Msel is None:
+        # V_att, V_rep, Noise_pen, B_pen, pll, too_much_B_pen
+        print('>>> WARNING: Event has no objects, only noise! Will return zero loss. <<<')
+        zero_tensor = tf.reduce_mean(q, axis=0) * 0.
+        zero_payload = tf.reduce_mean(payload_loss, axis=0) * 0.
+        return zero_tensor, zero_tensor, zero_tensor, zero_tensor, zero_payload, zero_tensor
+
+    N_per_obj = tf.cast(N_per_obj, dtype='float32')  # K x 1
+    K = tf.cast(Msel.shape[0], dtype='float32')
+
+    padmask_m = SelectWithDefault(Msel, tf.zeros_like(beta_in) + 1., 0.)  # K x V-obj x 1
+    x_m = SelectWithDefault(Msel, x, 0.)  # K x V-obj x C
+    beta_m = SelectWithDefault(Msel, beta, 0.)  # K x V-obj x 1
+    is_spectator_m = SelectWithDefault(Msel, is_spectator, 0.)  # K x V-obj x 1
+    q_m = SelectWithDefault(Msel, q, 0.)  # K x V-obj x 1
+    object_weights_m = SelectWithDefault(Msel, object_weights, 0.)
+    distance_scale_m = SelectWithDefault(Msel, distance_scale, 1.)
+
+    tf.assert_greater(distance_scale_m, 0., message="predicted distances must be greater zero")
+
+    kalpha_m = tf.argmax((1. - is_spectator_m) * beta_m, axis=1)  # K x 1
+
+    x_kalpha_m = tf.gather_nd(x_m, kalpha_m, batch_dims=1)  # K x C
+    if use_mean_x > 0:
+        x_kalpha_m_m = tf.reduce_sum(q_m * x_m * padmask_m, axis=1)  # K x C
+        x_kalpha_m_m = tf.math.divide_no_nan(x_kalpha_m_m, tf.reduce_sum(q_m * padmask_m, axis=1) + 1e-9)
+        x_kalpha_m = use_mean_x * x_kalpha_m_m + (1. - use_mean_x) * x_kalpha_m
+
+    if kalpha_damping_strength > 0:
+        x_kalpha_m = kalpha_damping_strength * tf.stop_gradient(x_kalpha_m) + (
+                1. - kalpha_damping_strength) * x_kalpha_m
+
+    # perform kNN using x_kalpha_m to get closest neighbour matrix
+    shower_neighbour_matrix = do_knn(x_kalpha_m, n_shower_neighbours, max_shower_dist) # K x (1 + n_shower_neighbours)
+
+    # calculate TRUTH energy sum of all neighbour TRUTH showers + the shower itself
+    truth_obj_hit_e = SelectWithDefault(Msel, t_energy, 0.)  # K x V-obj x 1
+    truth_obj_dep_e = tf.reduce_sum(truth_obj_hit_e, axis=1)  # K x 1
+    neighbour_shower_energy_matrix_truth = SelectWithDefault(shower_neighbour_matrix, truth_obj_dep_e, 0.) # K x (1 + n_shower_neighbours)
+    local_shower_energy_sum_truth = tf.reduce_sum(neighbour_shower_energy_matrix_truth, axis=1)  # K x 1
+
+    # calculate PREDICTED energy sum of all neighbour RECO showers + the shower itself
+    pred_obj_hit_e = SelectWithDefault(Msel, pred_energy, 0.)  # K x V-obj x 1
+    pred_obj_dep_e = tf.reduce_sum(pred_obj_hit_e, axis=1)  # K x 1
+    neighbour_shower_energy_matrix_pred = SelectWithDefault(shower_neighbour_matrix, pred_obj_dep_e, 0.) # K x (1 + n_shower_neighbours)
+    local_shower_energy_sum_pred = tf.reduce_sum(neighbour_shower_energy_matrix_pred, axis=1)  # K x 1
+
+    # TODO calculate loss (as the sum of energy differences for each shower?)
+    shower_energy_sq_diff = tf.math.squared_difference(local_shower_energy_sum_truth, local_shower_energy_sum_pred)
+    local_energy_conservation_loss = tf.reduce_sum(shower_energy_sq_diff, axis=0)
+
+    return local_energy_conservation_loss
+
+
+    # FIXME OPEN QUESTIONS BELOW
+    # truth RECO deposited energy
+    # FIXME is the following needed?: nt_idx = t_idx + 1  # make noise object
+    # obj_hit_e = SelectWithDefault(Msel, hit_energy, 0.) # K x V-obj x 1
+    # obj_dep_e = tf.reduce_sum(obj_hit_e, axis=1)  # K x 1
+
+    # FIXME I think I don't need to do this... since I need energy per shower only
+    # _, idxs, _ = tf.unique_with_counts(truth_idx[:, 0])  # for backgather, same indices as in objsel
+    # idxs = tf.expand_dims(idxs, axis=1)
+    # scat_dep_e = tf.gather_nd(obj_dep_e, idxs)
+
+    # FIXME from calc_energy_correction_factor_loss function
+    # FIXME # calo-like
+    # FIXME ediff = (t_energy - pred_energy * dep_energies) / tf.sqrt(t_energy + 1e-3)

From bf097a9c5f5601de2250f5b88468391fe1a72f63 Mon Sep 17 00:00:00 2001
From: Oleksandr Viazlo <oleksandr.viazlo@cern.ch>
Date: Fri, 14 Jan 2022 14:13:53 +0100
Subject: [PATCH 2/3] small fix

---
 modules/local_energy_conservation_loss.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local_energy_conservation_loss.py b/modules/local_energy_conservation_loss.py
index 8abbc390..d041e9f4 100644
--- a/modules/local_energy_conservation_loss.py
+++ b/modules/local_energy_conservation_loss.py
@@ -95,7 +95,7 @@ def get_local_energy_conservation_loss_per_batch_element(
 
     # TODO calculate loss (as the sum of energy differences for each shower?)
     shower_energy_sq_diff = tf.math.squared_difference(local_shower_energy_sum_truth, local_shower_energy_sum_pred)
-    local_energy_conservation_loss = tf.reduce_sum(shower_energy_sq_diff, axis=0)
+    local_energy_conservation_loss = tf.reduce_sum(shower_energy_sq_diff, axis=0) / shower_energy_sq_diff.shape[0]
 
     return local_energy_conservation_loss
 

From 9404bdd7086e004f01fbccaca4dc9379ed33d14a Mon Sep 17 00:00:00 2001
From: Oleksandr Viazlo <oleksandr.viazlo@cern.ch>
Date: Sat, 15 Jan 2022 00:15:12 +0100
Subject: [PATCH 3/3] rewriting of
 get_local_energy_conservation_loss_per_batch_element function after
 discussion with Jan

---
 modules/local_energy_conservation_loss.py | 133 +++++++---------------
 1 file changed, 38 insertions(+), 95 deletions(-)

diff --git a/modules/local_energy_conservation_loss.py b/modules/local_energy_conservation_loss.py
index d041e9f4..e46228ac 100644
--- a/modules/local_energy_conservation_loss.py
+++ b/modules/local_energy_conservation_loss.py
@@ -1,116 +1,59 @@
 import tensorflow as tf
 from oc_helper_ops import SelectWithDefault
 from oc_helper_ops import CreateMidx
+from select_knn_op import SelectKnn
+
 
 def get_local_energy_conservation_loss_per_batch_element(
-        beta,
         x,
-        q_min,
-        object_weights,  # V x 1 !!
         truth_idx,
-        is_spectator,
-        payload_loss,
         hit_energy,
         t_energy,
-        pred_energy,
-        n_shower_neighbours=3,
-        noise_q_min=None,
-        distance_scale=None,
-        use_mean_x=0.,
-        kalpha_damping_strength=0.,
-        beta_gradient_damping=0.,
-        soft_q_scaling=True,
+        pred_energy_corrections,
+        max_shower_dist=30,  # TODO think what value is the best
+        n_shower_neighbours=3
 ):
-    tf.assert_equal(True, is_spectator >= 0.)
-    tf.assert_equal(True, beta >= 0.)
-
-    # set all spectators invalid here, everything scales with beta, so:
-    if beta_gradient_damping > 0.:
-        beta = beta_gradient_damping * tf.stop_gradient(beta) + (1. - beta_gradient_damping) * beta
-    beta_in = beta
-    beta = tf.clip_by_value(beta, 0., 1. - 1e-4)
-    beta *= (1. - is_spectator)
-    qraw = tf.math.atanh(beta) ** 2
-
-    is_noise = tf.where(truth_idx < 0, tf.zeros_like(truth_idx, dtype='float32') + 1., 0.)  # V x 1
-    if noise_q_min is not None:
-        q_min = (1. - is_noise) * q_min + is_noise * noise_q_min
-
-    if soft_q_scaling:
-        qraw = tf.math.atanh(beta_in / 1.002) ** 2  # beta_in**4 *20.
-        beta = beta_in * (1. - is_spectator)  # no need for clipping
-
-    q = (qraw + q_min) * (1. - is_spectator)  # V x 1
 
-    N = tf.cast(beta.shape[0], dtype='float32')
-
-    Msel, M_not, N_per_obj = CreateMidx(truth_idx, calc_m_not=True)
-    # use eager here
+    # create hit-shower association matrix (each row contains all hits associated to one shower)
+    Msel, _, _ = CreateMidx(truth_idx, calc_m_not=False)
     if Msel is None:
-        # V_att, V_rep, Noise_pen, B_pen, pll, too_much_B_pen
         print('>>> WARNING: Event has no objects, only noise! Will return zero loss. <<<')
-        zero_tensor = tf.reduce_mean(q, axis=0) * 0.
-        zero_payload = tf.reduce_mean(payload_loss, axis=0) * 0.
-        return zero_tensor, zero_tensor, zero_tensor, zero_tensor, zero_payload, zero_tensor
-
-    N_per_obj = tf.cast(N_per_obj, dtype='float32')  # K x 1
-    K = tf.cast(Msel.shape[0], dtype='float32')
+        return None
 
-    padmask_m = SelectWithDefault(Msel, tf.zeros_like(beta_in) + 1., 0.)  # K x V-obj x 1
     x_m = SelectWithDefault(Msel, x, 0.)  # K x V-obj x C
-    beta_m = SelectWithDefault(Msel, beta, 0.)  # K x V-obj x 1
-    is_spectator_m = SelectWithDefault(Msel, is_spectator, 0.)  # K x V-obj x 1
-    q_m = SelectWithDefault(Msel, q, 0.)  # K x V-obj x 1
-    object_weights_m = SelectWithDefault(Msel, object_weights, 0.)
-    distance_scale_m = SelectWithDefault(Msel, distance_scale, 1.)
-
-    tf.assert_greater(distance_scale_m, 0., message="predicted distances must be greater zero")
-
-    kalpha_m = tf.argmax((1. - is_spectator_m) * beta_m, axis=1)  # K x 1
+    hit_counter = tf.where(x_m[:, :, 0] != 0., 1, 0)  # K x V-obj
+    x_m_sum = tf.reduce_sum(x_m, axis=1)  # K x C
+    n_hits_per_shower = tf.reduce_sum(hit_counter, axis=1)  # K
+    n_hits_per_shower = tf.expand_dims(n_hits_per_shower, axis=1)  # K x 1
+    n_hits_per_shower = tf.cast(n_hits_per_shower, dtype='float32')  # K x 1
+    # get shower centers as geometrical mean of hit coordinates
+    x_showers = tf.math.divide_no_nan(x_m_sum, n_hits_per_shower)  # K x C
+
+    # perform kNN using x_m to get closest neighbour matrix
+    shower_neighbour_matrix, _ = SelectKnn(n_shower_neighbours,  # K x (1 + n_shower_neighbours)
+                                           x_showers, tf.constant([0, Msel.shape[0]], dtype="int32"),
+                                           max_radius=max_shower_dist, tf_compatible=True)
+
+    # for backgather, the same indices as in Msel
+    _, idxs, _ = tf.unique_with_counts(truth_idx[:, 0])
+    idxs = tf.expand_dims(idxs, axis=1)  # V x 1
+
+    # calculate deposited energy per shower
+    hit_energies_per_shower = SelectWithDefault(Msel, hit_energy, 0.)  # K x V-obj x 1
+    hit_energy_sum_per_shower = tf.reduce_sum(hit_energies_per_shower, axis=1)  # K x 1
+    scat_energy_deposited = tf.gather_nd(hit_energy_sum_per_shower, idxs)  # V x 1
 
-    x_kalpha_m = tf.gather_nd(x_m, kalpha_m, batch_dims=1)  # K x C
-    if use_mean_x > 0:
-        x_kalpha_m_m = tf.reduce_sum(q_m * x_m * padmask_m, axis=1)  # K x C
-        x_kalpha_m_m = tf.math.divide_no_nan(x_kalpha_m_m, tf.reduce_sum(q_m * padmask_m, axis=1) + 1e-9)
-        x_kalpha_m = use_mean_x * x_kalpha_m_m + (1. - use_mean_x) * x_kalpha_m
-
-    if kalpha_damping_strength > 0:
-        x_kalpha_m = kalpha_damping_strength * tf.stop_gradient(x_kalpha_m) + (
-                1. - kalpha_damping_strength) * x_kalpha_m
-
-    # perform kNN using x_kalpha_m to get closest neighbour matrix
-    shower_neighbour_matrix = do_knn(x_kalpha_m, n_shower_neighbours, max_shower_dist) # K x (1 + n_shower_neighbours)
+    # calculate PREDICTED energy sum of all neighbour RECO showers + the shower itself
+    predicted_energy = scat_energy_deposited * pred_energy_corrections  # V x 1
+    neighbour_shower_energy_matrix_predicted = SelectWithDefault(shower_neighbour_matrix, predicted_energy, 0.)  # K x (1 + n_shower_neighbours)
+    local_shower_energy_sum_predicted = tf.reduce_sum(neighbour_shower_energy_matrix_predicted, axis=1)  # K x 1
+    scat_local_energy_predicted = tf.gather_nd(local_shower_energy_sum_predicted, idxs)  # V x 1
 
     # calculate TRUTH energy sum of all neighbour TRUTH showers + the shower itself
-    truth_obj_hit_e = SelectWithDefault(Msel, t_energy, 0.)  # K x V-obj x 1
-    truth_obj_dep_e = tf.reduce_sum(truth_obj_hit_e, axis=1)  # K x 1
-    neighbour_shower_energy_matrix_truth = SelectWithDefault(shower_neighbour_matrix, truth_obj_dep_e, 0.) # K x (1 + n_shower_neighbours)
+    neighbour_shower_energy_matrix_truth = SelectWithDefault(shower_neighbour_matrix, t_energy, 0.)  # K x (1 + n_shower_neighbours)
     local_shower_energy_sum_truth = tf.reduce_sum(neighbour_shower_energy_matrix_truth, axis=1)  # K x 1
+    scat_local_energy_truth = tf.gather_nd(local_shower_energy_sum_truth, idxs)  # V x 1
 
-    # calculate PREDICTED energy sum of all neighbour RECO showers + the shower itself
-    pred_obj_hit_e = SelectWithDefault(Msel, pred_energy, 0.)  # K x V-obj x 1
-    pred_obj_dep_e = tf.reduce_sum(pred_obj_hit_e, axis=1)  # K x 1
-    neighbour_shower_energy_matrix_pred = SelectWithDefault(shower_neighbour_matrix, pred_obj_dep_e, 0.) # K x (1 + n_shower_neighbours)
-    local_shower_energy_sum_pred = tf.reduce_sum(neighbour_shower_energy_matrix_pred, axis=1)  # K x 1
-
-    # TODO calculate loss (as the sum of energy differences for each shower?)
-    shower_energy_sq_diff = tf.math.squared_difference(local_shower_energy_sum_truth, local_shower_energy_sum_pred)
-    local_energy_conservation_loss = tf.reduce_sum(shower_energy_sq_diff, axis=0) / shower_energy_sq_diff.shape[0]
-
-    return local_energy_conservation_loss
-
-
-    # FIXME OPEN QUESTIONS BELOW
-    # truth RECO deposited energy
-    # FIXME is the following needed?: nt_idx = t_idx + 1  # make noise object
-    # obj_hit_e = SelectWithDefault(Msel, hit_energy, 0.) # K x V-obj x 1
-    # obj_dep_e = tf.reduce_sum(obj_hit_e, axis=1)  # K x 1
-
-    # FIXME I think I don't need to do this... since I need energy per shower only
-    # _, idxs, _ = tf.unique_with_counts(truth_idx[:, 0])  # for backgather, same indices as in objsel
-    # idxs = tf.expand_dims(idxs, axis=1)
-    # scat_dep_e = tf.gather_nd(obj_dep_e, idxs)
+    ediff = (scat_local_energy_truth - scat_local_energy_predicted) / tf.sqrt(scat_local_energy_truth + 1e-3)  # V x 1
 
-    # FIXME from calc_energy_correction_factor_loss function
-    # FIXME # calo-like
-    # FIXME ediff = (t_energy - pred_energy * dep_energies) / tf.sqrt(t_energy + 1e-3)
+    return ediff