From e7a5ed98e23ffa656aeb2cdfc03f29f71b956a5d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Schl=C3=BCter?= <felix.schluter@icecube.wisc.edu>
Date: Fri, 29 Mar 2024 16:29:17 +0100
Subject: [PATCH 01/27] Simplify code

---
 egenerator/ic3/reconstruction.py | 6 +-----
 egenerator/ic3/visualization.py  | 6 +-----
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/egenerator/ic3/reconstruction.py b/egenerator/ic3/reconstruction.py
index e0ac8d10..ff93da30 100644
--- a/egenerator/ic3/reconstruction.py
+++ b/egenerator/ic3/reconstruction.py
@@ -335,11 +335,7 @@ def Configure(self):
         self.manager = self.manager_configurator.manager
         self.loss_module = self.manager_configurator.loss_module
 
-        if 'I3ParticleMapping' in self.manager.configuration.config['config']:
-            self.i3_mapping = self.manager.configuration.config['config'][
-                'I3ParticleMapping']
-        else:
-            self.i3_mapping = None
+        self.i3_mapping = self.manager.configuration.config['config'].get('I3ParticleMapping', None)
 
         for model in self.manager.models:
             num_vars, num_total_vars = model.num_variables
diff --git a/egenerator/ic3/visualization.py b/egenerator/ic3/visualization.py
index b064aa1d..14ec9fe1 100644
--- a/egenerator/ic3/visualization.py
+++ b/egenerator/ic3/visualization.py
@@ -128,11 +128,7 @@ def Configure(self):
         self.manager = self.manager_configurator.manager
         self.loss_module = self.manager_configurator.loss_module
 
-        if 'I3ParticleMapping' in self.manager.configuration.config['config']:
-            self.i3_mapping = self.manager.configuration.config['config'][
-                'I3ParticleMapping']
-        else:
-            self.i3_mapping = None
+        self.i3_mapping = self.manager.configuration.config['config'].get('I3ParticleMapping', None)
 
         for model in self.manager.models:
             num_vars, num_total_vars = model.num_variables

From cb05b283427461aceaac62b65bd93fec8c7044df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Schl=C3=BCter?= <felix.schluter@icecube.wisc.edu>
Date: Fri, 29 Mar 2024 16:41:10 +0100
Subject: [PATCH 02/27] Remove hard-coded label_key LabelsDeepLearning. Allow
 to train on MCPEs.

---
 egenerator/data/handler/modular.py         |  5 ++-
 egenerator/data/modules/data/pulse_data.py | 39 ++++++++++++++--------
 2 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/egenerator/data/handler/modular.py b/egenerator/data/handler/modular.py
index e28ddba8..20a19937 100644
--- a/egenerator/data/handler/modular.py
+++ b/egenerator/data/handler/modular.py
@@ -322,7 +322,10 @@ def _get_data(self, file_or_frame, method, *args, **kwargs):
             assert isinstance(file_or_frame, str), 'Expected file path string'
 
             num_data, data = self.data_module.get_data_from_hdf(
-                                                file_or_frame, *args, **kwargs)
+                                                file_or_frame, *args,
+                                                label_key=self.label_module.configuration.config['label_key'],
+                                                **kwargs)
+
             num_labels, labels = self.label_module.get_data_from_hdf(
                                                 file_or_frame, *args, **kwargs)
             num_misc, misc = self.misc_module.get_data_from_hdf(
diff --git a/egenerator/data/modules/data/pulse_data.py b/egenerator/data/modules/data/pulse_data.py
index 603fab4d..f56af5ab 100644
--- a/egenerator/data/modules/data/pulse_data.py
+++ b/egenerator/data/modules/data/pulse_data.py
@@ -40,7 +40,7 @@ def __init__(self, logger=None):
 
     def _configure(self, config_data, pulse_key, dom_exclusions_key,
                    time_exclusions_key, float_precision, add_charge_quantiles,
-                   discard_pulses_from_excluded_doms):
+                   discard_pulses_from_excluded_doms, pulse_is_mcpe=False):
         """Configure Module Class
         This is an abstract method and must be implemented by derived class.
 
@@ -67,6 +67,9 @@ def _configure(self, config_data, pulse_key, dom_exclusions_key,
         discard_pulses_from_excluded_doms : bool, optional
             If True, pulses on excluded DOMs are discarded. The pulses are
             discarded after the charge at the DOM is collected.
+        pulse_is_mcpe : bool, optional
+            If True, train on MCPE pulses instead of RecoPulses. This setting
+            changes how the charge is read out of the hdf5 files. (Default: False)
 
         Returns
         -------
@@ -115,6 +118,14 @@ def _configure(self, config_data, pulse_key, dom_exclusions_key,
         time_exclusions_exist = time_exclusions_key is not None
         dom_exclusions_exist = dom_exclusions_key is not None
 
+
+        self._hdf_time_index = 10
+        # 11: npe, 12: charge
+        if pulse_is_mcpe:
+            self._hdf_charge_index = 11
+        else:
+            self._hdf_charge_index = 12
+
         if add_charge_quantiles:
             pulse_dim = 3
         else:
@@ -187,6 +198,7 @@ def _configure(self, config_data, pulse_key, dom_exclusions_key,
                           add_charge_quantiles=add_charge_quantiles),
             mutable_settings=dict(
                 pulse_key=pulse_key,
+                pulse_is_mcpe=pulse_is_mcpe,
                 dom_exclusions_key=dom_exclusions_key,
                 time_exclusions_key=time_exclusions_key,
                 discard_pulses_from_excluded_doms=(
@@ -221,10 +233,9 @@ def get_data_from_hdf(self, file, *args, **kwargs):
 
         # open file
         f = pd.HDFStore(file, 'r')
-
         try:
             pulses = f[self.configuration.config['pulse_key']]
-            _labels = f['LabelsDeepLearning']
+            _labels = f[kwargs['label_key']]
             if self.data['dom_exclusions_exist']:
                 try:
                     dom_exclusions = \
@@ -311,29 +322,29 @@ def get_data_from_hdf(self, file, *args, **kwargs):
                 continue
             index = event_dict[(row[1:5])]
 
-            # pulse charge: row[12], time: row[10]
+            # pulse charge: row[self._hdf_charge_index], time: row[self._hdf_time_index]
             # accumulate charge in DOMs
-            x_dom_charge[index, string-1, dom-1, 0] += row[12]
+            x_dom_charge[index, string-1, dom-1, 0] += row[self._hdf_charge_index]
 
             # gather pulses
             if add_charge_quantiles:
 
                 # (charge, time, quantile)
                 cum_charge = float(x_dom_charge[index, string-1, dom-1, 0])
-                x_pulses[pulse_index] = [row[12], row[10], cum_charge]
+                x_pulses[pulse_index] = [row[self._hdf_charge_index], row[self._hdf_time_index], cum_charge]
 
             else:
                 # (charge, time)
-                x_pulses[pulse_index] = [row[12], row[10]]
+                x_pulses[pulse_index] = [row[self._hdf_charge_index], row[self._hdf_time_index]]
 
             # gather pulse ids (batch index, string, dom)
             x_pulses_ids[pulse_index] = [index, string-1, dom-1]
 
             # update time window
-            if row[10] > x_time_window[index, 1]:
-                x_time_window[index, 1] = row[10]
-            if row[10] < x_time_window[index, 0]:
-                x_time_window[index, 0] = row[10]
+            if row[self._hdf_time_index] > x_time_window[index, 1]:
+                x_time_window[index, 1] = row[self._hdf_time_index]
+            if row[self._hdf_time_index] < x_time_window[index, 0]:
+                x_time_window[index, 0] = row[self._hdf_time_index]
 
         # convert cumulative charge to fraction of total charge, e.g. quantile
         if add_charge_quantiles:
@@ -364,10 +375,10 @@ def get_data_from_hdf(self, file, *args, **kwargs):
                     continue
                 index = event_dict[(row[1:5])]
 
-                # t_start (pulse time): row[10], t_end (pulse width): row[11]
+                # t_start (pulse time): row[self._hdf_time_index], t_end (pulse width): row[11]
 
                 # (t_start, t_end)
-                x_time_exclusions[tw_index] = [row[10], row[11]]
+                x_time_exclusions[tw_index] = [row[self._hdf_time_index], row[11]]
 
                 # gather pulse ids (batch index, string, dom)
                 x_time_exclusions_ids[tw_index] = [index, string-1, dom-1]
@@ -524,7 +535,7 @@ def get_data_from_frame(self, frame, *args, **kwargs):
             for pulse in pulse_list:
                 index = 0
 
-                # pulse charge: row[12], time: row[10]
+                # pulse charge: row[self._hdf_charge_index], time: row[self._hdf_time_index]
                 # accumulate charge in DOMs
                 x_dom_charge[index, string-1, dom-1, 0] += pulse.charge
 

From 009d3519e97fb13a30442a80bf2f5c97bf941388 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Schl=C3=BCter?= <felix.schluter@icecube.wisc.edu>
Date: Mon, 1 Apr 2024 18:11:43 +0200
Subject: [PATCH 03/27] Fixed setting charge index. Slightly refactor code to
 avoid future warning

---
 egenerator/data/modules/data/pulse_data.py | 46 ++++++++++++----------
 1 file changed, 25 insertions(+), 21 deletions(-)

diff --git a/egenerator/data/modules/data/pulse_data.py b/egenerator/data/modules/data/pulse_data.py
index f56af5ab..4592e93f 100644
--- a/egenerator/data/modules/data/pulse_data.py
+++ b/egenerator/data/modules/data/pulse_data.py
@@ -118,14 +118,6 @@ def _configure(self, config_data, pulse_key, dom_exclusions_key,
         time_exclusions_exist = time_exclusions_key is not None
         dom_exclusions_exist = dom_exclusions_key is not None
 
-
-        self._hdf_time_index = 10
-        # 11: npe, 12: charge
-        if pulse_is_mcpe:
-            self._hdf_charge_index = 11
-        else:
-            self._hdf_charge_index = 12
-
         if add_charge_quantiles:
             pulse_dim = 3
         else:
@@ -231,6 +223,13 @@ def get_data_from_hdf(self, file, *args, **kwargs):
         if not self.is_configured:
             raise ValueError('Module not configured yet!')
 
+        hdf_time_index = 10
+        # 11: npe, 12: charge
+        if self.configuration.config["pulse_is_mcpe"]:
+            hdf_charge_index = 11
+        else:
+            hdf_charge_index = 12
+
         # open file
         f = pd.HDFStore(file, 'r')
         try:
@@ -268,9 +267,13 @@ def get_data_from_hdf(self, file, *args, **kwargs):
 
         # create Dictionary with event IDs
         size = len(_labels['Event'])
+
+        if not size:
+            raise ValueError("Label length is 0.")
+
         event_dict = {}
-        for row in _labels.iterrows():
-            event_dict[(row[1][0], row[1][1], row[1][2], row[1][3])] = row[0]
+        for row in _labels.itertuples():
+            event_dict[(row[1:5])] = row[0]
 
         # create empty array for DOM charges
         x_dom_charge = np.zeros([size, 86, 60, 1],
@@ -320,31 +323,32 @@ def get_data_from_hdf(self, file, *args, **kwargs):
                 self._logger.warning(
                     'skipping pulse: {} {}'.format(string, dom))
                 continue
+
             index = event_dict[(row[1:5])]
 
-            # pulse charge: row[self._hdf_charge_index], time: row[self._hdf_time_index]
+            # pulse charge: row[hdf_charge_index], time: row[hdf_time_index]
             # accumulate charge in DOMs
-            x_dom_charge[index, string-1, dom-1, 0] += row[self._hdf_charge_index]
+            x_dom_charge[index, string-1, dom-1, 0] += row[hdf_charge_index]
 
             # gather pulses
             if add_charge_quantiles:
 
                 # (charge, time, quantile)
                 cum_charge = float(x_dom_charge[index, string-1, dom-1, 0])
-                x_pulses[pulse_index] = [row[self._hdf_charge_index], row[self._hdf_time_index], cum_charge]
+                x_pulses[pulse_index] = [row[hdf_charge_index], row[hdf_time_index], cum_charge]
 
             else:
                 # (charge, time)
-                x_pulses[pulse_index] = [row[self._hdf_charge_index], row[self._hdf_time_index]]
+                x_pulses[pulse_index] = [row[hdf_charge_index], row[hdf_time_index]]
 
             # gather pulse ids (batch index, string, dom)
             x_pulses_ids[pulse_index] = [index, string-1, dom-1]
 
             # update time window
-            if row[self._hdf_time_index] > x_time_window[index, 1]:
-                x_time_window[index, 1] = row[self._hdf_time_index]
-            if row[self._hdf_time_index] < x_time_window[index, 0]:
-                x_time_window[index, 0] = row[self._hdf_time_index]
+            if row[hdf_time_index] > x_time_window[index, 1]:
+                x_time_window[index, 1] = row[hdf_time_index]
+            if row[hdf_time_index] < x_time_window[index, 0]:
+                x_time_window[index, 0] = row[hdf_time_index]
 
         # convert cumulative charge to fraction of total charge, e.g. quantile
         if add_charge_quantiles:
@@ -375,10 +379,10 @@ def get_data_from_hdf(self, file, *args, **kwargs):
                     continue
                 index = event_dict[(row[1:5])]
 
-                # t_start (pulse time): row[self._hdf_time_index], t_end (pulse width): row[11]
+                # t_start (pulse time): row[hdf_time_index], t_end (pulse width): row[11]
 
                 # (t_start, t_end)
-                x_time_exclusions[tw_index] = [row[self._hdf_time_index], row[11]]
+                x_time_exclusions[tw_index] = [row[hdf_time_index], row[11]]
 
                 # gather pulse ids (batch index, string, dom)
                 x_time_exclusions_ids[tw_index] = [index, string-1, dom-1]
@@ -535,7 +539,7 @@ def get_data_from_frame(self, frame, *args, **kwargs):
             for pulse in pulse_list:
                 index = 0
 
-                # pulse charge: row[self._hdf_charge_index], time: row[self._hdf_time_index]
+                # pulse charge: row[hdf_charge_index], time: row[hdf_time_index]
                 # accumulate charge in DOMs
                 x_dom_charge[index, string-1, dom-1, 0] += pulse.charge
 

From bd3a13957ca032e32c262be6ead665705c79e8fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Schl=C3=BCter?= <felix.schluter@icecube.wisc.edu>
Date: Thu, 4 Apr 2024 14:21:36 +0200
Subject: [PATCH 04/27] Add new config file for training on MCPEs

---
 configs/cascade_7param_MCPE.yaml | 427 +++++++++++++++++++++++++++++++
 1 file changed, 427 insertions(+)
 create mode 100644 configs/cascade_7param_MCPE.yaml

diff --git a/configs/cascade_7param_MCPE.yaml b/configs/cascade_7param_MCPE.yaml
new file mode 100644
index 00000000..ef8aa787
--- /dev/null
+++ b/configs/cascade_7param_MCPE.yaml
@@ -0,0 +1,427 @@
+---
+##############
+# Config for single cascade events
+#
+# Detailed information on the parameters are given in the SetupManager
+# class located in egenerator.settings.setup_manager.py.
+##############
+
+# Provide a unique name for the model
+unique_name: 'cascade_7param_MCPE'
+
+
+#------------------
+# Training settings
+#------------------
+training_settings: {
+    'optimizer_name': 'Adam',
+    'optimizer_settings': {
+        # 'amsgrad': True,
+        'learning_rate': {
+            'full_class_string': 'egenerator.utils.learning_rate.MultiLearningRateScheduler',
+            'settings':{
+                'boundaries': [10000, 500000],
+                'scheduler_settings': [
+                    {
+                    'full_class_string': 'tensorflow.keras.optimizers.schedules.PolynomialDecay',
+                    'settings': {
+                        'initial_learning_rate': 0.01,
+                        'end_learning_rate': 0.001,
+                        'decay_steps': 10000,
+                        },
+                    },
+                    {
+                    'full_class_string': 'tensorflow.keras.optimizers.schedules.PolynomialDecay',
+                    'settings': {
+                        'initial_learning_rate': 0.001,
+                        'end_learning_rate': 0.001,
+                        'decay_steps': 490000,
+                        },
+                    },
+                    {
+                    'full_class_string': 'tensorflow.keras.optimizers.schedules.PolynomialDecay',
+                    'settings': {
+                        'initial_learning_rate': 0.001,
+                        'end_learning_rate': 0.000001,
+                        'decay_steps': 500000,
+                        'power': 2,
+                        },
+                    },
+                ]
+            },
+        },
+    },
+    'l1_regularization': 0.,
+    'l2_regularization': 0.,
+    'clip_gradients_value': ,
+    'remove_nan_gradients': False,
+
+    'validation_frequency': 100,
+    'evaluation_frequency': 100,
+    'save_frequency': 500,
+
+    # Number of training iterations to train the model for
+    'num_training_iterations': 1,
+
+    # Additional keywords to the loss module used for training
+    'additional_loss_module_kwargs': {
+        'normalize_by_total_charge': False,
+    },
+}
+
+#----------------.-------
+# Reconstruction settings
+#------------------------
+reconstruction_settings: {
+    'reco_output_file': '../data/reconstruction/{unique_name}/reconstruction_{unique_name}.hdf5',
+
+    # Define which modules to run
+    'calculate_covariance_matrix': False,
+    'calculate_goodness_of_fit': False,
+    'estimate_angular_uncertainty': False,
+    'run_mcmc': False,
+    'make_1d_llh_scans': False,
+
+    # define which tensor to use as parameter_tensor (default: x_parameters)
+    'parameter_tensor_name': 'x_parameters',
+    # define seed (to seed from MC truth choose: 'x_parameters')
+    'seed': 'MonopodFit4_PartialExclusion',
+    # these are only relevant for the SeedLoaderMiscModule. This specifies
+    # from which column names in the data table the parameters will be loaded
+    'seed_parameter_names': ['x', 'y', 'z', 'zenith', 'azimuth',
+                             'energy', 'time',
+                         ],
+    'seed_missing_value': 1.,
+    'seed_missing_value_dict': {
+    },
+    'seed_float_precision': 'float32',
+
+    # choose the otpimizer iterface:
+    #   'scipy' or 'tfp' (tensorflow_probability)
+    'reco_optimizer_interface': 'scipy',
+
+    'scipy_optimizer_settings': {
+        # 'method': 'L-BFGS-B',
+        'method': 'BFGS',
+        'options': {
+            # 'ftol': !!float 1e-7,
+        }
+    },
+
+    'tf_optimizer_settings': {
+        'method': 'bfgs_minimize',
+        'x_tolerance': 0.001,
+    },
+
+    # Perform minimization in transformed and normalized parameter space
+    # if True. This is usually desired as it may facilitate minimization.
+    'minimize_in_trafo_space': True,
+
+    # Specify which parameters to fit.
+    # If True, the parameter is minimized doing reconstruction, otherwise it
+    # is held constant.
+    # Set default value which will apply to all parameters, except if stated
+    # otherwise in the 'minimize_parameter_dict'
+    'minimize_parameter_default_value': True,
+    # Settings defined here overwrite the default value
+    # Entries must have the form: {parameter_name: value}
+    'minimize_parameter_dict': {
+
+    },
+}
+
+#-----------------------
+# Model Manager settings
+#-----------------------
+
+# Settings for model manager class
+model_manager_settings: {
+
+    # The loss module class to use
+    'model_manager_class': 'egenerator.manager.source.SourceManager',
+
+    # restore model if True, otherwise start from scratch
+    'restore_model' : True,
+
+    # These settings are used to configure the model manager and may not change
+    config: {
+
+        # Path to where the manager will be saved to and loaded from
+        'manager_dir': '../data/training/{unique_name}/manager',
+
+        # Define which model parameters to use for the I3Particle
+        # [x, y, z, zenith, azimuth, energy, time]
+        'I3ParticleMapping': {
+            'x': 'cascade_x',
+            'y': 'cascade_y',
+            'z': 'cascade_z',
+            'zenith': 'cascade_zenith',
+            'azimuth': 'cascade_azimuth',
+            'energy': 'cascade_energy',
+            'time': 'cascade_time',
+        },
+    },
+}
+
+#-----------------------
+# Data Iterator settings
+#-----------------------
+
+# These settings describe the data iterators
+data_iterator_settings: {
+
+    # The training data iterator
+    'training': {
+        'batch_size': 32,
+        'num_splits': ,
+        'file_capacity': 10,
+        'batch_capacity': 500,
+        'dataset_capacity': 500,
+        'num_jobs': 10,
+        'num_add_files': 20,
+        'num_repetitions': 3,
+        'input_data': [
+                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000000*.hdf5',       
+                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000001*.hdf5',
+                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000002*.hdf5',       
+                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000003*.hdf5',       
+                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000004*.hdf5',       
+        ],
+    },
+
+    # The trafo data iterator
+    'trafo': {
+        'batch_size': 32,
+        'num_splits': ,
+        'file_capacity': 1,
+        'batch_capacity': 2,
+        'num_jobs': 10,
+        'num_add_files': 1,
+        'num_repetitions': 1,
+        'pick_random_files_forever': False,
+        'input_data': [
+                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000000*.hdf5',       
+                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000001*.hdf5',
+                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000002*.hdf5',       
+                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000003*.hdf5',       
+                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000004*.hdf5',       
+        ],
+    },
+
+    # The validation data iterator
+    'validation': {
+        'batch_size': 32,
+        'num_splits': ,
+        'file_capacity': 5,
+        'batch_capacity': 10,
+        'dataset_capacity': 10,
+        'num_jobs': 2,
+        'num_add_files': 5,
+        'num_repetitions': 1,
+        'pick_random_files_forever': True,
+        'input_data': [
+                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000040*.hdf5',       
+                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000041*.hdf5',
+                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000042*.hdf5',       
+                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000043*.hdf5',       
+                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000044*.hdf5',       
+        ],
+    },
+
+    # The test data iterator
+    'test': {
+        'batch_size': 1,
+        'num_splits': ,
+        'file_capacity': 1,
+        'batch_capacity': 3,
+        'num_jobs': 1,
+        'num_add_files': 0,
+        'num_repetitions': 1,
+        'sample_randomly': False,
+        'pick_random_files_forever': False,
+        'input_data': [
+                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000045*.hdf5',       
+                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000046*.hdf5',
+                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000046*.hdf5',       
+                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000047*.hdf5',       
+                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000048*.hdf5',       
+                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000049*.hdf5',       
+        ],
+    },
+}
+
+#---------------------
+# Loss module settings
+#---------------------
+
+# Settings for the loss module class
+loss_module_settings: [
+    {
+        # The loss module class to use
+        'loss_module': 'egenerator.loss.default.DefaultLossModule',
+
+        config: {
+            # the float precision to use
+            'float_precision': 'float32',
+            # Add normalization terms to llh if True
+            'add_normalization_term': True,
+            # choose the loss function to use
+            'loss_function_name': 'unbinned_pulse_time_llh',
+        },
+    },
+    {
+        # The loss module class to use
+        'loss_module': 'egenerator.loss.default.DefaultLossModule',
+
+        config: {
+            # the float precision to use
+            'float_precision': 'float32',
+            # Add normalization terms to llh if True
+            'add_normalization_term': True,
+            # choose the loss function to use
+            'loss_function_name': 'negative_binomial_charge_pdf',
+        },
+    },
+]
+
+#---------------------------
+# Evaluation module settings
+#---------------------------
+
+# Settings for the evaluation module class
+evaluation_module_settings: {
+
+    # The loss module class to use
+    'evaluation_module':,
+    config: {
+    },
+}
+
+#-----------------------------
+# Data Transformation settings
+#-----------------------------
+data_trafo_settings: {
+    'float_precision': 'float64',
+    'norm_constant': !!float 1e-6,
+    'num_batches': 500,
+    'model_dir': '/lustre/fs22/group/radio/felix/data/{unique_name}',
+}
+
+#----------------------
+# Data Handler settings
+#----------------------
+
+# Settings for the data handler class
+data_handler_settings: {
+
+    # The data handler class to use
+    'data_handler': 'modular.ModuleDataHandler',
+
+    # -------------------
+    # DataModule Settings
+    # -------------------
+    # which data module to use
+    'data_module': 'pulse_data.PulseDataModule',
+
+    # settings for the data module
+    'data_settings':{
+        'pulse_key': 'I3MCPESeriesMap',
+        'pulse_is_mcpe': True,
+        'dom_exclusions_key':,
+        'time_exclusions_key':,
+        'float_precision': 'float32',
+        'add_charge_quantiles': False,
+        'discard_pulses_from_excluded_doms': False,
+    },
+
+    # --------------------
+    # LabelModule Settings
+    # --------------------
+    # which label module to use
+    'label_module': 'cascades.CascadeGeneratorLabelModule',
+
+    # settings for the label module
+    'label_settings':{
+        'shift_cascade_vertex': True,
+        # logarithm on labels:
+        # (x, y, z, zenith, azimuth, energy, time)?
+        'trafo_log': [False, False, False, False, False, True, False],
+        'label_key': 'LabelsDeepLearning',
+        'float_precision': 'float32',
+    },
+
+    # ---------------------
+    # WeightModule Settings
+    # ---------------------
+    # which weight module to use
+    'weight_module': 'dummy.DummyWeightModule',
+
+    # settings for the weight module
+    'weight_settings':{},
+
+    # -------------------
+    # MiscModule Settings
+    # -------------------
+    # which misc module to use
+    'misc_module': 'dummy.DummyMiscModule',
+
+    # settings for the misc module
+    'misc_settings':{},
+
+    # -------------------
+    # FilterModule Settings
+    # -------------------
+    # which filter module to use
+    'filter_module': 'general_filter.GeneralFilterModule',
+
+    # settings for the filter module
+    'filter_settings':{
+        'constraints': [
+            # ['LabelsDeepLearning', 'cascade_z', '<', -250.],
+        ],
+    },
+}
+
+#---------------
+# Model settings
+#---------------
+
+# Settings for the neural network model class
+model_settings: {
+
+    # The source class to use
+    'model_class': 'egenerator.model.source.cascade.default.DefaultCascadeModel',
+
+    config: {
+        'keep_prob':,
+        'add_opening_angle': True,
+        'add_dom_coordinates': False,
+        'num_local_vars': 0,
+        'scale_charge': True,
+        'scale_charge_by_relative_dom_efficiency': True,
+        'scale_charge_by_global_dom_efficiency': False,
+        'prevent_mixture_component_swapping': False,
+        'estimate_charge_distribution': 'negative_binomial',
+        'num_latent_models': 10,
+
+        # This is a list of labels in addition to
+        # (x, y, z, zenith, azimuth, energy, time) and snowstorm parameters
+        'additional_label_names' : [
+        ],
+
+        # First convolutions
+        'filter_size_list' : [[1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1]],
+        'num_filters_list' : [25, 500, 500, 500, 42],
+        'method_list' : ['locally_connected',
+                            'convolution', 'convolution', 'convolution',
+                            'convolution',
+                            ],
+        'use_dropout_list' : False,
+        'activation_list' : ['elu', 'elu', 'elu', 'elu', ''],
+        'use_batch_norm_list' : False,
+        'use_residual_list' : True,
+    },
+}
+
+#----------------------
+...

From 0dc96343a56a13a4e5705806b004eca02c9a35f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Schl=C3=BCter?= <felix.schluter@icecube.wisc.edu>
Date: Thu, 4 Apr 2024 14:28:16 +0200
Subject: [PATCH 05/27] Keep training and testing data independent

---
 configs/cascade_7param_MCPE.yaml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/configs/cascade_7param_MCPE.yaml b/configs/cascade_7param_MCPE.yaml
index ef8aa787..f30e20fe 100644
--- a/configs/cascade_7param_MCPE.yaml
+++ b/configs/cascade_7param_MCPE.yaml
@@ -185,7 +185,6 @@ data_iterator_settings: {
                 '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000001*.hdf5',
                 '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000002*.hdf5',       
                 '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000003*.hdf5',       
-                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000004*.hdf5',       
         ],
     },
 
@@ -204,7 +203,6 @@ data_iterator_settings: {
                 '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000001*.hdf5',
                 '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000002*.hdf5',       
                 '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000003*.hdf5',       
-                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000004*.hdf5',       
         ],
     },
 

From 871c5c8a636fd679c2b18ad2a44cefaa0734757e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Schl=C3=BCter?= <felix.schluter@icecube.wisc.edu>
Date: Thu, 4 Apr 2024 17:26:05 +0200
Subject: [PATCH 06/27] Allow to specify parameter names in config

---
 egenerator/data/modules/labels/cascades.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/egenerator/data/modules/labels/cascades.py b/egenerator/data/modules/labels/cascades.py
index 77316dc0..b30abe41 100644
--- a/egenerator/data/modules/labels/cascades.py
+++ b/egenerator/data/modules/labels/cascades.py
@@ -29,7 +29,9 @@ def __init__(self, logger=None):
         super(CascadeGeneratorLabelModule, self).__init__(logger=logger)
 
     def _configure(self, config_data, shift_cascade_vertex, trafo_log,
-                   float_precision, label_key='LabelsDeepLearning'):
+                   float_precision, label_key='LabelsDeepLearning',
+                   parameter_names=['cascade_x', 'cascade_y', 'cascade_z', 'cascade_zenith',
+                   'cascade_azimuth', 'cascade_energy', 'cascade_t']):
         """Configure Module Class
         This is an abstract method and must be implemented by derived class.
 
@@ -46,10 +48,12 @@ def _configure(self, config_data, shift_cascade_vertex, trafo_log,
             If a single bool is given, this applies to all labels. Otherwise
             a list of bools corresponds to the labels in the order:
                 x, y, z, zenith, azimuth, energy, time
-        label_key : str, optional
-            The name of the key under which the labels are saved.
         float_precision : str
             The float precision as a str.
+        label_key : str, optional
+            The name of the key under which the labels are saved.
+        parameter_names : list of str, optional
+            Name of the parameters (e.g, the columns in the hdf5 Dataset `label_key`)
 
         Returns
         -------
@@ -148,9 +152,8 @@ def get_data_from_hdf(self, file, *args, **kwargs):
         cascade_parameters = []
         try:
             _labels = f[self.configuration.config['label_key']]
-            for l in ['cascade_x', 'cascade_y', 'cascade_z', 'cascade_zenith',
-                      'cascade_azimuth', 'cascade_energy', 'cascade_t']:
-                cascade_parameters.append(_labels[l])
+            for par in self.configuration.config['parameter_names']:
+                cascade_parameters.append(_labels[par])
 
         except Exception as e:
             self._logger.warning(e)
@@ -202,9 +205,8 @@ def get_data_from_frame(self, frame, *args, **kwargs):
         cascade_parameters = []
         try:
             _labels = frame[self.configuration.config['label_key']]
-            for l in ['cascade_x', 'cascade_y', 'cascade_z', 'cascade_zenith',
-                      'cascade_azimuth', 'cascade_energy', 'cascade_t']:
-                cascade_parameters.append(np.atleast_1d(_labels[l]))
+            for par in self.configuration.config['parameter_names']:
+                cascade_parameters.append(np.atleast_1d(_labels[par]))
 
         except Exception as e:
             self._logger.warning(e)

From cc2a8da181a2bca52bafb72e3e0b77faa5ad8d95 Mon Sep 17 00:00:00 2001
From: Felix Schlueter <fschluter@icecube.wisc.edu>
Date: Thu, 4 Apr 2024 11:13:09 -0500
Subject: [PATCH 07/27] Propagate new argument correctly

---
 egenerator/data/modules/labels/cascades.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/egenerator/data/modules/labels/cascades.py b/egenerator/data/modules/labels/cascades.py
index b30abe41..82c9235b 100644
--- a/egenerator/data/modules/labels/cascades.py
+++ b/egenerator/data/modules/labels/cascades.py
@@ -119,7 +119,9 @@ def _configure(self, config_data, shift_cascade_vertex, trafo_log,
                           shift_cascade_vertex=shift_cascade_vertex,
                           trafo_log=trafo_log,
                           float_precision=float_precision,
-                          label_key=label_key))
+                          label_key=label_key,
+                          parameter_names=parameter_names))
+
         return configuration, data, {}
 
     def get_data_from_hdf(self, file, *args, **kwargs):

From 37abda55163e11dec0ccd4a269031dc8725b3dd7 Mon Sep 17 00:00:00 2001
From: Felix Schlueter <fschluter@icecube.wisc.edu>
Date: Tue, 9 Apr 2024 09:53:36 -0500
Subject: [PATCH 08/27] Update config for MCPE training

---
 configs/cascade_7param_MCPE.yaml | 82 ++++++++++++--------------------
 1 file changed, 31 insertions(+), 51 deletions(-)

diff --git a/configs/cascade_7param_MCPE.yaml b/configs/cascade_7param_MCPE.yaml
index f30e20fe..bc507baa 100644
--- a/configs/cascade_7param_MCPE.yaml
+++ b/configs/cascade_7param_MCPE.yaml
@@ -181,10 +181,10 @@ data_iterator_settings: {
         'num_add_files': 20,
         'num_repetitions': 3,
         'input_data': [
-                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000000*.hdf5',       
-                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000001*.hdf5',
-                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000002*.hdf5',       
-                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000003*.hdf5',       
+          '/data/user/fschluter/eg-data/electrons/set_4/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/set_5/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/set_6-9/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/set_10-15/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
         ],
     },
 
@@ -194,15 +194,15 @@ data_iterator_settings: {
         'num_splits': ,
         'file_capacity': 1,
         'batch_capacity': 2,
-        'num_jobs': 10,
+        'num_jobs': 12,
         'num_add_files': 1,
         'num_repetitions': 1,
         'pick_random_files_forever': False,
         'input_data': [
-                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000000*.hdf5',       
-                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000001*.hdf5',
-                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000002*.hdf5',       
-                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000003*.hdf5',       
+          '/data/user/fschluter/eg-data/electrons/set_4/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/set_5/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/set_6-9/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/set_10-15/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
         ],
     },
 
@@ -218,11 +218,9 @@ data_iterator_settings: {
         'num_repetitions': 1,
         'pick_random_files_forever': True,
         'input_data': [
-                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000040*.hdf5',       
-                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000041*.hdf5',
-                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000042*.hdf5',       
-                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000043*.hdf5',       
-                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000044*.hdf5',       
+          '/data/user/fschluter/eg-data/electrons/set_0/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/set_1/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/set_2/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
         ],
     },
 
@@ -238,12 +236,9 @@ data_iterator_settings: {
         'sample_randomly': False,
         'pick_random_files_forever': False,
         'input_data': [
-                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000045*.hdf5',       
-                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000046*.hdf5',
-                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000046*.hdf5',       
-                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000047*.hdf5',       
-                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000048*.hdf5',       
-                '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000049*.hdf5',       
+          '/data/user/fschluter/eg-data/electrons/set_3/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/set_16/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/set_17/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
         ],
     },
 }
@@ -253,34 +248,18 @@ data_iterator_settings: {
 #---------------------
 
 # Settings for the loss module class
-loss_module_settings: [
-    {
-        # The loss module class to use
-        'loss_module': 'egenerator.loss.default.DefaultLossModule',
-
-        config: {
-            # the float precision to use
-            'float_precision': 'float32',
-            # Add normalization terms to llh if True
-            'add_normalization_term': True,
-            # choose the loss function to use
-            'loss_function_name': 'unbinned_pulse_time_llh',
-        },
-    },
-    {
-        # The loss module class to use
-        'loss_module': 'egenerator.loss.default.DefaultLossModule',
-
-        config: {
-            # the float precision to use
-            'float_precision': 'float32',
-            # Add normalization terms to llh if True
-            'add_normalization_term': True,
-            # choose the loss function to use
-            'loss_function_name': 'negative_binomial_charge_pdf',
-        },
+loss_module_settings: {
+    # The loss module class to use
+    'loss_module': 'egenerator.loss.default.DefaultLossModule',
+
+    config: {
+        # the float precision to use
+        'float_precision': 'float32',
+
+        # choose the loss function to use
+        'loss_function_name': 'unbinned_extended_pulse_llh',  # this implies a Poisson likelihood for the charge
     },
-]
+}
 
 #---------------------------
 # Evaluation module settings
@@ -301,8 +280,8 @@ evaluation_module_settings: {
 data_trafo_settings: {
     'float_precision': 'float64',
     'norm_constant': !!float 1e-6,
-    'num_batches': 500,
-    'model_dir': '/lustre/fs22/group/radio/felix/data/{unique_name}',
+    'num_batches': 2000,
+    'model_dir': '../data/trafo/{unique_name}',
 }
 
 #----------------------
@@ -343,6 +322,7 @@ data_handler_settings: {
         'shift_cascade_vertex': True,
         # logarithm on labels:
         # (x, y, z, zenith, azimuth, energy, time)?
+        # 'parameter_names': ['x', 'y', 'z', 'zenith', 'azimuth', 'energy', 'time'],
         'trafo_log': [False, False, False, False, False, True, False],
         'label_key': 'LabelsDeepLearning',
         'float_precision': 'float32',
@@ -399,7 +379,7 @@ model_settings: {
         'scale_charge_by_relative_dom_efficiency': True,
         'scale_charge_by_global_dom_efficiency': False,
         'prevent_mixture_component_swapping': False,
-        'estimate_charge_distribution': 'negative_binomial',
+        'estimate_charge_distribution': False,
         'num_latent_models': 10,
 
         # This is a list of labels in addition to
@@ -409,7 +389,7 @@ model_settings: {
 
         # First convolutions
         'filter_size_list' : [[1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1]],
-        'num_filters_list' : [25, 500, 500, 500, 42],
+        'num_filters_list' : [25, 500, 500, 500, 41],  # The last dimension should be 4 * num_asym_gauss + num_charge_pdf; num_asym_gauss = 10,  num_charge_pdf = 1 (poisson) or 2 (binominal)
         'method_list' : ['locally_connected',
                             'convolution', 'convolution', 'convolution',
                             'convolution',

From 10fadf1da7f50eb25a8005aaea11f420a4ef4e58 Mon Sep 17 00:00:00 2001
From: Felix Schlueter <fschluter@icecube.wisc.edu>
Date: Tue, 9 Apr 2024 10:20:58 -0500
Subject: [PATCH 09/27] Add missing config

---
 configs/cascade_7param_MCPE.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/configs/cascade_7param_MCPE.yaml b/configs/cascade_7param_MCPE.yaml
index bc507baa..84ea158f 100644
--- a/configs/cascade_7param_MCPE.yaml
+++ b/configs/cascade_7param_MCPE.yaml
@@ -256,6 +256,8 @@ loss_module_settings: {
         # the float precision to use
         'float_precision': 'float32',
 
+        'add_normalization_term': True,
+
         # choose the loss function to use
         'loss_function_name': 'unbinned_extended_pulse_llh',  # this implies a Poisson likelihood for the charge
     },

From 4f4bc0cd842d5b51b83248b70d619ec9794b46be Mon Sep 17 00:00:00 2001
From: Felix Schlueter <fschluter@icecube.wisc.edu>
Date: Tue, 9 Apr 2024 14:45:34 -0500
Subject: [PATCH 10/27] Set number of iteration to 1M

---
 configs/cascade_7param_MCPE.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/cascade_7param_MCPE.yaml b/configs/cascade_7param_MCPE.yaml
index 84ea158f..6926f61f 100644
--- a/configs/cascade_7param_MCPE.yaml
+++ b/configs/cascade_7param_MCPE.yaml
@@ -61,7 +61,7 @@ training_settings: {
     'save_frequency': 500,
 
     # Number of training iterations to train the model for
-    'num_training_iterations': 1,
+    'num_training_iterations': 1000000,
 
     # Additional keywords to the loss module used for training
     'additional_loss_module_kwargs': {

From 1d2f4403026f7e309d4e0db47c44a1355b18d573 Mon Sep 17 00:00:00 2001
From: Felix Schlueter <fschluter@icecube.wisc.edu>
Date: Thu, 11 Apr 2024 09:00:54 -0500
Subject: [PATCH 11/27] Update MCPE config

---
 configs/cascade_7param_MCPE.yaml | 57 +++++++++++++++++++++-----------
 1 file changed, 38 insertions(+), 19 deletions(-)

diff --git a/configs/cascade_7param_MCPE.yaml b/configs/cascade_7param_MCPE.yaml
index 6926f61f..f3b5b76b 100644
--- a/configs/cascade_7param_MCPE.yaml
+++ b/configs/cascade_7param_MCPE.yaml
@@ -178,13 +178,23 @@ data_iterator_settings: {
         'batch_capacity': 500,
         'dataset_capacity': 500,
         'num_jobs': 10,
-        'num_add_files': 20,
+        'num_add_files': 5,
         'num_repetitions': 3,
+        'pick_random_files_forever': False,
         'input_data': [
-          '/data/user/fschluter/eg-data/electrons/set_4/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
-          '/data/user/fschluter/eg-data/electrons/set_5/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
-          '/data/user/fschluter/eg-data/electrons/set_6-9/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
-          '/data/user/fschluter/eg-data/electrons/set_10-15/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_3/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_4/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_5/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_6/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_7/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_8/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_9/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_10/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_11/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_12/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_13/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_14/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_15/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
         ],
     },
 
@@ -199,10 +209,19 @@ data_iterator_settings: {
         'num_repetitions': 1,
         'pick_random_files_forever': False,
         'input_data': [
-          '/data/user/fschluter/eg-data/electrons/set_4/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
-          '/data/user/fschluter/eg-data/electrons/set_5/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
-          '/data/user/fschluter/eg-data/electrons/set_6-9/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
-          '/data/user/fschluter/eg-data/electrons/set_10-15/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_3/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_4/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_5/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_6/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_7/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_8/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_9/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_10/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_11/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_12/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_13/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_14/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_15/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
         ],
     },
 
@@ -210,17 +229,17 @@ data_iterator_settings: {
     'validation': {
         'batch_size': 32,
         'num_splits': ,
-        'file_capacity': 5,
+        'file_capacity': 10,
         'batch_capacity': 10,
         'dataset_capacity': 10,
-        'num_jobs': 2,
+        'num_jobs': 10,
         'num_add_files': 5,
         'num_repetitions': 1,
-        'pick_random_files_forever': True,
+        'pick_random_files_forever': False,
         'input_data': [
-          '/data/user/fschluter/eg-data/electrons/set_0/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
-          '/data/user/fschluter/eg-data/electrons/set_1/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
-          '/data/user/fschluter/eg-data/electrons/set_2/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_0/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_1/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_2/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
         ],
     },
 
@@ -229,16 +248,16 @@ data_iterator_settings: {
         'batch_size': 1,
         'num_splits': ,
         'file_capacity': 1,
-        'batch_capacity': 3,
+        'batch_capacity': 10,
         'num_jobs': 1,
         'num_add_files': 0,
         'num_repetitions': 1,
         'sample_randomly': False,
         'pick_random_files_forever': False,
         'input_data': [
-          '/data/user/fschluter/eg-data/electrons/set_3/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
-          '/data/user/fschluter/eg-data/electrons/set_16/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
-          '/data/user/fschluter/eg-data/electrons/set_17/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_16/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_17/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
+          '/data/user/fschluter/eg-data/electrons/unbinned/set_18/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5',
         ],
     },
 }

From 382fd558b1166136bdf6c6e6a8d3627f46941a0a Mon Sep 17 00:00:00 2001
From: Felix Schlueter <fschluter@icecube.wisc.edu>
Date: Thu, 11 Apr 2024 09:03:48 -0500
Subject: [PATCH 12/27] Do not run validation in the first iteration

---
 egenerator/manager/base.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/egenerator/manager/base.py b/egenerator/manager/base.py
index 6857a7b5..aca36b05 100644
--- a/egenerator/manager/base.py
+++ b/egenerator/manager/base.py
@@ -776,7 +776,6 @@ def train(self, config, loss_module, num_training_iterations,
             # increment step counter
             for model in self.models:
                 model.step.assign_add(1)
-
             # get new batch of training data
             training_data_batch = next(train_dataset)
 
@@ -786,7 +785,7 @@ def train(self, config, loss_module, num_training_iterations,
             # --------------------------
             # evaluate on validation set
             # --------------------------
-            if step % opt_config['validation_frequency'] == 0:
+            if step % opt_config['validation_frequency'] == 0 and step:
                 new_validation_time = timeit.default_timer()
                 time_diff = new_validation_time - validation_time
                 validation_time = new_validation_time

From 8d5103546b87ccb08de64267fa2d638859a0d6cc Mon Sep 17 00:00:00 2001
From: Felix Schlueter <fschluter@icecube.wisc.edu>
Date: Mon, 15 Apr 2024 08:45:51 -0500
Subject: [PATCH 13/27] Add function which evaluates pdf only for one DOM

---
 egenerator/model/source/base.py | 89 +++++++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)

diff --git a/egenerator/model/source/base.py b/egenerator/model/source/base.py
index 2fdf43bb..688de28e 100644
--- a/egenerator/model/source/base.py
+++ b/egenerator/model/source/base.py
@@ -634,6 +634,95 @@ def pdf(self, x, result_tensors,
 
         return pdf_values
 
+    def pdf_per_dom(self, x, result_tensors, string, dom, **kwargs):
+        """Compute PDF values at x for given result_tensors for a specific dom
+
+        This is a numpy, i.e. not tensorflow, method to compute the PDF based
+        on a provided `result_tensors`. This can be used to investigate
+        the generated PDFs.
+
+        Note: this function only works for sources that use asymmetric
+        Gaussians to parameterize the PDF. The latent values of the AG
+        must be included in the `result_tensors`.
+
+        Note: the PDF does not set values inside excluded time windows to zero,
+        but it does adjust the normalization. It is assumed that pulses will
+        already be masked before evaluated by Event-Generator. Therefore, an
+        extra check for exclusions is not performed due to performance issues.
+
+        Parameters
+        ----------
+        x : array_like
+            The times in ns at which to evaluate the result tensors.
+            Shape: () or [n_points]
+        result_tensors : dict of tf.tensor
+            The dictionary of output tensors as obtained from `get_tensors`.
+        string : int
+            String id of the DOM
+        dom : int
+            Module id of the DOM
+        **kwargs
+            Keyword arguments.
+
+        Returns
+        -------
+        array_like
+            The PDF values at times x for the given event hypothesis and
+            exclusions that were used to compute `result_tensors`.
+            Shape: [n_events, 86, 60, n_points]
+
+        Raises
+        ------
+        NotImplementedError
+            If assymetric Gaussian latent variables are not present in
+            `result_tensors` dictionary.
+        """
+
+        x_orig = np.atleast_1d(x)
+        assert len(x_orig.shape) == 1, x_orig.shape
+
+        # shape: [1, 1, n_points]
+        x = np.reshape(x_orig, (1, 1, -1))
+
+        if result_tensors['time_offsets'] is not None:
+            # shape: [n_events]
+            t_offsets = result_tensors['time_offsets'].numpy()
+
+            # shape: [n_events, 1, 1]
+            t_offsets = np.reshape(t_offsets, [-1, 1, 1])
+            # shape: [n_events, 1, n_points]
+            x = x - t_offsets
+
+        # internally we are working with different time units
+        x = x / self.time_unit_in_ns
+
+        # Check if the asymmetric Gaussian latent variables exist
+        for latent_name in ['mu', 'scale', 'sigma', 'r']:
+            if 'latent_var_' + latent_name not in result_tensors:
+                msg = 'PDF evaluation is not supported for this model: {}'
+                raise NotImplementedError(msg.format(
+                    self._untracked_data['name']))
+
+        # extract values
+        # shape: [n_events, n_components, 1]
+        mu = result_tensors['latent_var_mu'].numpy()[:, string, dom, :, np.newaxis]
+        scale = result_tensors['latent_var_scale'].numpy()[:, string, dom, :, np.newaxis]
+        sigma = result_tensors['latent_var_sigma'].numpy()[:, string, dom, :, np.newaxis]
+        r = result_tensors['latent_var_r'].numpy()[:, string, dom, :, np.newaxis]
+
+        # shape: [n_events, n_components, n_points]
+        mixture_pdf = basis_functions.asymmetric_gauss(
+            x=x, mu=mu, sigma=sigma, r=r
+        )
+
+        # shape: [n_events, n_points]
+        pdf_values = np.sum(mixture_pdf * scale, axis=1)
+
+        # invert back to PDF in ns
+        pdf_values = pdf_values / self.time_unit_in_ns
+
+        return np.squeeze(pdf_values)
+
     def _apply_pdf_time_window_exclusions(
             self, times, pdf_values, tw_exclusions, tw_exclusions_ids):
         """Apply time window exclusions

From e0863aced64d7f3d668a7948ea0ca54e5d4d7e82 Mon Sep 17 00:00:00 2001
From: Felix Schlueter <fschluter@icecube.wisc.edu>
Date: Mon, 15 Apr 2024 08:47:14 -0500
Subject: [PATCH 14/27] Add module to evaluate mcpe eg models

---
 egenerator/ic3/__init__.py      |   2 +
 egenerator/ic3/evaluate_mcpe.py | 803 ++++++++++++++++++++++++++++++++
 2 files changed, 805 insertions(+)
 create mode 100644 egenerator/ic3/evaluate_mcpe.py

diff --git a/egenerator/ic3/__init__.py b/egenerator/ic3/__init__.py
index 9991aff1..a0076019 100644
--- a/egenerator/ic3/__init__.py
+++ b/egenerator/ic3/__init__.py
@@ -1,7 +1,9 @@
 from egenerator.ic3.reconstruction import EventGeneratorReconstruction
 from egenerator.ic3.simulation import EventGeneratorSimulation
+from egenerator.ic3.evaluate_mcpe import CalculateLikelihood
 
 __all__ = [
     'EventGeneratorReconstruction',
     'EventGeneratorSimulation',
+    'CalculateLikelihood',
 ]
diff --git a/egenerator/ic3/evaluate_mcpe.py b/egenerator/ic3/evaluate_mcpe.py
new file mode 100644
index 00000000..1630cc61
--- /dev/null
+++ b/egenerator/ic3/evaluate_mcpe.py
@@ -0,0 +1,803 @@
+import os
+import numpy as np
+import tensorflow as tf
+import timeit
+import math
+
+
+import matplotlib.pyplot as plt
+
+from icecube import dataclasses, icetray, photonics_service
+from icecube.icetray.i3logging import log_info, log_warn
+
+from egenerator.utils.configurator import ManagerConfigurator
+from egenerator.utils import basis_functions
+
+
+def calculate_loss(pred, true):
+    assert true % 1 == 0, "Not a int value"
+    return np.exp(-1 * pred) * pred ** true / math.factorial(true)
+
+
+class CalculateLikelihood(icetray.I3ConditionalModule):
+
+    """Class to evaluate predictions of MCPE with Event-Generator model.
+
+    """
+
+    def __init__(self, context):
+        """Class to evaluate predictions of MCPE with Event-Generator model.
+
+        Parameters
+        ----------
+        context : TYPE
+            Description
+        """
+        icetray.I3ConditionalModule.__init__(self, context)
+        self.AddOutBox('OutBox')
+
+        # Required settings
+        self.AddParameter('model_name',
+                          'A model name that defines which model to '
+                          'apply. The full path is given by `model_base_dir` +'
+                          ' model_name if `model_base_dir` is provided. '
+                          'Otherwise `model_name` must define the full '
+                          'model path.')
+
+        # Optional settings
+        self.AddParameter('default_values',
+                          'Not all parameters of the source hypothesis can '
+                          'be extracted from the I3MCTree. For these '
+                          'parameters, default values may be defined. The '
+                          '`default_values` must be a dictionary of the '
+                          'format `parameter_name`: `value`. `value` may '
+                          'either be a double or a string. If a string is '
+                          'provided, it is assumed that an I3Double exists in '
+                          'the frame under the key as provided by `value`',
+                          {
+                            'Absorption': 1.,
+                            'AnisotropyScale': 1.,
+                            'DOMEfficiency': 1.,
+                            'HoleIceForward_Unified_00': 0.,
+                            'HoleIceForward_Unified_01': 0.,
+                            'Scattering': 1.,
+                          })
+        self.AddParameter('model_base_dir',
+                          'The base directory in which the model is located.'
+                          ' The full path is given by `model_base_dir` + '
+                          ' model_name if `model_base_dir` is provided. '
+                          'Otherwise `model_name` must define the full '
+                          'model path.',
+                          '/data/user/mhuennefeld/exported_models/egenerator')
+        self.AddParameter('output_key',
+                          'The output base key to which pulses will be saved',
+                          'EventGeneratorPrediction')
+        self.AddParameter('mc_tree_name',
+                          'The name of the propagated I3MCTree for which the '
+                          'light yield and measured pulses will be simulated',
+                          'I3MCTree')
+        self.AddParameter('max_simulation_distance',
+                          'Particles further away from the center of the '
+                          'detector than this distance are not predicted',
+                          2250)
+        self.AddParameter('min_simulation_energy',
+                          'Particles below this energy threshold are not '
+                          'predicted.',
+                          100)
+        self.AddParameter('num_threads',
+                          'Number of threads to use for tensorflow. This will '
+                          'be passed on to tensorflow where it is used to set '
+                          'the "intra_op_parallelism_threads" and '
+                          '"inter_op_parallelism_threads" settings. If a '
+                          'value of zero (default) is provided, the system '
+                          'uses an appropriate number. Note: when running '
+                          'this as a job on a cluster you might want to limit '
+                          '"num_threads" to the amount of allocated CPUs.',
+                          0)
+        self.AddParameter('random_service',
+                          'The random service or seed to use. If this is an '
+                          'integer, a numpy random state will be created with '
+                          'the seed set to `random_service`',
+                          42)
+        self.AddParameter('SimulateElectronDaughterParticles',
+                          'If true, look for daughter particles of an electron and '
+                          'simulate their light yield rather than from the '
+                          'mother electron. This is necessary to correctly '
+                          'simulate the cascade longitudinal extension of '
+                          'high energy cascades.',
+                          False)
+        self.AddParameter('AddDarkDOMs', '', True)
+
+        self.AddParameter('CascadeService',
+                          '',
+                          None)
+
+
+    def Configure(self):
+        """Configures Module and loads model from file.
+        """
+        self.model_name = self.GetParameter('model_name')
+        self.default_values = self.GetParameter('default_values')
+        self.model_base_dir = self.GetParameter('model_base_dir')
+        self.output_key = self.GetParameter('output_key')
+        self.mc_tree_name = self.GetParameter('mc_tree_name')
+        self.max_simulation_distance = \
+            self.GetParameter('max_simulation_distance')
+        self.min_simulation_energy = self.GetParameter('min_simulation_energy')
+        self.num_threads = self.GetParameter('num_threads')
+        self.random_service = self.GetParameter('random_service')
+        self.simulate_electron_daughters = self.GetParameter('SimulateElectronDaughterParticles')
+        self.add_dark_doms = self.GetParameter("AddDarkDOMs")
+
+        if isinstance(self.random_service, int):
+            self.random_service = np.random.RandomState(self.random_service)
+
+        if self.model_base_dir is not None:
+            self.model_dir = os.path.join(self.model_base_dir, self.model_name)
+        else:
+            self.model_dir = self.model_name
+
+        # --------------------------------------------------
+        # Build and configure SourceManager and extrac Model
+        # --------------------------------------------------
+        self.manager_configurator = ManagerConfigurator(
+            manager_dirs=[self.model_dir],
+            num_threads=self.num_threads,
+        )
+        self.manager = self.manager_configurator.manager
+
+        for model in self.manager.models:
+            num_vars, num_total_vars = model.num_variables
+            msg = '\nNumber of Model Variables:\n'
+            msg += '\tFree: {}\n'
+            msg += '\tTotal: {}'
+            log_info(msg.format(num_vars, num_total_vars))
+
+        if len(self.manager.models) > 1:
+            raise NotImplementedError(
+                'Currently does not support model ensemble.')
+
+        self.model = self.manager.models[0]
+
+        # get parameter names that have to be set
+        self.param_names = sorted([n for n in self.model.parameter_names
+                                   if n not in self.default_values])
+
+        # make sure that the only parameters that need to be set are provided
+        included_parameters = [
+                'azimuth', 'energy', 'time', 'x', 'y', 'z', 'zenith']
+
+        # search if there is a common prefix that we can use
+        prefix_list = []
+        for name in included_parameters:
+            if name in self.param_names:
+                prefix_list.append('')
+            else:
+                found_match = False
+                for param in self.param_names:
+                    if param[-len(name):] == name:
+                        # only allow prefixes ending on '_'
+                        if param[-len(name)-1] == '_':
+                            prefix_list.append(param[:-len(name)])
+                            found_match = True
+                if not found_match:
+                    msg = (
+                        'Did not find a parameter name match for "{}". Model '
+                        'Parameter names are: {}'
+                    ).format(name, self.param_names)
+                    raise ValueError(msg)
+
+        prefix_list = np.unique(prefix_list)
+        if len(prefix_list) != 1:
+            msg = 'Could not find common parameter prefix. Found: {}'.format(
+                prefix_list)
+            raise ValueError(msg)
+
+        self._prefix = prefix_list[0]
+
+        # double check that the prefix now works
+        for name in self.param_names:
+            if name[len(self._prefix):] not in included_parameters:
+                raise KeyError('Unknown parameter name:', name)
+
+        # Create concrete tensorflow function to obtain DOM expectations
+        self.param_dtype = getattr(
+            tf, self.manager.data_trafo.data['tensors']['x_parameters'].dtype)
+        self.get_model_tensors = self.manager.get_model_tensors_function()
+
+        # ---------------------------------------------------
+        # Define which particles are simulated by which Model
+        # ---------------------------------------------------
+
+        # define allowed parent particles
+        # These are particles which we can safely simulate by only looking at
+        # their daughters
+        self.allowed_parent_particles = [
+            dataclasses.I3Particle.NuE,
+            dataclasses.I3Particle.NuMu,
+            dataclasses.I3Particle.NuTau,
+            dataclasses.I3Particle.NuEBar,
+            dataclasses.I3Particle.NuMuBar,
+            dataclasses.I3Particle.NuTauBar,
+            dataclasses.I3Particle.Hadrons,
+        ]
+        if self.simulate_electron_daughters:
+            self.allowed_parent_particles += [
+                dataclasses.I3Particle.EMinus,
+                dataclasses.I3Particle.EPlus,
+            ]
+
+        # Define type of particles that can be simulated as EM cascades
+        self.em_cascades = [
+            dataclasses.I3Particle.Pi0,
+            dataclasses.I3Particle.Gamma,
+            dataclasses.I3Particle.PairProd,
+            dataclasses.I3Particle.Brems,
+            dataclasses.I3Particle.DeltaE,
+            dataclasses.I3Particle.EMinus,
+            dataclasses.I3Particle.EPlus,
+        ]
+
+        # Define type of particles that can be simulated as tracks
+        self.tracks = [
+            dataclasses.I3Particle.MuMinus,
+            dataclasses.I3Particle.MuPlus,
+        ]
+
+        # define particles that do not deposit light, in other words
+        # if we end up with a final state particle of this type it is ok
+        # not to simulate the light yield for these particles.
+        self.dark_particles = [
+            dataclasses.I3Particle.NuE,
+            dataclasses.I3Particle.NuMu,
+            dataclasses.I3Particle.NuTau,
+            dataclasses.I3Particle.NuEBar,
+            dataclasses.I3Particle.NuMuBar,
+            dataclasses.I3Particle.NuTauBar,
+        ]
+
+        self.cascade_service = self.GetParameter('CascadeService')
+
+        if self.cascade_service is not None:
+            self.photonics = Photonics(self.cascade_service)
+
+        self._counter = 0
+
+    def DAQ(self, frame):
+        """Apply Event-Generator model to physics frames.
+
+        Parameters
+        ----------
+        frame : I3Frame
+            The current Q-Frame.
+        """
+
+        self._counter += 1
+
+        # if self._counter >= 5:
+        #     return
+
+        # start timer
+        t_0 = timeit.default_timer()
+
+        # get sources
+        cascades, tracks = self.get_light_sources(frame[self.mc_tree_name])
+
+        if len(tracks) > 0:
+            raise NotImplementedError('Tracks not yet supported')
+
+        # convert cascade to source hypotheses
+        cascade_sources = self.convert_cascades_to_tensor(cascades, frame)
+
+        # timer after source collection
+        t_1 = timeit.default_timer()
+
+        # get result tensors from model
+        result_tensors = self.get_model_tensors(cascade_sources)
+
+        # timer after NN evaluation
+        t_2 = timeit.default_timer()
+
+        # sample pdf
+        self.sample_from_pdf(frame, result_tensors, validate_photonics=self.cascade_service is not None)
+
+        if self._counter % 10 == 0:
+            self.plot_pdfs(frame, result_tensors, validate_photonics=self.cascade_service is not None)
+
+
+        # timer after Sampling
+        t_3 = timeit.default_timer()
+
+        log_info('Simulation took: {:3.3f}ms'.format((t_3 - t_0) * 1000))
+        log_info('\t Gathering Sources: {:3.3f}ms'.format((t_1 - t_0) * 1000))
+        log_info('\t Evaluating NN model: {:3.3f}ms'.format((t_2 - t_1) * 1000))
+        log_info('\t Sampling Pulses: {:3.3f}ms'.format((t_3 - t_2) * 1000))
+
+        # push frame to next modules
+        self.PushFrame(frame)
+
+
+    def sample_from_pdf(self, frame, result_tensors, validate_photonics=False):
+        total_charge = result_tensors['dom_charges'].numpy()
+        eps = 1e-7
+        mcpe_series_map = frame["I3MCPESeriesMap"]
+
+        loss_event_generator = dataclasses.I3MapKeyDouble()
+        loss_event_generator_total = 0
+
+        if validate_photonics:
+            loss_photonics = dataclasses.I3MapKeyDouble()
+            loss_photonics_total = 0
+            geo = frame["I3Geometry"]
+            particle = frame["I3MCTree"].get_head()
+
+        for string in range(86):
+            for om in range(60):
+                omkey = icetray.OMKey(string + 1, om + 1)
+
+
+                if omkey in mcpe_series_map:
+                    pulse_series = mcpe_series_map[omkey]
+
+                    dom_charges_true = np.array([pulse.npe for pulse in pulse_series])
+                    pulse_log_pdf_values = np.log([self.model.pdf_per_dom(pulse.time, result_tensors=result_tensors,
+                                                    string=string, dom=om) + eps for pulse in pulse_series])
+
+                    dom_charges_pred = total_charge[0, string, om, 0]
+
+                    time_log_likelihood = -dom_charges_true * pulse_log_pdf_values
+                    llh_poisson = dom_charges_pred - np.sum(dom_charges_true) * np.log(dom_charges_pred + eps)
+
+                    loss = np.sum(time_log_likelihood) + np.sum(llh_poisson)
+
+                    loss_event_generator_total += loss
+                    loss_event_generator[omkey] = loss
+
+                    if validate_photonics:
+                        position = geo.omgeo[omkey].position
+                        # for pulse in pulse_series:
+                        table_pdf, dom_charges_pred = self.photonics.get_pdf(particle, position, [pulse.time for pulse in pulse_series])
+
+                        time_log_likelihood = -dom_charges_true * np.log(np.array(table_pdf) + eps)
+                        llh_poisson = dom_charges_pred - np.sum(dom_charges_true) * np.log(dom_charges_pred + eps)
+
+                        loss = np.sum(time_log_likelihood) + np.sum(llh_poisson)
+
+                        loss_photonics_total += loss
+                        loss_photonics[omkey] = loss
+
+                elif self.add_dark_doms:
+                    expected_charge = total_charge[0, string, om, 0]
+
+                    loss = expected_charge - 0 * np.log(expected_charge + 1e-7)
+                    loss_event_generator_total += loss
+                    loss_event_generator[omkey] = loss
+
+                    if validate_photonics:
+                        position = geo.omgeo[omkey].position
+                        expected_charge, _ = self.photonics.get_PE(particle, position)
+
+                        loss = expected_charge - 0 * np.log(expected_charge + 1e-7)
+                        loss_photonics_total += loss
+                        loss_photonics[omkey] = loss
+
+                else:
+                    pass
+
+        frame.Put("EventGeneratorLoss", loss_event_generator)
+        frame.Put("EventGeneratorLossTotal", dataclasses.I3Double(loss_event_generator_total))
+
+        if validate_photonics:
+            frame.Put("PhotonicsLoss", loss_photonics)
+            frame.Put("PhotonicsLossTotal", dataclasses.I3Double(loss_photonics_total))
+
+        # t0 = timeit.default_timer()
+        # pdf_values = self.model.pdf(x_times, result_tensors=result_tensors)
+        # t1 = timeit.default_timer()
+        # print(f"Predict PDF EG: {t1 - t0:.2f}s")
+
+        # assert pdf_values.shape[0] == 1, "PDF result has unexpected shape"
+        # pdf_values = pdf_values[0]
+        # dt = x_times[1] - x_times[0]
+        # pdf_values = pdf_values * dt  # For normalisation
+
+        # pulse_series_map = dataclasses.I3RecoPulseSeriesMap()
+
+        # if self.cascade_service is not None:
+        #     pulse_series_map_photonics = dataclasses.I3RecoPulseSeriesMap()
+
+        # total_charge = result_tensors['dom_charges'].numpy()
+        # assert total_charge.shape[0] == 1, "Total charge has unexpected shape"
+        # total_charge = np.squeeze(total_charge[0])
+
+        # geo = frame["I3Geometry"]
+
+        # # walk through DOMs
+        # for string in range(86):
+        #     for om in range(60):
+        #         om_key = icetray.OMKey(string+1, om+1)
+        #         position = geo.omgeo[om_key].position
+
+        #         if self.cascade_service is not None:
+
+        #             table_pdf, table_charge = self.photonics.get_pdf(frame["I3MCTree"].get_head(), position, x_times)
+        #             if table_charge > 0.1:
+        #                 pulse_series_pht = dataclasses.I3RecoPulseSeries()
+        #                 for t, charge in zip(x_times, table_pdf * table_charge):
+        #                     pulse = dataclasses.I3RecoPulse()
+        #                     if charge <= 0:
+        #                         continue
+        #                     pulse.time = t
+        #                     pulse.charge = charge
+        #                     pulse_series_pht.append(pulse)
+
+        #                 pulse_series_map_photonics[om_key] = pulse_series_pht
+
+        #         if total_charge[string, om] > 0.1:
+        #             # create pulse series
+        #             pulse_series = dataclasses.I3RecoPulseSeries()
+        #             charge_distribution = pdf_values[string, om] * total_charge[string, om]
+        #             for t, charge in zip(x_times, charge_distribution):
+        #                 if charge <= 0:
+        #                     continue
+        #                 pulse = dataclasses.I3RecoPulse()
+        #                 pulse.time = t
+        #                 pulse.charge = charge
+        #                 pulse_series.append(pulse)
+
+        #             # add to pulse series map
+        #             pulse_series_map[om_key] = pulse_series
+
+        # if self.cascade_service is not None:
+        #     frame["Photonics_Prediction"] = pulse_series_map_photonics
+
+
+    def plot_pdfs(self, frame, result_tensors, validate_photonics=False):
+
+        total_charge = result_tensors['dom_charges'].numpy()
+        mcpe_series_map = frame["I3MCPESeriesMap"]
+
+        if validate_photonics:
+            geo = frame["I3Geometry"]
+            particle = frame["I3MCTree"].get_head()
+
+        hom = None
+        max_charge = 0
+
+        for omkey, pulses in mcpe_series_map:
+            tot_charge = np.sum(p.npe for p in pulses)
+
+            if tot_charge > max_charge:
+                max_charge = tot_charge
+                hom = omkey
+
+        if hom is None:
+            return
+
+        string = hom.string - 1
+        om = hom.om - 1
+
+        times = np.arange(0, 1000, 5)
+        dt = np.diff(times)[0]
+        ctime = times[:-1] + dt / 2
+
+        pulse_log_pdf_values = np.array([self.model.pdf_per_dom(time, result_tensors=result_tensors,
+                                         string=string, dom=om) for time in ctime]) * dt
+        tot_charge = total_charge[0, string, om, 0]
+        fig, ax = plt.subplots()
+        ax.hist([p.time for p in mcpe_series_map[hom]], times, weights=[p.npe for p in mcpe_series_map[hom]], alpha=0.5, color="k", label="MC")
+
+        ax.plot(ctime, pulse_log_pdf_values * tot_charge, label="EventGenerator", lw=1)
+
+        if validate_photonics:
+            geo = frame["I3Geometry"]
+            particle = frame["I3MCTree"].get_head()
+            position = geo.omgeo[hom].position
+            table_pdf, dom_charges_pred = self.photonics._get_pdf2(particle, position, times)
+
+            ax.plot(ctime, np.array(table_pdf) * dom_charges_pred, label="Photonics", lw=1, ls=":")
+
+
+        ax.set_xlabel("time / ns")
+        ax.set_ylabel("npe")
+        ax.legend()
+        ax.grid()
+
+        plt.tight_layout()
+        plt.savefig(f"pdf_{self._counter}.png")
+
+
+
+    def convert_cascades_to_tensor(self, cascades, frame):
+        """Convert a list of cascades to a source parameter tensor.
+
+        Parameters
+        ----------
+        cascades : list of I3Particles
+            The list of cascade I3Particles.
+
+        frame : I3Frame
+            Necessary to get additional parameter from frame.
+
+        Returns
+        -------
+        tf.Tensor
+            The source hypothesis paramter tensor.
+        """
+        # create parameter array
+        parameters = np.empty([len(cascades), self.model.num_parameters])
+
+        # Fill default values
+        for name, value in self.default_values.items():
+
+            # assume that a I3Double exists in frame
+            if isinstance(value, str):
+                value = frame[value].value
+
+            # assign values
+            parameters[:, self.model.get_index(name)] = value
+
+        # now fill cascade parameters (x, y, z, zenith, azimuth, E, t)
+        for i, cascade in enumerate(cascades):
+            for name in self.param_names:
+
+                # remove prefix
+                name = name[len(self._prefix):]
+
+                if name in ['x', 'y', 'z']:
+                    value = getattr(cascade.pos, name)
+                elif name in ['azimuth', 'zenith']:
+                    value = getattr(cascade.dir, name)
+                else:
+                    value = getattr(cascade, name)
+
+                index = self.model.get_index(self._prefix + name)
+                parameters[i, index] = value
+
+        return tf.convert_to_tensor(parameters, dtype=self.param_dtype)
+
+    def _get_light_sources(self, mc_tree, parent):
+        """Get a list of cascade and track light sources from an I3MCTree
+
+        Recursive helper function for the method `get_light_sources`.
+
+        Parameters
+        ----------
+        mc_tree : I3MCTree
+            The I3MCTree from which to get the light sources
+
+        Returns
+        -------
+        array_like
+            The list of cascades.
+        array_like
+            The list of tracks.
+        """
+
+        # Stopping condition for recursion: either found a track or cascade
+        if (parent.energy < self.min_simulation_energy or
+                parent.pos.magnitude - parent.length
+                > self.max_simulation_distance):
+            return [], []
+
+        if parent.type in self.em_cascades:
+            if self.simulate_electron_daughters and len(mc_tree.get_daughters(parent)):
+                pass # get light sourced from daughters
+            else:
+                return [parent], []
+
+        elif parent.type in self.tracks:
+            return [], [parent]
+
+        cascades = []
+        tracks = []
+
+        # need further recursion
+        daughters = mc_tree.get_daughters(parent)
+
+        # check if we have a branch that we can't simulate
+        if len(daughters) == 0 and parent.type not in self.dark_particles:
+            log_warn(f"Particle: {parent}")
+            log_warn(f"Tree: {mc_tree}")
+            raise NotImplementedError(
+                'Particle can not be simulated: ', parent.type)
+
+        if len(daughters) > 0:
+            if parent.type not in self.allowed_parent_particles:
+                raise ValueError('Unknown parent type:', parent.type)
+
+        for daughter in daughters:
+
+            # get list of cascades and tracks
+            cascades_i, tracks_i = self._get_light_sources(mc_tree, daughter)
+
+            # extend lists
+            cascades.extend(cascades_i)
+            tracks.extend(tracks_i)
+
+        return cascades, tracks
+
+    def get_light_sources(self, mc_tree):
+        """Get a list of cascade and track light sources from an I3MCTree
+
+        This method recursively walks through the I3MCTree by looking at
+        daughter particles. All daughter particles are collected, that need
+        to be simulated.
+
+        Parameters
+        ----------
+        mc_tree : I3MCTree
+            The I3MCTree from which to get the light sources
+
+        Returns
+        -------
+        array_like
+            The list of cascades.
+        array_like
+            The list of tracks.
+        """
+
+        cascades = []
+        tracks = []
+        for primary in mc_tree.get_primaries():
+
+            # get list of cascades and tracks
+            cascades_i, tracks_i = self._get_light_sources(mc_tree, primary)
+
+            # extend lists
+            cascades.extend(cascades_i)
+            tracks.extend(tracks_i)
+
+        return cascades, tracks
+
+
+
+def convert_params_to_particle(params):
+    """
+    Converts a list of parameters defining a source into a I3Particle
+
+    Parameters
+    ----------
+
+    params: list
+        List of parameters defining a source: [x, y, z, zenith, azimuth, energy, time]
+
+    Returns
+    -------
+
+    cascade: icecube.dataclasses.I3Particle
+        Particle describing a cascade
+    """
+    cascade = dataclasses.I3Particle()
+
+    cascade.pos = dataclasses.I3Position(params[0], params[1], params[2])
+    cascade.dir = dataclasses.I3Direction(params[3], params[4])
+
+    cascade.fit_status == dataclasses.I3Particle.NotSet
+    cascade.location_type = dataclasses.I3Particle.InIce
+    cascade.time = 0
+
+    # set particle type
+    cascade.type = dataclasses.I3Particle.ParticleType.EMinus
+
+    # set energy for each particle
+    cascade.energy = params[5] * I3Units.GeV
+
+    return cascade
+
+
+class Photonics(object):
+    """
+    Little wrapper around photonics_service
+    """
+
+    def __init__(self, cascadePhotonicsService=None):
+
+        if cascadePhotonicsService is None:
+
+            if os.environ.get('I3_DATA') is None or os.environ.get('I3_DATA') == "":
+                raise ValueError("The environment variable \"I3_DATA\" "
+                                 "is not set. Are you in an IceTray environment?")
+
+            table_base = os.path.expandvars('$I3_DATA/photon-tables/splines/ems_spice1_z20_a10.%s.fits')
+            cascadePhotonicsService = photonics_service.I3PhotoSplineService(
+                table_base % 'abs', table_base % 'prob', timingSigma=0)
+
+        self.cascade_pxs = cascadePhotonicsService
+
+
+    def _get_PE(self, source, position):
+        """ Returns the number of photons and geometrical distance for a source & reciever pair """
+
+        if not isinstance(source, dataclasses.I3Particle):
+            source = convert_params_to_particle(source)
+
+        if source.energy == 0:
+            return 0, 0
+
+        self.cascade_pxs.SelectModuleCoordinates(*position)
+        pes, dist, _ = self.cascade_pxs.SelectSource(photonics_service.PhotonicsSource(source))
+        if pes < 0:
+            pes = 0
+
+        return pes, dist
+
+
+    def get_PE(self, sources, position):
+        """ Wrapper around self._get_PE to allow providing a list of sources """
+
+        if isinstance(sources, dataclasses.I3Particle):
+            return self._get_PE(sources, position)
+
+        elif isinstance(sources, list) and isinstance(sources[0], float):
+            return self._get_PE(sources, position)
+
+        elif isinstance(sources, list):
+            output = np.array([self._get_PE(source, position) for source in sources])
+            pe_sum = np.sum(output[:, 0])
+            mask = output[:, 0] > 0
+            if not np.any(mask):
+                return 0, 0
+            return pe_sum, np.amin(output[mask, 1])
+        else:
+            raise TypeError(f"The type of sources \"{type(sources)}\" is not supported.")
+
+
+    def _get_pdf(self, source, position, times):
+
+        if not isinstance(source, dataclasses.I3Particle):
+            source = convert_params_to_particle(source)
+
+        if source.energy == 0:
+            return np.zeros_like(times), 0
+
+        self.cascade_pxs.SelectModuleCoordinates(*position)
+        pes, _, t0 = self.cascade_pxs.SelectSource(photonics_service.PhotonicsSource(source))
+        if pes <= 0:
+            return np.zeros_like(times), 0
+
+        pdf = [self.cascade_pxs.GetProbabilityDensity(t0 + t) for t in times]
+
+        # quantiles = self.cascade_pxs.GetProbabilityQuantiles(times, t0, 0)  # Reading charge distribution (normalized to 1)
+        # print(pdf, quantiles)
+
+        return pdf, pes
+
+
+    def _get_pdf2(self, source, position, times):
+
+        if not isinstance(source, dataclasses.I3Particle):
+            source = convert_params_to_particle(source)
+
+        if source.energy == 0:
+            return np.zeros_like(times), 0
+
+        self.cascade_pxs.SelectModuleCoordinates(*position)
+        pes, _, t0 = self.cascade_pxs.SelectSource(photonics_service.PhotonicsSource(source))
+        if pes <= 0:
+            return np.zeros_like(times), 0
+
+        quantiles = self.cascade_pxs.GetProbabilityQuantiles(times, t0, 0)  # Reading charge distribution (normalized to 1)
+
+        return quantiles, pes
+
+
+    def get_pdf(self, sources, position, t):
+        if isinstance(sources, dataclasses.I3Particle):
+            return self._get_pdf(sources, position, t)
+
+        elif isinstance(sources, list) and isinstance(sources[0], float):
+            return self._get_pdf(sources, position, t)
+
+        else:
+            pdf_tot = np.zeros_like(t)
+            pdfs = []
+            nps_tot = 0
+            for source in sources:
+                pdf, nps = self._self._get_pdf(source, position, t)
+                pdfs.append(pdf)
+                pdf_tot += pdf * nps
+                nps_tot += nps
+
+            pdfs.append(pdf_tot / nps_tot)
+            return pdfs
\ No newline at end of file

From 3229861d5903728b3b68ef35874a200b4338d4e6 Mon Sep 17 00:00:00 2001
From: Felix Schlueter <fschluter@icecube.wisc.edu>
Date: Tue, 16 Apr 2024 10:38:53 -0500
Subject: [PATCH 15/27] Clean up. Add correct time call to photonics function

---
 egenerator/ic3/evaluate_mcpe.py | 324 +++++---------------------------
 1 file changed, 45 insertions(+), 279 deletions(-)

diff --git a/egenerator/ic3/evaluate_mcpe.py b/egenerator/ic3/evaluate_mcpe.py
index 1630cc61..835c4fc7 100644
--- a/egenerator/ic3/evaluate_mcpe.py
+++ b/egenerator/ic3/evaluate_mcpe.py
@@ -12,14 +12,14 @@
 
 from egenerator.utils.configurator import ManagerConfigurator
 from egenerator.utils import basis_functions
-
+from egenerator.ic3.simulation import EventGeneratorSimulation
 
 def calculate_loss(pred, true):
     assert true % 1 == 0, "Not a int value"
     return np.exp(-1 * pred) * pred ** true / math.factorial(true)
 
 
-class CalculateLikelihood(icetray.I3ConditionalModule):
+class CalculateLikelihood(EventGeneratorSimulation):
 
     """Class to evaluate predictions of MCPE with Event-Generator model.
 
@@ -356,7 +356,9 @@ def sample_from_pdf(self, frame, result_tensors, validate_photonics=False):
                     if validate_photonics:
                         position = geo.omgeo[omkey].position
                         # for pulse in pulse_series:
-                        table_pdf, dom_charges_pred = self.photonics.get_pdf(particle, position, [pulse.time for pulse in pulse_series])
+                        table_pdf, dom_charges_pred = self.photonics.get_pdf(
+                            particle, position, [pulse.time for pulse in pulse_series],
+                            quantiles=False)
 
                         time_log_likelihood = -dom_charges_true * np.log(np.array(table_pdf) + eps)
                         llh_poisson = dom_charges_pred - np.sum(dom_charges_true) * np.log(dom_charges_pred + eps)
@@ -391,66 +393,6 @@ def sample_from_pdf(self, frame, result_tensors, validate_photonics=False):
             frame.Put("PhotonicsLoss", loss_photonics)
             frame.Put("PhotonicsLossTotal", dataclasses.I3Double(loss_photonics_total))
 
-        # t0 = timeit.default_timer()
-        # pdf_values = self.model.pdf(x_times, result_tensors=result_tensors)
-        # t1 = timeit.default_timer()
-        # print(f"Predict PDF EG: {t1 - t0:.2f}s")
-
-        # assert pdf_values.shape[0] == 1, "PDF result has unexpected shape"
-        # pdf_values = pdf_values[0]
-        # dt = x_times[1] - x_times[0]
-        # pdf_values = pdf_values * dt  # For normalisation
-
-        # pulse_series_map = dataclasses.I3RecoPulseSeriesMap()
-
-        # if self.cascade_service is not None:
-        #     pulse_series_map_photonics = dataclasses.I3RecoPulseSeriesMap()
-
-        # total_charge = result_tensors['dom_charges'].numpy()
-        # assert total_charge.shape[0] == 1, "Total charge has unexpected shape"
-        # total_charge = np.squeeze(total_charge[0])
-
-        # geo = frame["I3Geometry"]
-
-        # # walk through DOMs
-        # for string in range(86):
-        #     for om in range(60):
-        #         om_key = icetray.OMKey(string+1, om+1)
-        #         position = geo.omgeo[om_key].position
-
-        #         if self.cascade_service is not None:
-
-        #             table_pdf, table_charge = self.photonics.get_pdf(frame["I3MCTree"].get_head(), position, x_times)
-        #             if table_charge > 0.1:
-        #                 pulse_series_pht = dataclasses.I3RecoPulseSeries()
-        #                 for t, charge in zip(x_times, table_pdf * table_charge):
-        #                     pulse = dataclasses.I3RecoPulse()
-        #                     if charge <= 0:
-        #                         continue
-        #                     pulse.time = t
-        #                     pulse.charge = charge
-        #                     pulse_series_pht.append(pulse)
-
-        #                 pulse_series_map_photonics[om_key] = pulse_series_pht
-
-        #         if total_charge[string, om] > 0.1:
-        #             # create pulse series
-        #             pulse_series = dataclasses.I3RecoPulseSeries()
-        #             charge_distribution = pdf_values[string, om] * total_charge[string, om]
-        #             for t, charge in zip(x_times, charge_distribution):
-        #                 if charge <= 0:
-        #                     continue
-        #                 pulse = dataclasses.I3RecoPulse()
-        #                 pulse.time = t
-        #                 pulse.charge = charge
-        #                 pulse_series.append(pulse)
-
-        #             # add to pulse series map
-        #             pulse_series_map[om_key] = pulse_series
-
-        # if self.cascade_service is not None:
-        #     frame["Photonics_Prediction"] = pulse_series_map_photonics
-
 
     def plot_pdfs(self, frame, result_tensors, validate_photonics=False):
 
@@ -477,25 +419,33 @@ def plot_pdfs(self, frame, result_tensors, validate_photonics=False):
         string = hom.string - 1
         om = hom.om - 1
 
-        times = np.arange(0, 1000, 5)
+        if validate_photonics:
+            geo = frame["I3Geometry"]
+            particle = frame["I3MCTree"].get_head()
+            position = geo.omgeo[hom].position
+            self.photonics.cascade_pxs.SelectModuleCoordinates(*position)
+            _, _, t0 = self.photonics.cascade_pxs.SelectSource(photonics_service.PhotonicsSource(particle))
+        else:
+            t0 = 0
+
+        times = np.arange(t0 - 50, t0 + 300, 5)
         dt = np.diff(times)[0]
         ctime = times[:-1] + dt / 2
 
-        pulse_log_pdf_values = np.array([self.model.pdf_per_dom(time, result_tensors=result_tensors,
-                                         string=string, dom=om) for time in ctime]) * dt
-        tot_charge = total_charge[0, string, om, 0]
         fig, ax = plt.subplots()
-        ax.hist([p.time for p in mcpe_series_map[hom]], times, weights=[p.npe for p in mcpe_series_map[hom]], alpha=0.5, color="k", label="MC")
+        charges_mc = [p.npe for p in mcpe_series_map[hom]]
+        ax.hist([p.time for p in mcpe_series_map[hom]], times,
+                weights=charges_mc, alpha=0.5, color="k", label=f"MC {np.sum(charges_mc)}")
 
-        ax.plot(ctime, pulse_log_pdf_values * tot_charge, label="EventGenerator", lw=1)
+        eg_pdf_values = np.squeeze(self.model.get_probability_quantiles_per_dom(times, result_tensors=result_tensors,
+                                         string=string, dom=om))
+        tot_charge = total_charge[0, string, om, 0]
+        ax.plot(ctime, eg_pdf_values * tot_charge, label=f"EventGenerator {tot_charge:.2f}", lw=1)
 
         if validate_photonics:
-            geo = frame["I3Geometry"]
-            particle = frame["I3MCTree"].get_head()
-            position = geo.omgeo[hom].position
-            table_pdf, dom_charges_pred = self.photonics._get_pdf2(particle, position, times)
 
-            ax.plot(ctime, np.array(table_pdf) * dom_charges_pred, label="Photonics", lw=1, ls=":")
+            table_pdf, dom_charges_pred = self.photonics.get_pdf(particle, position, times)
+            ax.plot(ctime, np.array(table_pdf) * dom_charges_pred, label=f"Photonics {dom_charges_pred:.2f}", lw=1, ls=":")
 
 
         ax.set_xlabel("time / ns")
@@ -507,185 +457,6 @@ def plot_pdfs(self, frame, result_tensors, validate_photonics=False):
         plt.savefig(f"pdf_{self._counter}.png")
 
 
-
-    def convert_cascades_to_tensor(self, cascades, frame):
-        """Convert a list of cascades to a source parameter tensor.
-
-        Parameters
-        ----------
-        cascades : list of I3Particles
-            The list of cascade I3Particles.
-
-        frame : I3Frame
-            Necessary to get additional parameter from frame.
-
-        Returns
-        -------
-        tf.Tensor
-            The source hypothesis paramter tensor.
-        """
-        # create parameter array
-        parameters = np.empty([len(cascades), self.model.num_parameters])
-
-        # Fill default values
-        for name, value in self.default_values.items():
-
-            # assume that a I3Double exists in frame
-            if isinstance(value, str):
-                value = frame[value].value
-
-            # assign values
-            parameters[:, self.model.get_index(name)] = value
-
-        # now fill cascade parameters (x, y, z, zenith, azimuth, E, t)
-        for i, cascade in enumerate(cascades):
-            for name in self.param_names:
-
-                # remove prefix
-                name = name[len(self._prefix):]
-
-                if name in ['x', 'y', 'z']:
-                    value = getattr(cascade.pos, name)
-                elif name in ['azimuth', 'zenith']:
-                    value = getattr(cascade.dir, name)
-                else:
-                    value = getattr(cascade, name)
-
-                index = self.model.get_index(self._prefix + name)
-                parameters[i, index] = value
-
-        return tf.convert_to_tensor(parameters, dtype=self.param_dtype)
-
-    def _get_light_sources(self, mc_tree, parent):
-        """Get a list of cascade and track light sources from an I3MCTree
-
-        Recursive helper function for the method `get_light_sources`.
-
-        Parameters
-        ----------
-        mc_tree : I3MCTree
-            The I3MCTree from which to get the light sources
-
-        Returns
-        -------
-        array_like
-            The list of cascades.
-        array_like
-            The list of tracks.
-        """
-
-        # Stopping condition for recursion: either found a track or cascade
-        if (parent.energy < self.min_simulation_energy or
-                parent.pos.magnitude - parent.length
-                > self.max_simulation_distance):
-            return [], []
-
-        if parent.type in self.em_cascades:
-            if self.simulate_electron_daughters and len(mc_tree.get_daughters(parent)):
-                pass # get light sourced from daughters
-            else:
-                return [parent], []
-
-        elif parent.type in self.tracks:
-            return [], [parent]
-
-        cascades = []
-        tracks = []
-
-        # need further recursion
-        daughters = mc_tree.get_daughters(parent)
-
-        # check if we have a branch that we can't simulate
-        if len(daughters) == 0 and parent.type not in self.dark_particles:
-            log_warn(f"Particle: {parent}")
-            log_warn(f"Tree: {mc_tree}")
-            raise NotImplementedError(
-                'Particle can not be simulated: ', parent.type)
-
-        if len(daughters) > 0:
-            if parent.type not in self.allowed_parent_particles:
-                raise ValueError('Unknown parent type:', parent.type)
-
-        for daughter in daughters:
-
-            # get list of cascades and tracks
-            cascades_i, tracks_i = self._get_light_sources(mc_tree, daughter)
-
-            # extend lists
-            cascades.extend(cascades_i)
-            tracks.extend(tracks_i)
-
-        return cascades, tracks
-
-    def get_light_sources(self, mc_tree):
-        """Get a list of cascade and track light sources from an I3MCTree
-
-        This method recursively walks through the I3MCTree by looking at
-        daughter particles. All daughter particles are collected, that need
-        to be simulated.
-
-        Parameters
-        ----------
-        mc_tree : I3MCTree
-            The I3MCTree from which to get the light sources
-
-        Returns
-        -------
-        array_like
-            The list of cascades.
-        array_like
-            The list of tracks.
-        """
-
-        cascades = []
-        tracks = []
-        for primary in mc_tree.get_primaries():
-
-            # get list of cascades and tracks
-            cascades_i, tracks_i = self._get_light_sources(mc_tree, primary)
-
-            # extend lists
-            cascades.extend(cascades_i)
-            tracks.extend(tracks_i)
-
-        return cascades, tracks
-
-
-
-def convert_params_to_particle(params):
-    """
-    Converts a list of parameters defining a source into a I3Particle
-
-    Parameters
-    ----------
-
-    params: list
-        List of parameters defining a source: [x, y, z, zenith, azimuth, energy, time]
-
-    Returns
-    -------
-
-    cascade: icecube.dataclasses.I3Particle
-        Particle describing a cascade
-    """
-    cascade = dataclasses.I3Particle()
-
-    cascade.pos = dataclasses.I3Position(params[0], params[1], params[2])
-    cascade.dir = dataclasses.I3Direction(params[3], params[4])
-
-    cascade.fit_status == dataclasses.I3Particle.NotSet
-    cascade.location_type = dataclasses.I3Particle.InIce
-    cascade.time = 0
-
-    # set particle type
-    cascade.type = dataclasses.I3Particle.ParticleType.EMinus
-
-    # set energy for each particle
-    cascade.energy = params[5] * I3Units.GeV
-
-    return cascade
-
-
 class Photonics(object):
     """
     Little wrapper around photonics_service
@@ -743,7 +514,7 @@ def get_PE(self, sources, position):
             raise TypeError(f"The type of sources \"{type(sources)}\" is not supported.")
 
 
-    def _get_pdf(self, source, position, times):
+    def _get_pdf(self, source, position, times, quantiles=True):
 
         if not isinstance(source, dataclasses.I3Particle):
             source = convert_params_to_particle(source)
@@ -756,45 +527,40 @@ def _get_pdf(self, source, position, times):
         if pes <= 0:
             return np.zeros_like(times), 0
 
-        pdf = [self.cascade_pxs.GetProbabilityDensity(t0 + t) for t in times]
-
-        # quantiles = self.cascade_pxs.GetProbabilityQuantiles(times, t0, 0)  # Reading charge distribution (normalized to 1)
-        # print(pdf, quantiles)
+        if quantiles:
+            pdf = self.cascade_pxs.GetProbabilityQuantiles(times, t0, 0)  # Reading charge distribution (normalized to 1)
 
-        return pdf, pes
+            # dt = np.diff(times)
+            # ctime = times[:-1] + dt / 2
+            # pdf2 = [self.cascade_pxs.GetProbabilityDensity(float(t) - t0) for t in ctime]
+            # pdf2 = pdf2 * dt
 
+            # for t, p1, p2 in zip(ctime, pdf, pdf2):
+            #     print(f"{t:.1f}, {p1 / p2:.2f}")
+            # print()
 
-    def _get_pdf2(self, source, position, times):
-
-        if not isinstance(source, dataclasses.I3Particle):
-            source = convert_params_to_particle(source)
-
-        if source.energy == 0:
-            return np.zeros_like(times), 0
-
-        self.cascade_pxs.SelectModuleCoordinates(*position)
-        pes, _, t0 = self.cascade_pxs.SelectSource(photonics_service.PhotonicsSource(source))
-        if pes <= 0:
-            return np.zeros_like(times), 0
-
-        quantiles = self.cascade_pxs.GetProbabilityQuantiles(times, t0, 0)  # Reading charge distribution (normalized to 1)
-
-        return quantiles, pes
+            return pdf, pes
+        else:
+            if isinstance(times, list):
+                pdf = [self.cascade_pxs.GetProbabilityDensity(t - t0) for t in times]
+            else:
+                pdf = self.cascade_pxs.GetProbabilityDensity(times - t0)
+            return pdf, pes
 
 
-    def get_pdf(self, sources, position, t):
+    def get_pdf(self, sources, position, t, quantiles=True):
         if isinstance(sources, dataclasses.I3Particle):
-            return self._get_pdf(sources, position, t)
+            return self._get_pdf(sources, position, t, quantiles)
 
         elif isinstance(sources, list) and isinstance(sources[0], float):
-            return self._get_pdf(sources, position, t)
+            return self._get_pdf(sources, position, t, quantiles)
 
         else:
             pdf_tot = np.zeros_like(t)
             pdfs = []
             nps_tot = 0
             for source in sources:
-                pdf, nps = self._self._get_pdf(source, position, t)
+                pdf, nps = self._self._get_pdf(source, position, t, quantiles)
                 pdfs.append(pdf)
                 pdf_tot += pdf * nps
                 nps_tot += nps

From 252a91f4eafcf7fb5f272a94af543bde25caac78 Mon Sep 17 00:00:00 2001
From: Felix Schlueter <fschluter@icecube.wisc.edu>
Date: Tue, 16 Apr 2024 10:40:11 -0500
Subject: [PATCH 16/27] Add cdf_per_dom function and
 get_probability_quantiles_per_dom

---
 egenerator/model/source/base.py | 128 +++++++++++++++++++++++++++++++-
 1 file changed, 127 insertions(+), 1 deletion(-)

diff --git a/egenerator/model/source/base.py b/egenerator/model/source/base.py
index 688de28e..ea7051f2 100644
--- a/egenerator/model/source/base.py
+++ b/egenerator/model/source/base.py
@@ -511,6 +511,132 @@ def cdf(self, x, result_tensors,
 
         return cdf_values
 
+    def cdf_per_dom(self, x, result_tensors, string, dom, **kwargs):
+        """Compute CDF values at x for given result_tensors
+
+        This is a numpy, i.e. not tensorflow, method to compute the CDF based
+        on a provided `result_tensors`. This can be used to investigate
+        the generated PDFs.
+
+        Note: this function only works for sources that use asymmetric
+        Gaussians to parameterize the PDF. The latent values of the AG
+        must be included in the `result_tensors`.
+
+        Parameters
+        ----------
+        x : array_like
+            The times in ns at which to evaluate the result tensors.
+            Shape: () or [n_points]
+        result_tensors : dict of tf.tensor
+            The dictionary of output tensors as obtained from `get_tensors`.
+        string : int
+            String id of the DOM
+        dom : int
+            Module id of the DOM
+        **kwargs
+            Keyword arguments.
+
+        Returns
+        -------
+        array_like
+            The CDF values at times x for the given event hypothesis and
+            exclusions that were used to compute `result_tensors`.
+            Shape: [n_events, n_points]
+
+        Raises
+        ------
+        NotImplementedError
+            If assymetric Gaussian latent variables are not present in
+            `result_tensors` dictionary.
+        """
+
+        x_orig = np.atleast_1d(x)
+        assert len(x_orig.shape) == 1, x_orig.shape
+        n_points = len(x_orig)
+
+        # shape: [1, 1, n_points]
+        x = np.reshape(x_orig, (1, 1, -1))
+
+        if result_tensors['time_offsets'] is not None:
+            # shape: [n_events]
+            t_offsets = result_tensors['time_offsets'].numpy()
+            # shape: [n_events, 1, 1]
+            t_offsets = np.reshape(t_offsets, [-1, 1, 1])
+            # shape: [n_events, 1, n_points]
+            x = x - t_offsets
+        else:
+            t_offsets = 0.
+
+        # internally we are working with different time units
+        x = x / self.time_unit_in_ns
+
+        # Check if the asymmetric Gaussian latent variables exist
+        for latent_name in ['mu', 'scale', 'sigma', 'r']:
+            if 'latent_var_' + latent_name not in result_tensors:
+                msg = 'PDF evaluation is not supported for this model: {}'
+                raise NotImplementedError(msg.format(
+                    self._untracked_data['name']))
+
+        # extract values
+        # shape: [n_events, n_components, 1]
+        mu = result_tensors['latent_var_mu'].numpy()[:, string, dom, :, np.newaxis]
+        scale = result_tensors['latent_var_scale'].numpy()[:, string, dom, :, np.newaxis]
+        sigma = result_tensors['latent_var_sigma'].numpy()[:, string, dom, :, np.newaxis]
+        r = result_tensors['latent_var_r'].numpy()[:, string, dom, :, np.newaxis]
+
+        # shape: [n_events, n_components, n_points]
+        mixture_cdf = basis_functions.asymmetric_gauss_cdf(
+            x=x, mu=mu, sigma=sigma, r=r
+        )
+
+        # shape: [n_events, n_points]
+        cdf_values = np.sum(mixture_cdf * scale, axis=1)
+
+        return cdf_values
+
+
+    def get_probability_quantiles_per_dom(self, x, result_tensors, string, dom, **kwargs):
+        """
+        This is a numpy, i.e. not tensorflow, method to compute the CDF based
+        on a provided `result_tensors`. This can be used to investigate
+        the generated PDFs.
+
+        Note: this function only works for sources that use asymmetric
+        Gaussians to parameterize the PDF. The latent values of the AG
+        must be included in the `result_tensors`.
+
+        Parameters
+        ----------
+        x : array_like
+            The times in ns at which to evaluate the result tensors.
+            Shape: () or [n_points]
+        result_tensors : dict of tf.tensor
+            The dictionary of output tensors as obtained from `get_tensors`.
+        string : int
+            String id of the DOM
+        dom : int
+            Module id of the DOM
+        **kwargs
+            Keyword arguments.
+
+        Returns
+        -------
+        array_like
+            Shape: [n_events, n_points - 1]
+
+        Raises
+        ------
+        NotImplementedError
+            If assymetric Gaussian latent variables are not present in
+            `result_tensors` dictionary.
+        """
+
+        cdf = self.cdf_per_dom(x, result_tensors, string, dom, **kwargs)
+        quantiles = cdf[..., 1:] - cdf[..., :-1]
+
+        return quantiles
+
+
     def pdf(self, x, result_tensors,
             tw_exclusions=None, tw_exclusions_ids=None, **kwargs):
         """Compute PDF values at x for given result_tensors
@@ -669,7 +795,7 @@ def pdf_per_dom(self, x, result_tensors, string, dom, **kwargs):
         array_like
             The PDF values at times x for the given event hypothesis and
             exclusions that were used to compute `result_tensors`.
-            Shape: [n_events, 86, 60, n_points]
+            Shape: [n_events, n_points]
 
         Raises
         ------

From b85d0eed1883d9950b6797d5ee6bb960d0087c22 Mon Sep 17 00:00:00 2001
From: Felix Schlueter <fschluter@icecube.wisc.edu>
Date: Tue, 16 Apr 2024 10:40:31 -0500
Subject: [PATCH 17/27] Little clean up

---
 egenerator/model/source/cascade/default.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/egenerator/model/source/cascade/default.py b/egenerator/model/source/cascade/default.py
index b02a52ae..accc618a 100644
--- a/egenerator/model/source/cascade/default.py
+++ b/egenerator/model/source/cascade/default.py
@@ -355,9 +355,11 @@ def get_tensors(self, data_batch_dict, is_training,
 
         # check if we have the right amount of filters in the latent dimension
         n_models = config['num_latent_models']
-        if n_models*4 + n_charge != config['num_filters_list'][-1]:
-            raise ValueError('{!r} != {!r}'.format(
-                n_models*4 + n_charge, config['num_filters_list'][-1]))
+        exp_num_filts = n_models * 4 + n_charge
+        if exp_num_filts != config['num_filters_list'][-1]:
+            raise ValueError('{!r} (expected) != {!r} (configured)'.format(
+                exp_num_filts, config['num_filters_list'][-1]))
+
         if n_models <= 1:
             raise ValueError('{!r} !> 1'.format(n_models))
 

From 8d627589d779bb9a8721fee5d899f8e72aaad46f Mon Sep 17 00:00:00 2001
From: Felix Schlueter <fschluter@icecube.wisc.edu>
Date: Wed, 17 Apr 2024 11:44:14 -0500
Subject: [PATCH 18/27] Store total charge per dom as well. Improve filename
 for figures

---
 egenerator/ic3/evaluate_mcpe.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/egenerator/ic3/evaluate_mcpe.py b/egenerator/ic3/evaluate_mcpe.py
index 835c4fc7..10642f65 100644
--- a/egenerator/ic3/evaluate_mcpe.py
+++ b/egenerator/ic3/evaluate_mcpe.py
@@ -325,21 +325,28 @@ def sample_from_pdf(self, frame, result_tensors, validate_photonics=False):
         loss_event_generator = dataclasses.I3MapKeyDouble()
         loss_event_generator_total = 0
 
+        event_generator_total_dom_charge = dataclasses.I3MapKeyDouble()
+        mc_total_dom_charge = dataclasses.I3MapKeyDouble()
+
         if validate_photonics:
             loss_photonics = dataclasses.I3MapKeyDouble()
             loss_photonics_total = 0
             geo = frame["I3Geometry"]
             particle = frame["I3MCTree"].get_head()
+            photonics_total_dom_charge = dataclasses.I3MapKeyDouble()
+
 
         for string in range(86):
             for om in range(60):
                 omkey = icetray.OMKey(string + 1, om + 1)
 
+                event_generator_total_dom_charge[omkey] = float(total_charge[0, string, om, 0])
 
                 if omkey in mcpe_series_map:
                     pulse_series = mcpe_series_map[omkey]
 
                     dom_charges_true = np.array([pulse.npe for pulse in pulse_series])
+                    mc_total_dom_charge[omkey] = float(np.sum(dom_charges_true))
                     pulse_log_pdf_values = np.log([self.model.pdf_per_dom(pulse.time, result_tensors=result_tensors,
                                                     string=string, dom=om) + eps for pulse in pulse_series])
 
@@ -359,6 +366,7 @@ def sample_from_pdf(self, frame, result_tensors, validate_photonics=False):
                         table_pdf, dom_charges_pred = self.photonics.get_pdf(
                             particle, position, [pulse.time for pulse in pulse_series],
                             quantiles=False)
+                        photonics_total_dom_charge[omkey] = float(dom_charges_pred)
 
                         time_log_likelihood = -dom_charges_true * np.log(np.array(table_pdf) + eps)
                         llh_poisson = dom_charges_pred - np.sum(dom_charges_true) * np.log(dom_charges_pred + eps)
@@ -370,7 +378,7 @@ def sample_from_pdf(self, frame, result_tensors, validate_photonics=False):
 
                 elif self.add_dark_doms:
                     expected_charge = total_charge[0, string, om, 0]
-
+                    mc_total_dom_charge[omkey] = 0
                     loss = expected_charge - 0 * np.log(expected_charge + 1e-7)
                     loss_event_generator_total += loss
                     loss_event_generator[omkey] = loss
@@ -378,6 +386,7 @@ def sample_from_pdf(self, frame, result_tensors, validate_photonics=False):
                     if validate_photonics:
                         position = geo.omgeo[omkey].position
                         expected_charge, _ = self.photonics.get_PE(particle, position)
+                        photonics_total_dom_charge[omkey] = float(expected_charge)
 
                         loss = expected_charge - 0 * np.log(expected_charge + 1e-7)
                         loss_photonics_total += loss
@@ -386,10 +395,13 @@ def sample_from_pdf(self, frame, result_tensors, validate_photonics=False):
                 else:
                     pass
 
+        frame.Put("MCTotalChargePerDOM", mc_total_dom_charge)
+        frame.Put("EventGeneratorTotalChargePerDOM", event_generator_total_dom_charge)
         frame.Put("EventGeneratorLoss", loss_event_generator)
         frame.Put("EventGeneratorLossTotal", dataclasses.I3Double(loss_event_generator_total))
 
         if validate_photonics:
+            frame.Put("PhotonicsTotalChargePerDOM", photonics_total_dom_charge)
             frame.Put("PhotonicsLoss", loss_photonics)
             frame.Put("PhotonicsLossTotal", dataclasses.I3Double(loss_photonics_total))
 
@@ -453,8 +465,10 @@ def plot_pdfs(self, frame, result_tensors, validate_photonics=False):
         ax.legend()
         ax.grid()
 
+        header = frame["I3EventHeader"]
+
         plt.tight_layout()
-        plt.savefig(f"pdf_{self._counter}.png")
+        plt.savefig(f"{self.model_name}_{header.run_id}_{header.event_id}_pdf.png")
 
 
 class Photonics(object):

From 16f6cbc19b8d43352ff2639950b6589f6f592be2 Mon Sep 17 00:00:00 2001
From: Felix Schlueter <fschluter@icecube.wisc.edu>
Date: Thu, 18 Apr 2024 04:44:06 -0500
Subject: [PATCH 19/27] Add function which only calculates total charge per dom

---
 egenerator/ic3/evaluate_mcpe.py | 55 ++++++++++++++++++++++++++++-----
 1 file changed, 47 insertions(+), 8 deletions(-)

diff --git a/egenerator/ic3/evaluate_mcpe.py b/egenerator/ic3/evaluate_mcpe.py
index 10642f65..acaa559a 100644
--- a/egenerator/ic3/evaluate_mcpe.py
+++ b/egenerator/ic3/evaluate_mcpe.py
@@ -107,6 +107,7 @@ def __init__(self, context):
                           'high energy cascades.',
                           False)
         self.AddParameter('AddDarkDOMs', '', True)
+        self.AddParameter('ValidateTimePDF', '', True)
 
         self.AddParameter('CascadeService',
                           '',
@@ -128,7 +129,7 @@ def Configure(self):
         self.random_service = self.GetParameter('random_service')
         self.simulate_electron_daughters = self.GetParameter('SimulateElectronDaughterParticles')
         self.add_dark_doms = self.GetParameter("AddDarkDOMs")
-
+        self.validate_time_pdf = self.GetParameter("ValidateTimePDF")
         if isinstance(self.random_service, int):
             self.random_service = np.random.RandomState(self.random_service)
 
@@ -274,8 +275,8 @@ def DAQ(self, frame):
 
         self._counter += 1
 
-        # if self._counter >= 5:
-        #     return
+        if not self._counter % 100:
+            print(self._counter)
 
         # start timer
         t_0 = timeit.default_timer()
@@ -299,11 +300,12 @@ def DAQ(self, frame):
         t_2 = timeit.default_timer()
 
         # sample pdf
-        self.sample_from_pdf(frame, result_tensors, validate_photonics=self.cascade_service is not None)
-
-        if self._counter % 10 == 0:
-            self.plot_pdfs(frame, result_tensors, validate_photonics=self.cascade_service is not None)
-
+        if self.validate_time_pdf:
+            self.sample_from_pdf(frame, result_tensors, validate_photonics=self.cascade_service is not None)
+            if self._counter % 10 == 0:
+                self.plot_pdfs(frame, result_tensors, validate_photonics=self.cascade_service is not None)
+        else:
+            self.store_total_charge(frame, result_tensors, validate_photonics=self.cascade_service is not None)
 
         # timer after Sampling
         t_3 = timeit.default_timer()
@@ -316,6 +318,43 @@ def DAQ(self, frame):
         # push frame to next modules
         self.PushFrame(frame)
 
+    def store_total_charge(self, frame, result_tensors, validate_photonics=False):
+        """ This function stores the predicted charge by EG, MC, Photonics """
+        total_charge = result_tensors['dom_charges'].numpy()
+        mcpe_series_map = frame["I3MCPESeriesMap"]
+
+        event_generator_total_dom_charge = dataclasses.I3MapKeyDouble()
+        mc_total_dom_charge = dataclasses.I3MapKeyDouble()
+
+        if validate_photonics:
+            geo = frame["I3Geometry"]
+            particle = frame["I3MCTree"].get_head()
+            photonics_total_dom_charge = dataclasses.I3MapKeyDouble()
+
+
+        for string in range(86):
+            for om in range(60):
+                omkey = icetray.OMKey(string + 1, om + 1)
+
+                event_generator_total_dom_charge[omkey] = float(total_charge[0, string, om, 0])
+
+                if validate_photonics:
+                    position = geo.omgeo[omkey].position
+                    self.photonics.cascade_pxs.SelectModuleCoordinates(*position)
+                    charge, _, t0 = self.photonics.cascade_pxs.SelectSource(photonics_service.PhotonicsSource(particle))
+                    photonics_total_dom_charge[omkey] = float(charge)
+
+                if omkey in mcpe_series_map:
+                    mc_total_dom_charge[omkey] = np.sum([pulse.npe for pulse in mcpe_series_map[omkey]], dtype="float")
+                else:
+                    mc_total_dom_charge[omkey] = 0
+
+        frame.Put("MCTotalChargePerDOM", mc_total_dom_charge)
+        frame.Put("EventGeneratorTotalChargePerDOM", event_generator_total_dom_charge)
+
+        if validate_photonics:
+            frame.Put("PhotonicsTotalChargePerDOM", photonics_total_dom_charge)
+
 
     def sample_from_pdf(self, frame, result_tensors, validate_photonics=False):
         total_charge = result_tensors['dom_charges'].numpy()

From ea00a1d5ea60ac506cac50c403fb479ac5150190 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Schl=C3=BCter?= <felix.schluter@icecube.wisc.edu>
Date: Wed, 29 May 2024 11:49:01 +0200
Subject: [PATCH 20/27] fix ws

---
 egenerator/model/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/egenerator/model/base.py b/egenerator/model/base.py
index e6dcde5a..42a667d0 100644
--- a/egenerator/model/base.py
+++ b/egenerator/model/base.py
@@ -235,7 +235,7 @@ def save_weights(self, dir_path, max_keep=3, protected=False,
         in the output directory. If it does not exist yet, a new one will be
         created. Otherwise, its values will be updated
         The file contains meta data on the checkpoints and keeps track
-        of the most recents files. The structure  and content of meta data:
+        of the most recents files. The structure and content of meta data:
 
             latest_checkpoint: int
                 The number of the latest checkpoint.

From f1fb9e85d8fadc242123c91a28564c8272cb749e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Schl=C3=BCter?= <felix.schluter@icecube.wisc.edu>
Date: Fri, 29 Mar 2024 16:41:10 +0100
Subject: [PATCH 21/27] Remove hard-coded label_key LabelsDeepLearning. Allow
 to train on MCPEs.

---
 egenerator/data/modules/data/pulse_data.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/egenerator/data/modules/data/pulse_data.py b/egenerator/data/modules/data/pulse_data.py
index 4592e93f..35564db2 100644
--- a/egenerator/data/modules/data/pulse_data.py
+++ b/egenerator/data/modules/data/pulse_data.py
@@ -118,6 +118,14 @@ def _configure(self, config_data, pulse_key, dom_exclusions_key,
         time_exclusions_exist = time_exclusions_key is not None
         dom_exclusions_exist = dom_exclusions_key is not None
 
+
+        self._hdf_time_index = 10
+        # 11: npe, 12: charge
+        if pulse_is_mcpe:
+            self._hdf_charge_index = 11
+        else:
+            self._hdf_charge_index = 12
+
         if add_charge_quantiles:
             pulse_dim = 3
         else:
@@ -383,7 +391,6 @@ def get_data_from_hdf(self, file, *args, **kwargs):
 
                 # (t_start, t_end)
                 x_time_exclusions[tw_index] = [row[hdf_time_index], row[11]]
-
                 # gather pulse ids (batch index, string, dom)
                 x_time_exclusions_ids[tw_index] = [index, string-1, dom-1]
 

From 0c719581dd1518bafbe9205273fe51616cf3e531 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Schl=C3=BCter?= <felix.schluter@icecube.wisc.edu>
Date: Mon, 1 Apr 2024 18:11:43 +0200
Subject: [PATCH 22/27] Fixed setting charge index. Slightly refactor code to
 avoid future warning

---
 egenerator/data/modules/data/pulse_data.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/egenerator/data/modules/data/pulse_data.py b/egenerator/data/modules/data/pulse_data.py
index 35564db2..94ee4c59 100644
--- a/egenerator/data/modules/data/pulse_data.py
+++ b/egenerator/data/modules/data/pulse_data.py
@@ -118,14 +118,6 @@ def _configure(self, config_data, pulse_key, dom_exclusions_key,
         time_exclusions_exist = time_exclusions_key is not None
         dom_exclusions_exist = dom_exclusions_key is not None
 
-
-        self._hdf_time_index = 10
-        # 11: npe, 12: charge
-        if pulse_is_mcpe:
-            self._hdf_charge_index = 11
-        else:
-            self._hdf_charge_index = 12
-
         if add_charge_quantiles:
             pulse_dim = 3
         else:

From bc591ae70ebbc7185cd6070e5d25fb98fefe1528 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Schl=C3=BCter?= <felix.schluter@icecube.wisc.edu>
Date: Thu, 4 Apr 2024 14:21:36 +0200
Subject: [PATCH 23/27] Add new config file for training on MCPEs

---
 configs/cascade_7param_MCPE.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/configs/cascade_7param_MCPE.yaml b/configs/cascade_7param_MCPE.yaml
index f3b5b76b..4d2b5e8f 100644
--- a/configs/cascade_7param_MCPE.yaml
+++ b/configs/cascade_7param_MCPE.yaml
@@ -62,7 +62,7 @@ training_settings: {
 
     # Number of training iterations to train the model for
     'num_training_iterations': 1000000,
-
+    
     # Additional keywords to the loss module used for training
     'additional_loss_module_kwargs': {
         'normalize_by_total_charge': False,
@@ -400,7 +400,7 @@ model_settings: {
         'scale_charge_by_relative_dom_efficiency': True,
         'scale_charge_by_global_dom_efficiency': False,
         'prevent_mixture_component_swapping': False,
-        'estimate_charge_distribution': False,
+        'estimate_charge_distribution': 'negative_binomial',
         'num_latent_models': 10,
 
         # This is a list of labels in addition to
@@ -410,7 +410,7 @@ model_settings: {
 
         # First convolutions
         'filter_size_list' : [[1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1]],
-        'num_filters_list' : [25, 500, 500, 500, 41],  # The last dimension should be 4 * num_asym_gauss + num_charge_pdf; num_asym_gauss = 10,  num_charge_pdf = 1 (poisson) or 2 (binominal)
+        'num_filters_list' : [25, 500, 500, 500, 42],  # The last dimension should be 4 * num_asym_gauss + num_charge_pdf; num_asym_gauss = 10,  num_charge_pdf = 1 (poisson) or 2 (binominal)
         'method_list' : ['locally_connected',
                             'convolution', 'convolution', 'convolution',
                             'convolution',

From 84a74bd853be19b19f766476c1193eb17d346b17 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Schl=C3=BCter?= <felix.schluter@icecube.wisc.edu>
Date: Mon, 3 Jun 2024 10:02:08 +0200
Subject: [PATCH 24/27] Export to hdf5 rather than i3

---
 egenerator/ic3/evaluate_mcpe.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/egenerator/ic3/evaluate_mcpe.py b/egenerator/ic3/evaluate_mcpe.py
index acaa559a..99dbae24 100644
--- a/egenerator/ic3/evaluate_mcpe.py
+++ b/egenerator/ic3/evaluate_mcpe.py
@@ -7,7 +7,7 @@
 
 import matplotlib.pyplot as plt
 
-from icecube import dataclasses, icetray, photonics_service
+from icecube import dataclasses, icetray, photonics_service, simclasses
 from icecube.icetray.i3logging import log_info, log_warn
 
 from egenerator.utils.configurator import ManagerConfigurator
@@ -323,31 +323,31 @@ def store_total_charge(self, frame, result_tensors, validate_photonics=False):
         total_charge = result_tensors['dom_charges'].numpy()
         mcpe_series_map = frame["I3MCPESeriesMap"]
 
-        event_generator_total_dom_charge = dataclasses.I3MapKeyDouble()
-        mc_total_dom_charge = dataclasses.I3MapKeyDouble()
+        # We have to use here I3MCPulseSeriesMap here because for a simple I3MapKeyDouble is no hdf5 converter implemented ...
+
+        event_generator_total_dom_charge = dataclasses.I3MapKeyVectorDouble()
+        mc_total_dom_charge = dataclasses.I3MapKeyVectorDouble()
 
         if validate_photonics:
             geo = frame["I3Geometry"]
             particle = frame["I3MCTree"].get_head()
-            photonics_total_dom_charge = dataclasses.I3MapKeyDouble()
-
+            photonics_total_dom_charge = dataclasses.I3MapKeyVectorDouble()
 
         for string in range(86):
             for om in range(60):
                 omkey = icetray.OMKey(string + 1, om + 1)
-
-                event_generator_total_dom_charge[omkey] = float(total_charge[0, string, om, 0])
+                event_generator_total_dom_charge[omkey] = [float(total_charge[0, string, om, 0])]
 
                 if validate_photonics:
                     position = geo.omgeo[omkey].position
                     self.photonics.cascade_pxs.SelectModuleCoordinates(*position)
                     charge, _, t0 = self.photonics.cascade_pxs.SelectSource(photonics_service.PhotonicsSource(particle))
-                    photonics_total_dom_charge[omkey] = float(charge)
+                    photonics_total_dom_charge[omkey] = [float(charge)]
 
                 if omkey in mcpe_series_map:
-                    mc_total_dom_charge[omkey] = np.sum([pulse.npe for pulse in mcpe_series_map[omkey]], dtype="float")
+                    mc_total_dom_charge[omkey] = [np.sum([pulse.npe for pulse in mcpe_series_map[omkey]], dtype="float")]
                 else:
-                    mc_total_dom_charge[omkey] = 0
+                    mc_total_dom_charge[omkey] = [0]
 
         frame.Put("MCTotalChargePerDOM", mc_total_dom_charge)
         frame.Put("EventGeneratorTotalChargePerDOM", event_generator_total_dom_charge)
@@ -374,7 +374,6 @@ def sample_from_pdf(self, frame, result_tensors, validate_photonics=False):
             particle = frame["I3MCTree"].get_head()
             photonics_total_dom_charge = dataclasses.I3MapKeyDouble()
 
-
         for string in range(86):
             for om in range(60):
                 omkey = icetray.OMKey(string + 1, om + 1)

From d837934ae7eb704083c3bf8219abfd9e80b4523c Mon Sep 17 00:00:00 2001
From: mhuen <mhuen@users.noreply.github.com>
Date: Mon, 3 Jun 2024 14:02:28 +0200
Subject: [PATCH 25/27] Allow evaluation in eager mode

---
 egenerator/utils/learning_rate.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/egenerator/utils/learning_rate.py b/egenerator/utils/learning_rate.py
index 5e2062bc..9ab84ced 100644
--- a/egenerator/utils/learning_rate.py
+++ b/egenerator/utils/learning_rate.py
@@ -58,7 +58,6 @@ def __init__(self, boundaries, scheduler_settings, name=None):
         self.schedulers = schedulers
         self.name = name
 
-    @tf.function
     def __call__(self, step):
 
         step = tf.convert_to_tensor(step)
@@ -86,7 +85,10 @@ def __call__(self, step):
 
             pred_fn_pairs.append(
                 (
-                    step > low and step <= high,
+                    tf.math.logical_and(
+                        tf.math.greater(step, low),
+                        tf.math.less_equal(step, high),
+                    ),
                     lambda: scheduler(step - low),
                 )
             )

From de144f89b51f2e69587cc72b69b42ee256e3e3f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Schl=C3=BCter?= <felix.schluter@icecube.wisc.edu>
Date: Mon, 3 Jun 2024 14:14:26 +0200
Subject: [PATCH 26/27] Code refactoring

---
 egenerator/manager/base.py | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/egenerator/manager/base.py b/egenerator/manager/base.py
index a66d8e73..e7577a66 100644
--- a/egenerator/manager/base.py
+++ b/egenerator/manager/base.py
@@ -641,10 +641,7 @@ def perform_training_step(
         gradients = tape.gradient(combined_loss, variables)
 
         # remove nans in gradients and replace these with zeros
-        if "remove_nan_gradients" in opt_config:
-            remove_nan_gradients = opt_config["remove_nan_gradients"]
-        else:
-            remove_nan_gradients = False
+        remove_nan_gradients = opt_config.get("remove_nan_gradients", False)
         if remove_nan_gradients:
             gradients = [
                 tf.where(tf.is_nan(grad), tf.zeros_like(grad), grad)
@@ -652,10 +649,7 @@ def perform_training_step(
                 if grad is not None
             ]
 
-        if "clip_gradients_value" in opt_config:
-            clip_gradients_value = opt_config["clip_gradients_value"]
-        else:
-            clip_gradients_value = None
+        clip_gradients_value = opt_config.get("clip_gradients_value", None)
         if clip_gradients_value is not None:
             capped_gradients, _ = tf.clip_by_global_norm(
                 gradients, clip_gradients_value
@@ -675,6 +669,7 @@ def perform_training_step(
                 ],
             )
             asserts.append(assert_finite)
+
         with tf.control_dependencies(asserts):
             self.optimizer.apply_gradients(zip(capped_gradients, variables))
 
@@ -778,10 +773,8 @@ def train(
                 scheduler_class = misc.load_class(lr_cfg["full_class_string"])
                 scheduler = scheduler_class(**lr_cfg["settings"])
                 optimizer_settings["learning_rate"] = scheduler
-
-        optimizer = getattr(tf.optimizers, opt_config["optimizer_name"])(
-            **optimizer_settings
-        )
+        
+        optimizer = getattr(tf.optimizers, opt_config["optimizer_name"])(**optimizer_settings)
         self._untracked_data["optimizer"] = optimizer
 
         # save new training step to model

From dec3eb033f623735da5cd92111cee2000aa5eb9d Mon Sep 17 00:00:00 2001
From: Felix Schlueter <fschluet@trex01.zeuthen.desy.de>
Date: Tue, 4 Jun 2024 11:44:40 +0200
Subject: [PATCH 27/27] Improve code. access charge from hdf files by name and
 not index

---
 egenerator/data/modules/data/pulse_data.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/egenerator/data/modules/data/pulse_data.py b/egenerator/data/modules/data/pulse_data.py
index dd1e05ad..739c9f98 100644
--- a/egenerator/data/modules/data/pulse_data.py
+++ b/egenerator/data/modules/data/pulse_data.py
@@ -252,11 +252,7 @@ def get_data_from_hdf(self, file, *args, **kwargs):
         if not self.is_configured:
             raise ValueError("Module not configured yet!")
 
-        # 11: npe, 12: charge
-        if self.configuration.config["pulse_is_mcpe"]:
-            hdf_charge_index = 11
-        else:
-            hdf_charge_index = 12
+        charge_str = "npe" if self.configuration.config["pulse_is_mcpe"] else "charge"
 
         # open file
         f = pd.HDFStore(file, 'r')
@@ -366,20 +362,19 @@ def get_data_from_hdf(self, file, *args, **kwargs):
 
             index = event_dict[(row[1:5])]
 
-            # pulse charge: row[hdf_charge_index]
             # accumulate charge in DOMs
-            x_dom_charge[index, string - 1, dom - 1, 0] += row[hdf_charge_index]
+            x_dom_charge[index, string - 1, dom - 1, 0] += getattr(row, charge_str)
 
             # gather pulses
             if add_charge_quantiles:
 
                 # (charge, time, quantile)
                 cum_charge = float(x_dom_charge[index, string - 1, dom - 1, 0])
-                x_pulses[pulse_index] = [row[hdf_charge_index], row.time, cum_charge]
+                x_pulses[pulse_index] = [getattr(row, charge_str), row.time, cum_charge]
 
             else:
                 # (charge, time)
-                x_pulses[pulse_index] = [row[hdf_charge_index], row.time]
+                x_pulses[pulse_index] = [getattr(row, charge_str), row.time]
 
             # gather pulse ids (batch index, string, dom)
             x_pulses_ids[pulse_index] = [index, string - 1, dom - 1]
@@ -591,7 +586,6 @@ def get_data_from_frame(self, frame, *args, **kwargs):
             for pulse in pulse_list:
                 index = 0
 
-                # pulse charge: row[hdf_charge_index], time: row.time
                 # accumulate charge in DOMs
                 x_dom_charge[index, string - 1, dom - 1, 0] += pulse.charge