icecube · fschlueter · Mar 29, 2024 · Mar 29, 2024 · Apr 1, 2024 · Apr 4, 2024
diff --git a/configs/cascade_7param_MCPE.yaml b/configs/cascade_7param_MCPE.yaml
diff --git a/egenerator/data/handler/modular.py b/egenerator/data/handler/modular.py
@@ -343,8 +343,10 @@ def _get_data(self, file_or_frame, method, *args, **kwargs):
             assert isinstance(file_or_frame, str), "Expected file path string"
 
             num_data, data = self.data_module.get_data_from_hdf(
-                file_or_frame, *args, **kwargs
-            )
+                file_or_frame, *args,
+                label_key=self.label_module.configuration.config["label_key"],
+                **kwargs)
+
             num_labels, labels = self.label_module.get_data_from_hdf(
                 file_or_frame, *args, **kwargs
             )

diff --git a/egenerator/data/modules/data/pulse_data.py b/egenerator/data/modules/data/pulse_data.py
@@ -45,6 +45,7 @@ def _configure(
         float_precision,
         add_charge_quantiles,
         discard_pulses_from_excluded_doms,
+        pulse_is_mcpe=False,
     ):
         """Configure Module Class
         This is an abstract method and must be implemented by derived class.
@@ -72,6 +73,9 @@ def _configure(
         discard_pulses_from_excluded_doms : bool, optional
             If True, pulses on excluded DOMs are discarded. The pulses are
             discarded after the charge at the DOM is collected.
+        pulse_is_mcpe : bool, optional
+            If True, train on MCPE pulses instead of RecoPulses. This setting
+            changes how the charge is read out of the hdf5 files. (Default: False)
 
         Returns
         -------
@@ -214,6 +218,7 @@ def _configure(
             ),
             mutable_settings=dict(
                 pulse_key=pulse_key,
+                pulse_is_mcpe=pulse_is_mcpe,
                 dom_exclusions_key=dom_exclusions_key,
                 time_exclusions_key=time_exclusions_key,
                 discard_pulses_from_excluded_doms=(
@@ -247,12 +252,13 @@ def get_data_from_hdf(self, file, *args, **kwargs):
         if not self.is_configured:
             raise ValueError("Module not configured yet!")
 
-        # open file
-        f = pd.HDFStore(file, "r")
+        charge_str = "npe" if self.configuration.config["pulse_is_mcpe"] else "charge"
 
+        # open file
+        f = pd.HDFStore(file, 'r')
         try:
             pulses = f[self.configuration.config["pulse_key"]]
-            _labels = f["LabelsDeepLearning"]
+            _labels = f[kwargs["label_key"]]
             if self.data["dom_exclusions_exist"]:
                 try:
                     dom_exclusions = f[
@@ -293,11 +299,13 @@ def get_data_from_hdf(self, file, *args, **kwargs):
 
         # create Dictionary with event IDs
         size = len(_labels["Event"])
+
+        if not size:
+            raise ValueError("Label length is 0.")
+
         event_dict = {}
-        for idx, row in _labels.iterrows():
-            event_dict[
-                (row.iloc[0], row.iloc[1], row.iloc[2], row.iloc[3])
-            ] = idx
+        for row in _labels.itertuples():
+            event_dict[(row[1:5])] = row[0]
 
         # create empty array for DOM charges
         x_dom_charge = np.zeros(
@@ -351,21 +359,22 @@ def get_data_from_hdf(self, file, *args, **kwargs):
                     "skipping pulse: {} {}".format(string, dom)
                 )
                 continue
+
             index = event_dict[(row[1:5])]
 
             # accumulate charge in DOMs
-            x_dom_charge[index, string - 1, dom - 1, 0] += row.charge
+            x_dom_charge[index, string - 1, dom - 1, 0] += getattr(row, charge_str)
 
             # gather pulses
             if add_charge_quantiles:
 
                 # (charge, time, quantile)
                 cum_charge = float(x_dom_charge[index, string - 1, dom - 1, 0])
-                x_pulses[pulse_index] = [row.charge, row.time, cum_charge]
+                x_pulses[pulse_index] = [getattr(row, charge_str), row.time, cum_charge]
 
             else:
                 # (charge, time)
-                x_pulses[pulse_index] = [row.charge, row.time]
+                x_pulses[pulse_index] = [getattr(row, charge_str), row.time]
 
             # gather pulse ids (batch index, string, dom)
             x_pulses_ids[pulse_index] = [index, string - 1, dom - 1]
@@ -408,7 +417,7 @@ def get_data_from_hdf(self, file, *args, **kwargs):
                     continue
                 index = event_dict[(row[1:5])]
 
-                # t_start (pulse time): row[10], t_end (pulse width): row[11]
+                # t_start (pulse time): row.time, t_end (pulse width): row[11]
 
                 # (t_start, t_end)
                 x_time_exclusions[tw_index] = [row.time, row.width]
@@ -577,7 +586,6 @@ def get_data_from_frame(self, frame, *args, **kwargs):
             for pulse in pulse_list:
                 index = 0
 
-                # pulse charge: row[12], time: row[10]
                 # accumulate charge in DOMs
                 x_dom_charge[index, string - 1, dom - 1, 0] += pulse.charge
 

diff --git a/egenerator/data/modules/labels/cascades.py b/egenerator/data/modules/labels/cascades.py
@@ -34,6 +34,9 @@ def _configure(
         trafo_log,
         float_precision,
         label_key="LabelsDeepLearning",
+        parameter_names=["cascade_x", "cascade_y", "cascade_z",
+                         "cascade_zenith", "cascade_azimuth",
+                         "cascade_energy", "cascade_t"],
     ):
         """Configure Module Class
         This is an abstract method and must be implemented by derived class.
@@ -51,10 +54,12 @@ def _configure(
             If a single bool is given, this applies to all labels. Otherwise
             a list of bools corresponds to the labels in the order:
                 x, y, z, zenith, azimuth, energy, time
-        label_key : str, optional
-            The name of the key under which the labels are saved.
         float_precision : str
             The float precision as a str.
+        label_key : str, optional
+            The name of the key under which the labels are saved.
+        parameter_names : list of str, optional
+            Name of the parameters (e.g, the columns in the hdf5 Dataset `label_key`)
 
         Returns
         -------
@@ -129,6 +134,7 @@ def _configure(
                 trafo_log=trafo_log,
                 float_precision=float_precision,
                 label_key=label_key,
+                parameter_names=parameter_names,
             ),
         )
         return configuration, data, {}
@@ -163,16 +169,8 @@ def get_data_from_hdf(self, file, *args, **kwargs):
         cascade_parameters = []
         try:
             _labels = f[self.configuration.config["label_key"]]
-            for label in [
-                "cascade_x",
-                "cascade_y",
-                "cascade_z",
-                "cascade_zenith",
-                "cascade_azimuth",
-                "cascade_energy",
-                "cascade_t",
-            ]:
-                cascade_parameters.append(_labels[label])
+            for par in self.configuration.config["parameter_names"]:
+                cascade_parameters.append(_labels[par])
 
         except Exception as e:
             self._logger.warning(e)
@@ -223,16 +221,8 @@ def get_data_from_frame(self, frame, *args, **kwargs):
         cascade_parameters = []
         try:
             _labels = frame[self.configuration.config["label_key"]]
-            for label in [
-                "cascade_x",
-                "cascade_y",
-                "cascade_z",
-                "cascade_zenith",
-                "cascade_azimuth",
-                "cascade_energy",
-                "cascade_t",
-            ]:
-                cascade_parameters.append(np.atleast_1d(_labels[label]))
+            for par in self.configuration.config["parameter_names"]:
+                cascade_parameters.append(np.atleast_1d(_labels[par]))
 
         except Exception as e:
             self._logger.warning(e)

diff --git a/egenerator/ic3/__init__.py b/egenerator/ic3/__init__.py
@@ -1,7 +1,9 @@
 from egenerator.ic3.reconstruction import EventGeneratorReconstruction
 from egenerator.ic3.simulation import EventGeneratorSimulation
+from egenerator.ic3.evaluate_mcpe import CalculateLikelihood
 
 __all__ = [
     "EventGeneratorReconstruction",
     "EventGeneratorSimulation",
+    "CalculateLikelihood",
 ]