From e7a5ed98e23ffa656aeb2cdfc03f29f71b956a5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Schl=C3=BCter?= Date: Fri, 29 Mar 2024 16:29:17 +0100 Subject: [PATCH 01/27] Simplify code --- egenerator/ic3/reconstruction.py | 6 +----- egenerator/ic3/visualization.py | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/egenerator/ic3/reconstruction.py b/egenerator/ic3/reconstruction.py index e0ac8d10..ff93da30 100644 --- a/egenerator/ic3/reconstruction.py +++ b/egenerator/ic3/reconstruction.py @@ -335,11 +335,7 @@ def Configure(self): self.manager = self.manager_configurator.manager self.loss_module = self.manager_configurator.loss_module - if 'I3ParticleMapping' in self.manager.configuration.config['config']: - self.i3_mapping = self.manager.configuration.config['config'][ - 'I3ParticleMapping'] - else: - self.i3_mapping = None + self.i3_mapping = self.manager.configuration.config['config'].get('I3ParticleMapping', None) for model in self.manager.models: num_vars, num_total_vars = model.num_variables diff --git a/egenerator/ic3/visualization.py b/egenerator/ic3/visualization.py index b064aa1d..14ec9fe1 100644 --- a/egenerator/ic3/visualization.py +++ b/egenerator/ic3/visualization.py @@ -128,11 +128,7 @@ def Configure(self): self.manager = self.manager_configurator.manager self.loss_module = self.manager_configurator.loss_module - if 'I3ParticleMapping' in self.manager.configuration.config['config']: - self.i3_mapping = self.manager.configuration.config['config'][ - 'I3ParticleMapping'] - else: - self.i3_mapping = None + self.i3_mapping = self.manager.configuration.config['config'].get('I3ParticleMapping', None) for model in self.manager.models: num_vars, num_total_vars = model.num_variables From cb05b283427461aceaac62b65bd93fec8c7044df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Schl=C3=BCter?= Date: Fri, 29 Mar 2024 16:41:10 +0100 Subject: [PATCH 02/27] Remove hard-coded label_key LabelsDeepLearning. Allow to train on MCPEs. --- egenerator/data/handler/modular.py | 5 ++- egenerator/data/modules/data/pulse_data.py | 39 ++++++++++++++-------- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/egenerator/data/handler/modular.py b/egenerator/data/handler/modular.py index e28ddba8..20a19937 100644 --- a/egenerator/data/handler/modular.py +++ b/egenerator/data/handler/modular.py @@ -322,7 +322,10 @@ def _get_data(self, file_or_frame, method, *args, **kwargs): assert isinstance(file_or_frame, str), 'Expected file path string' num_data, data = self.data_module.get_data_from_hdf( - file_or_frame, *args, **kwargs) + file_or_frame, *args, + label_key=self.label_module.configuration.config['label_key'], + **kwargs) + num_labels, labels = self.label_module.get_data_from_hdf( file_or_frame, *args, **kwargs) num_misc, misc = self.misc_module.get_data_from_hdf( diff --git a/egenerator/data/modules/data/pulse_data.py b/egenerator/data/modules/data/pulse_data.py index 603fab4d..f56af5ab 100644 --- a/egenerator/data/modules/data/pulse_data.py +++ b/egenerator/data/modules/data/pulse_data.py @@ -40,7 +40,7 @@ def __init__(self, logger=None): def _configure(self, config_data, pulse_key, dom_exclusions_key, time_exclusions_key, float_precision, add_charge_quantiles, - discard_pulses_from_excluded_doms): + discard_pulses_from_excluded_doms, pulse_is_mcpe=False): """Configure Module Class This is an abstract method and must be implemented by derived class. @@ -67,6 +67,9 @@ def _configure(self, config_data, pulse_key, dom_exclusions_key, discard_pulses_from_excluded_doms : bool, optional If True, pulses on excluded DOMs are discarded. The pulses are discarded after the charge at the DOM is collected. + pulse_is_mcpe : bool, optional + If True, train on MCPE pulses instead of RecoPulses. This setting + changes how the charge is read out of the hdf5 files. (Default: False) Returns ------- @@ -115,6 +118,14 @@ def _configure(self, config_data, pulse_key, dom_exclusions_key, time_exclusions_exist = time_exclusions_key is not None dom_exclusions_exist = dom_exclusions_key is not None + + self._hdf_time_index = 10 + # 11: npe, 12: charge + if pulse_is_mcpe: + self._hdf_charge_index = 11 + else: + self._hdf_charge_index = 12 + if add_charge_quantiles: pulse_dim = 3 else: @@ -187,6 +198,7 @@ def _configure(self, config_data, pulse_key, dom_exclusions_key, add_charge_quantiles=add_charge_quantiles), mutable_settings=dict( pulse_key=pulse_key, + pulse_is_mcpe=pulse_is_mcpe, dom_exclusions_key=dom_exclusions_key, time_exclusions_key=time_exclusions_key, discard_pulses_from_excluded_doms=( @@ -221,10 +233,9 @@ def get_data_from_hdf(self, file, *args, **kwargs): # open file f = pd.HDFStore(file, 'r') - try: pulses = f[self.configuration.config['pulse_key']] - _labels = f['LabelsDeepLearning'] + _labels = f[kwargs['label_key']] if self.data['dom_exclusions_exist']: try: dom_exclusions = \ @@ -311,29 +322,29 @@ def get_data_from_hdf(self, file, *args, **kwargs): continue index = event_dict[(row[1:5])] - # pulse charge: row[12], time: row[10] + # pulse charge: row[self._hdf_charge_index], time: row[self._hdf_time_index] # accumulate charge in DOMs - x_dom_charge[index, string-1, dom-1, 0] += row[12] + x_dom_charge[index, string-1, dom-1, 0] += row[self._hdf_charge_index] # gather pulses if add_charge_quantiles: # (charge, time, quantile) cum_charge = float(x_dom_charge[index, string-1, dom-1, 0]) - x_pulses[pulse_index] = [row[12], row[10], cum_charge] + x_pulses[pulse_index] = [row[self._hdf_charge_index], row[self._hdf_time_index], cum_charge] else: # (charge, time) - x_pulses[pulse_index] = [row[12], row[10]] + x_pulses[pulse_index] = [row[self._hdf_charge_index], row[self._hdf_time_index]] # gather pulse ids (batch index, string, dom) x_pulses_ids[pulse_index] = [index, string-1, dom-1] # update time window - if row[10] > x_time_window[index, 1]: - x_time_window[index, 1] = row[10] - if row[10] < x_time_window[index, 0]: - x_time_window[index, 0] = row[10] + if row[self._hdf_time_index] > x_time_window[index, 1]: + x_time_window[index, 1] = row[self._hdf_time_index] + if row[self._hdf_time_index] < x_time_window[index, 0]: + x_time_window[index, 0] = row[self._hdf_time_index] # convert cumulative charge to fraction of total charge, e.g. quantile if add_charge_quantiles: @@ -364,10 +375,10 @@ def get_data_from_hdf(self, file, *args, **kwargs): continue index = event_dict[(row[1:5])] - # t_start (pulse time): row[10], t_end (pulse width): row[11] + # t_start (pulse time): row[self._hdf_time_index], t_end (pulse width): row[11] # (t_start, t_end) - x_time_exclusions[tw_index] = [row[10], row[11]] + x_time_exclusions[tw_index] = [row[self._hdf_time_index], row[11]] # gather pulse ids (batch index, string, dom) x_time_exclusions_ids[tw_index] = [index, string-1, dom-1] @@ -524,7 +535,7 @@ def get_data_from_frame(self, frame, *args, **kwargs): for pulse in pulse_list: index = 0 - # pulse charge: row[12], time: row[10] + # pulse charge: row[self._hdf_charge_index], time: row[self._hdf_time_index] # accumulate charge in DOMs x_dom_charge[index, string-1, dom-1, 0] += pulse.charge From 009d3519e97fb13a30442a80bf2f5c97bf941388 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Schl=C3=BCter?= Date: Mon, 1 Apr 2024 18:11:43 +0200 Subject: [PATCH 03/27] Fixed setting charge index. Slightly refactor code to avoid future warning --- egenerator/data/modules/data/pulse_data.py | 46 ++++++++++++---------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/egenerator/data/modules/data/pulse_data.py b/egenerator/data/modules/data/pulse_data.py index f56af5ab..4592e93f 100644 --- a/egenerator/data/modules/data/pulse_data.py +++ b/egenerator/data/modules/data/pulse_data.py @@ -118,14 +118,6 @@ def _configure(self, config_data, pulse_key, dom_exclusions_key, time_exclusions_exist = time_exclusions_key is not None dom_exclusions_exist = dom_exclusions_key is not None - - self._hdf_time_index = 10 - # 11: npe, 12: charge - if pulse_is_mcpe: - self._hdf_charge_index = 11 - else: - self._hdf_charge_index = 12 - if add_charge_quantiles: pulse_dim = 3 else: @@ -231,6 +223,13 @@ def get_data_from_hdf(self, file, *args, **kwargs): if not self.is_configured: raise ValueError('Module not configured yet!') + hdf_time_index = 10 + # 11: npe, 12: charge + if self.configuration.config["pulse_is_mcpe"]: + hdf_charge_index = 11 + else: + hdf_charge_index = 12 + # open file f = pd.HDFStore(file, 'r') try: @@ -268,9 +267,13 @@ def get_data_from_hdf(self, file, *args, **kwargs): # create Dictionary with event IDs size = len(_labels['Event']) + + if not size: + raise ValueError("Label length is 0.") + event_dict = {} - for row in _labels.iterrows(): - event_dict[(row[1][0], row[1][1], row[1][2], row[1][3])] = row[0] + for row in _labels.itertuples(): + event_dict[(row[1:5])] = row[0] # create empty array for DOM charges x_dom_charge = np.zeros([size, 86, 60, 1], @@ -320,31 +323,32 @@ def get_data_from_hdf(self, file, *args, **kwargs): self._logger.warning( 'skipping pulse: {} {}'.format(string, dom)) continue + index = event_dict[(row[1:5])] - # pulse charge: row[self._hdf_charge_index], time: row[self._hdf_time_index] + # pulse charge: row[hdf_charge_index], time: row[hdf_time_index] # accumulate charge in DOMs - x_dom_charge[index, string-1, dom-1, 0] += row[self._hdf_charge_index] + x_dom_charge[index, string-1, dom-1, 0] += row[hdf_charge_index] # gather pulses if add_charge_quantiles: # (charge, time, quantile) cum_charge = float(x_dom_charge[index, string-1, dom-1, 0]) - x_pulses[pulse_index] = [row[self._hdf_charge_index], row[self._hdf_time_index], cum_charge] + x_pulses[pulse_index] = [row[hdf_charge_index], row[hdf_time_index], cum_charge] else: # (charge, time) - x_pulses[pulse_index] = [row[self._hdf_charge_index], row[self._hdf_time_index]] + x_pulses[pulse_index] = [row[hdf_charge_index], row[hdf_time_index]] # gather pulse ids (batch index, string, dom) x_pulses_ids[pulse_index] = [index, string-1, dom-1] # update time window - if row[self._hdf_time_index] > x_time_window[index, 1]: - x_time_window[index, 1] = row[self._hdf_time_index] - if row[self._hdf_time_index] < x_time_window[index, 0]: - x_time_window[index, 0] = row[self._hdf_time_index] + if row[hdf_time_index] > x_time_window[index, 1]: + x_time_window[index, 1] = row[hdf_time_index] + if row[hdf_time_index] < x_time_window[index, 0]: + x_time_window[index, 0] = row[hdf_time_index] # convert cumulative charge to fraction of total charge, e.g. quantile if add_charge_quantiles: @@ -375,10 +379,10 @@ def get_data_from_hdf(self, file, *args, **kwargs): continue index = event_dict[(row[1:5])] - # t_start (pulse time): row[self._hdf_time_index], t_end (pulse width): row[11] + # t_start (pulse time): row[hdf_time_index], t_end (pulse width): row[11] # (t_start, t_end) - x_time_exclusions[tw_index] = [row[self._hdf_time_index], row[11]] + x_time_exclusions[tw_index] = [row[hdf_time_index], row[11]] # gather pulse ids (batch index, string, dom) x_time_exclusions_ids[tw_index] = [index, string-1, dom-1] @@ -535,7 +539,7 @@ def get_data_from_frame(self, frame, *args, **kwargs): for pulse in pulse_list: index = 0 - # pulse charge: row[self._hdf_charge_index], time: row[self._hdf_time_index] + # pulse charge: row[hdf_charge_index], time: row[hdf_time_index] # accumulate charge in DOMs x_dom_charge[index, string-1, dom-1, 0] += pulse.charge From bd3a13957ca032e32c262be6ead665705c79e8fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Schl=C3=BCter?= Date: Thu, 4 Apr 2024 14:21:36 +0200 Subject: [PATCH 04/27] Add new config file for training on MCPEs --- configs/cascade_7param_MCPE.yaml | 427 +++++++++++++++++++++++++++++++ 1 file changed, 427 insertions(+) create mode 100644 configs/cascade_7param_MCPE.yaml diff --git a/configs/cascade_7param_MCPE.yaml b/configs/cascade_7param_MCPE.yaml new file mode 100644 index 00000000..ef8aa787 --- /dev/null +++ b/configs/cascade_7param_MCPE.yaml @@ -0,0 +1,427 @@ +--- +############## +# Config for single cascade events +# +# Detailed information on the parameters are given in the SetupManager +# class located in egenerator.settings.setup_manager.py. +############## + +# Provide a unique name for the model +unique_name: 'cascade_7param_MCPE' + + +#------------------ +# Training settings +#------------------ +training_settings: { + 'optimizer_name': 'Adam', + 'optimizer_settings': { + # 'amsgrad': True, + 'learning_rate': { + 'full_class_string': 'egenerator.utils.learning_rate.MultiLearningRateScheduler', + 'settings':{ + 'boundaries': [10000, 500000], + 'scheduler_settings': [ + { + 'full_class_string': 'tensorflow.keras.optimizers.schedules.PolynomialDecay', + 'settings': { + 'initial_learning_rate': 0.01, + 'end_learning_rate': 0.001, + 'decay_steps': 10000, + }, + }, + { + 'full_class_string': 'tensorflow.keras.optimizers.schedules.PolynomialDecay', + 'settings': { + 'initial_learning_rate': 0.001, + 'end_learning_rate': 0.001, + 'decay_steps': 490000, + }, + }, + { + 'full_class_string': 'tensorflow.keras.optimizers.schedules.PolynomialDecay', + 'settings': { + 'initial_learning_rate': 0.001, + 'end_learning_rate': 0.000001, + 'decay_steps': 500000, + 'power': 2, + }, + }, + ] + }, + }, + }, + 'l1_regularization': 0., + 'l2_regularization': 0., + 'clip_gradients_value': , + 'remove_nan_gradients': False, + + 'validation_frequency': 100, + 'evaluation_frequency': 100, + 'save_frequency': 500, + + # Number of training iterations to train the model for + 'num_training_iterations': 1, + + # Additional keywords to the loss module used for training + 'additional_loss_module_kwargs': { + 'normalize_by_total_charge': False, + }, +} + +#----------------.------- +# Reconstruction settings +#------------------------ +reconstruction_settings: { + 'reco_output_file': '../data/reconstruction/{unique_name}/reconstruction_{unique_name}.hdf5', + + # Define which modules to run + 'calculate_covariance_matrix': False, + 'calculate_goodness_of_fit': False, + 'estimate_angular_uncertainty': False, + 'run_mcmc': False, + 'make_1d_llh_scans': False, + + # define which tensor to use as parameter_tensor (default: x_parameters) + 'parameter_tensor_name': 'x_parameters', + # define seed (to seed from MC truth choose: 'x_parameters') + 'seed': 'MonopodFit4_PartialExclusion', + # these are only relevant for the SeedLoaderMiscModule. This specifies + # from which column names in the data table the parameters will be loaded + 'seed_parameter_names': ['x', 'y', 'z', 'zenith', 'azimuth', + 'energy', 'time', + ], + 'seed_missing_value': 1., + 'seed_missing_value_dict': { + }, + 'seed_float_precision': 'float32', + + # choose the otpimizer iterface: + # 'scipy' or 'tfp' (tensorflow_probability) + 'reco_optimizer_interface': 'scipy', + + 'scipy_optimizer_settings': { + # 'method': 'L-BFGS-B', + 'method': 'BFGS', + 'options': { + # 'ftol': !!float 1e-7, + } + }, + + 'tf_optimizer_settings': { + 'method': 'bfgs_minimize', + 'x_tolerance': 0.001, + }, + + # Perform minimization in transformed and normalized parameter space + # if True. This is usually desired as it may facilitate minimization. + 'minimize_in_trafo_space': True, + + # Specify which parameters to fit. + # If True, the parameter is minimized doing reconstruction, otherwise it + # is held constant. + # Set default value which will apply to all parameters, except if stated + # otherwise in the 'minimize_parameter_dict' + 'minimize_parameter_default_value': True, + # Settings defined here overwrite the default value + # Entries must have the form: {parameter_name: value} + 'minimize_parameter_dict': { + + }, +} + +#----------------------- +# Model Manager settings +#----------------------- + +# Settings for model manager class +model_manager_settings: { + + # The loss module class to use + 'model_manager_class': 'egenerator.manager.source.SourceManager', + + # restore model if True, otherwise start from scratch + 'restore_model' : True, + + # These settings are used to configure the model manager and may not change + config: { + + # Path to where the manager will be saved to and loaded from + 'manager_dir': '../data/training/{unique_name}/manager', + + # Define which model parameters to use for the I3Particle + # [x, y, z, zenith, azimuth, energy, time] + 'I3ParticleMapping': { + 'x': 'cascade_x', + 'y': 'cascade_y', + 'z': 'cascade_z', + 'zenith': 'cascade_zenith', + 'azimuth': 'cascade_azimuth', + 'energy': 'cascade_energy', + 'time': 'cascade_time', + }, + }, +} + +#----------------------- +# Data Iterator settings +#----------------------- + +# These settings describe the data iterators +data_iterator_settings: { + + # The training data iterator + 'training': { + 'batch_size': 32, + 'num_splits': , + 'file_capacity': 10, + 'batch_capacity': 500, + 'dataset_capacity': 500, + 'num_jobs': 10, + 'num_add_files': 20, + 'num_repetitions': 3, + 'input_data': [ + '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000000*.hdf5', + '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000001*.hdf5', + '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000002*.hdf5', + '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000003*.hdf5', + '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000004*.hdf5', + ], + }, + + # The trafo data iterator + 'trafo': { + 'batch_size': 32, + 'num_splits': , + 'file_capacity': 1, + 'batch_capacity': 2, + 'num_jobs': 10, + 'num_add_files': 1, + 'num_repetitions': 1, + 'pick_random_files_forever': False, + 'input_data': [ + '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000000*.hdf5', + '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000001*.hdf5', + '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000002*.hdf5', + '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000003*.hdf5', + '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000004*.hdf5', + ], + }, + + # The validation data iterator + 'validation': { + 'batch_size': 32, + 'num_splits': , + 'file_capacity': 5, + 'batch_capacity': 10, + 'dataset_capacity': 10, + 'num_jobs': 2, + 'num_add_files': 5, + 'num_repetitions': 1, + 'pick_random_files_forever': True, + 'input_data': [ + '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000040*.hdf5', + '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000041*.hdf5', + '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000042*.hdf5', + '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000043*.hdf5', + '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000044*.hdf5', + ], + }, + + # The test data iterator + 'test': { + 'batch_size': 1, + 'num_splits': , + 'file_capacity': 1, + 'batch_capacity': 3, + 'num_jobs': 1, + 'num_add_files': 0, + 'num_repetitions': 1, + 'sample_randomly': False, + 'pick_random_files_forever': False, + 'input_data': [ + '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000045*.hdf5', + '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000046*.hdf5', + '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000046*.hdf5', + '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000047*.hdf5', + '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000048*.hdf5', + '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000049*.hdf5', + ], + }, +} + +#--------------------- +# Loss module settings +#--------------------- + +# Settings for the loss module class +loss_module_settings: [ + { + # The loss module class to use + 'loss_module': 'egenerator.loss.default.DefaultLossModule', + + config: { + # the float precision to use + 'float_precision': 'float32', + # Add normalization terms to llh if True + 'add_normalization_term': True, + # choose the loss function to use + 'loss_function_name': 'unbinned_pulse_time_llh', + }, + }, + { + # The loss module class to use + 'loss_module': 'egenerator.loss.default.DefaultLossModule', + + config: { + # the float precision to use + 'float_precision': 'float32', + # Add normalization terms to llh if True + 'add_normalization_term': True, + # choose the loss function to use + 'loss_function_name': 'negative_binomial_charge_pdf', + }, + }, +] + +#--------------------------- +# Evaluation module settings +#--------------------------- + +# Settings for the evaluation module class +evaluation_module_settings: { + + # The loss module class to use + 'evaluation_module':, + config: { + }, +} + +#----------------------------- +# Data Transformation settings +#----------------------------- +data_trafo_settings: { + 'float_precision': 'float64', + 'norm_constant': !!float 1e-6, + 'num_batches': 500, + 'model_dir': '/lustre/fs22/group/radio/felix/data/{unique_name}', +} + +#---------------------- +# Data Handler settings +#---------------------- + +# Settings for the data handler class +data_handler_settings: { + + # The data handler class to use + 'data_handler': 'modular.ModuleDataHandler', + + # ------------------- + # DataModule Settings + # ------------------- + # which data module to use + 'data_module': 'pulse_data.PulseDataModule', + + # settings for the data module + 'data_settings':{ + 'pulse_key': 'I3MCPESeriesMap', + 'pulse_is_mcpe': True, + 'dom_exclusions_key':, + 'time_exclusions_key':, + 'float_precision': 'float32', + 'add_charge_quantiles': False, + 'discard_pulses_from_excluded_doms': False, + }, + + # -------------------- + # LabelModule Settings + # -------------------- + # which label module to use + 'label_module': 'cascades.CascadeGeneratorLabelModule', + + # settings for the label module + 'label_settings':{ + 'shift_cascade_vertex': True, + # logarithm on labels: + # (x, y, z, zenith, azimuth, energy, time)? + 'trafo_log': [False, False, False, False, False, True, False], + 'label_key': 'LabelsDeepLearning', + 'float_precision': 'float32', + }, + + # --------------------- + # WeightModule Settings + # --------------------- + # which weight module to use + 'weight_module': 'dummy.DummyWeightModule', + + # settings for the weight module + 'weight_settings':{}, + + # ------------------- + # MiscModule Settings + # ------------------- + # which misc module to use + 'misc_module': 'dummy.DummyMiscModule', + + # settings for the misc module + 'misc_settings':{}, + + # ------------------- + # FilterModule Settings + # ------------------- + # which filter module to use + 'filter_module': 'general_filter.GeneralFilterModule', + + # settings for the filter module + 'filter_settings':{ + 'constraints': [ + # ['LabelsDeepLearning', 'cascade_z', '<', -250.], + ], + }, +} + +#--------------- +# Model settings +#--------------- + +# Settings for the neural network model class +model_settings: { + + # The source class to use + 'model_class': 'egenerator.model.source.cascade.default.DefaultCascadeModel', + + config: { + 'keep_prob':, + 'add_opening_angle': True, + 'add_dom_coordinates': False, + 'num_local_vars': 0, + 'scale_charge': True, + 'scale_charge_by_relative_dom_efficiency': True, + 'scale_charge_by_global_dom_efficiency': False, + 'prevent_mixture_component_swapping': False, + 'estimate_charge_distribution': 'negative_binomial', + 'num_latent_models': 10, + + # This is a list of labels in addition to + # (x, y, z, zenith, azimuth, energy, time) and snowstorm parameters + 'additional_label_names' : [ + ], + + # First convolutions + 'filter_size_list' : [[1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1]], + 'num_filters_list' : [25, 500, 500, 500, 42], + 'method_list' : ['locally_connected', + 'convolution', 'convolution', 'convolution', + 'convolution', + ], + 'use_dropout_list' : False, + 'activation_list' : ['elu', 'elu', 'elu', 'elu', ''], + 'use_batch_norm_list' : False, + 'use_residual_list' : True, + }, +} + +#---------------------- +... From 0dc96343a56a13a4e5705806b004eca02c9a35f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Schl=C3=BCter?= Date: Thu, 4 Apr 2024 14:28:16 +0200 Subject: [PATCH 05/27] Keep training and testing data independent --- configs/cascade_7param_MCPE.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/configs/cascade_7param_MCPE.yaml b/configs/cascade_7param_MCPE.yaml index ef8aa787..f30e20fe 100644 --- a/configs/cascade_7param_MCPE.yaml +++ b/configs/cascade_7param_MCPE.yaml @@ -185,7 +185,6 @@ data_iterator_settings: { '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000001*.hdf5', '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000002*.hdf5', '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000003*.hdf5', - '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000004*.hdf5', ], }, @@ -204,7 +203,6 @@ data_iterator_settings: { '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000001*.hdf5', '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000002*.hdf5', '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000003*.hdf5', - '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000004*.hdf5', ], }, From 871c5c8a636fd679c2b18ad2a44cefaa0734757e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Schl=C3=BCter?= Date: Thu, 4 Apr 2024 17:26:05 +0200 Subject: [PATCH 06/27] Allow to specify parameter names in config --- egenerator/data/modules/labels/cascades.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/egenerator/data/modules/labels/cascades.py b/egenerator/data/modules/labels/cascades.py index 77316dc0..b30abe41 100644 --- a/egenerator/data/modules/labels/cascades.py +++ b/egenerator/data/modules/labels/cascades.py @@ -29,7 +29,9 @@ def __init__(self, logger=None): super(CascadeGeneratorLabelModule, self).__init__(logger=logger) def _configure(self, config_data, shift_cascade_vertex, trafo_log, - float_precision, label_key='LabelsDeepLearning'): + float_precision, label_key='LabelsDeepLearning', + parameter_names=['cascade_x', 'cascade_y', 'cascade_z', 'cascade_zenith', + 'cascade_azimuth', 'cascade_energy', 'cascade_t']): """Configure Module Class This is an abstract method and must be implemented by derived class. @@ -46,10 +48,12 @@ def _configure(self, config_data, shift_cascade_vertex, trafo_log, If a single bool is given, this applies to all labels. Otherwise a list of bools corresponds to the labels in the order: x, y, z, zenith, azimuth, energy, time - label_key : str, optional - The name of the key under which the labels are saved. float_precision : str The float precision as a str. + label_key : str, optional + The name of the key under which the labels are saved. + parameter_names : list of str, optional + Name of the parameters (e.g, the columns in the hdf5 Dataset `label_key`) Returns ------- @@ -148,9 +152,8 @@ def get_data_from_hdf(self, file, *args, **kwargs): cascade_parameters = [] try: _labels = f[self.configuration.config['label_key']] - for l in ['cascade_x', 'cascade_y', 'cascade_z', 'cascade_zenith', - 'cascade_azimuth', 'cascade_energy', 'cascade_t']: - cascade_parameters.append(_labels[l]) + for par in self.configuration.config['parameter_names']: + cascade_parameters.append(_labels[par]) except Exception as e: self._logger.warning(e) @@ -202,9 +205,8 @@ def get_data_from_frame(self, frame, *args, **kwargs): cascade_parameters = [] try: _labels = frame[self.configuration.config['label_key']] - for l in ['cascade_x', 'cascade_y', 'cascade_z', 'cascade_zenith', - 'cascade_azimuth', 'cascade_energy', 'cascade_t']: - cascade_parameters.append(np.atleast_1d(_labels[l])) + for par in self.configuration.config['parameter_names']: + cascade_parameters.append(np.atleast_1d(_labels[par])) except Exception as e: self._logger.warning(e) From cc2a8da181a2bca52bafb72e3e0b77faa5ad8d95 Mon Sep 17 00:00:00 2001 From: Felix Schlueter Date: Thu, 4 Apr 2024 11:13:09 -0500 Subject: [PATCH 07/27] Propagate new argument correctly --- egenerator/data/modules/labels/cascades.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/egenerator/data/modules/labels/cascades.py b/egenerator/data/modules/labels/cascades.py index b30abe41..82c9235b 100644 --- a/egenerator/data/modules/labels/cascades.py +++ b/egenerator/data/modules/labels/cascades.py @@ -119,7 +119,9 @@ def _configure(self, config_data, shift_cascade_vertex, trafo_log, shift_cascade_vertex=shift_cascade_vertex, trafo_log=trafo_log, float_precision=float_precision, - label_key=label_key)) + label_key=label_key, + parameter_names=parameter_names)) + return configuration, data, {} def get_data_from_hdf(self, file, *args, **kwargs): From 37abda55163e11dec0ccd4a269031dc8725b3dd7 Mon Sep 17 00:00:00 2001 From: Felix Schlueter Date: Tue, 9 Apr 2024 09:53:36 -0500 Subject: [PATCH 08/27] Update config for MCPE training --- configs/cascade_7param_MCPE.yaml | 82 ++++++++++++-------------------- 1 file changed, 31 insertions(+), 51 deletions(-) diff --git a/configs/cascade_7param_MCPE.yaml b/configs/cascade_7param_MCPE.yaml index f30e20fe..bc507baa 100644 --- a/configs/cascade_7param_MCPE.yaml +++ b/configs/cascade_7param_MCPE.yaml @@ -181,10 +181,10 @@ data_iterator_settings: { 'num_add_files': 20, 'num_repetitions': 3, 'input_data': [ - '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000000*.hdf5', - '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000001*.hdf5', - '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000002*.hdf5', - '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000003*.hdf5', + '/data/user/fschluter/eg-data/electrons/set_4/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/set_5/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/set_6-9/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/set_10-15/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', ], }, @@ -194,15 +194,15 @@ data_iterator_settings: { 'num_splits': , 'file_capacity': 1, 'batch_capacity': 2, - 'num_jobs': 10, + 'num_jobs': 12, 'num_add_files': 1, 'num_repetitions': 1, 'pick_random_files_forever': False, 'input_data': [ - '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000000*.hdf5', - '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000001*.hdf5', - '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000002*.hdf5', - '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000003*.hdf5', + '/data/user/fschluter/eg-data/electrons/set_4/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/set_5/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/set_6-9/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/set_10-15/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', ], }, @@ -218,11 +218,9 @@ data_iterator_settings: { 'num_repetitions': 1, 'pick_random_files_forever': True, 'input_data': [ - '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000040*.hdf5', - '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000041*.hdf5', - '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000042*.hdf5', - '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000043*.hdf5', - '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000044*.hdf5', + '/data/user/fschluter/eg-data/electrons/set_0/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/set_1/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/set_2/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', ], }, @@ -238,12 +236,9 @@ data_iterator_settings: { 'sample_randomly': False, 'pick_random_files_forever': False, 'input_data': [ - '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000045*.hdf5', - '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000046*.hdf5', - '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000046*.hdf5', - '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000047*.hdf5', - '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000048*.hdf5', - '/lustre/fs22/group/radio/felix/data/eg/hdf5/photons_hese_000049*.hdf5', + '/data/user/fschluter/eg-data/electrons/set_3/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/set_16/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/set_17/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', ], }, } @@ -253,34 +248,18 @@ data_iterator_settings: { #--------------------- # Settings for the loss module class -loss_module_settings: [ - { - # The loss module class to use - 'loss_module': 'egenerator.loss.default.DefaultLossModule', - - config: { - # the float precision to use - 'float_precision': 'float32', - # Add normalization terms to llh if True - 'add_normalization_term': True, - # choose the loss function to use - 'loss_function_name': 'unbinned_pulse_time_llh', - }, - }, - { - # The loss module class to use - 'loss_module': 'egenerator.loss.default.DefaultLossModule', - - config: { - # the float precision to use - 'float_precision': 'float32', - # Add normalization terms to llh if True - 'add_normalization_term': True, - # choose the loss function to use - 'loss_function_name': 'negative_binomial_charge_pdf', - }, +loss_module_settings: { + # The loss module class to use + 'loss_module': 'egenerator.loss.default.DefaultLossModule', + + config: { + # the float precision to use + 'float_precision': 'float32', + + # choose the loss function to use + 'loss_function_name': 'unbinned_extended_pulse_llh', # this implies a Poisson likelihood for the charge }, -] +} #--------------------------- # Evaluation module settings @@ -301,8 +280,8 @@ evaluation_module_settings: { data_trafo_settings: { 'float_precision': 'float64', 'norm_constant': !!float 1e-6, - 'num_batches': 500, - 'model_dir': '/lustre/fs22/group/radio/felix/data/{unique_name}', + 'num_batches': 2000, + 'model_dir': '../data/trafo/{unique_name}', } #---------------------- @@ -343,6 +322,7 @@ data_handler_settings: { 'shift_cascade_vertex': True, # logarithm on labels: # (x, y, z, zenith, azimuth, energy, time)? + # 'parameter_names': ['x', 'y', 'z', 'zenith', 'azimuth', 'energy', 'time'], 'trafo_log': [False, False, False, False, False, True, False], 'label_key': 'LabelsDeepLearning', 'float_precision': 'float32', @@ -399,7 +379,7 @@ model_settings: { 'scale_charge_by_relative_dom_efficiency': True, 'scale_charge_by_global_dom_efficiency': False, 'prevent_mixture_component_swapping': False, - 'estimate_charge_distribution': 'negative_binomial', + 'estimate_charge_distribution': False, 'num_latent_models': 10, # This is a list of labels in addition to @@ -409,7 +389,7 @@ model_settings: { # First convolutions 'filter_size_list' : [[1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1]], - 'num_filters_list' : [25, 500, 500, 500, 42], + 'num_filters_list' : [25, 500, 500, 500, 41], # The last dimension should be 4 * num_asym_gauss + num_charge_pdf; num_asym_gauss = 10, num_charge_pdf = 1 (poisson) or 2 (binominal) 'method_list' : ['locally_connected', 'convolution', 'convolution', 'convolution', 'convolution', From 10fadf1da7f50eb25a8005aaea11f420a4ef4e58 Mon Sep 17 00:00:00 2001 From: Felix Schlueter Date: Tue, 9 Apr 2024 10:20:58 -0500 Subject: [PATCH 09/27] Add missing config --- configs/cascade_7param_MCPE.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/configs/cascade_7param_MCPE.yaml b/configs/cascade_7param_MCPE.yaml index bc507baa..84ea158f 100644 --- a/configs/cascade_7param_MCPE.yaml +++ b/configs/cascade_7param_MCPE.yaml @@ -256,6 +256,8 @@ loss_module_settings: { # the float precision to use 'float_precision': 'float32', + 'add_normalization_term': True, + # choose the loss function to use 'loss_function_name': 'unbinned_extended_pulse_llh', # this implies a Poisson likelihood for the charge }, From 4f4bc0cd842d5b51b83248b70d619ec9794b46be Mon Sep 17 00:00:00 2001 From: Felix Schlueter Date: Tue, 9 Apr 2024 14:45:34 -0500 Subject: [PATCH 10/27] Set number of iteration to 1M --- configs/cascade_7param_MCPE.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/cascade_7param_MCPE.yaml b/configs/cascade_7param_MCPE.yaml index 84ea158f..6926f61f 100644 --- a/configs/cascade_7param_MCPE.yaml +++ b/configs/cascade_7param_MCPE.yaml @@ -61,7 +61,7 @@ training_settings: { 'save_frequency': 500, # Number of training iterations to train the model for - 'num_training_iterations': 1, + 'num_training_iterations': 1000000, # Additional keywords to the loss module used for training 'additional_loss_module_kwargs': { From 1d2f4403026f7e309d4e0db47c44a1355b18d573 Mon Sep 17 00:00:00 2001 From: Felix Schlueter Date: Thu, 11 Apr 2024 09:00:54 -0500 Subject: [PATCH 11/27] Update MCPE config --- configs/cascade_7param_MCPE.yaml | 57 +++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/configs/cascade_7param_MCPE.yaml b/configs/cascade_7param_MCPE.yaml index 6926f61f..f3b5b76b 100644 --- a/configs/cascade_7param_MCPE.yaml +++ b/configs/cascade_7param_MCPE.yaml @@ -178,13 +178,23 @@ data_iterator_settings: { 'batch_capacity': 500, 'dataset_capacity': 500, 'num_jobs': 10, - 'num_add_files': 20, + 'num_add_files': 5, 'num_repetitions': 3, + 'pick_random_files_forever': False, 'input_data': [ - '/data/user/fschluter/eg-data/electrons/set_4/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', - '/data/user/fschluter/eg-data/electrons/set_5/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', - '/data/user/fschluter/eg-data/electrons/set_6-9/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', - '/data/user/fschluter/eg-data/electrons/set_10-15/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_3/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_4/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_5/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_6/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_7/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_8/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_9/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_10/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_11/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_12/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_13/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_14/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_15/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', ], }, @@ -199,10 +209,19 @@ data_iterator_settings: { 'num_repetitions': 1, 'pick_random_files_forever': False, 'input_data': [ - '/data/user/fschluter/eg-data/electrons/set_4/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', - '/data/user/fschluter/eg-data/electrons/set_5/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', - '/data/user/fschluter/eg-data/electrons/set_6-9/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', - '/data/user/fschluter/eg-data/electrons/set_10-15/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_3/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_4/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_5/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_6/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_7/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_8/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_9/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_10/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_11/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_12/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_13/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_14/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_15/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', ], }, @@ -210,17 +229,17 @@ data_iterator_settings: { 'validation': { 'batch_size': 32, 'num_splits': , - 'file_capacity': 5, + 'file_capacity': 10, 'batch_capacity': 10, 'dataset_capacity': 10, - 'num_jobs': 2, + 'num_jobs': 10, 'num_add_files': 5, 'num_repetitions': 1, - 'pick_random_files_forever': True, + 'pick_random_files_forever': False, 'input_data': [ - '/data/user/fschluter/eg-data/electrons/set_0/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', - '/data/user/fschluter/eg-data/electrons/set_1/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', - '/data/user/fschluter/eg-data/electrons/set_2/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_0/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_1/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_2/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', ], }, @@ -229,16 +248,16 @@ data_iterator_settings: { 'batch_size': 1, 'num_splits': , 'file_capacity': 1, - 'batch_capacity': 3, + 'batch_capacity': 10, 'num_jobs': 1, 'num_add_files': 0, 'num_repetitions': 1, 'sample_randomly': False, 'pick_random_files_forever': False, 'input_data': [ - '/data/user/fschluter/eg-data/electrons/set_3/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', - '/data/user/fschluter/eg-data/electrons/set_16/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', - '/data/user/fschluter/eg-data/electrons/set_17/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_16/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_17/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', + '/data/user/fschluter/eg-data/electrons/unbinned/set_18/Gen1k_10TeV_EMinus_FTP_V3_*.hdf5', ], }, } From 382fd558b1166136bdf6c6e6a8d3627f46941a0a Mon Sep 17 00:00:00 2001 From: Felix Schlueter Date: Thu, 11 Apr 2024 09:03:48 -0500 Subject: [PATCH 12/27] Do not run validation in the first iteration --- egenerator/manager/base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/egenerator/manager/base.py b/egenerator/manager/base.py index 6857a7b5..aca36b05 100644 --- a/egenerator/manager/base.py +++ b/egenerator/manager/base.py @@ -776,7 +776,6 @@ def train(self, config, loss_module, num_training_iterations, # increment step counter for model in self.models: model.step.assign_add(1) - # get new batch of training data training_data_batch = next(train_dataset) @@ -786,7 +785,7 @@ def train(self, config, loss_module, num_training_iterations, # -------------------------- # evaluate on validation set # -------------------------- - if step % opt_config['validation_frequency'] == 0: + if step % opt_config['validation_frequency'] == 0 and step: new_validation_time = timeit.default_timer() time_diff = new_validation_time - validation_time validation_time = new_validation_time From 8d5103546b87ccb08de64267fa2d638859a0d6cc Mon Sep 17 00:00:00 2001 From: Felix Schlueter Date: Mon, 15 Apr 2024 08:45:51 -0500 Subject: [PATCH 13/27] Add function which evaluates pdf only for one DOM --- egenerator/model/source/base.py | 89 +++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/egenerator/model/source/base.py b/egenerator/model/source/base.py index 2fdf43bb..688de28e 100644 --- a/egenerator/model/source/base.py +++ b/egenerator/model/source/base.py @@ -634,6 +634,95 @@ def pdf(self, x, result_tensors, return pdf_values + def pdf_per_dom(self, x, result_tensors, string, dom, **kwargs): + """Compute PDF values at x for given result_tensors for a specific dom + + This is a numpy, i.e. not tensorflow, method to compute the PDF based + on a provided `result_tensors`. This can be used to investigate + the generated PDFs. + + Note: this function only works for sources that use asymmetric + Gaussians to parameterize the PDF. The latent values of the AG + must be included in the `result_tensors`. + + Note: the PDF does not set values inside excluded time windows to zero, + but it does adjust the normalization. It is assumed that pulses will + already be masked before evaluated by Event-Generator. Therefore, an + extra check for exclusions is not performed due to performance issues. + + Parameters + ---------- + x : array_like + The times in ns at which to evaluate the result tensors. + Shape: () or [n_points] + result_tensors : dict of tf.tensor + The dictionary of output tensors as obtained from `get_tensors`. + string : int + String id of the DOM + dom : int + Module id of the DOM + **kwargs + Keyword arguments. + + Returns + ------- + array_like + The PDF values at times x for the given event hypothesis and + exclusions that were used to compute `result_tensors`. + Shape: [n_events, 86, 60, n_points] + + Raises + ------ + NotImplementedError + If assymetric Gaussian latent variables are not present in + `result_tensors` dictionary. + """ + + x_orig = np.atleast_1d(x) + assert len(x_orig.shape) == 1, x_orig.shape + + # shape: [1, 1, n_points] + x = np.reshape(x_orig, (1, 1, -1)) + + if result_tensors['time_offsets'] is not None: + # shape: [n_events] + t_offsets = result_tensors['time_offsets'].numpy() + + # shape: [n_events, 1, 1] + t_offsets = np.reshape(t_offsets, [-1, 1, 1]) + # shape: [n_events, 1, n_points] + x = x - t_offsets + + # internally we are working with different time units + x = x / self.time_unit_in_ns + + # Check if the asymmetric Gaussian latent variables exist + for latent_name in ['mu', 'scale', 'sigma', 'r']: + if 'latent_var_' + latent_name not in result_tensors: + msg = 'PDF evaluation is not supported for this model: {}' + raise NotImplementedError(msg.format( + self._untracked_data['name'])) + + # extract values + # shape: [n_events, n_components, 1] + mu = result_tensors['latent_var_mu'].numpy()[:, string, dom, :, np.newaxis] + scale = result_tensors['latent_var_scale'].numpy()[:, string, dom, :, np.newaxis] + sigma = result_tensors['latent_var_sigma'].numpy()[:, string, dom, :, np.newaxis] + r = result_tensors['latent_var_r'].numpy()[:, string, dom, :, np.newaxis] + + # shape: [n_events, n_components, n_points] + mixture_pdf = basis_functions.asymmetric_gauss( + x=x, mu=mu, sigma=sigma, r=r + ) + + # shape: [n_events, n_points] + pdf_values = np.sum(mixture_pdf * scale, axis=1) + + # invert back to PDF in ns + pdf_values = pdf_values / self.time_unit_in_ns + + return np.squeeze(pdf_values) + def _apply_pdf_time_window_exclusions( self, times, pdf_values, tw_exclusions, tw_exclusions_ids): """Apply time window exclusions From e0863aced64d7f3d668a7948ea0ca54e5d4d7e82 Mon Sep 17 00:00:00 2001 From: Felix Schlueter Date: Mon, 15 Apr 2024 08:47:14 -0500 Subject: [PATCH 14/27] Add module to evaluate mcpe eg models --- egenerator/ic3/__init__.py | 2 + egenerator/ic3/evaluate_mcpe.py | 803 ++++++++++++++++++++++++++++++++ 2 files changed, 805 insertions(+) create mode 100644 egenerator/ic3/evaluate_mcpe.py diff --git a/egenerator/ic3/__init__.py b/egenerator/ic3/__init__.py index 9991aff1..a0076019 100644 --- a/egenerator/ic3/__init__.py +++ b/egenerator/ic3/__init__.py @@ -1,7 +1,9 @@ from egenerator.ic3.reconstruction import EventGeneratorReconstruction from egenerator.ic3.simulation import EventGeneratorSimulation +from egenerator.ic3.evaluate_mcpe import CalculateLikelihood __all__ = [ 'EventGeneratorReconstruction', 'EventGeneratorSimulation', + 'CalculateLikelihood', ] diff --git a/egenerator/ic3/evaluate_mcpe.py b/egenerator/ic3/evaluate_mcpe.py new file mode 100644 index 00000000..1630cc61 --- /dev/null +++ b/egenerator/ic3/evaluate_mcpe.py @@ -0,0 +1,803 @@ +import os +import numpy as np +import tensorflow as tf +import timeit +import math + + +import matplotlib.pyplot as plt + +from icecube import dataclasses, icetray, photonics_service +from icecube.icetray.i3logging import log_info, log_warn + +from egenerator.utils.configurator import ManagerConfigurator +from egenerator.utils import basis_functions + + +def calculate_loss(pred, true): + assert true % 1 == 0, "Not a int value" + return np.exp(-1 * pred) * pred ** true / math.factorial(true) + + +class CalculateLikelihood(icetray.I3ConditionalModule): + + """Class to evaluate predictions of MCPE with Event-Generator model. + + """ + + def __init__(self, context): + """Class to evaluate predictions of MCPE with Event-Generator model. + + Parameters + ---------- + context : TYPE + Description + """ + icetray.I3ConditionalModule.__init__(self, context) + self.AddOutBox('OutBox') + + # Required settings + self.AddParameter('model_name', + 'A model name that defines which model to ' + 'apply. The full path is given by `model_base_dir` +' + ' model_name if `model_base_dir` is provided. ' + 'Otherwise `model_name` must define the full ' + 'model path.') + + # Optional settings + self.AddParameter('default_values', + 'Not all parameters of the source hypothesis can ' + 'be extracted from the I3MCTree. For these ' + 'parameters, default values may be defined. The ' + '`default_values` must be a dictionary of the ' + 'format `parameter_name`: `value`. `value` may ' + 'either be a double or a string. If a string is ' + 'provided, it is assumed that an I3Double exists in ' + 'the frame under the key as provided by `value`', + { + 'Absorption': 1., + 'AnisotropyScale': 1., + 'DOMEfficiency': 1., + 'HoleIceForward_Unified_00': 0., + 'HoleIceForward_Unified_01': 0., + 'Scattering': 1., + }) + self.AddParameter('model_base_dir', + 'The base directory in which the model is located.' + ' The full path is given by `model_base_dir` + ' + ' model_name if `model_base_dir` is provided. ' + 'Otherwise `model_name` must define the full ' + 'model path.', + '/data/user/mhuennefeld/exported_models/egenerator') + self.AddParameter('output_key', + 'The output base key to which pulses will be saved', + 'EventGeneratorPrediction') + self.AddParameter('mc_tree_name', + 'The name of the propagated I3MCTree for which the ' + 'light yield and measured pulses will be simulated', + 'I3MCTree') + self.AddParameter('max_simulation_distance', + 'Particles further away from the center of the ' + 'detector than this distance are not predicted', + 2250) + self.AddParameter('min_simulation_energy', + 'Particles below this energy threshold are not ' + 'predicted.', + 100) + self.AddParameter('num_threads', + 'Number of threads to use for tensorflow. This will ' + 'be passed on to tensorflow where it is used to set ' + 'the "intra_op_parallelism_threads" and ' + '"inter_op_parallelism_threads" settings. If a ' + 'value of zero (default) is provided, the system ' + 'uses an appropriate number. Note: when running ' + 'this as a job on a cluster you might want to limit ' + '"num_threads" to the amount of allocated CPUs.', + 0) + self.AddParameter('random_service', + 'The random service or seed to use. If this is an ' + 'integer, a numpy random state will be created with ' + 'the seed set to `random_service`', + 42) + self.AddParameter('SimulateElectronDaughterParticles', + 'If true, look for daughter particles of an electron and ' + 'simulate their light yield rather than from the ' + 'mother electron. This is necessary to correctly ' + 'simulate the cascade longitudinal extension of ' + 'high energy cascades.', + False) + self.AddParameter('AddDarkDOMs', '', True) + + self.AddParameter('CascadeService', + '', + None) + + + def Configure(self): + """Configures Module and loads model from file. + """ + self.model_name = self.GetParameter('model_name') + self.default_values = self.GetParameter('default_values') + self.model_base_dir = self.GetParameter('model_base_dir') + self.output_key = self.GetParameter('output_key') + self.mc_tree_name = self.GetParameter('mc_tree_name') + self.max_simulation_distance = \ + self.GetParameter('max_simulation_distance') + self.min_simulation_energy = self.GetParameter('min_simulation_energy') + self.num_threads = self.GetParameter('num_threads') + self.random_service = self.GetParameter('random_service') + self.simulate_electron_daughters = self.GetParameter('SimulateElectronDaughterParticles') + self.add_dark_doms = self.GetParameter("AddDarkDOMs") + + if isinstance(self.random_service, int): + self.random_service = np.random.RandomState(self.random_service) + + if self.model_base_dir is not None: + self.model_dir = os.path.join(self.model_base_dir, self.model_name) + else: + self.model_dir = self.model_name + + # -------------------------------------------------- + # Build and configure SourceManager and extrac Model + # -------------------------------------------------- + self.manager_configurator = ManagerConfigurator( + manager_dirs=[self.model_dir], + num_threads=self.num_threads, + ) + self.manager = self.manager_configurator.manager + + for model in self.manager.models: + num_vars, num_total_vars = model.num_variables + msg = '\nNumber of Model Variables:\n' + msg += '\tFree: {}\n' + msg += '\tTotal: {}' + log_info(msg.format(num_vars, num_total_vars)) + + if len(self.manager.models) > 1: + raise NotImplementedError( + 'Currently does not support model ensemble.') + + self.model = self.manager.models[0] + + # get parameter names that have to be set + self.param_names = sorted([n for n in self.model.parameter_names + if n not in self.default_values]) + + # make sure that the only parameters that need to be set are provided + included_parameters = [ + 'azimuth', 'energy', 'time', 'x', 'y', 'z', 'zenith'] + + # search if there is a common prefix that we can use + prefix_list = [] + for name in included_parameters: + if name in self.param_names: + prefix_list.append('') + else: + found_match = False + for param in self.param_names: + if param[-len(name):] == name: + # only allow prefixes ending on '_' + if param[-len(name)-1] == '_': + prefix_list.append(param[:-len(name)]) + found_match = True + if not found_match: + msg = ( + 'Did not find a parameter name match for "{}". Model ' + 'Parameter names are: {}' + ).format(name, self.param_names) + raise ValueError(msg) + + prefix_list = np.unique(prefix_list) + if len(prefix_list) != 1: + msg = 'Could not find common parameter prefix. Found: {}'.format( + prefix_list) + raise ValueError(msg) + + self._prefix = prefix_list[0] + + # double check that the prefix now works + for name in self.param_names: + if name[len(self._prefix):] not in included_parameters: + raise KeyError('Unknown parameter name:', name) + + # Create concrete tensorflow function to obtain DOM expectations + self.param_dtype = getattr( + tf, self.manager.data_trafo.data['tensors']['x_parameters'].dtype) + self.get_model_tensors = self.manager.get_model_tensors_function() + + # --------------------------------------------------- + # Define which particles are simulated by which Model + # --------------------------------------------------- + + # define allowed parent particles + # These are particles which we can safely simulate by only looking at + # their daughters + self.allowed_parent_particles = [ + dataclasses.I3Particle.NuE, + dataclasses.I3Particle.NuMu, + dataclasses.I3Particle.NuTau, + dataclasses.I3Particle.NuEBar, + dataclasses.I3Particle.NuMuBar, + dataclasses.I3Particle.NuTauBar, + dataclasses.I3Particle.Hadrons, + ] + if self.simulate_electron_daughters: + self.allowed_parent_particles += [ + dataclasses.I3Particle.EMinus, + dataclasses.I3Particle.EPlus, + ] + + # Define type of particles that can be simulated as EM cascades + self.em_cascades = [ + dataclasses.I3Particle.Pi0, + dataclasses.I3Particle.Gamma, + dataclasses.I3Particle.PairProd, + dataclasses.I3Particle.Brems, + dataclasses.I3Particle.DeltaE, + dataclasses.I3Particle.EMinus, + dataclasses.I3Particle.EPlus, + ] + + # Define type of particles that can be simulated as tracks + self.tracks = [ + dataclasses.I3Particle.MuMinus, + dataclasses.I3Particle.MuPlus, + ] + + # define particles that do not deposit light, in other words + # if we end up with a final state particle of this type it is ok + # not to simulate the light yield for these particles. + self.dark_particles = [ + dataclasses.I3Particle.NuE, + dataclasses.I3Particle.NuMu, + dataclasses.I3Particle.NuTau, + dataclasses.I3Particle.NuEBar, + dataclasses.I3Particle.NuMuBar, + dataclasses.I3Particle.NuTauBar, + ] + + self.cascade_service = self.GetParameter('CascadeService') + + if self.cascade_service is not None: + self.photonics = Photonics(self.cascade_service) + + self._counter = 0 + + def DAQ(self, frame): + """Apply Event-Generator model to physics frames. + + Parameters + ---------- + frame : I3Frame + The current Q-Frame. + """ + + self._counter += 1 + + # if self._counter >= 5: + # return + + # start timer + t_0 = timeit.default_timer() + + # get sources + cascades, tracks = self.get_light_sources(frame[self.mc_tree_name]) + + if len(tracks) > 0: + raise NotImplementedError('Tracks not yet supported') + + # convert cascade to source hypotheses + cascade_sources = self.convert_cascades_to_tensor(cascades, frame) + + # timer after source collection + t_1 = timeit.default_timer() + + # get result tensors from model + result_tensors = self.get_model_tensors(cascade_sources) + + # timer after NN evaluation + t_2 = timeit.default_timer() + + # sample pdf + self.sample_from_pdf(frame, result_tensors, validate_photonics=self.cascade_service is not None) + + if self._counter % 10 == 0: + self.plot_pdfs(frame, result_tensors, validate_photonics=self.cascade_service is not None) + + + # timer after Sampling + t_3 = timeit.default_timer() + + log_info('Simulation took: {:3.3f}ms'.format((t_3 - t_0) * 1000)) + log_info('\t Gathering Sources: {:3.3f}ms'.format((t_1 - t_0) * 1000)) + log_info('\t Evaluating NN model: {:3.3f}ms'.format((t_2 - t_1) * 1000)) + log_info('\t Sampling Pulses: {:3.3f}ms'.format((t_3 - t_2) * 1000)) + + # push frame to next modules + self.PushFrame(frame) + + + def sample_from_pdf(self, frame, result_tensors, validate_photonics=False): + total_charge = result_tensors['dom_charges'].numpy() + eps = 1e-7 + mcpe_series_map = frame["I3MCPESeriesMap"] + + loss_event_generator = dataclasses.I3MapKeyDouble() + loss_event_generator_total = 0 + + if validate_photonics: + loss_photonics = dataclasses.I3MapKeyDouble() + loss_photonics_total = 0 + geo = frame["I3Geometry"] + particle = frame["I3MCTree"].get_head() + + for string in range(86): + for om in range(60): + omkey = icetray.OMKey(string + 1, om + 1) + + + if omkey in mcpe_series_map: + pulse_series = mcpe_series_map[omkey] + + dom_charges_true = np.array([pulse.npe for pulse in pulse_series]) + pulse_log_pdf_values = np.log([self.model.pdf_per_dom(pulse.time, result_tensors=result_tensors, + string=string, dom=om) + eps for pulse in pulse_series]) + + dom_charges_pred = total_charge[0, string, om, 0] + + time_log_likelihood = -dom_charges_true * pulse_log_pdf_values + llh_poisson = dom_charges_pred - np.sum(dom_charges_true) * np.log(dom_charges_pred + eps) + + loss = np.sum(time_log_likelihood) + np.sum(llh_poisson) + + loss_event_generator_total += loss + loss_event_generator[omkey] = loss + + if validate_photonics: + position = geo.omgeo[omkey].position + # for pulse in pulse_series: + table_pdf, dom_charges_pred = self.photonics.get_pdf(particle, position, [pulse.time for pulse in pulse_series]) + + time_log_likelihood = -dom_charges_true * np.log(np.array(table_pdf) + eps) + llh_poisson = dom_charges_pred - np.sum(dom_charges_true) * np.log(dom_charges_pred + eps) + + loss = np.sum(time_log_likelihood) + np.sum(llh_poisson) + + loss_photonics_total += loss + loss_photonics[omkey] = loss + + elif self.add_dark_doms: + expected_charge = total_charge[0, string, om, 0] + + loss = expected_charge - 0 * np.log(expected_charge + 1e-7) + loss_event_generator_total += loss + loss_event_generator[omkey] = loss + + if validate_photonics: + position = geo.omgeo[omkey].position + expected_charge, _ = self.photonics.get_PE(particle, position) + + loss = expected_charge - 0 * np.log(expected_charge + 1e-7) + loss_photonics_total += loss + loss_photonics[omkey] = loss + + else: + pass + + frame.Put("EventGeneratorLoss", loss_event_generator) + frame.Put("EventGeneratorLossTotal", dataclasses.I3Double(loss_event_generator_total)) + + if validate_photonics: + frame.Put("PhotonicsLoss", loss_photonics) + frame.Put("PhotonicsLossTotal", dataclasses.I3Double(loss_photonics_total)) + + # t0 = timeit.default_timer() + # pdf_values = self.model.pdf(x_times, result_tensors=result_tensors) + # t1 = timeit.default_timer() + # print(f"Predict PDF EG: {t1 - t0:.2f}s") + + # assert pdf_values.shape[0] == 1, "PDF result has unexpected shape" + # pdf_values = pdf_values[0] + # dt = x_times[1] - x_times[0] + # pdf_values = pdf_values * dt # For normalisation + + # pulse_series_map = dataclasses.I3RecoPulseSeriesMap() + + # if self.cascade_service is not None: + # pulse_series_map_photonics = dataclasses.I3RecoPulseSeriesMap() + + # total_charge = result_tensors['dom_charges'].numpy() + # assert total_charge.shape[0] == 1, "Total charge has unexpected shape" + # total_charge = np.squeeze(total_charge[0]) + + # geo = frame["I3Geometry"] + + # # walk through DOMs + # for string in range(86): + # for om in range(60): + # om_key = icetray.OMKey(string+1, om+1) + # position = geo.omgeo[om_key].position + + # if self.cascade_service is not None: + + # table_pdf, table_charge = self.photonics.get_pdf(frame["I3MCTree"].get_head(), position, x_times) + # if table_charge > 0.1: + # pulse_series_pht = dataclasses.I3RecoPulseSeries() + # for t, charge in zip(x_times, table_pdf * table_charge): + # pulse = dataclasses.I3RecoPulse() + # if charge <= 0: + # continue + # pulse.time = t + # pulse.charge = charge + # pulse_series_pht.append(pulse) + + # pulse_series_map_photonics[om_key] = pulse_series_pht + + # if total_charge[string, om] > 0.1: + # # create pulse series + # pulse_series = dataclasses.I3RecoPulseSeries() + # charge_distribution = pdf_values[string, om] * total_charge[string, om] + # for t, charge in zip(x_times, charge_distribution): + # if charge <= 0: + # continue + # pulse = dataclasses.I3RecoPulse() + # pulse.time = t + # pulse.charge = charge + # pulse_series.append(pulse) + + # # add to pulse series map + # pulse_series_map[om_key] = pulse_series + + # if self.cascade_service is not None: + # frame["Photonics_Prediction"] = pulse_series_map_photonics + + + def plot_pdfs(self, frame, result_tensors, validate_photonics=False): + + total_charge = result_tensors['dom_charges'].numpy() + mcpe_series_map = frame["I3MCPESeriesMap"] + + if validate_photonics: + geo = frame["I3Geometry"] + particle = frame["I3MCTree"].get_head() + + hom = None + max_charge = 0 + + for omkey, pulses in mcpe_series_map: + tot_charge = np.sum(p.npe for p in pulses) + + if tot_charge > max_charge: + max_charge = tot_charge + hom = omkey + + if hom is None: + return + + string = hom.string - 1 + om = hom.om - 1 + + times = np.arange(0, 1000, 5) + dt = np.diff(times)[0] + ctime = times[:-1] + dt / 2 + + pulse_log_pdf_values = np.array([self.model.pdf_per_dom(time, result_tensors=result_tensors, + string=string, dom=om) for time in ctime]) * dt + tot_charge = total_charge[0, string, om, 0] + fig, ax = plt.subplots() + ax.hist([p.time for p in mcpe_series_map[hom]], times, weights=[p.npe for p in mcpe_series_map[hom]], alpha=0.5, color="k", label="MC") + + ax.plot(ctime, pulse_log_pdf_values * tot_charge, label="EventGenerator", lw=1) + + if validate_photonics: + geo = frame["I3Geometry"] + particle = frame["I3MCTree"].get_head() + position = geo.omgeo[hom].position + table_pdf, dom_charges_pred = self.photonics._get_pdf2(particle, position, times) + + ax.plot(ctime, np.array(table_pdf) * dom_charges_pred, label="Photonics", lw=1, ls=":") + + + ax.set_xlabel("time / ns") + ax.set_ylabel("npe") + ax.legend() + ax.grid() + + plt.tight_layout() + plt.savefig(f"pdf_{self._counter}.png") + + + + def convert_cascades_to_tensor(self, cascades, frame): + """Convert a list of cascades to a source parameter tensor. + + Parameters + ---------- + cascades : list of I3Particles + The list of cascade I3Particles. + + frame : I3Frame + Necessary to get additional parameter from frame. + + Returns + ------- + tf.Tensor + The source hypothesis paramter tensor. + """ + # create parameter array + parameters = np.empty([len(cascades), self.model.num_parameters]) + + # Fill default values + for name, value in self.default_values.items(): + + # assume that a I3Double exists in frame + if isinstance(value, str): + value = frame[value].value + + # assign values + parameters[:, self.model.get_index(name)] = value + + # now fill cascade parameters (x, y, z, zenith, azimuth, E, t) + for i, cascade in enumerate(cascades): + for name in self.param_names: + + # remove prefix + name = name[len(self._prefix):] + + if name in ['x', 'y', 'z']: + value = getattr(cascade.pos, name) + elif name in ['azimuth', 'zenith']: + value = getattr(cascade.dir, name) + else: + value = getattr(cascade, name) + + index = self.model.get_index(self._prefix + name) + parameters[i, index] = value + + return tf.convert_to_tensor(parameters, dtype=self.param_dtype) + + def _get_light_sources(self, mc_tree, parent): + """Get a list of cascade and track light sources from an I3MCTree + + Recursive helper function for the method `get_light_sources`. + + Parameters + ---------- + mc_tree : I3MCTree + The I3MCTree from which to get the light sources + + Returns + ------- + array_like + The list of cascades. + array_like + The list of tracks. + """ + + # Stopping condition for recursion: either found a track or cascade + if (parent.energy < self.min_simulation_energy or + parent.pos.magnitude - parent.length + > self.max_simulation_distance): + return [], [] + + if parent.type in self.em_cascades: + if self.simulate_electron_daughters and len(mc_tree.get_daughters(parent)): + pass # get light sourced from daughters + else: + return [parent], [] + + elif parent.type in self.tracks: + return [], [parent] + + cascades = [] + tracks = [] + + # need further recursion + daughters = mc_tree.get_daughters(parent) + + # check if we have a branch that we can't simulate + if len(daughters) == 0 and parent.type not in self.dark_particles: + log_warn(f"Particle: {parent}") + log_warn(f"Tree: {mc_tree}") + raise NotImplementedError( + 'Particle can not be simulated: ', parent.type) + + if len(daughters) > 0: + if parent.type not in self.allowed_parent_particles: + raise ValueError('Unknown parent type:', parent.type) + + for daughter in daughters: + + # get list of cascades and tracks + cascades_i, tracks_i = self._get_light_sources(mc_tree, daughter) + + # extend lists + cascades.extend(cascades_i) + tracks.extend(tracks_i) + + return cascades, tracks + + def get_light_sources(self, mc_tree): + """Get a list of cascade and track light sources from an I3MCTree + + This method recursively walks through the I3MCTree by looking at + daughter particles. All daughter particles are collected, that need + to be simulated. + + Parameters + ---------- + mc_tree : I3MCTree + The I3MCTree from which to get the light sources + + Returns + ------- + array_like + The list of cascades. + array_like + The list of tracks. + """ + + cascades = [] + tracks = [] + for primary in mc_tree.get_primaries(): + + # get list of cascades and tracks + cascades_i, tracks_i = self._get_light_sources(mc_tree, primary) + + # extend lists + cascades.extend(cascades_i) + tracks.extend(tracks_i) + + return cascades, tracks + + + +def convert_params_to_particle(params): + """ + Converts a list of parameters defining a source into a I3Particle + + Parameters + ---------- + + params: list + List of parameters defining a source: [x, y, z, zenith, azimuth, energy, time] + + Returns + ------- + + cascade: icecube.dataclasses.I3Particle + Particle describing a cascade + """ + cascade = dataclasses.I3Particle() + + cascade.pos = dataclasses.I3Position(params[0], params[1], params[2]) + cascade.dir = dataclasses.I3Direction(params[3], params[4]) + + cascade.fit_status == dataclasses.I3Particle.NotSet + cascade.location_type = dataclasses.I3Particle.InIce + cascade.time = 0 + + # set particle type + cascade.type = dataclasses.I3Particle.ParticleType.EMinus + + # set energy for each particle + cascade.energy = params[5] * I3Units.GeV + + return cascade + + +class Photonics(object): + """ + Little wrapper around photonics_service + """ + + def __init__(self, cascadePhotonicsService=None): + + if cascadePhotonicsService is None: + + if os.environ.get('I3_DATA') is None or os.environ.get('I3_DATA') == "": + raise ValueError("The environment variable \"I3_DATA\" " + "is not set. Are you in an IceTray environment?") + + table_base = os.path.expandvars('$I3_DATA/photon-tables/splines/ems_spice1_z20_a10.%s.fits') + cascadePhotonicsService = photonics_service.I3PhotoSplineService( + table_base % 'abs', table_base % 'prob', timingSigma=0) + + self.cascade_pxs = cascadePhotonicsService + + + def _get_PE(self, source, position): + """ Returns the number of photons and geometrical distance for a source & reciever pair """ + + if not isinstance(source, dataclasses.I3Particle): + source = convert_params_to_particle(source) + + if source.energy == 0: + return 0, 0 + + self.cascade_pxs.SelectModuleCoordinates(*position) + pes, dist, _ = self.cascade_pxs.SelectSource(photonics_service.PhotonicsSource(source)) + if pes < 0: + pes = 0 + + return pes, dist + + + def get_PE(self, sources, position): + """ Wrapper around self._get_PE to allow providing a list of sources """ + + if isinstance(sources, dataclasses.I3Particle): + return self._get_PE(sources, position) + + elif isinstance(sources, list) and isinstance(sources[0], float): + return self._get_PE(sources, position) + + elif isinstance(sources, list): + output = np.array([self._get_PE(source, position) for source in sources]) + pe_sum = np.sum(output[:, 0]) + mask = output[:, 0] > 0 + if not np.any(mask): + return 0, 0 + return pe_sum, np.amin(output[mask, 1]) + else: + raise TypeError(f"The type of sources \"{type(sources)}\" is not supported.") + + + def _get_pdf(self, source, position, times): + + if not isinstance(source, dataclasses.I3Particle): + source = convert_params_to_particle(source) + + if source.energy == 0: + return np.zeros_like(times), 0 + + self.cascade_pxs.SelectModuleCoordinates(*position) + pes, _, t0 = self.cascade_pxs.SelectSource(photonics_service.PhotonicsSource(source)) + if pes <= 0: + return np.zeros_like(times), 0 + + pdf = [self.cascade_pxs.GetProbabilityDensity(t0 + t) for t in times] + + # quantiles = self.cascade_pxs.GetProbabilityQuantiles(times, t0, 0) # Reading charge distribution (normalized to 1) + # print(pdf, quantiles) + + return pdf, pes + + + def _get_pdf2(self, source, position, times): + + if not isinstance(source, dataclasses.I3Particle): + source = convert_params_to_particle(source) + + if source.energy == 0: + return np.zeros_like(times), 0 + + self.cascade_pxs.SelectModuleCoordinates(*position) + pes, _, t0 = self.cascade_pxs.SelectSource(photonics_service.PhotonicsSource(source)) + if pes <= 0: + return np.zeros_like(times), 0 + + quantiles = self.cascade_pxs.GetProbabilityQuantiles(times, t0, 0) # Reading charge distribution (normalized to 1) + + return quantiles, pes + + + def get_pdf(self, sources, position, t): + if isinstance(sources, dataclasses.I3Particle): + return self._get_pdf(sources, position, t) + + elif isinstance(sources, list) and isinstance(sources[0], float): + return self._get_pdf(sources, position, t) + + else: + pdf_tot = np.zeros_like(t) + pdfs = [] + nps_tot = 0 + for source in sources: + pdf, nps = self._self._get_pdf(source, position, t) + pdfs.append(pdf) + pdf_tot += pdf * nps + nps_tot += nps + + pdfs.append(pdf_tot / nps_tot) + return pdfs \ No newline at end of file From 3229861d5903728b3b68ef35874a200b4338d4e6 Mon Sep 17 00:00:00 2001 From: Felix Schlueter Date: Tue, 16 Apr 2024 10:38:53 -0500 Subject: [PATCH 15/27] Clean up. Add correct time call to photonics function --- egenerator/ic3/evaluate_mcpe.py | 324 +++++--------------------------- 1 file changed, 45 insertions(+), 279 deletions(-) diff --git a/egenerator/ic3/evaluate_mcpe.py b/egenerator/ic3/evaluate_mcpe.py index 1630cc61..835c4fc7 100644 --- a/egenerator/ic3/evaluate_mcpe.py +++ b/egenerator/ic3/evaluate_mcpe.py @@ -12,14 +12,14 @@ from egenerator.utils.configurator import ManagerConfigurator from egenerator.utils import basis_functions - +from egenerator.ic3.simulation import EventGeneratorSimulation def calculate_loss(pred, true): assert true % 1 == 0, "Not a int value" return np.exp(-1 * pred) * pred ** true / math.factorial(true) -class CalculateLikelihood(icetray.I3ConditionalModule): +class CalculateLikelihood(EventGeneratorSimulation): """Class to evaluate predictions of MCPE with Event-Generator model. @@ -356,7 +356,9 @@ def sample_from_pdf(self, frame, result_tensors, validate_photonics=False): if validate_photonics: position = geo.omgeo[omkey].position # for pulse in pulse_series: - table_pdf, dom_charges_pred = self.photonics.get_pdf(particle, position, [pulse.time for pulse in pulse_series]) + table_pdf, dom_charges_pred = self.photonics.get_pdf( + particle, position, [pulse.time for pulse in pulse_series], + quantiles=False) time_log_likelihood = -dom_charges_true * np.log(np.array(table_pdf) + eps) llh_poisson = dom_charges_pred - np.sum(dom_charges_true) * np.log(dom_charges_pred + eps) @@ -391,66 +393,6 @@ def sample_from_pdf(self, frame, result_tensors, validate_photonics=False): frame.Put("PhotonicsLoss", loss_photonics) frame.Put("PhotonicsLossTotal", dataclasses.I3Double(loss_photonics_total)) - # t0 = timeit.default_timer() - # pdf_values = self.model.pdf(x_times, result_tensors=result_tensors) - # t1 = timeit.default_timer() - # print(f"Predict PDF EG: {t1 - t0:.2f}s") - - # assert pdf_values.shape[0] == 1, "PDF result has unexpected shape" - # pdf_values = pdf_values[0] - # dt = x_times[1] - x_times[0] - # pdf_values = pdf_values * dt # For normalisation - - # pulse_series_map = dataclasses.I3RecoPulseSeriesMap() - - # if self.cascade_service is not None: - # pulse_series_map_photonics = dataclasses.I3RecoPulseSeriesMap() - - # total_charge = result_tensors['dom_charges'].numpy() - # assert total_charge.shape[0] == 1, "Total charge has unexpected shape" - # total_charge = np.squeeze(total_charge[0]) - - # geo = frame["I3Geometry"] - - # # walk through DOMs - # for string in range(86): - # for om in range(60): - # om_key = icetray.OMKey(string+1, om+1) - # position = geo.omgeo[om_key].position - - # if self.cascade_service is not None: - - # table_pdf, table_charge = self.photonics.get_pdf(frame["I3MCTree"].get_head(), position, x_times) - # if table_charge > 0.1: - # pulse_series_pht = dataclasses.I3RecoPulseSeries() - # for t, charge in zip(x_times, table_pdf * table_charge): - # pulse = dataclasses.I3RecoPulse() - # if charge <= 0: - # continue - # pulse.time = t - # pulse.charge = charge - # pulse_series_pht.append(pulse) - - # pulse_series_map_photonics[om_key] = pulse_series_pht - - # if total_charge[string, om] > 0.1: - # # create pulse series - # pulse_series = dataclasses.I3RecoPulseSeries() - # charge_distribution = pdf_values[string, om] * total_charge[string, om] - # for t, charge in zip(x_times, charge_distribution): - # if charge <= 0: - # continue - # pulse = dataclasses.I3RecoPulse() - # pulse.time = t - # pulse.charge = charge - # pulse_series.append(pulse) - - # # add to pulse series map - # pulse_series_map[om_key] = pulse_series - - # if self.cascade_service is not None: - # frame["Photonics_Prediction"] = pulse_series_map_photonics - def plot_pdfs(self, frame, result_tensors, validate_photonics=False): @@ -477,25 +419,33 @@ def plot_pdfs(self, frame, result_tensors, validate_photonics=False): string = hom.string - 1 om = hom.om - 1 - times = np.arange(0, 1000, 5) + if validate_photonics: + geo = frame["I3Geometry"] + particle = frame["I3MCTree"].get_head() + position = geo.omgeo[hom].position + self.photonics.cascade_pxs.SelectModuleCoordinates(*position) + _, _, t0 = self.photonics.cascade_pxs.SelectSource(photonics_service.PhotonicsSource(particle)) + else: + t0 = 0 + + times = np.arange(t0 - 50, t0 + 300, 5) dt = np.diff(times)[0] ctime = times[:-1] + dt / 2 - pulse_log_pdf_values = np.array([self.model.pdf_per_dom(time, result_tensors=result_tensors, - string=string, dom=om) for time in ctime]) * dt - tot_charge = total_charge[0, string, om, 0] fig, ax = plt.subplots() - ax.hist([p.time for p in mcpe_series_map[hom]], times, weights=[p.npe for p in mcpe_series_map[hom]], alpha=0.5, color="k", label="MC") + charges_mc = [p.npe for p in mcpe_series_map[hom]] + ax.hist([p.time for p in mcpe_series_map[hom]], times, + weights=charges_mc, alpha=0.5, color="k", label=f"MC {np.sum(charges_mc)}") - ax.plot(ctime, pulse_log_pdf_values * tot_charge, label="EventGenerator", lw=1) + eg_pdf_values = np.squeeze(self.model.get_probability_quantiles_per_dom(times, result_tensors=result_tensors, + string=string, dom=om)) + tot_charge = total_charge[0, string, om, 0] + ax.plot(ctime, eg_pdf_values * tot_charge, label=f"EventGenerator {tot_charge:.2f}", lw=1) if validate_photonics: - geo = frame["I3Geometry"] - particle = frame["I3MCTree"].get_head() - position = geo.omgeo[hom].position - table_pdf, dom_charges_pred = self.photonics._get_pdf2(particle, position, times) - ax.plot(ctime, np.array(table_pdf) * dom_charges_pred, label="Photonics", lw=1, ls=":") + table_pdf, dom_charges_pred = self.photonics.get_pdf(particle, position, times) + ax.plot(ctime, np.array(table_pdf) * dom_charges_pred, label=f"Photonics {dom_charges_pred:.2f}", lw=1, ls=":") ax.set_xlabel("time / ns") @@ -507,185 +457,6 @@ def plot_pdfs(self, frame, result_tensors, validate_photonics=False): plt.savefig(f"pdf_{self._counter}.png") - - def convert_cascades_to_tensor(self, cascades, frame): - """Convert a list of cascades to a source parameter tensor. - - Parameters - ---------- - cascades : list of I3Particles - The list of cascade I3Particles. - - frame : I3Frame - Necessary to get additional parameter from frame. - - Returns - ------- - tf.Tensor - The source hypothesis paramter tensor. - """ - # create parameter array - parameters = np.empty([len(cascades), self.model.num_parameters]) - - # Fill default values - for name, value in self.default_values.items(): - - # assume that a I3Double exists in frame - if isinstance(value, str): - value = frame[value].value - - # assign values - parameters[:, self.model.get_index(name)] = value - - # now fill cascade parameters (x, y, z, zenith, azimuth, E, t) - for i, cascade in enumerate(cascades): - for name in self.param_names: - - # remove prefix - name = name[len(self._prefix):] - - if name in ['x', 'y', 'z']: - value = getattr(cascade.pos, name) - elif name in ['azimuth', 'zenith']: - value = getattr(cascade.dir, name) - else: - value = getattr(cascade, name) - - index = self.model.get_index(self._prefix + name) - parameters[i, index] = value - - return tf.convert_to_tensor(parameters, dtype=self.param_dtype) - - def _get_light_sources(self, mc_tree, parent): - """Get a list of cascade and track light sources from an I3MCTree - - Recursive helper function for the method `get_light_sources`. - - Parameters - ---------- - mc_tree : I3MCTree - The I3MCTree from which to get the light sources - - Returns - ------- - array_like - The list of cascades. - array_like - The list of tracks. - """ - - # Stopping condition for recursion: either found a track or cascade - if (parent.energy < self.min_simulation_energy or - parent.pos.magnitude - parent.length - > self.max_simulation_distance): - return [], [] - - if parent.type in self.em_cascades: - if self.simulate_electron_daughters and len(mc_tree.get_daughters(parent)): - pass # get light sourced from daughters - else: - return [parent], [] - - elif parent.type in self.tracks: - return [], [parent] - - cascades = [] - tracks = [] - - # need further recursion - daughters = mc_tree.get_daughters(parent) - - # check if we have a branch that we can't simulate - if len(daughters) == 0 and parent.type not in self.dark_particles: - log_warn(f"Particle: {parent}") - log_warn(f"Tree: {mc_tree}") - raise NotImplementedError( - 'Particle can not be simulated: ', parent.type) - - if len(daughters) > 0: - if parent.type not in self.allowed_parent_particles: - raise ValueError('Unknown parent type:', parent.type) - - for daughter in daughters: - - # get list of cascades and tracks - cascades_i, tracks_i = self._get_light_sources(mc_tree, daughter) - - # extend lists - cascades.extend(cascades_i) - tracks.extend(tracks_i) - - return cascades, tracks - - def get_light_sources(self, mc_tree): - """Get a list of cascade and track light sources from an I3MCTree - - This method recursively walks through the I3MCTree by looking at - daughter particles. All daughter particles are collected, that need - to be simulated. - - Parameters - ---------- - mc_tree : I3MCTree - The I3MCTree from which to get the light sources - - Returns - ------- - array_like - The list of cascades. - array_like - The list of tracks. - """ - - cascades = [] - tracks = [] - for primary in mc_tree.get_primaries(): - - # get list of cascades and tracks - cascades_i, tracks_i = self._get_light_sources(mc_tree, primary) - - # extend lists - cascades.extend(cascades_i) - tracks.extend(tracks_i) - - return cascades, tracks - - - -def convert_params_to_particle(params): - """ - Converts a list of parameters defining a source into a I3Particle - - Parameters - ---------- - - params: list - List of parameters defining a source: [x, y, z, zenith, azimuth, energy, time] - - Returns - ------- - - cascade: icecube.dataclasses.I3Particle - Particle describing a cascade - """ - cascade = dataclasses.I3Particle() - - cascade.pos = dataclasses.I3Position(params[0], params[1], params[2]) - cascade.dir = dataclasses.I3Direction(params[3], params[4]) - - cascade.fit_status == dataclasses.I3Particle.NotSet - cascade.location_type = dataclasses.I3Particle.InIce - cascade.time = 0 - - # set particle type - cascade.type = dataclasses.I3Particle.ParticleType.EMinus - - # set energy for each particle - cascade.energy = params[5] * I3Units.GeV - - return cascade - - class Photonics(object): """ Little wrapper around photonics_service @@ -743,7 +514,7 @@ def get_PE(self, sources, position): raise TypeError(f"The type of sources \"{type(sources)}\" is not supported.") - def _get_pdf(self, source, position, times): + def _get_pdf(self, source, position, times, quantiles=True): if not isinstance(source, dataclasses.I3Particle): source = convert_params_to_particle(source) @@ -756,45 +527,40 @@ def _get_pdf(self, source, position, times): if pes <= 0: return np.zeros_like(times), 0 - pdf = [self.cascade_pxs.GetProbabilityDensity(t0 + t) for t in times] - - # quantiles = self.cascade_pxs.GetProbabilityQuantiles(times, t0, 0) # Reading charge distribution (normalized to 1) - # print(pdf, quantiles) + if quantiles: + pdf = self.cascade_pxs.GetProbabilityQuantiles(times, t0, 0) # Reading charge distribution (normalized to 1) - return pdf, pes + # dt = np.diff(times) + # ctime = times[:-1] + dt / 2 + # pdf2 = [self.cascade_pxs.GetProbabilityDensity(float(t) - t0) for t in ctime] + # pdf2 = pdf2 * dt + # for t, p1, p2 in zip(ctime, pdf, pdf2): + # print(f"{t:.1f}, {p1 / p2:.2f}") + # print() - def _get_pdf2(self, source, position, times): - - if not isinstance(source, dataclasses.I3Particle): - source = convert_params_to_particle(source) - - if source.energy == 0: - return np.zeros_like(times), 0 - - self.cascade_pxs.SelectModuleCoordinates(*position) - pes, _, t0 = self.cascade_pxs.SelectSource(photonics_service.PhotonicsSource(source)) - if pes <= 0: - return np.zeros_like(times), 0 - - quantiles = self.cascade_pxs.GetProbabilityQuantiles(times, t0, 0) # Reading charge distribution (normalized to 1) - - return quantiles, pes + return pdf, pes + else: + if isinstance(times, list): + pdf = [self.cascade_pxs.GetProbabilityDensity(t - t0) for t in times] + else: + pdf = self.cascade_pxs.GetProbabilityDensity(times - t0) + return pdf, pes - def get_pdf(self, sources, position, t): + def get_pdf(self, sources, position, t, quantiles=True): if isinstance(sources, dataclasses.I3Particle): - return self._get_pdf(sources, position, t) + return self._get_pdf(sources, position, t, quantiles) elif isinstance(sources, list) and isinstance(sources[0], float): - return self._get_pdf(sources, position, t) + return self._get_pdf(sources, position, t, quantiles) else: pdf_tot = np.zeros_like(t) pdfs = [] nps_tot = 0 for source in sources: - pdf, nps = self._self._get_pdf(source, position, t) + pdf, nps = self._self._get_pdf(source, position, t, quantiles) pdfs.append(pdf) pdf_tot += pdf * nps nps_tot += nps From 252a91f4eafcf7fb5f272a94af543bde25caac78 Mon Sep 17 00:00:00 2001 From: Felix Schlueter Date: Tue, 16 Apr 2024 10:40:11 -0500 Subject: [PATCH 16/27] Add cdf_per_dom function and get_probability_quantiles_per_dom --- egenerator/model/source/base.py | 128 +++++++++++++++++++++++++++++++- 1 file changed, 127 insertions(+), 1 deletion(-) diff --git a/egenerator/model/source/base.py b/egenerator/model/source/base.py index 688de28e..ea7051f2 100644 --- a/egenerator/model/source/base.py +++ b/egenerator/model/source/base.py @@ -511,6 +511,132 @@ def cdf(self, x, result_tensors, return cdf_values + def cdf_per_dom(self, x, result_tensors, string, dom, **kwargs): + """Compute CDF values at x for given result_tensors + + This is a numpy, i.e. not tensorflow, method to compute the CDF based + on a provided `result_tensors`. This can be used to investigate + the generated PDFs. + + Note: this function only works for sources that use asymmetric + Gaussians to parameterize the PDF. The latent values of the AG + must be included in the `result_tensors`. + + Parameters + ---------- + x : array_like + The times in ns at which to evaluate the result tensors. + Shape: () or [n_points] + result_tensors : dict of tf.tensor + The dictionary of output tensors as obtained from `get_tensors`. + string : int + String id of the DOM + dom : int + Module id of the DOM + **kwargs + Keyword arguments. + + Returns + ------- + array_like + The CDF values at times x for the given event hypothesis and + exclusions that were used to compute `result_tensors`. + Shape: [n_events, n_points] + + Raises + ------ + NotImplementedError + If assymetric Gaussian latent variables are not present in + `result_tensors` dictionary. + """ + + x_orig = np.atleast_1d(x) + assert len(x_orig.shape) == 1, x_orig.shape + n_points = len(x_orig) + + # shape: [1, 1, n_points] + x = np.reshape(x_orig, (1, 1, -1)) + + if result_tensors['time_offsets'] is not None: + # shape: [n_events] + t_offsets = result_tensors['time_offsets'].numpy() + # shape: [n_events, 1, 1] + t_offsets = np.reshape(t_offsets, [-1, 1, 1]) + # shape: [n_events, 1, n_points] + x = x - t_offsets + else: + t_offsets = 0. + + # internally we are working with different time units + x = x / self.time_unit_in_ns + + # Check if the asymmetric Gaussian latent variables exist + for latent_name in ['mu', 'scale', 'sigma', 'r']: + if 'latent_var_' + latent_name not in result_tensors: + msg = 'PDF evaluation is not supported for this model: {}' + raise NotImplementedError(msg.format( + self._untracked_data['name'])) + + # extract values + # shape: [n_events, n_components, 1] + mu = result_tensors['latent_var_mu'].numpy()[:, string, dom, :, np.newaxis] + scale = result_tensors['latent_var_scale'].numpy()[:, string, dom, :, np.newaxis] + sigma = result_tensors['latent_var_sigma'].numpy()[:, string, dom, :, np.newaxis] + r = result_tensors['latent_var_r'].numpy()[:, string, dom, :, np.newaxis] + + # shape: [n_events, n_components, n_points] + mixture_cdf = basis_functions.asymmetric_gauss_cdf( + x=x, mu=mu, sigma=sigma, r=r + ) + + # shape: [n_events, n_points] + cdf_values = np.sum(mixture_cdf * scale, axis=1) + + return cdf_values + + + def get_probability_quantiles_per_dom(self, x, result_tensors, string, dom, **kwargs): + """ + This is a numpy, i.e. not tensorflow, method to compute the CDF based + on a provided `result_tensors`. This can be used to investigate + the generated PDFs. + + Note: this function only works for sources that use asymmetric + Gaussians to parameterize the PDF. The latent values of the AG + must be included in the `result_tensors`. + + Parameters + ---------- + x : array_like + The times in ns at which to evaluate the result tensors. + Shape: () or [n_points] + result_tensors : dict of tf.tensor + The dictionary of output tensors as obtained from `get_tensors`. + string : int + String id of the DOM + dom : int + Module id of the DOM + **kwargs + Keyword arguments. + + Returns + ------- + array_like + Shape: [n_events, n_points - 1] + + Raises + ------ + NotImplementedError + If assymetric Gaussian latent variables are not present in + `result_tensors` dictionary. + """ + + cdf = self.cdf_per_dom(x, result_tensors, string, dom, **kwargs) + quantiles = cdf[..., 1:] - cdf[..., :-1] + + return quantiles + + def pdf(self, x, result_tensors, tw_exclusions=None, tw_exclusions_ids=None, **kwargs): """Compute PDF values at x for given result_tensors @@ -669,7 +795,7 @@ def pdf_per_dom(self, x, result_tensors, string, dom, **kwargs): array_like The PDF values at times x for the given event hypothesis and exclusions that were used to compute `result_tensors`. - Shape: [n_events, 86, 60, n_points] + Shape: [n_events, n_points] Raises ------ From b85d0eed1883d9950b6797d5ee6bb960d0087c22 Mon Sep 17 00:00:00 2001 From: Felix Schlueter Date: Tue, 16 Apr 2024 10:40:31 -0500 Subject: [PATCH 17/27] Little clean up --- egenerator/model/source/cascade/default.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/egenerator/model/source/cascade/default.py b/egenerator/model/source/cascade/default.py index b02a52ae..accc618a 100644 --- a/egenerator/model/source/cascade/default.py +++ b/egenerator/model/source/cascade/default.py @@ -355,9 +355,11 @@ def get_tensors(self, data_batch_dict, is_training, # check if we have the right amount of filters in the latent dimension n_models = config['num_latent_models'] - if n_models*4 + n_charge != config['num_filters_list'][-1]: - raise ValueError('{!r} != {!r}'.format( - n_models*4 + n_charge, config['num_filters_list'][-1])) + exp_num_filts = n_models * 4 + n_charge + if exp_num_filts != config['num_filters_list'][-1]: + raise ValueError('{!r} (expected) != {!r} (configured)'.format( + exp_num_filts, config['num_filters_list'][-1])) + if n_models <= 1: raise ValueError('{!r} !> 1'.format(n_models)) From 8d627589d779bb9a8721fee5d899f8e72aaad46f Mon Sep 17 00:00:00 2001 From: Felix Schlueter Date: Wed, 17 Apr 2024 11:44:14 -0500 Subject: [PATCH 18/27] Store total charge per dom as well. Improve filename for figures --- egenerator/ic3/evaluate_mcpe.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/egenerator/ic3/evaluate_mcpe.py b/egenerator/ic3/evaluate_mcpe.py index 835c4fc7..10642f65 100644 --- a/egenerator/ic3/evaluate_mcpe.py +++ b/egenerator/ic3/evaluate_mcpe.py @@ -325,21 +325,28 @@ def sample_from_pdf(self, frame, result_tensors, validate_photonics=False): loss_event_generator = dataclasses.I3MapKeyDouble() loss_event_generator_total = 0 + event_generator_total_dom_charge = dataclasses.I3MapKeyDouble() + mc_total_dom_charge = dataclasses.I3MapKeyDouble() + if validate_photonics: loss_photonics = dataclasses.I3MapKeyDouble() loss_photonics_total = 0 geo = frame["I3Geometry"] particle = frame["I3MCTree"].get_head() + photonics_total_dom_charge = dataclasses.I3MapKeyDouble() + for string in range(86): for om in range(60): omkey = icetray.OMKey(string + 1, om + 1) + event_generator_total_dom_charge[omkey] = float(total_charge[0, string, om, 0]) if omkey in mcpe_series_map: pulse_series = mcpe_series_map[omkey] dom_charges_true = np.array([pulse.npe for pulse in pulse_series]) + mc_total_dom_charge[omkey] = float(np.sum(dom_charges_true)) pulse_log_pdf_values = np.log([self.model.pdf_per_dom(pulse.time, result_tensors=result_tensors, string=string, dom=om) + eps for pulse in pulse_series]) @@ -359,6 +366,7 @@ def sample_from_pdf(self, frame, result_tensors, validate_photonics=False): table_pdf, dom_charges_pred = self.photonics.get_pdf( particle, position, [pulse.time for pulse in pulse_series], quantiles=False) + photonics_total_dom_charge[omkey] = float(dom_charges_pred) time_log_likelihood = -dom_charges_true * np.log(np.array(table_pdf) + eps) llh_poisson = dom_charges_pred - np.sum(dom_charges_true) * np.log(dom_charges_pred + eps) @@ -370,7 +378,7 @@ def sample_from_pdf(self, frame, result_tensors, validate_photonics=False): elif self.add_dark_doms: expected_charge = total_charge[0, string, om, 0] - + mc_total_dom_charge[omkey] = 0 loss = expected_charge - 0 * np.log(expected_charge + 1e-7) loss_event_generator_total += loss loss_event_generator[omkey] = loss @@ -378,6 +386,7 @@ def sample_from_pdf(self, frame, result_tensors, validate_photonics=False): if validate_photonics: position = geo.omgeo[omkey].position expected_charge, _ = self.photonics.get_PE(particle, position) + photonics_total_dom_charge[omkey] = float(expected_charge) loss = expected_charge - 0 * np.log(expected_charge + 1e-7) loss_photonics_total += loss @@ -386,10 +395,13 @@ def sample_from_pdf(self, frame, result_tensors, validate_photonics=False): else: pass + frame.Put("MCTotalChargePerDOM", mc_total_dom_charge) + frame.Put("EventGeneratorTotalChargePerDOM", event_generator_total_dom_charge) frame.Put("EventGeneratorLoss", loss_event_generator) frame.Put("EventGeneratorLossTotal", dataclasses.I3Double(loss_event_generator_total)) if validate_photonics: + frame.Put("PhotonicsTotalChargePerDOM", photonics_total_dom_charge) frame.Put("PhotonicsLoss", loss_photonics) frame.Put("PhotonicsLossTotal", dataclasses.I3Double(loss_photonics_total)) @@ -453,8 +465,10 @@ def plot_pdfs(self, frame, result_tensors, validate_photonics=False): ax.legend() ax.grid() + header = frame["I3EventHeader"] + plt.tight_layout() - plt.savefig(f"pdf_{self._counter}.png") + plt.savefig(f"{self.model_name}_{header.run_id}_{header.event_id}_pdf.png") class Photonics(object): From 16f6cbc19b8d43352ff2639950b6589f6f592be2 Mon Sep 17 00:00:00 2001 From: Felix Schlueter Date: Thu, 18 Apr 2024 04:44:06 -0500 Subject: [PATCH 19/27] Add function which only calculates total charge per dom --- egenerator/ic3/evaluate_mcpe.py | 55 ++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 8 deletions(-) diff --git a/egenerator/ic3/evaluate_mcpe.py b/egenerator/ic3/evaluate_mcpe.py index 10642f65..acaa559a 100644 --- a/egenerator/ic3/evaluate_mcpe.py +++ b/egenerator/ic3/evaluate_mcpe.py @@ -107,6 +107,7 @@ def __init__(self, context): 'high energy cascades.', False) self.AddParameter('AddDarkDOMs', '', True) + self.AddParameter('ValidateTimePDF', '', True) self.AddParameter('CascadeService', '', @@ -128,7 +129,7 @@ def Configure(self): self.random_service = self.GetParameter('random_service') self.simulate_electron_daughters = self.GetParameter('SimulateElectronDaughterParticles') self.add_dark_doms = self.GetParameter("AddDarkDOMs") - + self.validate_time_pdf = self.GetParameter("ValidateTimePDF") if isinstance(self.random_service, int): self.random_service = np.random.RandomState(self.random_service) @@ -274,8 +275,8 @@ def DAQ(self, frame): self._counter += 1 - # if self._counter >= 5: - # return + if not self._counter % 100: + print(self._counter) # start timer t_0 = timeit.default_timer() @@ -299,11 +300,12 @@ def DAQ(self, frame): t_2 = timeit.default_timer() # sample pdf - self.sample_from_pdf(frame, result_tensors, validate_photonics=self.cascade_service is not None) - - if self._counter % 10 == 0: - self.plot_pdfs(frame, result_tensors, validate_photonics=self.cascade_service is not None) - + if self.validate_time_pdf: + self.sample_from_pdf(frame, result_tensors, validate_photonics=self.cascade_service is not None) + if self._counter % 10 == 0: + self.plot_pdfs(frame, result_tensors, validate_photonics=self.cascade_service is not None) + else: + self.store_total_charge(frame, result_tensors, validate_photonics=self.cascade_service is not None) # timer after Sampling t_3 = timeit.default_timer() @@ -316,6 +318,43 @@ def DAQ(self, frame): # push frame to next modules self.PushFrame(frame) + def store_total_charge(self, frame, result_tensors, validate_photonics=False): + """ This function stores the predicted charge by EG, MC, Photonics """ + total_charge = result_tensors['dom_charges'].numpy() + mcpe_series_map = frame["I3MCPESeriesMap"] + + event_generator_total_dom_charge = dataclasses.I3MapKeyDouble() + mc_total_dom_charge = dataclasses.I3MapKeyDouble() + + if validate_photonics: + geo = frame["I3Geometry"] + particle = frame["I3MCTree"].get_head() + photonics_total_dom_charge = dataclasses.I3MapKeyDouble() + + + for string in range(86): + for om in range(60): + omkey = icetray.OMKey(string + 1, om + 1) + + event_generator_total_dom_charge[omkey] = float(total_charge[0, string, om, 0]) + + if validate_photonics: + position = geo.omgeo[omkey].position + self.photonics.cascade_pxs.SelectModuleCoordinates(*position) + charge, _, t0 = self.photonics.cascade_pxs.SelectSource(photonics_service.PhotonicsSource(particle)) + photonics_total_dom_charge[omkey] = float(charge) + + if omkey in mcpe_series_map: + mc_total_dom_charge[omkey] = np.sum([pulse.npe for pulse in mcpe_series_map[omkey]], dtype="float") + else: + mc_total_dom_charge[omkey] = 0 + + frame.Put("MCTotalChargePerDOM", mc_total_dom_charge) + frame.Put("EventGeneratorTotalChargePerDOM", event_generator_total_dom_charge) + + if validate_photonics: + frame.Put("PhotonicsTotalChargePerDOM", photonics_total_dom_charge) + def sample_from_pdf(self, frame, result_tensors, validate_photonics=False): total_charge = result_tensors['dom_charges'].numpy() From ea00a1d5ea60ac506cac50c403fb479ac5150190 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Schl=C3=BCter?= Date: Wed, 29 May 2024 11:49:01 +0200 Subject: [PATCH 20/27] fix ws --- egenerator/model/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egenerator/model/base.py b/egenerator/model/base.py index e6dcde5a..42a667d0 100644 --- a/egenerator/model/base.py +++ b/egenerator/model/base.py @@ -235,7 +235,7 @@ def save_weights(self, dir_path, max_keep=3, protected=False, in the output directory. If it does not exist yet, a new one will be created. Otherwise, its values will be updated The file contains meta data on the checkpoints and keeps track - of the most recents files. The structure and content of meta data: + of the most recents files. The structure and content of meta data: latest_checkpoint: int The number of the latest checkpoint. From f1fb9e85d8fadc242123c91a28564c8272cb749e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Schl=C3=BCter?= Date: Fri, 29 Mar 2024 16:41:10 +0100 Subject: [PATCH 21/27] Remove hard-coded label_key LabelsDeepLearning. Allow to train on MCPEs. --- egenerator/data/modules/data/pulse_data.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/egenerator/data/modules/data/pulse_data.py b/egenerator/data/modules/data/pulse_data.py index 4592e93f..35564db2 100644 --- a/egenerator/data/modules/data/pulse_data.py +++ b/egenerator/data/modules/data/pulse_data.py @@ -118,6 +118,14 @@ def _configure(self, config_data, pulse_key, dom_exclusions_key, time_exclusions_exist = time_exclusions_key is not None dom_exclusions_exist = dom_exclusions_key is not None + + self._hdf_time_index = 10 + # 11: npe, 12: charge + if pulse_is_mcpe: + self._hdf_charge_index = 11 + else: + self._hdf_charge_index = 12 + if add_charge_quantiles: pulse_dim = 3 else: @@ -383,7 +391,6 @@ def get_data_from_hdf(self, file, *args, **kwargs): # (t_start, t_end) x_time_exclusions[tw_index] = [row[hdf_time_index], row[11]] - # gather pulse ids (batch index, string, dom) x_time_exclusions_ids[tw_index] = [index, string-1, dom-1] From 0c719581dd1518bafbe9205273fe51616cf3e531 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Schl=C3=BCter?= Date: Mon, 1 Apr 2024 18:11:43 +0200 Subject: [PATCH 22/27] Fixed setting charge index. Slightly refactor code to avoid future warning --- egenerator/data/modules/data/pulse_data.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/egenerator/data/modules/data/pulse_data.py b/egenerator/data/modules/data/pulse_data.py index 35564db2..94ee4c59 100644 --- a/egenerator/data/modules/data/pulse_data.py +++ b/egenerator/data/modules/data/pulse_data.py @@ -118,14 +118,6 @@ def _configure(self, config_data, pulse_key, dom_exclusions_key, time_exclusions_exist = time_exclusions_key is not None dom_exclusions_exist = dom_exclusions_key is not None - - self._hdf_time_index = 10 - # 11: npe, 12: charge - if pulse_is_mcpe: - self._hdf_charge_index = 11 - else: - self._hdf_charge_index = 12 - if add_charge_quantiles: pulse_dim = 3 else: From bc591ae70ebbc7185cd6070e5d25fb98fefe1528 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Schl=C3=BCter?= Date: Thu, 4 Apr 2024 14:21:36 +0200 Subject: [PATCH 23/27] Add new config file for training on MCPEs --- configs/cascade_7param_MCPE.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/configs/cascade_7param_MCPE.yaml b/configs/cascade_7param_MCPE.yaml index f3b5b76b..4d2b5e8f 100644 --- a/configs/cascade_7param_MCPE.yaml +++ b/configs/cascade_7param_MCPE.yaml @@ -62,7 +62,7 @@ training_settings: { # Number of training iterations to train the model for 'num_training_iterations': 1000000, - + # Additional keywords to the loss module used for training 'additional_loss_module_kwargs': { 'normalize_by_total_charge': False, @@ -400,7 +400,7 @@ model_settings: { 'scale_charge_by_relative_dom_efficiency': True, 'scale_charge_by_global_dom_efficiency': False, 'prevent_mixture_component_swapping': False, - 'estimate_charge_distribution': False, + 'estimate_charge_distribution': 'negative_binomial', 'num_latent_models': 10, # This is a list of labels in addition to @@ -410,7 +410,7 @@ model_settings: { # First convolutions 'filter_size_list' : [[1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1]], - 'num_filters_list' : [25, 500, 500, 500, 41], # The last dimension should be 4 * num_asym_gauss + num_charge_pdf; num_asym_gauss = 10, num_charge_pdf = 1 (poisson) or 2 (binominal) + 'num_filters_list' : [25, 500, 500, 500, 42], # The last dimension should be 4 * num_asym_gauss + num_charge_pdf; num_asym_gauss = 10, num_charge_pdf = 1 (poisson) or 2 (binominal) 'method_list' : ['locally_connected', 'convolution', 'convolution', 'convolution', 'convolution', From 84a74bd853be19b19f766476c1193eb17d346b17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Schl=C3=BCter?= Date: Mon, 3 Jun 2024 10:02:08 +0200 Subject: [PATCH 24/27] Export to hdf5 rather than i3 --- egenerator/ic3/evaluate_mcpe.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/egenerator/ic3/evaluate_mcpe.py b/egenerator/ic3/evaluate_mcpe.py index acaa559a..99dbae24 100644 --- a/egenerator/ic3/evaluate_mcpe.py +++ b/egenerator/ic3/evaluate_mcpe.py @@ -7,7 +7,7 @@ import matplotlib.pyplot as plt -from icecube import dataclasses, icetray, photonics_service +from icecube import dataclasses, icetray, photonics_service, simclasses from icecube.icetray.i3logging import log_info, log_warn from egenerator.utils.configurator import ManagerConfigurator @@ -323,31 +323,31 @@ def store_total_charge(self, frame, result_tensors, validate_photonics=False): total_charge = result_tensors['dom_charges'].numpy() mcpe_series_map = frame["I3MCPESeriesMap"] - event_generator_total_dom_charge = dataclasses.I3MapKeyDouble() - mc_total_dom_charge = dataclasses.I3MapKeyDouble() + # We have to use here I3MCPulseSeriesMap here because for a simple I3MapKeyDouble is no hdf5 converter implemented ... + + event_generator_total_dom_charge = dataclasses.I3MapKeyVectorDouble() + mc_total_dom_charge = dataclasses.I3MapKeyVectorDouble() if validate_photonics: geo = frame["I3Geometry"] particle = frame["I3MCTree"].get_head() - photonics_total_dom_charge = dataclasses.I3MapKeyDouble() - + photonics_total_dom_charge = dataclasses.I3MapKeyVectorDouble() for string in range(86): for om in range(60): omkey = icetray.OMKey(string + 1, om + 1) - - event_generator_total_dom_charge[omkey] = float(total_charge[0, string, om, 0]) + event_generator_total_dom_charge[omkey] = [float(total_charge[0, string, om, 0])] if validate_photonics: position = geo.omgeo[omkey].position self.photonics.cascade_pxs.SelectModuleCoordinates(*position) charge, _, t0 = self.photonics.cascade_pxs.SelectSource(photonics_service.PhotonicsSource(particle)) - photonics_total_dom_charge[omkey] = float(charge) + photonics_total_dom_charge[omkey] = [float(charge)] if omkey in mcpe_series_map: - mc_total_dom_charge[omkey] = np.sum([pulse.npe for pulse in mcpe_series_map[omkey]], dtype="float") + mc_total_dom_charge[omkey] = [np.sum([pulse.npe for pulse in mcpe_series_map[omkey]], dtype="float")] else: - mc_total_dom_charge[omkey] = 0 + mc_total_dom_charge[omkey] = [0] frame.Put("MCTotalChargePerDOM", mc_total_dom_charge) frame.Put("EventGeneratorTotalChargePerDOM", event_generator_total_dom_charge) @@ -374,7 +374,6 @@ def sample_from_pdf(self, frame, result_tensors, validate_photonics=False): particle = frame["I3MCTree"].get_head() photonics_total_dom_charge = dataclasses.I3MapKeyDouble() - for string in range(86): for om in range(60): omkey = icetray.OMKey(string + 1, om + 1) From d837934ae7eb704083c3bf8219abfd9e80b4523c Mon Sep 17 00:00:00 2001 From: mhuen Date: Mon, 3 Jun 2024 14:02:28 +0200 Subject: [PATCH 25/27] Allow evaluation in eager mode --- egenerator/utils/learning_rate.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/egenerator/utils/learning_rate.py b/egenerator/utils/learning_rate.py index 5e2062bc..9ab84ced 100644 --- a/egenerator/utils/learning_rate.py +++ b/egenerator/utils/learning_rate.py @@ -58,7 +58,6 @@ def __init__(self, boundaries, scheduler_settings, name=None): self.schedulers = schedulers self.name = name - @tf.function def __call__(self, step): step = tf.convert_to_tensor(step) @@ -86,7 +85,10 @@ def __call__(self, step): pred_fn_pairs.append( ( - step > low and step <= high, + tf.math.logical_and( + tf.math.greater(step, low), + tf.math.less_equal(step, high), + ), lambda: scheduler(step - low), ) ) From de144f89b51f2e69587cc72b69b42ee256e3e3f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Schl=C3=BCter?= Date: Mon, 3 Jun 2024 14:14:26 +0200 Subject: [PATCH 26/27] Code refactoring --- egenerator/manager/base.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/egenerator/manager/base.py b/egenerator/manager/base.py index a66d8e73..e7577a66 100644 --- a/egenerator/manager/base.py +++ b/egenerator/manager/base.py @@ -641,10 +641,7 @@ def perform_training_step( gradients = tape.gradient(combined_loss, variables) # remove nans in gradients and replace these with zeros - if "remove_nan_gradients" in opt_config: - remove_nan_gradients = opt_config["remove_nan_gradients"] - else: - remove_nan_gradients = False + remove_nan_gradients = opt_config.get("remove_nan_gradients", False) if remove_nan_gradients: gradients = [ tf.where(tf.is_nan(grad), tf.zeros_like(grad), grad) @@ -652,10 +649,7 @@ def perform_training_step( if grad is not None ] - if "clip_gradients_value" in opt_config: - clip_gradients_value = opt_config["clip_gradients_value"] - else: - clip_gradients_value = None + clip_gradients_value = opt_config.get("clip_gradients_value", None) if clip_gradients_value is not None: capped_gradients, _ = tf.clip_by_global_norm( gradients, clip_gradients_value @@ -675,6 +669,7 @@ def perform_training_step( ], ) asserts.append(assert_finite) + with tf.control_dependencies(asserts): self.optimizer.apply_gradients(zip(capped_gradients, variables)) @@ -778,10 +773,8 @@ def train( scheduler_class = misc.load_class(lr_cfg["full_class_string"]) scheduler = scheduler_class(**lr_cfg["settings"]) optimizer_settings["learning_rate"] = scheduler - - optimizer = getattr(tf.optimizers, opt_config["optimizer_name"])( - **optimizer_settings - ) + + optimizer = getattr(tf.optimizers, opt_config["optimizer_name"])(**optimizer_settings) self._untracked_data["optimizer"] = optimizer # save new training step to model From dec3eb033f623735da5cd92111cee2000aa5eb9d Mon Sep 17 00:00:00 2001 From: Felix Schlueter Date: Tue, 4 Jun 2024 11:44:40 +0200 Subject: [PATCH 27/27] Improve code. access charge from hdf files by name and not index --- egenerator/data/modules/data/pulse_data.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/egenerator/data/modules/data/pulse_data.py b/egenerator/data/modules/data/pulse_data.py index dd1e05ad..739c9f98 100644 --- a/egenerator/data/modules/data/pulse_data.py +++ b/egenerator/data/modules/data/pulse_data.py @@ -252,11 +252,7 @@ def get_data_from_hdf(self, file, *args, **kwargs): if not self.is_configured: raise ValueError("Module not configured yet!") - # 11: npe, 12: charge - if self.configuration.config["pulse_is_mcpe"]: - hdf_charge_index = 11 - else: - hdf_charge_index = 12 + charge_str = "npe" if self.configuration.config["pulse_is_mcpe"] else "charge" # open file f = pd.HDFStore(file, 'r') @@ -366,20 +362,19 @@ def get_data_from_hdf(self, file, *args, **kwargs): index = event_dict[(row[1:5])] - # pulse charge: row[hdf_charge_index] # accumulate charge in DOMs - x_dom_charge[index, string - 1, dom - 1, 0] += row[hdf_charge_index] + x_dom_charge[index, string - 1, dom - 1, 0] += getattr(row, charge_str) # gather pulses if add_charge_quantiles: # (charge, time, quantile) cum_charge = float(x_dom_charge[index, string - 1, dom - 1, 0]) - x_pulses[pulse_index] = [row[hdf_charge_index], row.time, cum_charge] + x_pulses[pulse_index] = [getattr(row, charge_str), row.time, cum_charge] else: # (charge, time) - x_pulses[pulse_index] = [row[hdf_charge_index], row.time] + x_pulses[pulse_index] = [getattr(row, charge_str), row.time] # gather pulse ids (batch index, string, dom) x_pulses_ids[pulse_index] = [index, string - 1, dom - 1] @@ -591,7 +586,6 @@ def get_data_from_frame(self, frame, *args, **kwargs): for pulse in pulse_list: index = 0 - # pulse charge: row[hdf_charge_index], time: row.time # accumulate charge in DOMs x_dom_charge[index, string - 1, dom - 1, 0] += pulse.charge