From fbf5d55b3796c9d72858dafdfb5c11064ff415bd Mon Sep 17 00:00:00 2001 From: Cyan Date: Tue, 10 Sep 2024 11:43:28 +0200 Subject: [PATCH 01/11] changed the variable name from sort_this to contingency_table for the sake of readbility and fixed column selection --- src/graphnet/models/graphs/utils.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py index 9dd21ee60..f0a9a64f2 100644 --- a/src/graphnet/models/graphs/utils.py +++ b/src/graphnet/models/graphs/utils.py @@ -66,10 +66,11 @@ def gather_cluster_sequence( x[:, cluster_columns], return_counts=True, axis=0 ) # sort DOMs and pulse-counts - sort_this = np.concatenate([unique_sensors, counts.reshape(-1, 1)], axis=1) - sort_this = lex_sort(x=sort_this, cluster_columns=cluster_columns) - unique_sensors = sort_this[:, 0 : unique_sensors.shape[1]] - counts = sort_this[:, unique_sensors.shape[1] :].flatten().astype(int) + contingency_table = np.concatenate([unique_sensors, counts.reshape(-1, 1)], axis=1) + cluster_columns_contingency_table = np.arange(n_clusters = len(cluster_columns)) + contingency_table = lex_sort(x = contingency_table, cluster_columns = cluster_columns_contingency_table) + unique_sensors = contingency_table[:, 0 : unique_sensors.shape[1]] + counts = contingency_table[:, unique_sensors.shape[1] :].flatten().astype(int) # Pad unique sensor columns with NaN's up until the maximum number of # Same pmt-pulses. Each of padded columns represents a pulse. From d161280ff115fc6b7da1ec00b6074d89ce95e808 Mon Sep 17 00:00:00 2001 From: Cyan Date: Tue, 10 Sep 2024 13:37:46 +0200 Subject: [PATCH 02/11] adjust fix --- src/graphnet/models/graphs/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py index f0a9a64f2..b9e06b21f 100644 --- a/src/graphnet/models/graphs/utils.py +++ b/src/graphnet/models/graphs/utils.py @@ -67,7 +67,7 @@ def gather_cluster_sequence( ) # sort DOMs and pulse-counts contingency_table = np.concatenate([unique_sensors, counts.reshape(-1, 1)], axis=1) - cluster_columns_contingency_table = np.arange(n_clusters = len(cluster_columns)) + cluster_columns_contingency_table = np.arange(0, unique_sensors.shape[1], 1) contingency_table = lex_sort(x = contingency_table, cluster_columns = cluster_columns_contingency_table) unique_sensors = contingency_table[:, 0 : unique_sensors.shape[1]] counts = contingency_table[:, unique_sensors.shape[1] :].flatten().astype(int) From 9713da9bceae376d248201b196d8b871b9c2956e Mon Sep 17 00:00:00 2001 From: Cyan Date: Wed, 11 Sep 2024 08:59:35 +0200 Subject: [PATCH 03/11] removed spaces around assignment operators, and introduced a linebreak in a long code line(line71) --- src/graphnet/models/graphs/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py index b9e06b21f..de5c38e90 100644 --- a/src/graphnet/models/graphs/utils.py +++ b/src/graphnet/models/graphs/utils.py @@ -68,7 +68,8 @@ def gather_cluster_sequence( # sort DOMs and pulse-counts contingency_table = np.concatenate([unique_sensors, counts.reshape(-1, 1)], axis=1) cluster_columns_contingency_table = np.arange(0, unique_sensors.shape[1], 1) - contingency_table = lex_sort(x = contingency_table, cluster_columns = cluster_columns_contingency_table) + contingency_table = lex_sort(x=contingency_table, + cluster_columns=cluster_columns_contingency_table) unique_sensors = contingency_table[:, 0 : unique_sensors.shape[1]] counts = contingency_table[:, unique_sensors.shape[1] :].flatten().astype(int) From 687d005d77d506dc7e0f0ee9e76f24fd856c35cd Mon Sep 17 00:00:00 2001 From: Cyan Date: Wed, 11 Sep 2024 09:21:51 +0200 Subject: [PATCH 04/11] remove the space and add a transitional variable to prevent from code to exceed 79 letters a line --- src/graphnet/models/graphs/utils.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py index de5c38e90..74bd28737 100644 --- a/src/graphnet/models/graphs/utils.py +++ b/src/graphnet/models/graphs/utils.py @@ -66,12 +66,13 @@ def gather_cluster_sequence( x[:, cluster_columns], return_counts=True, axis=0 ) # sort DOMs and pulse-counts - contingency_table = np.concatenate([unique_sensors, counts.reshape(-1, 1)], axis=1) - cluster_columns_contingency_table = np.arange(0, unique_sensors.shape[1], 1) - contingency_table = lex_sort(x=contingency_table, - cluster_columns=cluster_columns_contingency_table) - unique_sensors = contingency_table[:, 0 : unique_sensors.shape[1]] - counts = contingency_table[:, unique_sensors.shape[1] :].flatten().astype(int) + sensor_counts = counts.reshape(-1, 1) + contingency_table = np.concatenate([unique_sensors, sensor_counts], axis=1) + cluster_columns_contingency_table = np.arange(0,unique_sensors.shape[1],1) + contingency_table = lex_sort(x=contingency_table, + cluster_columns=cluster_columns_contingency_table) + unique_sensors = contingency_table[:, 0:unique_sensors.shape[1]] + counts = contingency_table[:, unique_sensors.shape[1]:].flatten().astype(int) # Pad unique sensor columns with NaN's up until the maximum number of # Same pmt-pulses. Each of padded columns represents a pulse. @@ -88,8 +89,8 @@ def gather_cluster_sequence( # Insert pulse attribute in place of NaN. for k in range(len(counts)): - array[k, column_offset : (column_offset + counts[k])] = x[ - cumsum[k] : cumsum[k + 1], feature_idx + array[k, column_offset:(column_offset + counts[k])] = x[ + cumsum[k]:cumsum[k + 1], feature_idx ] return array, column_offset, counts @@ -131,7 +132,8 @@ def cluster_summarize_with_percentiles( then each row in the returned array will correspond to a DOM, and the time and charge for each DOM will be summarized by percentiles. Returned output array has dimensions - `[n_clusters, len(percentiles)*len(summarization_indices) + len(cluster_indices)]` + `[n_clusters, + len(percentiles)*len(summarization_indices) + len(cluster_indices)]` Args: x: Array to be clustered From aaf591874d2d0ea0e2e879175f1856d3ce3b94ad Mon Sep 17 00:00:00 2001 From: Cyan Date: Wed, 11 Sep 2024 09:29:23 +0200 Subject: [PATCH 05/11] change variable name to reduce the line below 79 letters --- src/graphnet/models/graphs/utils.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py index 74bd28737..3d25a69c8 100644 --- a/src/graphnet/models/graphs/utils.py +++ b/src/graphnet/models/graphs/utils.py @@ -68,11 +68,13 @@ def gather_cluster_sequence( # sort DOMs and pulse-counts sensor_counts = counts.reshape(-1, 1) contingency_table = np.concatenate([unique_sensors, sensor_counts], axis=1) - cluster_columns_contingency_table = np.arange(0,unique_sensors.shape[1],1) + contingency_table = np.arange(0, unique_sensors.shape[1], 1) contingency_table = lex_sort(x=contingency_table, - cluster_columns=cluster_columns_contingency_table) + cluster_columns=contingency_table) unique_sensors = contingency_table[:, 0:unique_sensors.shape[1]] - counts = contingency_table[:, unique_sensors.shape[1]:].flatten().astype(int) + count_part = contingency_table[:, unique_sensors.shape[1]:] + flattened_counts = count_part.flatten() + counts = flattened_counts.astype(int) # Pad unique sensor columns with NaN's up until the maximum number of # Same pmt-pulses. Each of padded columns represents a pulse. From de83c1555c34457caab71302f1c7ce2798e55638 Mon Sep 17 00:00:00 2001 From: Cyan Date: Thu, 12 Sep 2024 09:07:13 +0200 Subject: [PATCH 06/11] additional linebreak for long lines --- src/graphnet/models/graphs/utils.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py index 3d25a69c8..1c96539bc 100644 --- a/src/graphnet/models/graphs/utils.py +++ b/src/graphnet/models/graphs/utils.py @@ -67,10 +67,14 @@ def gather_cluster_sequence( ) # sort DOMs and pulse-counts sensor_counts = counts.reshape(-1, 1) - contingency_table = np.concatenate([unique_sensors, sensor_counts], axis=1) + contingency_table = np.concatenate( + [unique_sensors, sensor_counts], axis=1 + ) contingency_table = np.arange(0, unique_sensors.shape[1], 1) - contingency_table = lex_sort(x=contingency_table, - cluster_columns=contingency_table) + contingency_table = lex_sort( + x=contingency_table, + cluster_columns=contingency_table + ) unique_sensors = contingency_table[:, 0:unique_sensors.shape[1]] count_part = contingency_table[:, unique_sensors.shape[1]:] flattened_counts = count_part.flatten() From 8773e32a29feae9cd100d605f9c1e58a01c7a397 Mon Sep 17 00:00:00 2001 From: Cyan Date: Fri, 13 Sep 2024 14:05:58 +0200 Subject: [PATCH 07/11] change the argument arrangement --- src/graphnet/models/graphs/utils.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py index 1c96539bc..d80491097 100644 --- a/src/graphnet/models/graphs/utils.py +++ b/src/graphnet/models/graphs/utils.py @@ -68,13 +68,14 @@ def gather_cluster_sequence( # sort DOMs and pulse-counts sensor_counts = counts.reshape(-1, 1) contingency_table = np.concatenate( - [unique_sensors, sensor_counts], axis=1 + [unique_sensors, sensor_counts], + axis=1 ) contingency_table = np.arange(0, unique_sensors.shape[1], 1) contingency_table = lex_sort( - x=contingency_table, - cluster_columns=contingency_table - ) + x=contingency_table, + cluster_columns=contingency_table + ) unique_sensors = contingency_table[:, 0:unique_sensors.shape[1]] count_part = contingency_table[:, unique_sensors.shape[1]:] flattened_counts = count_part.flatten() From 92fc2c441103082bb327570e0ba95ef3cb5cf83c Mon Sep 17 00:00:00 2001 From: Cyan Date: Fri, 13 Sep 2024 14:49:25 +0200 Subject: [PATCH 08/11] remove tailing space --- src/graphnet/models/graphs/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py index d80491097..b46d9a3ed 100644 --- a/src/graphnet/models/graphs/utils.py +++ b/src/graphnet/models/graphs/utils.py @@ -68,7 +68,7 @@ def gather_cluster_sequence( # sort DOMs and pulse-counts sensor_counts = counts.reshape(-1, 1) contingency_table = np.concatenate( - [unique_sensors, sensor_counts], + [unique_sensors, sensor_counts], axis=1 ) contingency_table = np.arange(0, unique_sensors.shape[1], 1) From eddf06c2df2448e4996b9b1e4af3c6690f0b0b6b Mon Sep 17 00:00:00 2001 From: Cyan Date: Thu, 19 Sep 2024 08:54:13 +0200 Subject: [PATCH 09/11] indentation matching --- src/graphnet/models/graphs/utils.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py index b46d9a3ed..8e47286b2 100644 --- a/src/graphnet/models/graphs/utils.py +++ b/src/graphnet/models/graphs/utils.py @@ -51,7 +51,7 @@ def gather_cluster_sequence( Args: x: Array for clustering feature_idx: Index of the feature in `x` to - be gathered for each cluster. + be gathered for each cluster. cluster_columns: Index in `x` from which to build clusters. Returns: @@ -68,14 +68,14 @@ def gather_cluster_sequence( # sort DOMs and pulse-counts sensor_counts = counts.reshape(-1, 1) contingency_table = np.concatenate( - [unique_sensors, sensor_counts], - axis=1 - ) + [unique_sensors, sensor_counts], + axis=1 + ) contingency_table = np.arange(0, unique_sensors.shape[1], 1) contingency_table = lex_sort( - x=contingency_table, - cluster_columns=contingency_table - ) + x=contingency_table, + cluster_columns=contingency_table + ) unique_sensors = contingency_table[:, 0:unique_sensors.shape[1]] count_part = contingency_table[:, unique_sensors.shape[1]:] flattened_counts = count_part.flatten() @@ -191,9 +191,9 @@ def ice_transparency( Returns: f_scattering: Function that takes a normalized depth and returns the - corresponding normalized scattering length. + corresponding normalized scattering length. f_absorption: Function that takes a normalized depth and returns the - corresponding normalized absorption length. + corresponding normalized absorption length. """ # Data from page 31 of https://arxiv.org/pdf/1301.5361.pdf df = pd.read_parquet( From 12746f652ec300d693cf7ec6b189c3d0c0752149 Mon Sep 17 00:00:00 2001 From: Cyan Date: Fri, 20 Sep 2024 17:31:46 +0200 Subject: [PATCH 10/11] pre-commit run for new change --- src/graphnet/models/graphs/utils.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py index 8e47286b2..671808528 100644 --- a/src/graphnet/models/graphs/utils.py +++ b/src/graphnet/models/graphs/utils.py @@ -67,17 +67,13 @@ def gather_cluster_sequence( ) # sort DOMs and pulse-counts sensor_counts = counts.reshape(-1, 1) - contingency_table = np.concatenate( - [unique_sensors, sensor_counts], - axis=1 - ) + contingency_table = np.concatenate([unique_sensors, sensor_counts], axis=1) contingency_table = np.arange(0, unique_sensors.shape[1], 1) contingency_table = lex_sort( - x=contingency_table, - cluster_columns=contingency_table + x=contingency_table, cluster_columns=contingency_table ) - unique_sensors = contingency_table[:, 0:unique_sensors.shape[1]] - count_part = contingency_table[:, unique_sensors.shape[1]:] + unique_sensors = contingency_table[:, 0 : unique_sensors.shape[1]] + count_part = contingency_table[:, unique_sensors.shape[1] :] flattened_counts = count_part.flatten() counts = flattened_counts.astype(int) @@ -96,8 +92,8 @@ def gather_cluster_sequence( # Insert pulse attribute in place of NaN. for k in range(len(counts)): - array[k, column_offset:(column_offset + counts[k])] = x[ - cumsum[k]:cumsum[k + 1], feature_idx + array[k, column_offset : (column_offset + counts[k])] = x[ + cumsum[k] : cumsum[k + 1], feature_idx ] return array, column_offset, counts From 6c58cda31bcf11e879e6339feeb5c1a26612f35a Mon Sep 17 00:00:00 2001 From: Cyan Date: Mon, 23 Sep 2024 10:10:57 +0200 Subject: [PATCH 11/11] correction on the choice of columns in the contingency table, the variable names are misused in the process of reducing the variable name --- src/graphnet/models/graphs/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py index 671808528..ea8445f90 100644 --- a/src/graphnet/models/graphs/utils.py +++ b/src/graphnet/models/graphs/utils.py @@ -68,9 +68,9 @@ def gather_cluster_sequence( # sort DOMs and pulse-counts sensor_counts = counts.reshape(-1, 1) contingency_table = np.concatenate([unique_sensors, sensor_counts], axis=1) - contingency_table = np.arange(0, unique_sensors.shape[1], 1) + sensors_in_contingency_table = np.arange(0, unique_sensors.shape[1], 1) contingency_table = lex_sort( - x=contingency_table, cluster_columns=contingency_table + x=contingency_table, cluster_columns=sensors_in_contingency_table ) unique_sensors = contingency_table[:, 0 : unique_sensors.shape[1]] count_part = contingency_table[:, unique_sensors.shape[1] :]