From fbf5d55b3796c9d72858dafdfb5c11064ff415bd Mon Sep 17 00:00:00 2001
From: Cyan <cyan@hep04.cluster>
Date: Tue, 10 Sep 2024 11:43:28 +0200
Subject: [PATCH 01/11] changed the variable name from sort_this to
 contingency_table for the sake of readbility and fixed column selection

---
 src/graphnet/models/graphs/utils.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py
index 9dd21ee60..f0a9a64f2 100644
--- a/src/graphnet/models/graphs/utils.py
+++ b/src/graphnet/models/graphs/utils.py
@@ -66,10 +66,11 @@ def gather_cluster_sequence(
         x[:, cluster_columns], return_counts=True, axis=0
     )
     # sort DOMs and pulse-counts
-    sort_this = np.concatenate([unique_sensors, counts.reshape(-1, 1)], axis=1)
-    sort_this = lex_sort(x=sort_this, cluster_columns=cluster_columns)
-    unique_sensors = sort_this[:, 0 : unique_sensors.shape[1]]
-    counts = sort_this[:, unique_sensors.shape[1] :].flatten().astype(int)
+    contingency_table = np.concatenate([unique_sensors, counts.reshape(-1, 1)], axis=1)
+    cluster_columns_contingency_table = np.arange(n_clusters = len(cluster_columns))
+    contingency_table = lex_sort(x = contingency_table, cluster_columns = cluster_columns_contingency_table)
+    unique_sensors = contingency_table[:, 0 : unique_sensors.shape[1]]
+    counts = contingency_table[:, unique_sensors.shape[1] :].flatten().astype(int)
 
     # Pad unique sensor columns with NaN's up until the maximum number of
     # Same pmt-pulses. Each of padded columns represents a pulse.

From d161280ff115fc6b7da1ec00b6074d89ce95e808 Mon Sep 17 00:00:00 2001
From: Cyan <cyan@hep02.cluster>
Date: Tue, 10 Sep 2024 13:37:46 +0200
Subject: [PATCH 02/11] adjust fix

---
 src/graphnet/models/graphs/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py
index f0a9a64f2..b9e06b21f 100644
--- a/src/graphnet/models/graphs/utils.py
+++ b/src/graphnet/models/graphs/utils.py
@@ -67,7 +67,7 @@ def gather_cluster_sequence(
     )
     # sort DOMs and pulse-counts
     contingency_table = np.concatenate([unique_sensors, counts.reshape(-1, 1)], axis=1)
-    cluster_columns_contingency_table = np.arange(n_clusters = len(cluster_columns))
+    cluster_columns_contingency_table = np.arange(0, unique_sensors.shape[1], 1)
     contingency_table = lex_sort(x = contingency_table, cluster_columns = cluster_columns_contingency_table)
     unique_sensors = contingency_table[:, 0 : unique_sensors.shape[1]]
     counts = contingency_table[:, unique_sensors.shape[1] :].flatten().astype(int)

From 9713da9bceae376d248201b196d8b871b9c2956e Mon Sep 17 00:00:00 2001
From: Cyan <cyan@hep02.cluster>
Date: Wed, 11 Sep 2024 08:59:35 +0200
Subject: [PATCH 03/11] removed spaces around assignment operators, and
 introduced a linebreak in a long code line(line71)

---
 src/graphnet/models/graphs/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py
index b9e06b21f..de5c38e90 100644
--- a/src/graphnet/models/graphs/utils.py
+++ b/src/graphnet/models/graphs/utils.py
@@ -68,7 +68,8 @@ def gather_cluster_sequence(
     # sort DOMs and pulse-counts
     contingency_table = np.concatenate([unique_sensors, counts.reshape(-1, 1)], axis=1)
     cluster_columns_contingency_table = np.arange(0, unique_sensors.shape[1], 1)
-    contingency_table = lex_sort(x = contingency_table, cluster_columns = cluster_columns_contingency_table)
+    contingency_table = lex_sort(x=contingency_table, 
+                                 cluster_columns=cluster_columns_contingency_table)
     unique_sensors = contingency_table[:, 0 : unique_sensors.shape[1]]
     counts = contingency_table[:, unique_sensors.shape[1] :].flatten().astype(int)
 

From 687d005d77d506dc7e0f0ee9e76f24fd856c35cd Mon Sep 17 00:00:00 2001
From: Cyan <cyan@hep02.cluster>
Date: Wed, 11 Sep 2024 09:21:51 +0200
Subject: [PATCH 04/11] remove the space and add a transitional variable to
 prevent from code to exceed 79 letters a line

---
 src/graphnet/models/graphs/utils.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py
index de5c38e90..74bd28737 100644
--- a/src/graphnet/models/graphs/utils.py
+++ b/src/graphnet/models/graphs/utils.py
@@ -66,12 +66,13 @@ def gather_cluster_sequence(
         x[:, cluster_columns], return_counts=True, axis=0
     )
     # sort DOMs and pulse-counts
-    contingency_table = np.concatenate([unique_sensors, counts.reshape(-1, 1)], axis=1)
-    cluster_columns_contingency_table = np.arange(0, unique_sensors.shape[1], 1)
-    contingency_table = lex_sort(x=contingency_table, 
-                                 cluster_columns=cluster_columns_contingency_table)
-    unique_sensors = contingency_table[:, 0 : unique_sensors.shape[1]]
-    counts = contingency_table[:, unique_sensors.shape[1] :].flatten().astype(int)
+    sensor_counts = counts.reshape(-1, 1)
+    contingency_table = np.concatenate([unique_sensors, sensor_counts], axis=1)
+    cluster_columns_contingency_table = np.arange(0,unique_sensors.shape[1],1)
+    contingency_table = lex_sort(x=contingency_table,
+                            cluster_columns=cluster_columns_contingency_table)
+    unique_sensors = contingency_table[:, 0:unique_sensors.shape[1]]
+    counts = contingency_table[:, unique_sensors.shape[1]:].flatten().astype(int)
 
     # Pad unique sensor columns with NaN's up until the maximum number of
     # Same pmt-pulses. Each of padded columns represents a pulse.
@@ -88,8 +89,8 @@ def gather_cluster_sequence(
 
     # Insert pulse attribute in place of NaN.
     for k in range(len(counts)):
-        array[k, column_offset : (column_offset + counts[k])] = x[
-            cumsum[k] : cumsum[k + 1], feature_idx
+        array[k, column_offset:(column_offset + counts[k])] = x[
+            cumsum[k]:cumsum[k + 1], feature_idx
         ]
     return array, column_offset, counts
 
@@ -131,7 +132,8 @@ def cluster_summarize_with_percentiles(
     then each row in the returned array will correspond to a DOM,
     and the time and charge for each DOM will be summarized by percentiles.
     Returned output array has dimensions
-    `[n_clusters, len(percentiles)*len(summarization_indices) + len(cluster_indices)]`
+    `[n_clusters,
+    len(percentiles)*len(summarization_indices) + len(cluster_indices)]`
 
     Args:
         x: Array to be clustered

From aaf591874d2d0ea0e2e879175f1856d3ce3b94ad Mon Sep 17 00:00:00 2001
From: Cyan <cyan@hep02.cluster>
Date: Wed, 11 Sep 2024 09:29:23 +0200
Subject: [PATCH 05/11] change variable name to reduce the line below 79
 letters

---
 src/graphnet/models/graphs/utils.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py
index 74bd28737..3d25a69c8 100644
--- a/src/graphnet/models/graphs/utils.py
+++ b/src/graphnet/models/graphs/utils.py
@@ -68,11 +68,13 @@ def gather_cluster_sequence(
     # sort DOMs and pulse-counts
     sensor_counts = counts.reshape(-1, 1)
     contingency_table = np.concatenate([unique_sensors, sensor_counts], axis=1)
-    cluster_columns_contingency_table = np.arange(0,unique_sensors.shape[1],1)
+    contingency_table = np.arange(0, unique_sensors.shape[1], 1)
     contingency_table = lex_sort(x=contingency_table,
-                            cluster_columns=cluster_columns_contingency_table)
+                                 cluster_columns=contingency_table)
     unique_sensors = contingency_table[:, 0:unique_sensors.shape[1]]
-    counts = contingency_table[:, unique_sensors.shape[1]:].flatten().astype(int)
+    count_part = contingency_table[:, unique_sensors.shape[1]:]
+    flattened_counts = count_part.flatten()
+    counts = flattened_counts.astype(int)
 
     # Pad unique sensor columns with NaN's up until the maximum number of
     # Same pmt-pulses. Each of padded columns represents a pulse.

From de83c1555c34457caab71302f1c7ce2798e55638 Mon Sep 17 00:00:00 2001
From: Cyan <cyan@hep02.cluster>
Date: Thu, 12 Sep 2024 09:07:13 +0200
Subject: [PATCH 06/11] additional linebreak for long lines

---
 src/graphnet/models/graphs/utils.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py
index 3d25a69c8..1c96539bc 100644
--- a/src/graphnet/models/graphs/utils.py
+++ b/src/graphnet/models/graphs/utils.py
@@ -67,10 +67,14 @@ def gather_cluster_sequence(
     )
     # sort DOMs and pulse-counts
     sensor_counts = counts.reshape(-1, 1)
-    contingency_table = np.concatenate([unique_sensors, sensor_counts], axis=1)
+    contingency_table = np.concatenate(
+                        [unique_sensors, sensor_counts], axis=1
+                        )
     contingency_table = np.arange(0, unique_sensors.shape[1], 1)
-    contingency_table = lex_sort(x=contingency_table,
-                                 cluster_columns=contingency_table)
+    contingency_table = lex_sort(
+                        x=contingency_table,
+                        cluster_columns=contingency_table
+                        )
     unique_sensors = contingency_table[:, 0:unique_sensors.shape[1]]
     count_part = contingency_table[:, unique_sensors.shape[1]:]
     flattened_counts = count_part.flatten()

From 8773e32a29feae9cd100d605f9c1e58a01c7a397 Mon Sep 17 00:00:00 2001
From: Cyan <cyan@hep02.cluster>
Date: Fri, 13 Sep 2024 14:05:58 +0200
Subject: [PATCH 07/11] change the argument arrangement

---
 src/graphnet/models/graphs/utils.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py
index 1c96539bc..d80491097 100644
--- a/src/graphnet/models/graphs/utils.py
+++ b/src/graphnet/models/graphs/utils.py
@@ -68,13 +68,14 @@ def gather_cluster_sequence(
     # sort DOMs and pulse-counts
     sensor_counts = counts.reshape(-1, 1)
     contingency_table = np.concatenate(
-                        [unique_sensors, sensor_counts], axis=1
+                        [unique_sensors, sensor_counts], 
+                        axis=1
                         )
     contingency_table = np.arange(0, unique_sensors.shape[1], 1)
     contingency_table = lex_sort(
-                        x=contingency_table,
-                        cluster_columns=contingency_table
-                        )
+            x=contingency_table,
+            cluster_columns=contingency_table
+            )
     unique_sensors = contingency_table[:, 0:unique_sensors.shape[1]]
     count_part = contingency_table[:, unique_sensors.shape[1]:]
     flattened_counts = count_part.flatten()

From 92fc2c441103082bb327570e0ba95ef3cb5cf83c Mon Sep 17 00:00:00 2001
From: Cyan <cyan@hep02.cluster>
Date: Fri, 13 Sep 2024 14:49:25 +0200
Subject: [PATCH 08/11] remove tailing space

---
 src/graphnet/models/graphs/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py
index d80491097..b46d9a3ed 100644
--- a/src/graphnet/models/graphs/utils.py
+++ b/src/graphnet/models/graphs/utils.py
@@ -68,7 +68,7 @@ def gather_cluster_sequence(
     # sort DOMs and pulse-counts
     sensor_counts = counts.reshape(-1, 1)
     contingency_table = np.concatenate(
-                        [unique_sensors, sensor_counts], 
+                        [unique_sensors, sensor_counts],
                         axis=1
                         )
     contingency_table = np.arange(0, unique_sensors.shape[1], 1)

From eddf06c2df2448e4996b9b1e4af3c6690f0b0b6b Mon Sep 17 00:00:00 2001
From: Cyan <cyan@hep04.cluster>
Date: Thu, 19 Sep 2024 08:54:13 +0200
Subject: [PATCH 09/11] indentation matching

---
 src/graphnet/models/graphs/utils.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py
index b46d9a3ed..8e47286b2 100644
--- a/src/graphnet/models/graphs/utils.py
+++ b/src/graphnet/models/graphs/utils.py
@@ -51,7 +51,7 @@ def gather_cluster_sequence(
     Args:
         x:  Array for clustering
         feature_idx: Index of the feature in `x` to
-                     be gathered for each cluster.
+        be gathered for each cluster.
         cluster_columns: Index in `x` from which to build clusters.
 
     Returns:
@@ -68,14 +68,14 @@ def gather_cluster_sequence(
     # sort DOMs and pulse-counts
     sensor_counts = counts.reshape(-1, 1)
     contingency_table = np.concatenate(
-                        [unique_sensors, sensor_counts],
-                        axis=1
-                        )
+        [unique_sensors, sensor_counts],
+        axis=1
+    )
     contingency_table = np.arange(0, unique_sensors.shape[1], 1)
     contingency_table = lex_sort(
-            x=contingency_table,
-            cluster_columns=contingency_table
-            )
+        x=contingency_table,
+        cluster_columns=contingency_table
+    )
     unique_sensors = contingency_table[:, 0:unique_sensors.shape[1]]
     count_part = contingency_table[:, unique_sensors.shape[1]:]
     flattened_counts = count_part.flatten()
@@ -191,9 +191,9 @@ def ice_transparency(
 
     Returns:
         f_scattering: Function that takes a normalized depth and returns the
-                      corresponding normalized scattering length.
+        corresponding normalized scattering length.
         f_absorption: Function that takes a normalized depth and returns the
-                      corresponding normalized absorption length.
+        corresponding normalized absorption length.
     """
     # Data from page 31 of https://arxiv.org/pdf/1301.5361.pdf
     df = pd.read_parquet(

From 12746f652ec300d693cf7ec6b189c3d0c0752149 Mon Sep 17 00:00:00 2001
From: Cyan <cyan@hep02.cluster>
Date: Fri, 20 Sep 2024 17:31:46 +0200
Subject: [PATCH 10/11] pre-commit run for new change

---
 src/graphnet/models/graphs/utils.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py
index 8e47286b2..671808528 100644
--- a/src/graphnet/models/graphs/utils.py
+++ b/src/graphnet/models/graphs/utils.py
@@ -67,17 +67,13 @@ def gather_cluster_sequence(
     )
     # sort DOMs and pulse-counts
     sensor_counts = counts.reshape(-1, 1)
-    contingency_table = np.concatenate(
-        [unique_sensors, sensor_counts],
-        axis=1
-    )
+    contingency_table = np.concatenate([unique_sensors, sensor_counts], axis=1)
     contingency_table = np.arange(0, unique_sensors.shape[1], 1)
     contingency_table = lex_sort(
-        x=contingency_table,
-        cluster_columns=contingency_table
+        x=contingency_table, cluster_columns=contingency_table
     )
-    unique_sensors = contingency_table[:, 0:unique_sensors.shape[1]]
-    count_part = contingency_table[:, unique_sensors.shape[1]:]
+    unique_sensors = contingency_table[:, 0 : unique_sensors.shape[1]]
+    count_part = contingency_table[:, unique_sensors.shape[1] :]
     flattened_counts = count_part.flatten()
     counts = flattened_counts.astype(int)
 
@@ -96,8 +92,8 @@ def gather_cluster_sequence(
 
     # Insert pulse attribute in place of NaN.
     for k in range(len(counts)):
-        array[k, column_offset:(column_offset + counts[k])] = x[
-            cumsum[k]:cumsum[k + 1], feature_idx
+        array[k, column_offset : (column_offset + counts[k])] = x[
+            cumsum[k] : cumsum[k + 1], feature_idx
         ]
     return array, column_offset, counts
 

From 6c58cda31bcf11e879e6339feeb5c1a26612f35a Mon Sep 17 00:00:00 2001
From: Cyan <cyan@hep02.cluster>
Date: Mon, 23 Sep 2024 10:10:57 +0200
Subject: [PATCH 11/11] correction on the choice of columns in the contingency
 table, the variable names are misused in the process of reducing the variable
 name

---
 src/graphnet/models/graphs/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py
index 671808528..ea8445f90 100644
--- a/src/graphnet/models/graphs/utils.py
+++ b/src/graphnet/models/graphs/utils.py
@@ -68,9 +68,9 @@ def gather_cluster_sequence(
     # sort DOMs and pulse-counts
     sensor_counts = counts.reshape(-1, 1)
     contingency_table = np.concatenate([unique_sensors, sensor_counts], axis=1)
-    contingency_table = np.arange(0, unique_sensors.shape[1], 1)
+    sensors_in_contingency_table = np.arange(0, unique_sensors.shape[1], 1)
     contingency_table = lex_sort(
-        x=contingency_table, cluster_columns=contingency_table
+        x=contingency_table, cluster_columns=sensors_in_contingency_table
     )
     unique_sensors = contingency_table[:, 0 : unique_sensors.shape[1]]
     count_part = contingency_table[:, unique_sensors.shape[1] :]