From 84f73f528fbe8c0e133819f2b8369ccf60b2d555 Mon Sep 17 00:00:00 2001
From: Rasmus Oersoe <rahn@outlook.dk>
Date: Mon, 9 Oct 2023 14:32:57 +0200
Subject: [PATCH 01/21] copy-paste of code

---
 src/graphnet/models/graphs/nodes/nodes.py | 114 ++++++++++++++++++++--
 1 file changed, 107 insertions(+), 7 deletions(-)

diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py
index ce539ee80..8966d2891 100644
--- a/src/graphnet/models/graphs/nodes/nodes.py
+++ b/src/graphnet/models/graphs/nodes/nodes.py
@@ -1,6 +1,6 @@
 """Class(es) for building/connecting graphs."""
 
-from typing import List
+from typing import List, Tuple
 from abc import abstractmethod
 
 import torch
@@ -8,6 +8,11 @@
 
 from graphnet.utilities.decorators import final
 from graphnet.models import Model
+from graphnet.models.graphs.utils import (
+    cluster_summarize_with_percentiles,
+    identify_indices,
+)
+from copy import deepcopy
 
 
 class NodeDefinition(Model):  # pylint: disable=too-few-public-methods
@@ -19,18 +24,24 @@ def __init__(self) -> None:
         super().__init__(name=__name__, class_name=self.__class__.__name__)
 
     @final
-    def forward(self, x: torch.tensor) -> Data:
+    def forward(
+        self, x: torch.tensor, node_feature_names: List[str]
+    ) -> Tuple[Data, List[str]]:
         """Construct nodes from raw node features.
 
         Args:
             x: standardized node features with shape ´[num_pulses, d]´,
             where ´d´ is the number of node features.
+            node_feature_names: list of names for each column in ´x´.
 
         Returns:
             graph: a graph without edges
+            new_features_name: List of new feature names.
         """
-        graph = self._construct_nodes(x)
-        return graph
+        graph, new_feature_names = self._construct_nodes(
+            x=x, feature_names=node_feature_names
+        )
+        return graph, new_feature_names
 
     @property
     def nb_outputs(self) -> int:
@@ -51,20 +62,109 @@ def set_number_of_inputs(self, node_feature_names: List[str]) -> None:
         self.nb_inputs = len(node_feature_names)
 
     @abstractmethod
-    def _construct_nodes(self, x: torch.tensor) -> Data:
+    def _construct_nodes(
+        self, x: torch.tensor, feature_names: List[str]
+    ) -> Data:
         """Construct nodes from raw node features ´x´.
 
         Args:
             x: standardized node features with shape ´[num_pulses, d]´,
             where ´d´ is the number of node features.
+            feature_names: List of names for reach column in `x`. Identical
+            order of appearance. Length `d`.
 
         Returns:
             graph: graph without edges.
+            new_node_features: A list of node features names.
         """
 
 
 class NodesAsPulses(NodeDefinition):
     """Represent each measured pulse of Cherenkov Radiation as a node."""
 
-    def _construct_nodes(self, x: torch.Tensor) -> Data:
-        return Data(x=x)
+    def _construct_nodes(
+        self, x: torch.Tensor, feature_names: List[str]
+    ) -> Data:
+        return Data(x=x), feature_names
+
+
+class PercentileClusters(NodeDefinition):
+    """Represent nodes as clusters with percentile summary node features.
+
+    If `cluster_on` is set to the xyz coordinates of DOMs
+    e.g. `cluster_on = ['dom_x', 'dom_y', 'dom_z']`, each node will be a
+    unique DOM and the pulse information (charge, time) is summarized using
+    percentiles.
+    """
+
+    def __init__(
+        self,
+        cluster_on: List[str],
+        feature_names: List[str],
+        percentiles: List[int],
+        add_counts: bool = True,
+    ) -> None:
+        """Construct `PercentileClusters`.
+
+        Args:
+            cluster_on: Names of features to create clusters from.
+            feature_names: List of colum names for the input data.
+                           E.g. ['dom_x', 'dom_y', 'dom_z',..]
+            percentiles: List of percentiles. E.g. `[10, 50, 90]`.
+            add_counts: If True, number of duplicates is added to output array.
+        """
+        self._cluster_on = cluster_on
+        self._percentiles = percentiles
+        self._add_counts = add_counts
+        (
+            cluster_idx,
+            summ_idx,
+            new_feature_names,
+        ) = self._get_indices_and_feature_names(
+            feature_names, self._add_counts
+        )
+        self._cluster_indices = cluster_idx
+        self._summarization_indices = summ_idx
+        self._output_feature_names = new_feature_names
+        # Base class constructor
+        super().__init__()
+
+    def _get_indices_and_feature_names(
+        self,
+        feature_names: List[str],
+        add_counts: bool,
+    ) -> Tuple[List[int], List[int], List[str]]:
+        cluster_idx, summ_idx, summ_names = identify_indices(
+            feature_names, self._cluster_on
+        )
+        new_feature_names = deepcopy(self._cluster_on)
+        for feature in summ_names:
+            for pct in self._percentiles:
+                new_feature_names.append(f"{feature}_pct{pct}")
+        if add_counts:
+            # add "counts" as the last feature
+            new_feature_names.append("counts")
+        return cluster_idx, summ_idx, new_feature_names
+
+    def _construct_nodes(
+        self, x: torch.Tensor, feature_names: List[str]
+    ) -> Data:
+        # Cast to Numpy
+        x = x.numpy()
+        # Construct clusters with percentile-summarized features
+        array = cluster_summarize_with_percentiles(
+            x=x,
+            summarization_indices=self._summarization_indices,
+            cluster_indices=self._cluster_indices,
+            percentiles=self._percentiles,
+            add_counts=self._add_counts,
+        )
+
+        return Data(x=torch.tensor(array)), self._output_feature_names
+
+    def nb_outputs(self) -> int:
+        """Return number of output features.
+
+        This the default, but may be overridden by specific inheriting classes.
+        """
+        return len(self._output_feature_names)

From 776e3001548ea8df5c6240800610c83eebc42e8b Mon Sep 17 00:00:00 2001
From: Rasmus Oersoe <rahn@outlook.dk>
Date: Mon, 9 Oct 2023 14:34:43 +0200
Subject: [PATCH 02/21] copy-paste

---
 src/graphnet/models/graphs/graph_definition.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/graphnet/models/graphs/graph_definition.py b/src/graphnet/models/graphs/graph_definition.py
index 9c4db4d47..f311340b9 100644
--- a/src/graphnet/models/graphs/graph_definition.py
+++ b/src/graphnet/models/graphs/graph_definition.py
@@ -139,7 +139,10 @@ def forward(  # type: ignore
         node_features = self._detector(node_features, node_feature_names)
 
         # Create graph
-        graph = self._node_definition(node_features)
+        graph, node_feature_names = self._node_definition(
+            node_features, node_feature_names
+        )
+        graph.x = graph.x.type(self.dtype)
 
         # Attach number of pulses as static attribute.
         graph.n_pulses = torch.tensor(len(node_features), dtype=torch.int32)

From f8577a47ee519b47eafb090ce0cab5ec5687fadb Mon Sep 17 00:00:00 2001
From: Rasmus Oersoe <rahn@outlook.dk>
Date: Mon, 9 Oct 2023 14:36:59 +0200
Subject: [PATCH 03/21] add comment

---
 src/graphnet/models/graphs/graph_definition.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/graphnet/models/graphs/graph_definition.py b/src/graphnet/models/graphs/graph_definition.py
index f311340b9..8bd25759e 100644
--- a/src/graphnet/models/graphs/graph_definition.py
+++ b/src/graphnet/models/graphs/graph_definition.py
@@ -138,10 +138,10 @@ def forward(  # type: ignore
         # Standardize / Scale  node features
         node_features = self._detector(node_features, node_feature_names)
 
-        # Create graph
-        graph, node_feature_names = self._node_definition(
-            node_features, node_feature_names
-        )
+        # Create graph & get new node feature names
+        graph, node_feature_names = self._node_definition(node_features)
+
+        # Enforce dtype
         graph.x = graph.x.type(self.dtype)
 
         # Attach number of pulses as static attribute.

From 27d0b3ac36e1bd33cf9f78f33231769fdbab5d96 Mon Sep 17 00:00:00 2001
From: Rasmus Oersoe <rahn@outlook.dk>
Date: Mon, 9 Oct 2023 16:19:20 +0200
Subject: [PATCH 04/21] introduce set function, refactor

---
 .../models/graphs/graph_definition.py         |  5 +
 src/graphnet/models/graphs/nodes/nodes.py     | 91 +++++++++++++------
 2 files changed, 70 insertions(+), 26 deletions(-)

diff --git a/src/graphnet/models/graphs/graph_definition.py b/src/graphnet/models/graphs/graph_definition.py
index 8bd25759e..6e8d74715 100644
--- a/src/graphnet/models/graphs/graph_definition.py
+++ b/src/graphnet/models/graphs/graph_definition.py
@@ -67,6 +67,11 @@ def __init__(
             node_feature_names = list(self._detector.feature_map().keys())  # type: ignore
         self._node_feature_names = node_feature_names
 
+        # Set input data column names for node definition
+        self._node_definition.set_output_feature_names(
+            self._node_feature_names
+        )
+
         # Set data type
         self.to(dtype)
 
diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py
index 8966d2891..477751934 100644
--- a/src/graphnet/models/graphs/nodes/nodes.py
+++ b/src/graphnet/models/graphs/nodes/nodes.py
@@ -1,6 +1,6 @@
 """Class(es) for building/connecting graphs."""
 
-from typing import List, Tuple
+from typing import List, Tuple, Optional
 from abc import abstractmethod
 
 import torch
@@ -18,15 +18,19 @@
 class NodeDefinition(Model):  # pylint: disable=too-few-public-methods
     """Base class for graph building."""
 
-    def __init__(self) -> None:
+    def __init__(
+        self, input_feature_names: Optional[List[str]] = None
+    ) -> None:
         """Construct `Detector`."""
         # Base class constructor
         super().__init__(name=__name__, class_name=self.__class__.__name__)
+        if input_feature_names is not None:
+            self.set_output_feature_names(
+                input_feature_names=input_feature_names
+            )
 
     @final
-    def forward(
-        self, x: torch.tensor, node_feature_names: List[str]
-    ) -> Tuple[Data, List[str]]:
+    def forward(self, x: torch.tensor) -> Tuple[Data, List[str]]:
         """Construct nodes from raw node features.
 
         Args:
@@ -38,10 +42,18 @@ def forward(
             graph: a graph without edges
             new_features_name: List of new feature names.
         """
-        graph, new_feature_names = self._construct_nodes(
-            x=x, feature_names=node_feature_names
-        )
-        return graph, new_feature_names
+        graph = self._construct_nodes(x=x)
+        try:
+            self._output_feature_names
+        except AttributeError as e:
+            self.error(
+                f"""{self.__class__.__name__} was instantiated without
+                       `input_feature_names` and it was not set prior to this
+                       forward call. If you are using this class outside a
+                       `GraphDefinition`, please instatiate with `input_feature_names`."""
+            )  # noqa
+            raise e
+        return graph, self._output_feature_names
 
     @property
     def nb_outputs(self) -> int:
@@ -61,10 +73,33 @@ def set_number_of_inputs(self, node_feature_names: List[str]) -> None:
         assert isinstance(node_feature_names, list)
         self.nb_inputs = len(node_feature_names)
 
+    @final
+    def set_output_feature_names(self, input_feature_names: List[str]) -> None:
+        """Set output features names as a member variable.
+
+        Args:
+            input_feature_names: List of column names of the input to the
+            node definition.
+        """
+        self._output_feature_names = self._define_output_feature_names(
+            input_feature_names
+        )
+
     @abstractmethod
-    def _construct_nodes(
-        self, x: torch.tensor, feature_names: List[str]
-    ) -> Data:
+    def _define_output_feature_names(
+        self, input_feature_names: List[str]
+    ) -> List[str]:
+        """Construct names of output columns.
+
+        Args:
+            input_feature_names: List of column names for the input data.
+
+        Returns:
+            A list of column names for each column in the node definition output.
+        """
+
+    @abstractmethod
+    def _construct_nodes(self, x: torch.tensor) -> Tuple[Data, List[str]]:
         """Construct nodes from raw node features ´x´.
 
         Args:
@@ -82,10 +117,13 @@ def _construct_nodes(
 class NodesAsPulses(NodeDefinition):
     """Represent each measured pulse of Cherenkov Radiation as a node."""
 
-    def _construct_nodes(
-        self, x: torch.Tensor, feature_names: List[str]
-    ) -> Data:
-        return Data(x=x), feature_names
+    def _define_output_feature_names(
+        self, input_feature_names: List[str]
+    ) -> List[str]:
+        return input_feature_names
+
+    def _construct_nodes(self, x: torch.Tensor) -> Tuple[Data, List[str]]:
+        return Data(x=x)
 
 
 class PercentileClusters(NodeDefinition):
@@ -100,34 +138,37 @@ class PercentileClusters(NodeDefinition):
     def __init__(
         self,
         cluster_on: List[str],
-        feature_names: List[str],
         percentiles: List[int],
         add_counts: bool = True,
+        input_feature_names: Optional[List[str]] = None,
     ) -> None:
         """Construct `PercentileClusters`.
 
         Args:
             cluster_on: Names of features to create clusters from.
-            feature_names: List of colum names for the input data.
-                           E.g. ['dom_x', 'dom_y', 'dom_z',..]
             percentiles: List of percentiles. E.g. `[10, 50, 90]`.
             add_counts: If True, number of duplicates is added to output array.
+            input_feature_names: (Optional) column names for input features.
         """
         self._cluster_on = cluster_on
         self._percentiles = percentiles
         self._add_counts = add_counts
+        # Base class constructor
+        super().__init__(input_feature_names=input_feature_names)
+
+    def _define_output_feature_names(
+        self, input_feature_names: List[str]
+    ) -> List[str]:
         (
             cluster_idx,
             summ_idx,
             new_feature_names,
         ) = self._get_indices_and_feature_names(
-            feature_names, self._add_counts
+            input_feature_names, self._add_counts
         )
         self._cluster_indices = cluster_idx
         self._summarization_indices = summ_idx
-        self._output_feature_names = new_feature_names
-        # Base class constructor
-        super().__init__()
+        return new_feature_names
 
     def _get_indices_and_feature_names(
         self,
@@ -146,9 +187,7 @@ def _get_indices_and_feature_names(
             new_feature_names.append("counts")
         return cluster_idx, summ_idx, new_feature_names
 
-    def _construct_nodes(
-        self, x: torch.Tensor, feature_names: List[str]
-    ) -> Data:
+    def _construct_nodes(self, x: torch.Tensor) -> Tuple[Data, List[str]]:
         # Cast to Numpy
         x = x.numpy()
         # Construct clusters with percentile-summarized features

From d41af7d0dc0e0078c1a5309b5d53f41e06f7a5c5 Mon Sep 17 00:00:00 2001
From: Rasmus Oersoe <rahn@outlook.dk>
Date: Mon, 9 Oct 2023 16:20:42 +0200
Subject: [PATCH 05/21] copy-paste utils

---
 src/graphnet/models/graphs/utils.py | 158 ++++++++++++++++++++++++++++
 1 file changed, 158 insertions(+)
 create mode 100644 src/graphnet/models/graphs/utils.py

diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py
new file mode 100644
index 000000000..72928befb
--- /dev/null
+++ b/src/graphnet/models/graphs/utils.py
@@ -0,0 +1,158 @@
+"""Utility functions for construction of graphs."""
+
+from typing import List, Tuple
+import numpy as np
+
+
+def lex_sort(x: np.array, cluster_columns: List[int]) -> np.ndarray:
+    """Sort numpy arrays according to columns on ´cluster_columns´.
+
+    Note that `x` is sorted along the dimensions in `cluster_columns`
+    backwards. I.e. `cluster_columns = [0,1,2]`
+    means `x` is sorted along `[2,1,0]`.
+
+    Args:
+        x: array to be sorted.
+        cluster_columns: Columns of `x` to be sorted along.
+
+    Returns:
+        A sorted version of `x`.
+    """
+    tmp_list = []
+    for cluster_column in cluster_columns:
+        tmp_list.append(x[:, cluster_column])
+    return x[np.lexsort(tuple(tmp_list)), :]
+
+
+def gather_cluster_sequence(
+    x: np.ndarray, feature_idx: int, cluster_columns: List[int]
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """Turn `x` into rows of clusters with sequences along columns.
+
+    Sequences along columns are added which correspond to
+    gathered sequences of the feature in `x` specified by column index
+    `feature_idx` associated with each column. Sequences are padded with NaN to
+    be of same length. Dimension of clustered array is `[n_clusters, l +
+    len(cluster_columns)]`,where l is the largest sequence length.
+
+    **Example**:
+    Suppose `x` represents a neutrino event and we have chosen to cluster on
+    the PMT positions. Suppose also that `feature_idx` correspond to pulse time.
+
+    The resulting array will have dimensions `[n_pmts, m + 3]` where `m` is the
+    maximum number of same-pmt pulses found in `x`, and `+3`for the three
+    spatial directions  defining each cluster.
+
+    Args:
+        x:  Array for clustering
+        feature_idx: Index of the feature in `x` to be gathered for each cluster.
+        cluster_columns: Index in `x` from which to build clusters.
+
+    Returns:
+        array: Array with dimensions  `[n_clusters, l + len(cluster_columns)]`
+        column_offset: Indices of the columns in `array` that defines clusters.
+    """
+    # sort pulses according to cluster columns
+    x = lex_sort(x=x, cluster_columns=cluster_columns)
+
+    # Calculate clusters and counts
+    unique_sensors, counts = np.unique(
+        x[:, cluster_columns], return_counts=True, axis=0
+    )
+    # sort DOMs and pulse-counts
+    sort_this = np.concatenate([unique_sensors, counts.reshape(-1, 1)], axis=1)
+    sort_this = lex_sort(x=sort_this, cluster_columns=cluster_columns)
+    unique_sensors = sort_this[:, 0 : unique_sensors.shape[1]]
+    counts = sort_this[:, unique_sensors.shape[1] :].flatten().astype(int)
+
+    # Pad unique sensor columns with NaN's up until the maximum number of
+    # Same pmt-pulses. Each of padded columns represents a pulse.
+    pad = np.empty((unique_sensors.shape[0], max(counts)))
+    pad[:] = np.nan
+    array = np.concatenate([unique_sensors, pad], axis=1)
+    column_offset = unique_sensors.shape[1]
+
+    # Construct indices for loop
+    cumsum = np.zeros(len(np.cumsum(counts)) + 1)
+    cumsum[0] = 0
+    cumsum[1:] = np.cumsum(counts)
+    cumsum = cumsum.astype(int)
+
+    # Insert pulse attribute in place of NaN.
+    for k in range(len(counts)):
+        array[k, column_offset : (column_offset + counts[k])] = x[
+            cumsum[k] : cumsum[k + 1], feature_idx
+        ]
+    return array, column_offset, counts
+
+
+def identify_indices(
+    feature_names: List[str], cluster_on: List[str]
+) -> Tuple[List[int], List[int], List[str]]:
+    """Identify indices for clustering and summarization."""
+    features_for_summarization = []
+    for feature in feature_names:
+        if feature not in cluster_on:
+            features_for_summarization.append(feature)
+    cluster_indices = [feature_names.index(column) for column in cluster_on]
+    summarization_indices = [
+        feature_names.index(column) for column in features_for_summarization
+    ]
+    return cluster_indices, summarization_indices, features_for_summarization
+
+
+def cluster_summarize_with_percentiles(
+    x: np.ndarray,
+    summarization_indices: List[int],
+    cluster_indices: List[int],
+    percentiles: List[int],
+    add_counts: bool,
+) -> np.ndarray:
+    """Turn `x` into clusters with percentile summary.
+
+    From variables specified by column indices `cluster_indices`, `x` is turned
+    into clusters. Information in columns of `x` specified by indices
+    `summarization_indices` with each cluster is summarized using percentiles.
+    It is assumed `x` represents a single event.
+
+    **Example use-case**:
+    Suppose `x` contains raw pulses from a neutrino event where some DOMs have
+    multiple measurements of Cherenkov radiation. If `cluster_indices` is set
+    to the columns corresponding to the xyz-position of the DOMs, and the
+    features specified in `summarization_indices` correspond to time, charge,
+    then each row in the returned array will correspond to a DOM,
+    and the time and charge for each DOM will be summarized by percentiles.
+    Returned output array has dimensions
+    `[n_clusters, len(percentiles)*len(summarization_indices) + len(cluster_indices)]`
+
+    Args:
+        x: Array to be clustered
+        summarization_indices: List of column indices that defines features that
+                                will be summarized with percentiles.
+        cluster_indices: List of column indices on which the clusters are constructed.
+        percentiles: percentiles used to summarize `x`. E.g. [10,50,90].
+
+    Returns:
+        Percentile-summarized array
+    """
+    pct_dict = {}
+    for feature_idx in summarization_indices:
+        summarized_array, column_offset, counts = gather_cluster_sequence(
+            x, feature_idx, cluster_indices
+        )
+        pct_dict[feature_idx] = np.nanpercentile(
+            summarized_array[:, column_offset:], percentiles, axis=1
+        ).T
+
+    for i, key in enumerate(pct_dict.keys()):
+        if i == 0:
+            array = summarized_array[:, 0:column_offset]
+
+        array = np.concatenate([array, pct_dict[key]], axis=1)
+
+    if add_counts:
+        array = np.concatenate(
+            [array, np.log10(counts).reshape(-1, 1)], axis=1
+        )
+
+    return array

From d7e9b821762074a7d3865e085bb86352ad2e038a Mon Sep 17 00:00:00 2001
From: Rasmus Oersoe <rahn@outlook.dk>
Date: Tue, 10 Oct 2023 09:00:25 +0200
Subject: [PATCH 06/21] add import statement

---
 src/graphnet/models/graphs/nodes/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/graphnet/models/graphs/nodes/__init__.py b/src/graphnet/models/graphs/nodes/__init__.py
index 05194b61a..0119d2b98 100644
--- a/src/graphnet/models/graphs/nodes/__init__.py
+++ b/src/graphnet/models/graphs/nodes/__init__.py
@@ -5,4 +5,4 @@
 and their features.
 """
 
-from .nodes import NodeDefinition, NodesAsPulses
+from .nodes import NodeDefinition, NodesAsPulses, PercentileClusters

From a6010331b2089e8aba7b6b02171c5fe3f26412b3 Mon Sep 17 00:00:00 2001
From: Rasmus Oersoe <rahn@outlook.dk>
Date: Tue, 10 Oct 2023 10:45:59 +0200
Subject: [PATCH 07/21] fix output of construct_nodes

---
 src/graphnet/models/graphs/nodes/nodes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py
index 477751934..b31857d2f 100644
--- a/src/graphnet/models/graphs/nodes/nodes.py
+++ b/src/graphnet/models/graphs/nodes/nodes.py
@@ -199,7 +199,7 @@ def _construct_nodes(self, x: torch.Tensor) -> Tuple[Data, List[str]]:
             add_counts=self._add_counts,
         )
 
-        return Data(x=torch.tensor(array)), self._output_feature_names
+        return Data(x=torch.tensor(array))
 
     def nb_outputs(self) -> int:
         """Return number of output features.

From 4c7e121c82f9e65a14a865dee6b1b65be4c20806 Mon Sep 17 00:00:00 2001
From: Rasmus Oersoe <rahn@outlook.dk>
Date: Tue, 10 Oct 2023 10:46:34 +0200
Subject: [PATCH 08/21] type hint

---
 src/graphnet/models/graphs/nodes/nodes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py
index b31857d2f..7131788e5 100644
--- a/src/graphnet/models/graphs/nodes/nodes.py
+++ b/src/graphnet/models/graphs/nodes/nodes.py
@@ -187,7 +187,7 @@ def _get_indices_and_feature_names(
             new_feature_names.append("counts")
         return cluster_idx, summ_idx, new_feature_names
 
-    def _construct_nodes(self, x: torch.Tensor) -> Tuple[Data, List[str]]:
+    def _construct_nodes(self, x: torch.Tensor) -> Data:
         # Cast to Numpy
         x = x.numpy()
         # Construct clusters with percentile-summarized features

From 57571f2976c78109bda5cb701522e8e0c582f8e4 Mon Sep 17 00:00:00 2001
From: Rasmus Oersoe <rahn@outlook.dk>
Date: Tue, 10 Oct 2023 11:14:52 +0200
Subject: [PATCH 09/21] nb_output property

---
 src/graphnet/models/graphs/nodes/nodes.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py
index 7131788e5..7c1d0d21c 100644
--- a/src/graphnet/models/graphs/nodes/nodes.py
+++ b/src/graphnet/models/graphs/nodes/nodes.py
@@ -61,7 +61,7 @@ def nb_outputs(self) -> int:
 
         This the default, but may be overridden by specific inheriting classes.
         """
-        return self.nb_inputs
+        return len(self._output_feature_names)
 
     @final
     def set_number_of_inputs(self, node_feature_names: List[str]) -> None:
@@ -200,10 +200,3 @@ def _construct_nodes(self, x: torch.Tensor) -> Data:
         )
 
         return Data(x=torch.tensor(array))
-
-    def nb_outputs(self) -> int:
-        """Return number of output features.
-
-        This the default, but may be overridden by specific inheriting classes.
-        """
-        return len(self._output_feature_names)

From a1f6b7e00addd3d6de31c1f99fb9f4615f9d483b Mon Sep 17 00:00:00 2001
From: Rasmus Oersoe <rahn@outlook.dk>
Date: Tue, 10 Oct 2023 12:15:28 +0200
Subject: [PATCH 10/21] add unit test of node definition

---
 src/graphnet/models/graphs/nodes/nodes.py | 20 ++++--
 tests/models/test_node_definition.py      | 80 +++++++++++++++++++++++
 2 files changed, 93 insertions(+), 7 deletions(-)
 create mode 100644 tests/models/test_node_definition.py

diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py
index 7c1d0d21c..2f5e0dde8 100644
--- a/src/graphnet/models/graphs/nodes/nodes.py
+++ b/src/graphnet/models/graphs/nodes/nodes.py
@@ -191,12 +191,18 @@ def _construct_nodes(self, x: torch.Tensor) -> Data:
         # Cast to Numpy
         x = x.numpy()
         # Construct clusters with percentile-summarized features
-        array = cluster_summarize_with_percentiles(
-            x=x,
-            summarization_indices=self._summarization_indices,
-            cluster_indices=self._cluster_indices,
-            percentiles=self._percentiles,
-            add_counts=self._add_counts,
-        )
+        if hasattr(self, "_summarization_indices"):
+            array = cluster_summarize_with_percentiles(
+                x=x,
+                summarization_indices=self._summarization_indices,
+                cluster_indices=self._cluster_indices,
+                percentiles=self._percentiles,
+                add_counts=self._add_counts,
+            )
+        else:
+            self.error(
+                f"""{self.__class__.__name__} was not instatiated with `input_feature_names` and has not been set later. Please instantiate this class with `input_feature_names` if you're using it outside `GraphDefinition`."""
+            )  # noqa
+            raise AttributeError
 
         return Data(x=torch.tensor(array))
diff --git a/tests/models/test_node_definition.py b/tests/models/test_node_definition.py
new file mode 100644
index 000000000..4c199abd6
--- /dev/null
+++ b/tests/models/test_node_definition.py
@@ -0,0 +1,80 @@
+"""Unit tests for node definitions."""
+import numpy as np
+import pandas as pd
+import sqlite3
+import torch
+from graphnet.models.graphs.nodes import PercentileClusters
+from graphnet.constants import EXAMPLE_DATA_DIR
+
+
+def test_percentile_cluster() -> None:
+    """Test that percentiles outputted by PercentileCluster.
+
+    Here we check that it matches percentiles obtained from "traditional" ways.
+    """
+    # definitions
+    percentiles = [0, 10, 50, 90, 100]
+    database = f"{EXAMPLE_DATA_DIR}/sqlite/prometheus/prometheus-events.db"
+    #  Grab first event in database
+    with sqlite3.connect(database) as con:
+        query = "select event_no from mc_truth limit 1"
+        event_no = pd.read_sql(query, con)
+        query = f'select sensor_pos_x, sensor_pos_y, sensor_pos_z, t from total where event_no = {str(event_no["event_no"][0])}'
+        df = pd.read_sql(query, con)
+
+    # Save original feature names, create variables.
+    original_features = list(df.columns)
+    x = np.array(df)
+    tensor = torch.tensor(x)
+
+    # Construct node definition
+    # This defines each DOM as a cluster, and will summarize pulses seen by
+    # DOMs using percentiles.
+    node_definition = PercentileClusters(
+        cluster_on=["sensor_pos_x", "sensor_pos_y", "sensor_pos_z"],
+        percentiles=percentiles,
+        input_feature_names=original_features,
+    )
+
+    # Apply node definition to torch tensor with raw pulses
+    graph, new_features = node_definition(tensor)
+    x_tilde = graph.x.numpy()
+
+    # Calculate percentiles "the normal way" and compare that output of
+    # node definition match.
+
+    unique_doms = (
+        df.groupby(["sensor_pos_x", "sensor_pos_y", "sensor_pos_z"])
+        .size()
+        .reset_index()
+    )
+    for i in range(len(unique_doms)):
+        idx_original = (
+            (df["sensor_pos_x"] == unique_doms["sensor_pos_x"][i])
+            & ((df["sensor_pos_y"] == unique_doms["sensor_pos_y"][i]))
+            & (df["sensor_pos_z"] == unique_doms["sensor_pos_z"][i])
+        )
+        idx_tilde = (
+            (
+                x_tilde[:, new_features.index("sensor_pos_x")]
+                == unique_doms["sensor_pos_x"][i]
+            )
+            & (
+                x_tilde[:, new_features.index("sensor_pos_y")]
+                == unique_doms["sensor_pos_y"][i]
+            )
+            & (
+                x_tilde[:, new_features.index("sensor_pos_z")]
+                == unique_doms["sensor_pos_z"][i]
+            )
+        )
+        for percentile in percentiles:
+            pct_idx = new_features.index(f"t_pct{percentile}")
+            try:
+                assert np.isclose(
+                    x_tilde[idx_tilde, pct_idx],
+                    np.percentile(df.loc[idx_original, "t"], percentile),
+                )
+            except AssertionError as e:
+                print(f"Percentile {percentile} does not match.")
+                raise e

From 1e1ffc850a6c7d372a4570ac54a502441cb7b40d Mon Sep 17 00:00:00 2001
From: Rasmus Oersoe <rahn@outlook.dk>
Date: Tue, 10 Oct 2023 13:13:52 +0200
Subject: [PATCH 11/21] code-climate

---
 src/graphnet/models/graphs/nodes/nodes.py | 11 ++++++++---
 src/graphnet/models/graphs/utils.py       | 12 +++++++-----
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py
index 2f5e0dde8..abca07588 100644
--- a/src/graphnet/models/graphs/nodes/nodes.py
+++ b/src/graphnet/models/graphs/nodes/nodes.py
@@ -50,7 +50,8 @@ def forward(self, x: torch.tensor) -> Tuple[Data, List[str]]:
                 f"""{self.__class__.__name__} was instantiated without
                        `input_feature_names` and it was not set prior to this
                        forward call. If you are using this class outside a
-                       `GraphDefinition`, please instatiate with `input_feature_names`."""
+                       `GraphDefinition`, please instatiate
+                       with `input_feature_names`."""
             )  # noqa
             raise e
         return graph, self._output_feature_names
@@ -95,7 +96,8 @@ def _define_output_feature_names(
             input_feature_names: List of column names for the input data.
 
         Returns:
-            A list of column names for each column in the node definition output.
+            A list of column names for each column in
+            the node definition output.
         """
 
     @abstractmethod
@@ -201,7 +203,10 @@ def _construct_nodes(self, x: torch.Tensor) -> Data:
             )
         else:
             self.error(
-                f"""{self.__class__.__name__} was not instatiated with `input_feature_names` and has not been set later. Please instantiate this class with `input_feature_names` if you're using it outside `GraphDefinition`."""
+                f"""{self.__class__.__name__} was not instatiated with
+                `input_feature_names` and has not been set later.
+                Please instantiate this class with `input_feature_names`
+                if you're using it outside `GraphDefinition`."""
             )  # noqa
             raise AttributeError
 
diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py
index 72928befb..ccd861783 100644
--- a/src/graphnet/models/graphs/utils.py
+++ b/src/graphnet/models/graphs/utils.py
@@ -37,7 +37,7 @@ def gather_cluster_sequence(
 
     **Example**:
     Suppose `x` represents a neutrino event and we have chosen to cluster on
-    the PMT positions. Suppose also that `feature_idx` correspond to pulse time.
+    the PMT positions and that `feature_idx` correspond to pulse time.
 
     The resulting array will have dimensions `[n_pmts, m + 3]` where `m` is the
     maximum number of same-pmt pulses found in `x`, and `+3`for the three
@@ -45,7 +45,8 @@ def gather_cluster_sequence(
 
     Args:
         x:  Array for clustering
-        feature_idx: Index of the feature in `x` to be gathered for each cluster.
+        feature_idx: Index of the feature in `x` to
+                     be gathered for each cluster.
         cluster_columns: Index in `x` from which to build clusters.
 
     Returns:
@@ -127,9 +128,10 @@ def cluster_summarize_with_percentiles(
 
     Args:
         x: Array to be clustered
-        summarization_indices: List of column indices that defines features that
-                                will be summarized with percentiles.
-        cluster_indices: List of column indices on which the clusters are constructed.
+        summarization_indices: List of column indices that defines features
+                                that will be summarized with percentiles.
+        cluster_indices: List of column indices on which the clusters
+                        are constructed.
         percentiles: percentiles used to summarize `x`. E.g. [10,50,90].
 
     Returns:

From 2f0f21ad018e2cec92ee10faf8a90b4e90ff6afc Mon Sep 17 00:00:00 2001
From: Rasmus Oersoe <rahn@outlook.dk>
Date: Wed, 18 Oct 2023 11:26:35 +0200
Subject: [PATCH 12/21] rename variables

---
 .../models/graphs/graph_definition.py         | 70 ++++++++++---------
 src/graphnet/models/graphs/nodes/nodes.py     |  8 +--
 2 files changed, 41 insertions(+), 37 deletions(-)

diff --git a/src/graphnet/models/graphs/graph_definition.py b/src/graphnet/models/graphs/graph_definition.py
index 6e8d74715..f75f65b98 100644
--- a/src/graphnet/models/graphs/graph_definition.py
+++ b/src/graphnet/models/graphs/graph_definition.py
@@ -26,7 +26,7 @@ def __init__(
         detector: Detector,
         node_definition: NodeDefinition = NodesAsPulses(),
         edge_definition: Optional[EdgeDefinition] = None,
-        node_feature_names: Optional[List[str]] = None,
+        input_feature_names: Optional[List[str]] = None,
         dtype: Optional[torch.dtype] = torch.float,
         perturbation_dict: Optional[Dict[str, float]] = None,
         seed: Optional[Union[int, Generator]] = None,
@@ -44,7 +44,10 @@ def __init__(
             detector: The corresponding ´Detector´ representing the data.
             node_definition: Definition of nodes. Defaults to NodesAsPulses.
             edge_definition: Definition of edges. Defaults to None.
-            node_feature_names: Names of node feature columns. Defaults to None
+            input_feature_names: Names of each column in expected input data
+                that will be built into a graph. If not provided,
+                it is automatically assumed that all features in `Detector` is
+                used.
             dtype: data type used for node features. e.g. ´torch.float´
             perturbation_dict: Dictionary mapping a feature name to a standard
                                deviation according to which the values for this
@@ -62,14 +65,14 @@ def __init__(
         self._node_definition = node_definition
         self._perturbation_dict = perturbation_dict
 
-        if node_feature_names is None:
+        if input_feature_names is None:
             # Assume all features in Detector is used.
-            node_feature_names = list(self._detector.feature_map().keys())  # type: ignore
-        self._node_feature_names = node_feature_names
+            input_feature_names = list(self._detector.feature_map().keys())  # type: ignore
+        self._input_feature_names = input_feature_names
 
         # Set input data column names for node definition
         self._node_definition.set_output_feature_names(
-            self._node_feature_names
+            self._input_feature_names
         )
 
         # Set data type
@@ -77,15 +80,15 @@ def __init__(
 
         # Set Input / Output dimensions
         self._node_definition.set_number_of_inputs(
-            node_feature_names=node_feature_names
+            input_feature_names=input_feature_names
         )
-        self.nb_inputs = len(self._node_feature_names)
+        self.nb_inputs = len(self._input_feature_names)
         self.nb_outputs = self._node_definition.nb_outputs
 
         # Set perturbation_cols if needed
         if isinstance(self._perturbation_dict, dict):
             self._perturbation_cols = [
-                self._node_feature_names.index(key)
+                self._input_feature_names.index(key)
                 for key in self._perturbation_dict.keys()
             ]
         if seed is not None:
@@ -102,8 +105,8 @@ def __init__(
 
     def forward(  # type: ignore
         self,
-        node_features: np.ndarray,
-        node_feature_names: List[str],
+        input_features: np.ndarray,
+        input_feature_names: List[str],
         truth_dicts: Optional[List[Dict[str, Any]]] = None,
         custom_label_functions: Optional[Dict[str, Callable[..., Any]]] = None,
         loss_weight_column: Optional[str] = None,
@@ -114,8 +117,8 @@ def forward(  # type: ignore
         """Construct graph as ´Data´ object.
 
         Args:
-            node_features: node features for graph. Shape ´[num_nodes, d]´
-            node_feature_names: name of each column. Shape ´[,d]´.
+            input_features: Input features for graph construction. Shape ´[num_rows, d]´
+            input_feature_names: name of each column. Shape ´[,d]´.
             truth_dicts: Dictionary containing truth labels.
             custom_label_functions: Custom label functions. See https://github.com/graphnet-team/graphnet/blob/main/GETTING_STARTED.md#adding-custom-truth-labels.
             loss_weight_column: Name of column that holds loss weight.
@@ -131,26 +134,27 @@ def forward(  # type: ignore
         """
         # Checks
         self._validate_input(
-            node_features=node_features, node_feature_names=node_feature_names
+            input_features=input_features,
+            input_feature_names=input_feature_names,
         )
 
         # Gaussian perturbation of each column if perturbation dict is given
-        node_features = self._perturb_input(node_features)
+        input_features = self._perturb_input(input_features)
 
         # Transform to pytorch tensor
-        node_features = torch.tensor(node_features, dtype=self.dtype)
+        input_features = torch.tensor(input_features, dtype=self.dtype)
 
         # Standardize / Scale  node features
-        node_features = self._detector(node_features, node_feature_names)
+        input_features = self._detector(input_features, input_feature_names)
 
         # Create graph & get new node feature names
-        graph, node_feature_names = self._node_definition(node_features)
+        graph, node_feature_names = self._node_definition(input_features)
 
         # Enforce dtype
         graph.x = graph.x.type(self.dtype)
 
         # Attach number of pulses as static attribute.
-        graph.n_pulses = torch.tensor(len(node_features), dtype=torch.int32)
+        graph.n_pulses = torch.tensor(len(input_features), dtype=torch.int32)
 
         # Assign edges
         if self._edge_definition is not None:
@@ -194,26 +198,26 @@ def forward(  # type: ignore
         return graph
 
     def _validate_input(
-        self, node_features: np.array, node_feature_names: List[str]
+        self, input_features: np.array, input_feature_names: List[str]
     ) -> None:
         # node feature matrix dimension check
-        assert node_features.shape[1] == len(node_feature_names)
+        assert input_features.shape[1] == len(input_feature_names)
 
         # check that provided features for input is the same that the ´Graph´
         # was instantiated with.
-        assert len(node_feature_names) == len(
-            self._node_feature_names
-        ), f"""Input features ({node_feature_names}) is not what 
+        assert len(input_feature_names) == len(
+            self._input_feature_names
+        ), f"""Input features ({input_feature_names}) is not what 
                {self.__class__.__name__} was instatiated
-               with ({self._node_feature_names})"""  # noqa
-        for idx in range(len(node_feature_names)):
+               with ({self._input_feature_names})"""  # noqa
+        for idx in range(len(input_feature_names)):
             assert (
-                node_feature_names[idx] == self._node_feature_names[idx]
+                input_feature_names[idx] == self._input_feature_names[idx]
             ), f""" Order of node features in data
-                    are not the same as expected. Got {node_feature_names} 
-                    vs. {self._node_feature_names}"""  # noqa
+                    are not the same as expected. Got {input_feature_names} 
+                    vs. {self._input_feature_names}"""  # noqa
 
-    def _perturb_input(self, node_features: np.ndarray) -> np.ndarray:
+    def _perturb_input(self, input_features: np.ndarray) -> np.ndarray:
         if isinstance(self._perturbation_dict, dict):
             self.warning_once(
                 f"""Will randomly perturb
@@ -221,13 +225,13 @@ def _perturb_input(self, node_features: np.ndarray) -> np.ndarray:
                 using stds {self._perturbation_dict.values()}"""  # noqa
             )
             perturbed_features = self.rng.normal(
-                loc=node_features[:, self._perturbation_cols],
+                loc=input_features[:, self._perturbation_cols],
                 scale=np.array(
                     list(self._perturbation_dict.values()), dtype=float
                 ),
             )
-            node_features[:, self._perturbation_cols] = perturbed_features
-        return node_features
+            input_features[:, self._perturbation_cols] = perturbed_features
+        return input_features
 
     def _add_loss_weights(
         self,
diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py
index abca07588..fa0400b97 100644
--- a/src/graphnet/models/graphs/nodes/nodes.py
+++ b/src/graphnet/models/graphs/nodes/nodes.py
@@ -65,14 +65,14 @@ def nb_outputs(self) -> int:
         return len(self._output_feature_names)
 
     @final
-    def set_number_of_inputs(self, node_feature_names: List[str]) -> None:
+    def set_number_of_inputs(self, input_feature_names: List[str]) -> None:
         """Return number of inputs expected by node definition.
 
         Args:
-            node_feature_names: name of each node feature column.
+            input_feature_names: name of each input feature column.
         """
-        assert isinstance(node_feature_names, list)
-        self.nb_inputs = len(node_feature_names)
+        assert isinstance(input_feature_names, list)
+        self.nb_inputs = len(input_feature_names)
 
     @final
     def set_output_feature_names(self, input_feature_names: List[str]) -> None:

From d07115cdd1a78ff85673131ebed06fb4351378e6 Mon Sep 17 00:00:00 2001
From: Rasmus Oersoe <rahn@outlook.dk>
Date: Wed, 18 Oct 2023 11:36:21 +0200
Subject: [PATCH 13/21] rename

---
 .../dynedge_PID_classification_example.yml    |  2 +-
 ...ynedge_position_custom_scaling_example.yml |  2 +-
 configs/models/dynedge_position_example.yml   | 44 -------------------
 ...example_direction_reconstruction_model.yml |  2 +-
 .../example_energy_reconstruction_model.yml   |  2 +-
 ...e_vertex_position_reconstruction_model.yml |  2 +-
 tests/models/test_graph_definition.py         |  6 +--
 7 files changed, 8 insertions(+), 52 deletions(-)
 delete mode 100644 configs/models/dynedge_position_example.yml

diff --git a/configs/models/dynedge_PID_classification_example.yml b/configs/models/dynedge_PID_classification_example.yml
index 57fec3e88..f9b1509c4 100644
--- a/configs/models/dynedge_PID_classification_example.yml
+++ b/configs/models/dynedge_PID_classification_example.yml
@@ -25,7 +25,7 @@ arguments:
           ModelConfig:
             arguments: {}
             class_name: NodesAsPulses
-        node_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t]
+        input_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t]
       class_name: KNNGraph
   optimizer_class: '!class torch.optim.adam Adam'
   optimizer_kwargs: {eps: 0.001, lr: 0.001}
diff --git a/configs/models/dynedge_position_custom_scaling_example.yml b/configs/models/dynedge_position_custom_scaling_example.yml
index 195695a8d..013dab592 100644
--- a/configs/models/dynedge_position_custom_scaling_example.yml
+++ b/configs/models/dynedge_position_custom_scaling_example.yml
@@ -17,7 +17,7 @@ arguments:
           ModelConfig:
             arguments: {}
             class_name: NodesAsPulses
-        node_feature_names: null
+        input_feature_names: null
       class_name: KNNGraph
   gnn:
     ModelConfig:
diff --git a/configs/models/dynedge_position_example.yml b/configs/models/dynedge_position_example.yml
deleted file mode 100644
index c82223825..000000000
--- a/configs/models/dynedge_position_example.yml
+++ /dev/null
@@ -1,44 +0,0 @@
-arguments:
-  coarsening: null
-  detector:
-    ModelConfig:
-      arguments:
-        graph_builder:
-          ModelConfig:
-            arguments: {columns: null, nb_nearest_neighbours: 8}
-            class_name: KNNGraphBuilder
-        scalers: null
-      class_name: IceCubeDeepCore
-  gnn:
-    ModelConfig:
-      arguments:
-        add_global_variables_after_pooling: false
-        dynedge_layer_sizes: null
-        features_subset: null
-        global_pooling_schemes: [min, max, mean, sum]
-        nb_inputs: 7
-        nb_neighbours: 8
-        post_processing_layer_sizes: null
-        readout_layer_sizes: null
-      class_name: DynEdge
-  optimizer_class: '!class torch.optim.adam Adam'
-  optimizer_kwargs: {eps: 0.001, lr: 1e-05}
-  scheduler_class: '!class torch.optim.lr_scheduler ReduceLROnPlateau'
-  scheduler_config: {frequency: 1, monitor: val_loss}
-  scheduler_kwargs: {patience: 5}
-  tasks:
-  - ModelConfig:
-      arguments:
-        hidden_size: 128
-        loss_function:
-          ModelConfig:
-            arguments: {}
-            class_name: MSELoss
-        loss_weight: null
-        target_labels: ["position_x", "position_y", "position_z"]
-        transform_inference: null
-        transform_prediction_and_target: null
-        transform_support: null
-        transform_target: null
-      class_name: PositionReconstruction
-class_name: StandardModel
diff --git a/configs/models/example_direction_reconstruction_model.yml b/configs/models/example_direction_reconstruction_model.yml
index cb1c4d841..faf168ed5 100644
--- a/configs/models/example_direction_reconstruction_model.yml
+++ b/configs/models/example_direction_reconstruction_model.yml
@@ -13,7 +13,7 @@ arguments:
           ModelConfig:
             arguments: {}
             class_name: NodesAsPulses
-        node_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t]
+        input_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t]
       class_name: KNNGraph
   gnn:
     ModelConfig:
diff --git a/configs/models/example_energy_reconstruction_model.yml b/configs/models/example_energy_reconstruction_model.yml
index 827c84748..5983ef799 100644
--- a/configs/models/example_energy_reconstruction_model.yml
+++ b/configs/models/example_energy_reconstruction_model.yml
@@ -25,7 +25,7 @@ arguments:
           ModelConfig:
             arguments: {}
             class_name: NodesAsPulses
-        node_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t]
+        input_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t]
       class_name: KNNGraph
   optimizer_class: '!class torch.optim.adam Adam'
   optimizer_kwargs: {eps: 0.001, lr: 0.001}
diff --git a/configs/models/example_vertex_position_reconstruction_model.yml b/configs/models/example_vertex_position_reconstruction_model.yml
index 0522a1f2d..ce0a993c4 100644
--- a/configs/models/example_vertex_position_reconstruction_model.yml
+++ b/configs/models/example_vertex_position_reconstruction_model.yml
@@ -25,7 +25,7 @@ arguments:
           ModelConfig:
             arguments: {}
             class_name: NodesAsPulses
-        node_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t]
+        input_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t]
       class_name: KNNGraph
   optimizer_class: '!class torch.optim.adam Adam'
   optimizer_kwargs: {eps: 0.001, lr: 0.001}
diff --git a/tests/models/test_graph_definition.py b/tests/models/test_graph_definition.py
index bf16d7853..ec6c75e24 100644
--- a/tests/models/test_graph_definition.py
+++ b/tests/models/test_graph_definition.py
@@ -27,7 +27,7 @@ def test_graph_definition() -> None:
         detector=Prometheus(), perturbation_dict=perturbation_dict, seed=seed
     )
     original_output = graph_definition(
-        node_features=deepcopy(mock_data), node_feature_names=features
+        input_features=deepcopy(mock_data), input_feature_names=features
     )
 
     for _ in range(n_reps):
@@ -42,11 +42,11 @@ def test_graph_definition() -> None:
         )
 
         data = graph_definition(
-            node_features=deepcopy(mock_data), node_feature_names=features
+            input_features=deepcopy(mock_data), input_feature_names=features
         )
 
         perturbed_data = graph_definition_perturbed(
-            node_features=deepcopy(mock_data), node_feature_names=features
+            input_features=deepcopy(mock_data), input_feature_names=features
         )
 
         assert ~torch.equal(data.x, perturbed_data.x)  # should not be equal.

From b67ba0865b70ab8f320bedecbe6141dc1d98a42d Mon Sep 17 00:00:00 2001
From: Rasmus Oersoe <rahn@outlook.dk>
Date: Wed, 18 Oct 2023 11:41:51 +0200
Subject: [PATCH 14/21] rename

---
 tests/training/test_dataloader_utilities.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/training/test_dataloader_utilities.py b/tests/training/test_dataloader_utilities.py
index 0fdaccf60..423b2f34b 100644
--- a/tests/training/test_dataloader_utilities.py
+++ b/tests/training/test_dataloader_utilities.py
@@ -22,7 +22,7 @@
     detector=IceCubeDeepCore(),
     node_definition=NodesAsPulses(),
     nb_nearest_neighbours=8,
-    node_feature_names=FEATURES.DEEPCORE,
+    input_feature_names=FEATURES.DEEPCORE,
 )
 
 

From 1049765f5ca5bc638208a84406702bb6d9e989c4 Mon Sep 17 00:00:00 2001
From: Rasmus Oersoe <rahn@outlook.dk>
Date: Wed, 18 Oct 2023 11:43:45 +0200
Subject: [PATCH 15/21] update pretrained configs

---
 .../SplitInIcePulses_cleaner_config.yml                         | 2 +-
 .../QUESO/neutrino_direction/neutrino_direction_config.yml      | 2 +-
 .../neutrino_vs_muon_classifier_config.yml                      | 2 +-
 .../upgrade/QUESO/neutrino_zenith/neutrino_zenith_config.yml    | 2 +-
 .../total_neutrino_energy/total_neutrino_energy_config.yml      | 2 +-
 .../track_vs_cascade_classifier_config.yml                      | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/SplitInIcePulses_cleaner/SplitInIcePulses_cleaner_config.yml b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/SplitInIcePulses_cleaner/SplitInIcePulses_cleaner_config.yml
index 281bda2f4..a13f11aa2 100644
--- a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/SplitInIcePulses_cleaner/SplitInIcePulses_cleaner_config.yml
+++ b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/SplitInIcePulses_cleaner/SplitInIcePulses_cleaner_config.yml
@@ -19,7 +19,7 @@ arguments:
           ModelConfig:
             arguments: {}
             class_name: NodesAsPulses
-        node_feature_names: null
+        input_feature_names: null
       class_name: KNNGraph
   optimizer_class: '!class torch.optim.adam Adam'
   optimizer_kwargs: null
diff --git a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_direction/neutrino_direction_config.yml b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_direction/neutrino_direction_config.yml
index 6cabc6985..b42e1fef8 100644
--- a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_direction/neutrino_direction_config.yml
+++ b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_direction/neutrino_direction_config.yml
@@ -25,7 +25,7 @@ arguments:
           ModelConfig:
             arguments: {}
             class_name: NodesAsPulses
-        node_feature_names: null
+        input_feature_names: null
       class_name: KNNGraph
   optimizer_class: '!class torch.optim.adam Adam'
   optimizer_kwargs: null
diff --git a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_vs_muon_classifier/neutrino_vs_muon_classifier_config.yml b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_vs_muon_classifier/neutrino_vs_muon_classifier_config.yml
index 3c0c7510a..326617c00 100644
--- a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_vs_muon_classifier/neutrino_vs_muon_classifier_config.yml
+++ b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_vs_muon_classifier/neutrino_vs_muon_classifier_config.yml
@@ -25,7 +25,7 @@ arguments:
           ModelConfig:
             arguments: {}
             class_name: NodesAsPulses
-        node_feature_names: null
+        input_feature_names: null
       class_name: KNNGraph
   optimizer_class: '!class torch.optim.adam Adam'
   optimizer_kwargs: null
diff --git a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_zenith/neutrino_zenith_config.yml b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_zenith/neutrino_zenith_config.yml
index fee57a531..c54f6ec5b 100644
--- a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_zenith/neutrino_zenith_config.yml
+++ b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_zenith/neutrino_zenith_config.yml
@@ -25,7 +25,7 @@ arguments:
           ModelConfig:
             arguments: {}
             class_name: NodesAsPulses
-        node_feature_names: null
+        input_feature_names: null
       class_name: KNNGraph
   optimizer_class: '!class torch.optim.adam Adam'
   optimizer_kwargs: null
diff --git a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/total_neutrino_energy/total_neutrino_energy_config.yml b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/total_neutrino_energy/total_neutrino_energy_config.yml
index 16d9ddde5..a35c0203a 100644
--- a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/total_neutrino_energy/total_neutrino_energy_config.yml
+++ b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/total_neutrino_energy/total_neutrino_energy_config.yml
@@ -25,7 +25,7 @@ arguments:
           ModelConfig:
             arguments: {}
             class_name: NodesAsPulses
-        node_feature_names: null
+        input_feature_names: null
       class_name: KNNGraph
   optimizer_class: '!class torch.optim.adam Adam'
   optimizer_kwargs: null
diff --git a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/track_vs_cascade_classifier/track_vs_cascade_classifier_config.yml b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/track_vs_cascade_classifier/track_vs_cascade_classifier_config.yml
index a49c60a22..5e88b510a 100644
--- a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/track_vs_cascade_classifier/track_vs_cascade_classifier_config.yml
+++ b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/track_vs_cascade_classifier/track_vs_cascade_classifier_config.yml
@@ -25,7 +25,7 @@ arguments:
           ModelConfig:
             arguments: {}
             class_name: NodesAsPulses
-        node_feature_names: null
+        input_feature_names: null
       class_name: KNNGraph
   optimizer_class: '!class torch.optim.adam Adam'
   optimizer_kwargs: null

From 1d190267975408121fbda9eb62c2a39ad1b83e97 Mon Sep 17 00:00:00 2001
From: Rasmus Oersoe <rahn@outlook.dk>
Date: Wed, 18 Oct 2023 11:49:22 +0200
Subject: [PATCH 16/21] rename arg in KNNGraph

---
 src/graphnet/models/graphs/graphs.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/graphnet/models/graphs/graphs.py b/src/graphnet/models/graphs/graphs.py
index 4ae53037a..bd52eaeae 100644
--- a/src/graphnet/models/graphs/graphs.py
+++ b/src/graphnet/models/graphs/graphs.py
@@ -17,7 +17,7 @@ def __init__(
         self,
         detector: Detector,
         node_definition: NodeDefinition = NodesAsPulses(),
-        node_feature_names: Optional[List[str]] = None,
+        input_feature_names: Optional[List[str]] = None,
         dtype: Optional[torch.dtype] = torch.float,
         perturbation_dict: Optional[Dict[str, float]] = None,
         seed: Optional[Union[int, Generator]] = None,
@@ -29,7 +29,7 @@ def __init__(
         Args:
             detector: Detector that represents your data.
             node_definition: Definition of nodes in the graph.
-            node_feature_names: Name of node features.
+            input_feature_names: Name of input feature columns.
             dtype: data type for node features.
             perturbation_dict: Dictionary mapping a feature name to a standard
                                deviation according to which the values for this
@@ -50,7 +50,7 @@ def __init__(
                 columns=columns,
             ),
             dtype=dtype,
-            node_feature_names=node_feature_names,
+            input_feature_names=input_feature_names,
             perturbation_dict=perturbation_dict,
             seed=seed,
         )

From 170c2b31171ed40001dd8d59ef96259252f948c7 Mon Sep 17 00:00:00 2001
From: Rasmus Oersoe <rahn@outlook.dk>
Date: Wed, 18 Oct 2023 12:12:23 +0200
Subject: [PATCH 17/21] arg update in dataset

---
 src/graphnet/data/dataset/dataset.py | 4 ++--
 tests/models/test_task.py            | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/graphnet/data/dataset/dataset.py b/src/graphnet/data/dataset/dataset.py
index 4253788a8..0988c5793 100644
--- a/src/graphnet/data/dataset/dataset.py
+++ b/src/graphnet/data/dataset/dataset.py
@@ -629,8 +629,8 @@ def _create_graph(
         # Construct graph data object
         assert self._graph_definition is not None
         graph = self._graph_definition(
-            node_features=node_features,
-            node_feature_names=self._features[
+            input_features=node_features,
+            input_feature_names=self._features[
                 1:
             ],  # first entry is index column
             truth_dicts=truth_dicts,
diff --git a/tests/models/test_task.py b/tests/models/test_task.py
index 68e014f33..bfadb6263 100644
--- a/tests/models/test_task.py
+++ b/tests/models/test_task.py
@@ -18,7 +18,7 @@ def test_transform_prediction_and_target() -> None:
         detector=IceCube86(),
         node_definition=NodesAsPulses(),
         nb_nearest_neighbours=8,
-        node_feature_names=FEATURES.DEEPCORE,
+        input_feature_names=FEATURES.DEEPCORE,
     )
     gnn = DynEdge(
         nb_inputs=graph_definition.nb_outputs,

From fd279974622748057506b34821e8170742c00876 Mon Sep 17 00:00:00 2001
From: Rasmus Oersoe <rahn@outlook.dk>
Date: Wed, 18 Oct 2023 12:24:03 +0200
Subject: [PATCH 18/21] update args

---
 examples/02_data/04_ensemble_dataset.py        | 2 +-
 examples/05_pisa/02_make_pipeline_database.py  | 2 +-
 tests/data/test_dataconverters_and_datasets.py | 6 +++---
 tests/utilities/test_model_config.py           | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/02_data/04_ensemble_dataset.py b/examples/02_data/04_ensemble_dataset.py
index f1cc9de68..4ade95de6 100644
--- a/examples/02_data/04_ensemble_dataset.py
+++ b/examples/02_data/04_ensemble_dataset.py
@@ -24,7 +24,7 @@
     detector=IceCubeDeepCore(),
     node_definition=NodesAsPulses(),
     nb_nearest_neighbours=8,
-    node_feature_names=features,
+    input_feature_names=features,
 )
 
 
diff --git a/examples/05_pisa/02_make_pipeline_database.py b/examples/05_pisa/02_make_pipeline_database.py
index 17e86646d..722b997f3 100644
--- a/examples/05_pisa/02_make_pipeline_database.py
+++ b/examples/05_pisa/02_make_pipeline_database.py
@@ -65,7 +65,7 @@ def main() -> None:
         detector=IceCubeDeepCore(),
         node_definition=NodesAsPulses(),
         nb_nearest_neighbours=8,
-        node_feature_names=FEATURES.DEEPCORE,
+        input_feature_names=FEATURES.DEEPCORE,
     )
 
     # Remove `interaction_time` if it exists
diff --git a/tests/data/test_dataconverters_and_datasets.py b/tests/data/test_dataconverters_and_datasets.py
index 64fcd85c6..480f11d4d 100644
--- a/tests/data/test_dataconverters_and_datasets.py
+++ b/tests/data/test_dataconverters_and_datasets.py
@@ -115,7 +115,7 @@ def test_dataset(backend: str) -> None:
         detector=IceCubeDeepCore(),
         node_definition=NodesAsPulses(),
         nb_nearest_neighbours=8,
-        node_feature_names=FEATURES.DEEPCORE,
+        input_feature_names=FEATURES.DEEPCORE,
     )
 
     # Constructor DataConverter instance
@@ -168,7 +168,7 @@ def test_datasetquery_table(backend: str) -> None:
         detector=IceCubeDeepCore(),
         node_definition=NodesAsPulses(),
         nb_nearest_neighbours=8,
-        node_feature_names=FEATURES.DEEPCORE,
+        input_feature_names=FEATURES.DEEPCORE,
     )
     # Constructor DataConverter instance
     pulsemap = "SRTInIcePulses"
@@ -220,7 +220,7 @@ def test_parquet_to_sqlite_converter() -> None:
         detector=IceCubeDeepCore(),
         node_definition=NodesAsPulses(),
         nb_nearest_neighbours=8,
-        node_feature_names=FEATURES.DEEPCORE,
+        input_feature_names=FEATURES.DEEPCORE,
     )
     # Perform conversion from I3 to `backend`
     database_name = FILE_NAME + "_from_parquet"
diff --git a/tests/utilities/test_model_config.py b/tests/utilities/test_model_config.py
index 8979f0255..59eb6343a 100644
--- a/tests/utilities/test_model_config.py
+++ b/tests/utilities/test_model_config.py
@@ -49,7 +49,7 @@ def test_complete_model_config(path: str = "/tmp/complete_model.yml") -> None:
         detector=IceCubeDeepCore(),
         node_definition=NodesAsPulses(),
         nb_nearest_neighbours=8,
-        node_feature_names=FEATURES.DEEPCORE,
+        input_feature_names=FEATURES.DEEPCORE,
     )
     gnn = DynEdge(
         nb_inputs=graph_definition.nb_outputs,

From 7c921c3cb8ea25c0de3a93de44e7e5bbcc0316c3 Mon Sep 17 00:00:00 2001
From: Rasmus Oersoe <rahn@outlook.dk>
Date: Wed, 18 Oct 2023 12:40:39 +0200
Subject: [PATCH 19/21] update configs

---
 configs/datasets/dev_lvl7_robustness_muon_neutrino_0000.yml     | 2 +-
 configs/datasets/test_data_sqlite.yml                           | 2 +-
 .../datasets/training_classification_example_data_sqlite.yml    | 2 +-
 configs/datasets/training_example_data_parquet.yml              | 2 +-
 configs/datasets/training_example_data_sqlite.yml               | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/configs/datasets/dev_lvl7_robustness_muon_neutrino_0000.yml b/configs/datasets/dev_lvl7_robustness_muon_neutrino_0000.yml
index d70de5294..523f4fa90 100644
--- a/configs/datasets/dev_lvl7_robustness_muon_neutrino_0000.yml
+++ b/configs/datasets/dev_lvl7_robustness_muon_neutrino_0000.yml
@@ -10,7 +10,7 @@ graph_definition:
     node_definition:
       arguments: {}
       class_name: NodesAsPulses
-    node_feature_names: [dom_x, dom_y, dom_z, dom_time, charge, rde, pmt_area]
+    input_feature_names: [dom_x, dom_y, dom_z, dom_time, charge, rde, pmt_area]
   class_name: KNNGraph
 pulsemaps:
   - SRTTWOfflinePulsesDC
diff --git a/configs/datasets/test_data_sqlite.yml b/configs/datasets/test_data_sqlite.yml
index 689e8af31..11ea4496d 100644
--- a/configs/datasets/test_data_sqlite.yml
+++ b/configs/datasets/test_data_sqlite.yml
@@ -10,7 +10,7 @@ graph_definition:
     node_definition:
       arguments: {}
       class_name: NodesAsPulses
-    node_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t]
+    input_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t]
   class_name: KNNGraph
 index_column: event_no
 loss_weight_column: null
diff --git a/configs/datasets/training_classification_example_data_sqlite.yml b/configs/datasets/training_classification_example_data_sqlite.yml
index ae94420ee..3a13f8749 100644
--- a/configs/datasets/training_classification_example_data_sqlite.yml
+++ b/configs/datasets/training_classification_example_data_sqlite.yml
@@ -10,7 +10,7 @@ graph_definition:
     node_definition:
       arguments: {}
       class_name: NodesAsPulses
-    node_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t]
+    input_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t]
   class_name: KNNGraph
 pulsemaps:
   - total
diff --git a/configs/datasets/training_example_data_parquet.yml b/configs/datasets/training_example_data_parquet.yml
index d8bde7e30..67abca0c4 100644
--- a/configs/datasets/training_example_data_parquet.yml
+++ b/configs/datasets/training_example_data_parquet.yml
@@ -10,7 +10,7 @@ graph_definition:
     node_definition:
       arguments: {}
       class_name: NodesAsPulses
-    node_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t]
+    input_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t]
   class_name: KNNGraph
 pulsemaps:
   - total
diff --git a/configs/datasets/training_example_data_sqlite.yml b/configs/datasets/training_example_data_sqlite.yml
index b33a0ee0c..20c4aa8c0 100644
--- a/configs/datasets/training_example_data_sqlite.yml
+++ b/configs/datasets/training_example_data_sqlite.yml
@@ -10,7 +10,7 @@ graph_definition:
     node_definition:
       arguments: {}
       class_name: NodesAsPulses
-    node_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t]
+    input_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t]
   class_name: KNNGraph
 pulsemaps:
   - total

From aaa8dc62d32ae9e32edb447b8e12c9e6f4adfb0e Mon Sep 17 00:00:00 2001
From: Rasmus Oersoe <rahn@outlook.dk>
Date: Wed, 18 Oct 2023 13:03:48 +0200
Subject: [PATCH 20/21] update args in i3modules

---
 src/graphnet/deployment/i3modules/graphnet_module.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/graphnet/deployment/i3modules/graphnet_module.py b/src/graphnet/deployment/i3modules/graphnet_module.py
index 2c85600a3..dee0973b8 100644
--- a/src/graphnet/deployment/i3modules/graphnet_module.py
+++ b/src/graphnet/deployment/i3modules/graphnet_module.py
@@ -84,12 +84,12 @@ def _make_graph(
     ) -> Data:  # py-l-i-n-t-:- -d-i-s-able=invalid-name
         """Process Physics I3Frame into graph."""
         # Extract features
-        node_features = self._extract_feature_array_from_frame(frame)
+        input_features = self._extract_feature_array_from_frame(frame)
         # Prepare graph data
-        if len(node_features) > 0:
+        if len(input_features) > 0:
             data = self._graph_definition(
-                node_features=node_features,
-                node_feature_names=self._features,
+                input_features=input_features,
+                input_feature_names=self._features,
             )
             return Batch.from_data_list([data])
         else:

From 928c221a0060e4798e38f5783dca357f3be7d39d Mon Sep 17 00:00:00 2001
From: Rasmus Oersoe <rahn@outlook.dk>
Date: Wed, 18 Oct 2023 13:13:40 +0200
Subject: [PATCH 21/21] update args dataset config test

---
 tests/utilities/test_dataset_config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/utilities/test_dataset_config.py b/tests/utilities/test_dataset_config.py
index 5f7de5b6a..ca906d659 100644
--- a/tests/utilities/test_dataset_config.py
+++ b/tests/utilities/test_dataset_config.py
@@ -30,7 +30,7 @@
     detector=IceCubeDeepCore(),
     node_definition=NodesAsPulses(),
     nb_nearest_neighbours=8,
-    node_feature_names=FEATURES.DEEPCORE,
+    input_feature_names=FEATURES.DEEPCORE,
 )