From 84f73f528fbe8c0e133819f2b8369ccf60b2d555 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Mon, 9 Oct 2023 14:32:57 +0200 Subject: [PATCH 01/21] copy-paste of code --- src/graphnet/models/graphs/nodes/nodes.py | 114 ++++++++++++++++++++-- 1 file changed, 107 insertions(+), 7 deletions(-) diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py index ce539ee80..8966d2891 100644 --- a/src/graphnet/models/graphs/nodes/nodes.py +++ b/src/graphnet/models/graphs/nodes/nodes.py @@ -1,6 +1,6 @@ """Class(es) for building/connecting graphs.""" -from typing import List +from typing import List, Tuple from abc import abstractmethod import torch @@ -8,6 +8,11 @@ from graphnet.utilities.decorators import final from graphnet.models import Model +from graphnet.models.graphs.utils import ( + cluster_summarize_with_percentiles, + identify_indices, +) +from copy import deepcopy class NodeDefinition(Model): # pylint: disable=too-few-public-methods @@ -19,18 +24,24 @@ def __init__(self) -> None: super().__init__(name=__name__, class_name=self.__class__.__name__) @final - def forward(self, x: torch.tensor) -> Data: + def forward( + self, x: torch.tensor, node_feature_names: List[str] + ) -> Tuple[Data, List[str]]: """Construct nodes from raw node features. Args: x: standardized node features with shape ´[num_pulses, d]´, where ´d´ is the number of node features. + node_feature_names: list of names for each column in ´x´. Returns: graph: a graph without edges + new_features_name: List of new feature names. """ - graph = self._construct_nodes(x) - return graph + graph, new_feature_names = self._construct_nodes( + x=x, feature_names=node_feature_names + ) + return graph, new_feature_names @property def nb_outputs(self) -> int: @@ -51,20 +62,109 @@ def set_number_of_inputs(self, node_feature_names: List[str]) -> None: self.nb_inputs = len(node_feature_names) @abstractmethod - def _construct_nodes(self, x: torch.tensor) -> Data: + def _construct_nodes( + self, x: torch.tensor, feature_names: List[str] + ) -> Data: """Construct nodes from raw node features ´x´. Args: x: standardized node features with shape ´[num_pulses, d]´, where ´d´ is the number of node features. + feature_names: List of names for reach column in `x`. Identical + order of appearance. Length `d`. Returns: graph: graph without edges. + new_node_features: A list of node features names. """ class NodesAsPulses(NodeDefinition): """Represent each measured pulse of Cherenkov Radiation as a node.""" - def _construct_nodes(self, x: torch.Tensor) -> Data: - return Data(x=x) + def _construct_nodes( + self, x: torch.Tensor, feature_names: List[str] + ) -> Data: + return Data(x=x), feature_names + + +class PercentileClusters(NodeDefinition): + """Represent nodes as clusters with percentile summary node features. + + If `cluster_on` is set to the xyz coordinates of DOMs + e.g. `cluster_on = ['dom_x', 'dom_y', 'dom_z']`, each node will be a + unique DOM and the pulse information (charge, time) is summarized using + percentiles. + """ + + def __init__( + self, + cluster_on: List[str], + feature_names: List[str], + percentiles: List[int], + add_counts: bool = True, + ) -> None: + """Construct `PercentileClusters`. + + Args: + cluster_on: Names of features to create clusters from. + feature_names: List of colum names for the input data. + E.g. ['dom_x', 'dom_y', 'dom_z',..] + percentiles: List of percentiles. E.g. `[10, 50, 90]`. + add_counts: If True, number of duplicates is added to output array. + """ + self._cluster_on = cluster_on + self._percentiles = percentiles + self._add_counts = add_counts + ( + cluster_idx, + summ_idx, + new_feature_names, + ) = self._get_indices_and_feature_names( + feature_names, self._add_counts + ) + self._cluster_indices = cluster_idx + self._summarization_indices = summ_idx + self._output_feature_names = new_feature_names + # Base class constructor + super().__init__() + + def _get_indices_and_feature_names( + self, + feature_names: List[str], + add_counts: bool, + ) -> Tuple[List[int], List[int], List[str]]: + cluster_idx, summ_idx, summ_names = identify_indices( + feature_names, self._cluster_on + ) + new_feature_names = deepcopy(self._cluster_on) + for feature in summ_names: + for pct in self._percentiles: + new_feature_names.append(f"{feature}_pct{pct}") + if add_counts: + # add "counts" as the last feature + new_feature_names.append("counts") + return cluster_idx, summ_idx, new_feature_names + + def _construct_nodes( + self, x: torch.Tensor, feature_names: List[str] + ) -> Data: + # Cast to Numpy + x = x.numpy() + # Construct clusters with percentile-summarized features + array = cluster_summarize_with_percentiles( + x=x, + summarization_indices=self._summarization_indices, + cluster_indices=self._cluster_indices, + percentiles=self._percentiles, + add_counts=self._add_counts, + ) + + return Data(x=torch.tensor(array)), self._output_feature_names + + def nb_outputs(self) -> int: + """Return number of output features. + + This the default, but may be overridden by specific inheriting classes. + """ + return len(self._output_feature_names) From 776e3001548ea8df5c6240800610c83eebc42e8b Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Mon, 9 Oct 2023 14:34:43 +0200 Subject: [PATCH 02/21] copy-paste --- src/graphnet/models/graphs/graph_definition.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/graphnet/models/graphs/graph_definition.py b/src/graphnet/models/graphs/graph_definition.py index 9c4db4d47..f311340b9 100644 --- a/src/graphnet/models/graphs/graph_definition.py +++ b/src/graphnet/models/graphs/graph_definition.py @@ -139,7 +139,10 @@ def forward( # type: ignore node_features = self._detector(node_features, node_feature_names) # Create graph - graph = self._node_definition(node_features) + graph, node_feature_names = self._node_definition( + node_features, node_feature_names + ) + graph.x = graph.x.type(self.dtype) # Attach number of pulses as static attribute. graph.n_pulses = torch.tensor(len(node_features), dtype=torch.int32) From f8577a47ee519b47eafb090ce0cab5ec5687fadb Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Mon, 9 Oct 2023 14:36:59 +0200 Subject: [PATCH 03/21] add comment --- src/graphnet/models/graphs/graph_definition.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/graphnet/models/graphs/graph_definition.py b/src/graphnet/models/graphs/graph_definition.py index f311340b9..8bd25759e 100644 --- a/src/graphnet/models/graphs/graph_definition.py +++ b/src/graphnet/models/graphs/graph_definition.py @@ -138,10 +138,10 @@ def forward( # type: ignore # Standardize / Scale node features node_features = self._detector(node_features, node_feature_names) - # Create graph - graph, node_feature_names = self._node_definition( - node_features, node_feature_names - ) + # Create graph & get new node feature names + graph, node_feature_names = self._node_definition(node_features) + + # Enforce dtype graph.x = graph.x.type(self.dtype) # Attach number of pulses as static attribute. From 27d0b3ac36e1bd33cf9f78f33231769fdbab5d96 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Mon, 9 Oct 2023 16:19:20 +0200 Subject: [PATCH 04/21] introduce set function, refactor --- .../models/graphs/graph_definition.py | 5 + src/graphnet/models/graphs/nodes/nodes.py | 91 +++++++++++++------ 2 files changed, 70 insertions(+), 26 deletions(-) diff --git a/src/graphnet/models/graphs/graph_definition.py b/src/graphnet/models/graphs/graph_definition.py index 8bd25759e..6e8d74715 100644 --- a/src/graphnet/models/graphs/graph_definition.py +++ b/src/graphnet/models/graphs/graph_definition.py @@ -67,6 +67,11 @@ def __init__( node_feature_names = list(self._detector.feature_map().keys()) # type: ignore self._node_feature_names = node_feature_names + # Set input data column names for node definition + self._node_definition.set_output_feature_names( + self._node_feature_names + ) + # Set data type self.to(dtype) diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py index 8966d2891..477751934 100644 --- a/src/graphnet/models/graphs/nodes/nodes.py +++ b/src/graphnet/models/graphs/nodes/nodes.py @@ -1,6 +1,6 @@ """Class(es) for building/connecting graphs.""" -from typing import List, Tuple +from typing import List, Tuple, Optional from abc import abstractmethod import torch @@ -18,15 +18,19 @@ class NodeDefinition(Model): # pylint: disable=too-few-public-methods """Base class for graph building.""" - def __init__(self) -> None: + def __init__( + self, input_feature_names: Optional[List[str]] = None + ) -> None: """Construct `Detector`.""" # Base class constructor super().__init__(name=__name__, class_name=self.__class__.__name__) + if input_feature_names is not None: + self.set_output_feature_names( + input_feature_names=input_feature_names + ) @final - def forward( - self, x: torch.tensor, node_feature_names: List[str] - ) -> Tuple[Data, List[str]]: + def forward(self, x: torch.tensor) -> Tuple[Data, List[str]]: """Construct nodes from raw node features. Args: @@ -38,10 +42,18 @@ def forward( graph: a graph without edges new_features_name: List of new feature names. """ - graph, new_feature_names = self._construct_nodes( - x=x, feature_names=node_feature_names - ) - return graph, new_feature_names + graph = self._construct_nodes(x=x) + try: + self._output_feature_names + except AttributeError as e: + self.error( + f"""{self.__class__.__name__} was instantiated without + `input_feature_names` and it was not set prior to this + forward call. If you are using this class outside a + `GraphDefinition`, please instatiate with `input_feature_names`.""" + ) # noqa + raise e + return graph, self._output_feature_names @property def nb_outputs(self) -> int: @@ -61,10 +73,33 @@ def set_number_of_inputs(self, node_feature_names: List[str]) -> None: assert isinstance(node_feature_names, list) self.nb_inputs = len(node_feature_names) + @final + def set_output_feature_names(self, input_feature_names: List[str]) -> None: + """Set output features names as a member variable. + + Args: + input_feature_names: List of column names of the input to the + node definition. + """ + self._output_feature_names = self._define_output_feature_names( + input_feature_names + ) + @abstractmethod - def _construct_nodes( - self, x: torch.tensor, feature_names: List[str] - ) -> Data: + def _define_output_feature_names( + self, input_feature_names: List[str] + ) -> List[str]: + """Construct names of output columns. + + Args: + input_feature_names: List of column names for the input data. + + Returns: + A list of column names for each column in the node definition output. + """ + + @abstractmethod + def _construct_nodes(self, x: torch.tensor) -> Tuple[Data, List[str]]: """Construct nodes from raw node features ´x´. Args: @@ -82,10 +117,13 @@ def _construct_nodes( class NodesAsPulses(NodeDefinition): """Represent each measured pulse of Cherenkov Radiation as a node.""" - def _construct_nodes( - self, x: torch.Tensor, feature_names: List[str] - ) -> Data: - return Data(x=x), feature_names + def _define_output_feature_names( + self, input_feature_names: List[str] + ) -> List[str]: + return input_feature_names + + def _construct_nodes(self, x: torch.Tensor) -> Tuple[Data, List[str]]: + return Data(x=x) class PercentileClusters(NodeDefinition): @@ -100,34 +138,37 @@ class PercentileClusters(NodeDefinition): def __init__( self, cluster_on: List[str], - feature_names: List[str], percentiles: List[int], add_counts: bool = True, + input_feature_names: Optional[List[str]] = None, ) -> None: """Construct `PercentileClusters`. Args: cluster_on: Names of features to create clusters from. - feature_names: List of colum names for the input data. - E.g. ['dom_x', 'dom_y', 'dom_z',..] percentiles: List of percentiles. E.g. `[10, 50, 90]`. add_counts: If True, number of duplicates is added to output array. + input_feature_names: (Optional) column names for input features. """ self._cluster_on = cluster_on self._percentiles = percentiles self._add_counts = add_counts + # Base class constructor + super().__init__(input_feature_names=input_feature_names) + + def _define_output_feature_names( + self, input_feature_names: List[str] + ) -> List[str]: ( cluster_idx, summ_idx, new_feature_names, ) = self._get_indices_and_feature_names( - feature_names, self._add_counts + input_feature_names, self._add_counts ) self._cluster_indices = cluster_idx self._summarization_indices = summ_idx - self._output_feature_names = new_feature_names - # Base class constructor - super().__init__() + return new_feature_names def _get_indices_and_feature_names( self, @@ -146,9 +187,7 @@ def _get_indices_and_feature_names( new_feature_names.append("counts") return cluster_idx, summ_idx, new_feature_names - def _construct_nodes( - self, x: torch.Tensor, feature_names: List[str] - ) -> Data: + def _construct_nodes(self, x: torch.Tensor) -> Tuple[Data, List[str]]: # Cast to Numpy x = x.numpy() # Construct clusters with percentile-summarized features From d41af7d0dc0e0078c1a5309b5d53f41e06f7a5c5 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Mon, 9 Oct 2023 16:20:42 +0200 Subject: [PATCH 05/21] copy-paste utils --- src/graphnet/models/graphs/utils.py | 158 ++++++++++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 src/graphnet/models/graphs/utils.py diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py new file mode 100644 index 000000000..72928befb --- /dev/null +++ b/src/graphnet/models/graphs/utils.py @@ -0,0 +1,158 @@ +"""Utility functions for construction of graphs.""" + +from typing import List, Tuple +import numpy as np + + +def lex_sort(x: np.array, cluster_columns: List[int]) -> np.ndarray: + """Sort numpy arrays according to columns on ´cluster_columns´. + + Note that `x` is sorted along the dimensions in `cluster_columns` + backwards. I.e. `cluster_columns = [0,1,2]` + means `x` is sorted along `[2,1,0]`. + + Args: + x: array to be sorted. + cluster_columns: Columns of `x` to be sorted along. + + Returns: + A sorted version of `x`. + """ + tmp_list = [] + for cluster_column in cluster_columns: + tmp_list.append(x[:, cluster_column]) + return x[np.lexsort(tuple(tmp_list)), :] + + +def gather_cluster_sequence( + x: np.ndarray, feature_idx: int, cluster_columns: List[int] +) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + """Turn `x` into rows of clusters with sequences along columns. + + Sequences along columns are added which correspond to + gathered sequences of the feature in `x` specified by column index + `feature_idx` associated with each column. Sequences are padded with NaN to + be of same length. Dimension of clustered array is `[n_clusters, l + + len(cluster_columns)]`,where l is the largest sequence length. + + **Example**: + Suppose `x` represents a neutrino event and we have chosen to cluster on + the PMT positions. Suppose also that `feature_idx` correspond to pulse time. + + The resulting array will have dimensions `[n_pmts, m + 3]` where `m` is the + maximum number of same-pmt pulses found in `x`, and `+3`for the three + spatial directions defining each cluster. + + Args: + x: Array for clustering + feature_idx: Index of the feature in `x` to be gathered for each cluster. + cluster_columns: Index in `x` from which to build clusters. + + Returns: + array: Array with dimensions `[n_clusters, l + len(cluster_columns)]` + column_offset: Indices of the columns in `array` that defines clusters. + """ + # sort pulses according to cluster columns + x = lex_sort(x=x, cluster_columns=cluster_columns) + + # Calculate clusters and counts + unique_sensors, counts = np.unique( + x[:, cluster_columns], return_counts=True, axis=0 + ) + # sort DOMs and pulse-counts + sort_this = np.concatenate([unique_sensors, counts.reshape(-1, 1)], axis=1) + sort_this = lex_sort(x=sort_this, cluster_columns=cluster_columns) + unique_sensors = sort_this[:, 0 : unique_sensors.shape[1]] + counts = sort_this[:, unique_sensors.shape[1] :].flatten().astype(int) + + # Pad unique sensor columns with NaN's up until the maximum number of + # Same pmt-pulses. Each of padded columns represents a pulse. + pad = np.empty((unique_sensors.shape[0], max(counts))) + pad[:] = np.nan + array = np.concatenate([unique_sensors, pad], axis=1) + column_offset = unique_sensors.shape[1] + + # Construct indices for loop + cumsum = np.zeros(len(np.cumsum(counts)) + 1) + cumsum[0] = 0 + cumsum[1:] = np.cumsum(counts) + cumsum = cumsum.astype(int) + + # Insert pulse attribute in place of NaN. + for k in range(len(counts)): + array[k, column_offset : (column_offset + counts[k])] = x[ + cumsum[k] : cumsum[k + 1], feature_idx + ] + return array, column_offset, counts + + +def identify_indices( + feature_names: List[str], cluster_on: List[str] +) -> Tuple[List[int], List[int], List[str]]: + """Identify indices for clustering and summarization.""" + features_for_summarization = [] + for feature in feature_names: + if feature not in cluster_on: + features_for_summarization.append(feature) + cluster_indices = [feature_names.index(column) for column in cluster_on] + summarization_indices = [ + feature_names.index(column) for column in features_for_summarization + ] + return cluster_indices, summarization_indices, features_for_summarization + + +def cluster_summarize_with_percentiles( + x: np.ndarray, + summarization_indices: List[int], + cluster_indices: List[int], + percentiles: List[int], + add_counts: bool, +) -> np.ndarray: + """Turn `x` into clusters with percentile summary. + + From variables specified by column indices `cluster_indices`, `x` is turned + into clusters. Information in columns of `x` specified by indices + `summarization_indices` with each cluster is summarized using percentiles. + It is assumed `x` represents a single event. + + **Example use-case**: + Suppose `x` contains raw pulses from a neutrino event where some DOMs have + multiple measurements of Cherenkov radiation. If `cluster_indices` is set + to the columns corresponding to the xyz-position of the DOMs, and the + features specified in `summarization_indices` correspond to time, charge, + then each row in the returned array will correspond to a DOM, + and the time and charge for each DOM will be summarized by percentiles. + Returned output array has dimensions + `[n_clusters, len(percentiles)*len(summarization_indices) + len(cluster_indices)]` + + Args: + x: Array to be clustered + summarization_indices: List of column indices that defines features that + will be summarized with percentiles. + cluster_indices: List of column indices on which the clusters are constructed. + percentiles: percentiles used to summarize `x`. E.g. [10,50,90]. + + Returns: + Percentile-summarized array + """ + pct_dict = {} + for feature_idx in summarization_indices: + summarized_array, column_offset, counts = gather_cluster_sequence( + x, feature_idx, cluster_indices + ) + pct_dict[feature_idx] = np.nanpercentile( + summarized_array[:, column_offset:], percentiles, axis=1 + ).T + + for i, key in enumerate(pct_dict.keys()): + if i == 0: + array = summarized_array[:, 0:column_offset] + + array = np.concatenate([array, pct_dict[key]], axis=1) + + if add_counts: + array = np.concatenate( + [array, np.log10(counts).reshape(-1, 1)], axis=1 + ) + + return array From d7e9b821762074a7d3865e085bb86352ad2e038a Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 10 Oct 2023 09:00:25 +0200 Subject: [PATCH 06/21] add import statement --- src/graphnet/models/graphs/nodes/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/graphnet/models/graphs/nodes/__init__.py b/src/graphnet/models/graphs/nodes/__init__.py index 05194b61a..0119d2b98 100644 --- a/src/graphnet/models/graphs/nodes/__init__.py +++ b/src/graphnet/models/graphs/nodes/__init__.py @@ -5,4 +5,4 @@ and their features. """ -from .nodes import NodeDefinition, NodesAsPulses +from .nodes import NodeDefinition, NodesAsPulses, PercentileClusters From a6010331b2089e8aba7b6b02171c5fe3f26412b3 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 10 Oct 2023 10:45:59 +0200 Subject: [PATCH 07/21] fix output of construct_nodes --- src/graphnet/models/graphs/nodes/nodes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py index 477751934..b31857d2f 100644 --- a/src/graphnet/models/graphs/nodes/nodes.py +++ b/src/graphnet/models/graphs/nodes/nodes.py @@ -199,7 +199,7 @@ def _construct_nodes(self, x: torch.Tensor) -> Tuple[Data, List[str]]: add_counts=self._add_counts, ) - return Data(x=torch.tensor(array)), self._output_feature_names + return Data(x=torch.tensor(array)) def nb_outputs(self) -> int: """Return number of output features. From 4c7e121c82f9e65a14a865dee6b1b65be4c20806 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 10 Oct 2023 10:46:34 +0200 Subject: [PATCH 08/21] type hint --- src/graphnet/models/graphs/nodes/nodes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py index b31857d2f..7131788e5 100644 --- a/src/graphnet/models/graphs/nodes/nodes.py +++ b/src/graphnet/models/graphs/nodes/nodes.py @@ -187,7 +187,7 @@ def _get_indices_and_feature_names( new_feature_names.append("counts") return cluster_idx, summ_idx, new_feature_names - def _construct_nodes(self, x: torch.Tensor) -> Tuple[Data, List[str]]: + def _construct_nodes(self, x: torch.Tensor) -> Data: # Cast to Numpy x = x.numpy() # Construct clusters with percentile-summarized features From 57571f2976c78109bda5cb701522e8e0c582f8e4 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 10 Oct 2023 11:14:52 +0200 Subject: [PATCH 09/21] nb_output property --- src/graphnet/models/graphs/nodes/nodes.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py index 7131788e5..7c1d0d21c 100644 --- a/src/graphnet/models/graphs/nodes/nodes.py +++ b/src/graphnet/models/graphs/nodes/nodes.py @@ -61,7 +61,7 @@ def nb_outputs(self) -> int: This the default, but may be overridden by specific inheriting classes. """ - return self.nb_inputs + return len(self._output_feature_names) @final def set_number_of_inputs(self, node_feature_names: List[str]) -> None: @@ -200,10 +200,3 @@ def _construct_nodes(self, x: torch.Tensor) -> Data: ) return Data(x=torch.tensor(array)) - - def nb_outputs(self) -> int: - """Return number of output features. - - This the default, but may be overridden by specific inheriting classes. - """ - return len(self._output_feature_names) From a1f6b7e00addd3d6de31c1f99fb9f4615f9d483b Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 10 Oct 2023 12:15:28 +0200 Subject: [PATCH 10/21] add unit test of node definition --- src/graphnet/models/graphs/nodes/nodes.py | 20 ++++-- tests/models/test_node_definition.py | 80 +++++++++++++++++++++++ 2 files changed, 93 insertions(+), 7 deletions(-) create mode 100644 tests/models/test_node_definition.py diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py index 7c1d0d21c..2f5e0dde8 100644 --- a/src/graphnet/models/graphs/nodes/nodes.py +++ b/src/graphnet/models/graphs/nodes/nodes.py @@ -191,12 +191,18 @@ def _construct_nodes(self, x: torch.Tensor) -> Data: # Cast to Numpy x = x.numpy() # Construct clusters with percentile-summarized features - array = cluster_summarize_with_percentiles( - x=x, - summarization_indices=self._summarization_indices, - cluster_indices=self._cluster_indices, - percentiles=self._percentiles, - add_counts=self._add_counts, - ) + if hasattr(self, "_summarization_indices"): + array = cluster_summarize_with_percentiles( + x=x, + summarization_indices=self._summarization_indices, + cluster_indices=self._cluster_indices, + percentiles=self._percentiles, + add_counts=self._add_counts, + ) + else: + self.error( + f"""{self.__class__.__name__} was not instatiated with `input_feature_names` and has not been set later. Please instantiate this class with `input_feature_names` if you're using it outside `GraphDefinition`.""" + ) # noqa + raise AttributeError return Data(x=torch.tensor(array)) diff --git a/tests/models/test_node_definition.py b/tests/models/test_node_definition.py new file mode 100644 index 000000000..4c199abd6 --- /dev/null +++ b/tests/models/test_node_definition.py @@ -0,0 +1,80 @@ +"""Unit tests for node definitions.""" +import numpy as np +import pandas as pd +import sqlite3 +import torch +from graphnet.models.graphs.nodes import PercentileClusters +from graphnet.constants import EXAMPLE_DATA_DIR + + +def test_percentile_cluster() -> None: + """Test that percentiles outputted by PercentileCluster. + + Here we check that it matches percentiles obtained from "traditional" ways. + """ + # definitions + percentiles = [0, 10, 50, 90, 100] + database = f"{EXAMPLE_DATA_DIR}/sqlite/prometheus/prometheus-events.db" + # Grab first event in database + with sqlite3.connect(database) as con: + query = "select event_no from mc_truth limit 1" + event_no = pd.read_sql(query, con) + query = f'select sensor_pos_x, sensor_pos_y, sensor_pos_z, t from total where event_no = {str(event_no["event_no"][0])}' + df = pd.read_sql(query, con) + + # Save original feature names, create variables. + original_features = list(df.columns) + x = np.array(df) + tensor = torch.tensor(x) + + # Construct node definition + # This defines each DOM as a cluster, and will summarize pulses seen by + # DOMs using percentiles. + node_definition = PercentileClusters( + cluster_on=["sensor_pos_x", "sensor_pos_y", "sensor_pos_z"], + percentiles=percentiles, + input_feature_names=original_features, + ) + + # Apply node definition to torch tensor with raw pulses + graph, new_features = node_definition(tensor) + x_tilde = graph.x.numpy() + + # Calculate percentiles "the normal way" and compare that output of + # node definition match. + + unique_doms = ( + df.groupby(["sensor_pos_x", "sensor_pos_y", "sensor_pos_z"]) + .size() + .reset_index() + ) + for i in range(len(unique_doms)): + idx_original = ( + (df["sensor_pos_x"] == unique_doms["sensor_pos_x"][i]) + & ((df["sensor_pos_y"] == unique_doms["sensor_pos_y"][i])) + & (df["sensor_pos_z"] == unique_doms["sensor_pos_z"][i]) + ) + idx_tilde = ( + ( + x_tilde[:, new_features.index("sensor_pos_x")] + == unique_doms["sensor_pos_x"][i] + ) + & ( + x_tilde[:, new_features.index("sensor_pos_y")] + == unique_doms["sensor_pos_y"][i] + ) + & ( + x_tilde[:, new_features.index("sensor_pos_z")] + == unique_doms["sensor_pos_z"][i] + ) + ) + for percentile in percentiles: + pct_idx = new_features.index(f"t_pct{percentile}") + try: + assert np.isclose( + x_tilde[idx_tilde, pct_idx], + np.percentile(df.loc[idx_original, "t"], percentile), + ) + except AssertionError as e: + print(f"Percentile {percentile} does not match.") + raise e From 1e1ffc850a6c7d372a4570ac54a502441cb7b40d Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 10 Oct 2023 13:13:52 +0200 Subject: [PATCH 11/21] code-climate --- src/graphnet/models/graphs/nodes/nodes.py | 11 ++++++++--- src/graphnet/models/graphs/utils.py | 12 +++++++----- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py index 2f5e0dde8..abca07588 100644 --- a/src/graphnet/models/graphs/nodes/nodes.py +++ b/src/graphnet/models/graphs/nodes/nodes.py @@ -50,7 +50,8 @@ def forward(self, x: torch.tensor) -> Tuple[Data, List[str]]: f"""{self.__class__.__name__} was instantiated without `input_feature_names` and it was not set prior to this forward call. If you are using this class outside a - `GraphDefinition`, please instatiate with `input_feature_names`.""" + `GraphDefinition`, please instatiate + with `input_feature_names`.""" ) # noqa raise e return graph, self._output_feature_names @@ -95,7 +96,8 @@ def _define_output_feature_names( input_feature_names: List of column names for the input data. Returns: - A list of column names for each column in the node definition output. + A list of column names for each column in + the node definition output. """ @abstractmethod @@ -201,7 +203,10 @@ def _construct_nodes(self, x: torch.Tensor) -> Data: ) else: self.error( - f"""{self.__class__.__name__} was not instatiated with `input_feature_names` and has not been set later. Please instantiate this class with `input_feature_names` if you're using it outside `GraphDefinition`.""" + f"""{self.__class__.__name__} was not instatiated with + `input_feature_names` and has not been set later. + Please instantiate this class with `input_feature_names` + if you're using it outside `GraphDefinition`.""" ) # noqa raise AttributeError diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py index 72928befb..ccd861783 100644 --- a/src/graphnet/models/graphs/utils.py +++ b/src/graphnet/models/graphs/utils.py @@ -37,7 +37,7 @@ def gather_cluster_sequence( **Example**: Suppose `x` represents a neutrino event and we have chosen to cluster on - the PMT positions. Suppose also that `feature_idx` correspond to pulse time. + the PMT positions and that `feature_idx` correspond to pulse time. The resulting array will have dimensions `[n_pmts, m + 3]` where `m` is the maximum number of same-pmt pulses found in `x`, and `+3`for the three @@ -45,7 +45,8 @@ def gather_cluster_sequence( Args: x: Array for clustering - feature_idx: Index of the feature in `x` to be gathered for each cluster. + feature_idx: Index of the feature in `x` to + be gathered for each cluster. cluster_columns: Index in `x` from which to build clusters. Returns: @@ -127,9 +128,10 @@ def cluster_summarize_with_percentiles( Args: x: Array to be clustered - summarization_indices: List of column indices that defines features that - will be summarized with percentiles. - cluster_indices: List of column indices on which the clusters are constructed. + summarization_indices: List of column indices that defines features + that will be summarized with percentiles. + cluster_indices: List of column indices on which the clusters + are constructed. percentiles: percentiles used to summarize `x`. E.g. [10,50,90]. Returns: From 2f0f21ad018e2cec92ee10faf8a90b4e90ff6afc Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 18 Oct 2023 11:26:35 +0200 Subject: [PATCH 12/21] rename variables --- .../models/graphs/graph_definition.py | 70 ++++++++++--------- src/graphnet/models/graphs/nodes/nodes.py | 8 +-- 2 files changed, 41 insertions(+), 37 deletions(-) diff --git a/src/graphnet/models/graphs/graph_definition.py b/src/graphnet/models/graphs/graph_definition.py index 6e8d74715..f75f65b98 100644 --- a/src/graphnet/models/graphs/graph_definition.py +++ b/src/graphnet/models/graphs/graph_definition.py @@ -26,7 +26,7 @@ def __init__( detector: Detector, node_definition: NodeDefinition = NodesAsPulses(), edge_definition: Optional[EdgeDefinition] = None, - node_feature_names: Optional[List[str]] = None, + input_feature_names: Optional[List[str]] = None, dtype: Optional[torch.dtype] = torch.float, perturbation_dict: Optional[Dict[str, float]] = None, seed: Optional[Union[int, Generator]] = None, @@ -44,7 +44,10 @@ def __init__( detector: The corresponding ´Detector´ representing the data. node_definition: Definition of nodes. Defaults to NodesAsPulses. edge_definition: Definition of edges. Defaults to None. - node_feature_names: Names of node feature columns. Defaults to None + input_feature_names: Names of each column in expected input data + that will be built into a graph. If not provided, + it is automatically assumed that all features in `Detector` is + used. dtype: data type used for node features. e.g. ´torch.float´ perturbation_dict: Dictionary mapping a feature name to a standard deviation according to which the values for this @@ -62,14 +65,14 @@ def __init__( self._node_definition = node_definition self._perturbation_dict = perturbation_dict - if node_feature_names is None: + if input_feature_names is None: # Assume all features in Detector is used. - node_feature_names = list(self._detector.feature_map().keys()) # type: ignore - self._node_feature_names = node_feature_names + input_feature_names = list(self._detector.feature_map().keys()) # type: ignore + self._input_feature_names = input_feature_names # Set input data column names for node definition self._node_definition.set_output_feature_names( - self._node_feature_names + self._input_feature_names ) # Set data type @@ -77,15 +80,15 @@ def __init__( # Set Input / Output dimensions self._node_definition.set_number_of_inputs( - node_feature_names=node_feature_names + input_feature_names=input_feature_names ) - self.nb_inputs = len(self._node_feature_names) + self.nb_inputs = len(self._input_feature_names) self.nb_outputs = self._node_definition.nb_outputs # Set perturbation_cols if needed if isinstance(self._perturbation_dict, dict): self._perturbation_cols = [ - self._node_feature_names.index(key) + self._input_feature_names.index(key) for key in self._perturbation_dict.keys() ] if seed is not None: @@ -102,8 +105,8 @@ def __init__( def forward( # type: ignore self, - node_features: np.ndarray, - node_feature_names: List[str], + input_features: np.ndarray, + input_feature_names: List[str], truth_dicts: Optional[List[Dict[str, Any]]] = None, custom_label_functions: Optional[Dict[str, Callable[..., Any]]] = None, loss_weight_column: Optional[str] = None, @@ -114,8 +117,8 @@ def forward( # type: ignore """Construct graph as ´Data´ object. Args: - node_features: node features for graph. Shape ´[num_nodes, d]´ - node_feature_names: name of each column. Shape ´[,d]´. + input_features: Input features for graph construction. Shape ´[num_rows, d]´ + input_feature_names: name of each column. Shape ´[,d]´. truth_dicts: Dictionary containing truth labels. custom_label_functions: Custom label functions. See https://github.com/graphnet-team/graphnet/blob/main/GETTING_STARTED.md#adding-custom-truth-labels. loss_weight_column: Name of column that holds loss weight. @@ -131,26 +134,27 @@ def forward( # type: ignore """ # Checks self._validate_input( - node_features=node_features, node_feature_names=node_feature_names + input_features=input_features, + input_feature_names=input_feature_names, ) # Gaussian perturbation of each column if perturbation dict is given - node_features = self._perturb_input(node_features) + input_features = self._perturb_input(input_features) # Transform to pytorch tensor - node_features = torch.tensor(node_features, dtype=self.dtype) + input_features = torch.tensor(input_features, dtype=self.dtype) # Standardize / Scale node features - node_features = self._detector(node_features, node_feature_names) + input_features = self._detector(input_features, input_feature_names) # Create graph & get new node feature names - graph, node_feature_names = self._node_definition(node_features) + graph, node_feature_names = self._node_definition(input_features) # Enforce dtype graph.x = graph.x.type(self.dtype) # Attach number of pulses as static attribute. - graph.n_pulses = torch.tensor(len(node_features), dtype=torch.int32) + graph.n_pulses = torch.tensor(len(input_features), dtype=torch.int32) # Assign edges if self._edge_definition is not None: @@ -194,26 +198,26 @@ def forward( # type: ignore return graph def _validate_input( - self, node_features: np.array, node_feature_names: List[str] + self, input_features: np.array, input_feature_names: List[str] ) -> None: # node feature matrix dimension check - assert node_features.shape[1] == len(node_feature_names) + assert input_features.shape[1] == len(input_feature_names) # check that provided features for input is the same that the ´Graph´ # was instantiated with. - assert len(node_feature_names) == len( - self._node_feature_names - ), f"""Input features ({node_feature_names}) is not what + assert len(input_feature_names) == len( + self._input_feature_names + ), f"""Input features ({input_feature_names}) is not what {self.__class__.__name__} was instatiated - with ({self._node_feature_names})""" # noqa - for idx in range(len(node_feature_names)): + with ({self._input_feature_names})""" # noqa + for idx in range(len(input_feature_names)): assert ( - node_feature_names[idx] == self._node_feature_names[idx] + input_feature_names[idx] == self._input_feature_names[idx] ), f""" Order of node features in data - are not the same as expected. Got {node_feature_names} - vs. {self._node_feature_names}""" # noqa + are not the same as expected. Got {input_feature_names} + vs. {self._input_feature_names}""" # noqa - def _perturb_input(self, node_features: np.ndarray) -> np.ndarray: + def _perturb_input(self, input_features: np.ndarray) -> np.ndarray: if isinstance(self._perturbation_dict, dict): self.warning_once( f"""Will randomly perturb @@ -221,13 +225,13 @@ def _perturb_input(self, node_features: np.ndarray) -> np.ndarray: using stds {self._perturbation_dict.values()}""" # noqa ) perturbed_features = self.rng.normal( - loc=node_features[:, self._perturbation_cols], + loc=input_features[:, self._perturbation_cols], scale=np.array( list(self._perturbation_dict.values()), dtype=float ), ) - node_features[:, self._perturbation_cols] = perturbed_features - return node_features + input_features[:, self._perturbation_cols] = perturbed_features + return input_features def _add_loss_weights( self, diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py index abca07588..fa0400b97 100644 --- a/src/graphnet/models/graphs/nodes/nodes.py +++ b/src/graphnet/models/graphs/nodes/nodes.py @@ -65,14 +65,14 @@ def nb_outputs(self) -> int: return len(self._output_feature_names) @final - def set_number_of_inputs(self, node_feature_names: List[str]) -> None: + def set_number_of_inputs(self, input_feature_names: List[str]) -> None: """Return number of inputs expected by node definition. Args: - node_feature_names: name of each node feature column. + input_feature_names: name of each input feature column. """ - assert isinstance(node_feature_names, list) - self.nb_inputs = len(node_feature_names) + assert isinstance(input_feature_names, list) + self.nb_inputs = len(input_feature_names) @final def set_output_feature_names(self, input_feature_names: List[str]) -> None: From d07115cdd1a78ff85673131ebed06fb4351378e6 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 18 Oct 2023 11:36:21 +0200 Subject: [PATCH 13/21] rename --- .../dynedge_PID_classification_example.yml | 2 +- ...ynedge_position_custom_scaling_example.yml | 2 +- configs/models/dynedge_position_example.yml | 44 ------------------- ...example_direction_reconstruction_model.yml | 2 +- .../example_energy_reconstruction_model.yml | 2 +- ...e_vertex_position_reconstruction_model.yml | 2 +- tests/models/test_graph_definition.py | 6 +-- 7 files changed, 8 insertions(+), 52 deletions(-) delete mode 100644 configs/models/dynedge_position_example.yml diff --git a/configs/models/dynedge_PID_classification_example.yml b/configs/models/dynedge_PID_classification_example.yml index 57fec3e88..f9b1509c4 100644 --- a/configs/models/dynedge_PID_classification_example.yml +++ b/configs/models/dynedge_PID_classification_example.yml @@ -25,7 +25,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - node_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] + input_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] class_name: KNNGraph optimizer_class: '!class torch.optim.adam Adam' optimizer_kwargs: {eps: 0.001, lr: 0.001} diff --git a/configs/models/dynedge_position_custom_scaling_example.yml b/configs/models/dynedge_position_custom_scaling_example.yml index 195695a8d..013dab592 100644 --- a/configs/models/dynedge_position_custom_scaling_example.yml +++ b/configs/models/dynedge_position_custom_scaling_example.yml @@ -17,7 +17,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - node_feature_names: null + input_feature_names: null class_name: KNNGraph gnn: ModelConfig: diff --git a/configs/models/dynedge_position_example.yml b/configs/models/dynedge_position_example.yml deleted file mode 100644 index c82223825..000000000 --- a/configs/models/dynedge_position_example.yml +++ /dev/null @@ -1,44 +0,0 @@ -arguments: - coarsening: null - detector: - ModelConfig: - arguments: - graph_builder: - ModelConfig: - arguments: {columns: null, nb_nearest_neighbours: 8} - class_name: KNNGraphBuilder - scalers: null - class_name: IceCubeDeepCore - gnn: - ModelConfig: - arguments: - add_global_variables_after_pooling: false - dynedge_layer_sizes: null - features_subset: null - global_pooling_schemes: [min, max, mean, sum] - nb_inputs: 7 - nb_neighbours: 8 - post_processing_layer_sizes: null - readout_layer_sizes: null - class_name: DynEdge - optimizer_class: '!class torch.optim.adam Adam' - optimizer_kwargs: {eps: 0.001, lr: 1e-05} - scheduler_class: '!class torch.optim.lr_scheduler ReduceLROnPlateau' - scheduler_config: {frequency: 1, monitor: val_loss} - scheduler_kwargs: {patience: 5} - tasks: - - ModelConfig: - arguments: - hidden_size: 128 - loss_function: - ModelConfig: - arguments: {} - class_name: MSELoss - loss_weight: null - target_labels: ["position_x", "position_y", "position_z"] - transform_inference: null - transform_prediction_and_target: null - transform_support: null - transform_target: null - class_name: PositionReconstruction -class_name: StandardModel diff --git a/configs/models/example_direction_reconstruction_model.yml b/configs/models/example_direction_reconstruction_model.yml index cb1c4d841..faf168ed5 100644 --- a/configs/models/example_direction_reconstruction_model.yml +++ b/configs/models/example_direction_reconstruction_model.yml @@ -13,7 +13,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - node_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] + input_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] class_name: KNNGraph gnn: ModelConfig: diff --git a/configs/models/example_energy_reconstruction_model.yml b/configs/models/example_energy_reconstruction_model.yml index 827c84748..5983ef799 100644 --- a/configs/models/example_energy_reconstruction_model.yml +++ b/configs/models/example_energy_reconstruction_model.yml @@ -25,7 +25,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - node_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] + input_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] class_name: KNNGraph optimizer_class: '!class torch.optim.adam Adam' optimizer_kwargs: {eps: 0.001, lr: 0.001} diff --git a/configs/models/example_vertex_position_reconstruction_model.yml b/configs/models/example_vertex_position_reconstruction_model.yml index 0522a1f2d..ce0a993c4 100644 --- a/configs/models/example_vertex_position_reconstruction_model.yml +++ b/configs/models/example_vertex_position_reconstruction_model.yml @@ -25,7 +25,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - node_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] + input_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] class_name: KNNGraph optimizer_class: '!class torch.optim.adam Adam' optimizer_kwargs: {eps: 0.001, lr: 0.001} diff --git a/tests/models/test_graph_definition.py b/tests/models/test_graph_definition.py index bf16d7853..ec6c75e24 100644 --- a/tests/models/test_graph_definition.py +++ b/tests/models/test_graph_definition.py @@ -27,7 +27,7 @@ def test_graph_definition() -> None: detector=Prometheus(), perturbation_dict=perturbation_dict, seed=seed ) original_output = graph_definition( - node_features=deepcopy(mock_data), node_feature_names=features + input_features=deepcopy(mock_data), input_feature_names=features ) for _ in range(n_reps): @@ -42,11 +42,11 @@ def test_graph_definition() -> None: ) data = graph_definition( - node_features=deepcopy(mock_data), node_feature_names=features + input_features=deepcopy(mock_data), input_feature_names=features ) perturbed_data = graph_definition_perturbed( - node_features=deepcopy(mock_data), node_feature_names=features + input_features=deepcopy(mock_data), input_feature_names=features ) assert ~torch.equal(data.x, perturbed_data.x) # should not be equal. From b67ba0865b70ab8f320bedecbe6141dc1d98a42d Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 18 Oct 2023 11:41:51 +0200 Subject: [PATCH 14/21] rename --- tests/training/test_dataloader_utilities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/training/test_dataloader_utilities.py b/tests/training/test_dataloader_utilities.py index 0fdaccf60..423b2f34b 100644 --- a/tests/training/test_dataloader_utilities.py +++ b/tests/training/test_dataloader_utilities.py @@ -22,7 +22,7 @@ detector=IceCubeDeepCore(), node_definition=NodesAsPulses(), nb_nearest_neighbours=8, - node_feature_names=FEATURES.DEEPCORE, + input_feature_names=FEATURES.DEEPCORE, ) From 1049765f5ca5bc638208a84406702bb6d9e989c4 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 18 Oct 2023 11:43:45 +0200 Subject: [PATCH 15/21] update pretrained configs --- .../SplitInIcePulses_cleaner_config.yml | 2 +- .../QUESO/neutrino_direction/neutrino_direction_config.yml | 2 +- .../neutrino_vs_muon_classifier_config.yml | 2 +- .../upgrade/QUESO/neutrino_zenith/neutrino_zenith_config.yml | 2 +- .../total_neutrino_energy/total_neutrino_energy_config.yml | 2 +- .../track_vs_cascade_classifier_config.yml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/SplitInIcePulses_cleaner/SplitInIcePulses_cleaner_config.yml b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/SplitInIcePulses_cleaner/SplitInIcePulses_cleaner_config.yml index 281bda2f4..a13f11aa2 100644 --- a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/SplitInIcePulses_cleaner/SplitInIcePulses_cleaner_config.yml +++ b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/SplitInIcePulses_cleaner/SplitInIcePulses_cleaner_config.yml @@ -19,7 +19,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - node_feature_names: null + input_feature_names: null class_name: KNNGraph optimizer_class: '!class torch.optim.adam Adam' optimizer_kwargs: null diff --git a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_direction/neutrino_direction_config.yml b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_direction/neutrino_direction_config.yml index 6cabc6985..b42e1fef8 100644 --- a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_direction/neutrino_direction_config.yml +++ b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_direction/neutrino_direction_config.yml @@ -25,7 +25,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - node_feature_names: null + input_feature_names: null class_name: KNNGraph optimizer_class: '!class torch.optim.adam Adam' optimizer_kwargs: null diff --git a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_vs_muon_classifier/neutrino_vs_muon_classifier_config.yml b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_vs_muon_classifier/neutrino_vs_muon_classifier_config.yml index 3c0c7510a..326617c00 100644 --- a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_vs_muon_classifier/neutrino_vs_muon_classifier_config.yml +++ b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_vs_muon_classifier/neutrino_vs_muon_classifier_config.yml @@ -25,7 +25,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - node_feature_names: null + input_feature_names: null class_name: KNNGraph optimizer_class: '!class torch.optim.adam Adam' optimizer_kwargs: null diff --git a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_zenith/neutrino_zenith_config.yml b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_zenith/neutrino_zenith_config.yml index fee57a531..c54f6ec5b 100644 --- a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_zenith/neutrino_zenith_config.yml +++ b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_zenith/neutrino_zenith_config.yml @@ -25,7 +25,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - node_feature_names: null + input_feature_names: null class_name: KNNGraph optimizer_class: '!class torch.optim.adam Adam' optimizer_kwargs: null diff --git a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/total_neutrino_energy/total_neutrino_energy_config.yml b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/total_neutrino_energy/total_neutrino_energy_config.yml index 16d9ddde5..a35c0203a 100644 --- a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/total_neutrino_energy/total_neutrino_energy_config.yml +++ b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/total_neutrino_energy/total_neutrino_energy_config.yml @@ -25,7 +25,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - node_feature_names: null + input_feature_names: null class_name: KNNGraph optimizer_class: '!class torch.optim.adam Adam' optimizer_kwargs: null diff --git a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/track_vs_cascade_classifier/track_vs_cascade_classifier_config.yml b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/track_vs_cascade_classifier/track_vs_cascade_classifier_config.yml index a49c60a22..5e88b510a 100644 --- a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/track_vs_cascade_classifier/track_vs_cascade_classifier_config.yml +++ b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/track_vs_cascade_classifier/track_vs_cascade_classifier_config.yml @@ -25,7 +25,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - node_feature_names: null + input_feature_names: null class_name: KNNGraph optimizer_class: '!class torch.optim.adam Adam' optimizer_kwargs: null From 1d190267975408121fbda9eb62c2a39ad1b83e97 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 18 Oct 2023 11:49:22 +0200 Subject: [PATCH 16/21] rename arg in KNNGraph --- src/graphnet/models/graphs/graphs.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/graphnet/models/graphs/graphs.py b/src/graphnet/models/graphs/graphs.py index 4ae53037a..bd52eaeae 100644 --- a/src/graphnet/models/graphs/graphs.py +++ b/src/graphnet/models/graphs/graphs.py @@ -17,7 +17,7 @@ def __init__( self, detector: Detector, node_definition: NodeDefinition = NodesAsPulses(), - node_feature_names: Optional[List[str]] = None, + input_feature_names: Optional[List[str]] = None, dtype: Optional[torch.dtype] = torch.float, perturbation_dict: Optional[Dict[str, float]] = None, seed: Optional[Union[int, Generator]] = None, @@ -29,7 +29,7 @@ def __init__( Args: detector: Detector that represents your data. node_definition: Definition of nodes in the graph. - node_feature_names: Name of node features. + input_feature_names: Name of input feature columns. dtype: data type for node features. perturbation_dict: Dictionary mapping a feature name to a standard deviation according to which the values for this @@ -50,7 +50,7 @@ def __init__( columns=columns, ), dtype=dtype, - node_feature_names=node_feature_names, + input_feature_names=input_feature_names, perturbation_dict=perturbation_dict, seed=seed, ) From 170c2b31171ed40001dd8d59ef96259252f948c7 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 18 Oct 2023 12:12:23 +0200 Subject: [PATCH 17/21] arg update in dataset --- src/graphnet/data/dataset/dataset.py | 4 ++-- tests/models/test_task.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/graphnet/data/dataset/dataset.py b/src/graphnet/data/dataset/dataset.py index 4253788a8..0988c5793 100644 --- a/src/graphnet/data/dataset/dataset.py +++ b/src/graphnet/data/dataset/dataset.py @@ -629,8 +629,8 @@ def _create_graph( # Construct graph data object assert self._graph_definition is not None graph = self._graph_definition( - node_features=node_features, - node_feature_names=self._features[ + input_features=node_features, + input_feature_names=self._features[ 1: ], # first entry is index column truth_dicts=truth_dicts, diff --git a/tests/models/test_task.py b/tests/models/test_task.py index 68e014f33..bfadb6263 100644 --- a/tests/models/test_task.py +++ b/tests/models/test_task.py @@ -18,7 +18,7 @@ def test_transform_prediction_and_target() -> None: detector=IceCube86(), node_definition=NodesAsPulses(), nb_nearest_neighbours=8, - node_feature_names=FEATURES.DEEPCORE, + input_feature_names=FEATURES.DEEPCORE, ) gnn = DynEdge( nb_inputs=graph_definition.nb_outputs, From fd279974622748057506b34821e8170742c00876 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 18 Oct 2023 12:24:03 +0200 Subject: [PATCH 18/21] update args --- examples/02_data/04_ensemble_dataset.py | 2 +- examples/05_pisa/02_make_pipeline_database.py | 2 +- tests/data/test_dataconverters_and_datasets.py | 6 +++--- tests/utilities/test_model_config.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/02_data/04_ensemble_dataset.py b/examples/02_data/04_ensemble_dataset.py index f1cc9de68..4ade95de6 100644 --- a/examples/02_data/04_ensemble_dataset.py +++ b/examples/02_data/04_ensemble_dataset.py @@ -24,7 +24,7 @@ detector=IceCubeDeepCore(), node_definition=NodesAsPulses(), nb_nearest_neighbours=8, - node_feature_names=features, + input_feature_names=features, ) diff --git a/examples/05_pisa/02_make_pipeline_database.py b/examples/05_pisa/02_make_pipeline_database.py index 17e86646d..722b997f3 100644 --- a/examples/05_pisa/02_make_pipeline_database.py +++ b/examples/05_pisa/02_make_pipeline_database.py @@ -65,7 +65,7 @@ def main() -> None: detector=IceCubeDeepCore(), node_definition=NodesAsPulses(), nb_nearest_neighbours=8, - node_feature_names=FEATURES.DEEPCORE, + input_feature_names=FEATURES.DEEPCORE, ) # Remove `interaction_time` if it exists diff --git a/tests/data/test_dataconverters_and_datasets.py b/tests/data/test_dataconverters_and_datasets.py index 64fcd85c6..480f11d4d 100644 --- a/tests/data/test_dataconverters_and_datasets.py +++ b/tests/data/test_dataconverters_and_datasets.py @@ -115,7 +115,7 @@ def test_dataset(backend: str) -> None: detector=IceCubeDeepCore(), node_definition=NodesAsPulses(), nb_nearest_neighbours=8, - node_feature_names=FEATURES.DEEPCORE, + input_feature_names=FEATURES.DEEPCORE, ) # Constructor DataConverter instance @@ -168,7 +168,7 @@ def test_datasetquery_table(backend: str) -> None: detector=IceCubeDeepCore(), node_definition=NodesAsPulses(), nb_nearest_neighbours=8, - node_feature_names=FEATURES.DEEPCORE, + input_feature_names=FEATURES.DEEPCORE, ) # Constructor DataConverter instance pulsemap = "SRTInIcePulses" @@ -220,7 +220,7 @@ def test_parquet_to_sqlite_converter() -> None: detector=IceCubeDeepCore(), node_definition=NodesAsPulses(), nb_nearest_neighbours=8, - node_feature_names=FEATURES.DEEPCORE, + input_feature_names=FEATURES.DEEPCORE, ) # Perform conversion from I3 to `backend` database_name = FILE_NAME + "_from_parquet" diff --git a/tests/utilities/test_model_config.py b/tests/utilities/test_model_config.py index 8979f0255..59eb6343a 100644 --- a/tests/utilities/test_model_config.py +++ b/tests/utilities/test_model_config.py @@ -49,7 +49,7 @@ def test_complete_model_config(path: str = "/tmp/complete_model.yml") -> None: detector=IceCubeDeepCore(), node_definition=NodesAsPulses(), nb_nearest_neighbours=8, - node_feature_names=FEATURES.DEEPCORE, + input_feature_names=FEATURES.DEEPCORE, ) gnn = DynEdge( nb_inputs=graph_definition.nb_outputs, From 7c921c3cb8ea25c0de3a93de44e7e5bbcc0316c3 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 18 Oct 2023 12:40:39 +0200 Subject: [PATCH 19/21] update configs --- configs/datasets/dev_lvl7_robustness_muon_neutrino_0000.yml | 2 +- configs/datasets/test_data_sqlite.yml | 2 +- .../datasets/training_classification_example_data_sqlite.yml | 2 +- configs/datasets/training_example_data_parquet.yml | 2 +- configs/datasets/training_example_data_sqlite.yml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/configs/datasets/dev_lvl7_robustness_muon_neutrino_0000.yml b/configs/datasets/dev_lvl7_robustness_muon_neutrino_0000.yml index d70de5294..523f4fa90 100644 --- a/configs/datasets/dev_lvl7_robustness_muon_neutrino_0000.yml +++ b/configs/datasets/dev_lvl7_robustness_muon_neutrino_0000.yml @@ -10,7 +10,7 @@ graph_definition: node_definition: arguments: {} class_name: NodesAsPulses - node_feature_names: [dom_x, dom_y, dom_z, dom_time, charge, rde, pmt_area] + input_feature_names: [dom_x, dom_y, dom_z, dom_time, charge, rde, pmt_area] class_name: KNNGraph pulsemaps: - SRTTWOfflinePulsesDC diff --git a/configs/datasets/test_data_sqlite.yml b/configs/datasets/test_data_sqlite.yml index 689e8af31..11ea4496d 100644 --- a/configs/datasets/test_data_sqlite.yml +++ b/configs/datasets/test_data_sqlite.yml @@ -10,7 +10,7 @@ graph_definition: node_definition: arguments: {} class_name: NodesAsPulses - node_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] + input_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] class_name: KNNGraph index_column: event_no loss_weight_column: null diff --git a/configs/datasets/training_classification_example_data_sqlite.yml b/configs/datasets/training_classification_example_data_sqlite.yml index ae94420ee..3a13f8749 100644 --- a/configs/datasets/training_classification_example_data_sqlite.yml +++ b/configs/datasets/training_classification_example_data_sqlite.yml @@ -10,7 +10,7 @@ graph_definition: node_definition: arguments: {} class_name: NodesAsPulses - node_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] + input_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] class_name: KNNGraph pulsemaps: - total diff --git a/configs/datasets/training_example_data_parquet.yml b/configs/datasets/training_example_data_parquet.yml index d8bde7e30..67abca0c4 100644 --- a/configs/datasets/training_example_data_parquet.yml +++ b/configs/datasets/training_example_data_parquet.yml @@ -10,7 +10,7 @@ graph_definition: node_definition: arguments: {} class_name: NodesAsPulses - node_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] + input_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] class_name: KNNGraph pulsemaps: - total diff --git a/configs/datasets/training_example_data_sqlite.yml b/configs/datasets/training_example_data_sqlite.yml index b33a0ee0c..20c4aa8c0 100644 --- a/configs/datasets/training_example_data_sqlite.yml +++ b/configs/datasets/training_example_data_sqlite.yml @@ -10,7 +10,7 @@ graph_definition: node_definition: arguments: {} class_name: NodesAsPulses - node_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] + input_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] class_name: KNNGraph pulsemaps: - total From aaa8dc62d32ae9e32edb447b8e12c9e6f4adfb0e Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 18 Oct 2023 13:03:48 +0200 Subject: [PATCH 20/21] update args in i3modules --- src/graphnet/deployment/i3modules/graphnet_module.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/graphnet/deployment/i3modules/graphnet_module.py b/src/graphnet/deployment/i3modules/graphnet_module.py index 2c85600a3..dee0973b8 100644 --- a/src/graphnet/deployment/i3modules/graphnet_module.py +++ b/src/graphnet/deployment/i3modules/graphnet_module.py @@ -84,12 +84,12 @@ def _make_graph( ) -> Data: # py-l-i-n-t-:- -d-i-s-able=invalid-name """Process Physics I3Frame into graph.""" # Extract features - node_features = self._extract_feature_array_from_frame(frame) + input_features = self._extract_feature_array_from_frame(frame) # Prepare graph data - if len(node_features) > 0: + if len(input_features) > 0: data = self._graph_definition( - node_features=node_features, - node_feature_names=self._features, + input_features=input_features, + input_feature_names=self._features, ) return Batch.from_data_list([data]) else: From 928c221a0060e4798e38f5783dca357f3be7d39d Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 18 Oct 2023 13:13:40 +0200 Subject: [PATCH 21/21] update args dataset config test --- tests/utilities/test_dataset_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utilities/test_dataset_config.py b/tests/utilities/test_dataset_config.py index 5f7de5b6a..ca906d659 100644 --- a/tests/utilities/test_dataset_config.py +++ b/tests/utilities/test_dataset_config.py @@ -30,7 +30,7 @@ detector=IceCubeDeepCore(), node_definition=NodesAsPulses(), nb_nearest_neighbours=8, - node_feature_names=FEATURES.DEEPCORE, + input_feature_names=FEATURES.DEEPCORE, )