From 45f885df5fb3604def376ea5863d218dbc5589fb Mon Sep 17 00:00:00 2001 From: Aske-Rosted Date: Tue, 30 May 2023 12:52:00 +0900 Subject: [PATCH 001/156] optional check of I3Filters in dataconverter --- src/graphnet/data/dataconverter.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/graphnet/data/dataconverter.py b/src/graphnet/data/dataconverter.py index dc0deabd0..3d9304f9f 100644 --- a/src/graphnet/data/dataconverter.py +++ b/src/graphnet/data/dataconverter.py @@ -107,6 +107,7 @@ def __init__( workers: int = 1, index_column: str = "event_no", icetray_verbose: int = 0, + I3filters: List[str] = [], ): """Construct DataConverter. @@ -166,6 +167,7 @@ def __init__( self._sequential_batch_pattern = sequential_batch_pattern self._input_file_batch_pattern = input_file_batch_pattern self._workers = workers + self._I3filters = I3filters # Create I3Extractors self._extractors = I3ExtractorCollection(*extractors) @@ -435,6 +437,8 @@ def _extract_data(self, fileset: FileSet) -> List[OrderedDict]: continue if self._skip_frame(frame): continue + if self._filter_mask(frame, self._I3filters): + continue # Try to extract data from I3Frame results = self._extractors(frame) @@ -566,3 +570,17 @@ def _skip_frame(self, frame: "icetray.I3Frame") -> bool: if frame["I3EventHeader"].sub_event_stream == "NullSplit": return True return False + + def _filter_mask( + self, frame: "icetray.I3Frame", I3filters: List[str] + ) -> bool: + """Check if specified condition(s) are met. + + Args: + frame: I3Frame to check. + I3filters: List of I3Filters to check for pass. + """ + for filter in I3filters: + if frame["FilterMask"][filter].condition_passed is False: + return True + return False From 2c344ebbbb82a76ffbc9d883b5a8aace315119fe Mon Sep 17 00:00:00 2001 From: ArturoLlorente Date: Sat, 19 Aug 2023 13:35:12 +0200 Subject: [PATCH 002/156] collate function applied. Standard model to be tested --- src/graphnet/models/standard_model.py | 11 +++++++++-- src/graphnet/training/utils.py | 20 ++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/graphnet/models/standard_model.py b/src/graphnet/models/standard_model.py index 844f4f55b..9d529718f 100644 --- a/src/graphnet/models/standard_model.py +++ b/src/graphnet/models/standard_model.py @@ -99,10 +99,17 @@ def configure_optimizers(self) -> Dict[str, Any]: def forward(self, data: Data) -> List[Union[Tensor, Data]]: """Forward pass, chaining model components.""" + if isinstance(data, Data): + data = [data] if self._coarsening: data = self._coarsening(data) - data = self._detector(data) - x = self._gnn(data) + x_list = [] + for d in data: + d = self._detector(d) + x = self._gnn(d) + x_list.append(x) + x = torch.cat(x_list, dim=0) + preds = [task(x) for task in self._tasks] return preds diff --git a/src/graphnet/training/utils.py b/src/graphnet/training/utils.py index 52b7634e8..704f6745c 100644 --- a/src/graphnet/training/utils.py +++ b/src/graphnet/training/utils.py @@ -27,6 +27,26 @@ def collate_fn(graphs: List[Data]) -> Batch: return Batch.from_data_list(graphs) +def collator_sequence_buckleting(graphs: List[Data]) -> List[Batch]: + """Remove graphs with less than two DOM hits. + + Should not occur in "production. + """ + graphs = [g for g in graphs if g.n_pulses > 1] + graphs.sort(key=lambda x: x.n_pulses) + batch_list = [] + + splits_end = [0.8, 1] + for minp, maxp in zip([0] + splits_end[:-1], splits_end): + min_idx = int(minp * len(graphs)) + max_idx = int(maxp * len(graphs)) + this_graphs = graphs[min_idx:max_idx] + if len(this_graphs) > 0: + this_batch = Batch.from_data_list(this_graphs) + batch_list.append(this_batch) + return batch_list + + # @TODO: Remove in favour of DataLoader{,.from_dataset_config} def make_dataloader( db: str, From a3b722ede15810d0cf373bfb37826cc37a6d385e Mon Sep 17 00:00:00 2001 From: AMHermansen Date: Wed, 13 Sep 2023 10:41:38 +0200 Subject: [PATCH 003/156] Create MetaClasses to save Model/Dataset configs --- src/graphnet/utilities/config/__init__.py | 4 +- .../utilities/config/dataset_config.py | 50 ++++++++----------- src/graphnet/utilities/config/model_config.py | 47 ++++++++--------- 3 files changed, 45 insertions(+), 56 deletions(-) diff --git a/src/graphnet/utilities/config/__init__.py b/src/graphnet/utilities/config/__init__.py index 5e37c6a00..426eae788 100644 --- a/src/graphnet/utilities/config/__init__.py +++ b/src/graphnet/utilities/config/__init__.py @@ -1,6 +1,6 @@ """Modules for configuration files for use across `graphnet`.""" from .configurable import Configurable -from .dataset_config import DatasetConfig, save_dataset_config -from .model_config import ModelConfig, save_model_config +from .dataset_config import DatasetConfig, DatasetConfigSaverMeta +from .model_config import ModelConfig, ModelConfigSaverMeta from .training_config import TrainingConfig diff --git a/src/graphnet/utilities/config/dataset_config.py b/src/graphnet/utilities/config/dataset_config.py index 34d92fc3c..b48459315 100644 --- a/src/graphnet/utilities/config/dataset_config.py +++ b/src/graphnet/utilities/config/dataset_config.py @@ -178,39 +178,33 @@ def _parse_torch(self, obj: Any) -> Any: return obj -def save_dataset_config(init_fn: Callable) -> Callable: - """Save the arguments to `__init__` functions as member `DatasetConfig`.""" - - def _replace_model_instance_with_config( - obj: Union["Model", Any] - ) -> Union[ModelConfig, Any]: - """Replace `Model` instances in `obj` with their `ModelConfig`.""" - from graphnet.models import Model - import torch +class DatasetConfigSaverMeta(type): + """Metaclass for `DatasetConfig` that saves the config after `__init__`.""" - if isinstance(obj, Model): - return obj.config + def __call__(cls: Any, *args: Any, **kwargs: Any) -> object: + """Catch object construction and save config after `__init__`.""" - if isinstance(obj, torch.dtype): - return obj.__str__() + def _replace_model_instance_with_config( + obj: Union["Model", Any] + ) -> Union[ModelConfig, Any]: + """Replace `Model` instances in `obj` with their `ModelConfig`.""" + from graphnet.models import Model - else: - return obj + if isinstance(obj, Model): + return obj.config + else: + return obj - @wraps(init_fn) - def wrapper(self: Any, *args: Any, **kwargs: Any) -> Any: - """Set `DatasetConfig` after calling `init_fn`.""" - # Call wrapped method - ret = init_fn(self, *args, **kwargs) + # Create object + created_obj = super().__call__(*args, **kwargs) # Get all argument values, including defaults - cfg = get_all_argument_values(init_fn, *args, **kwargs) - - # Handle nested `Model`s, etc. + cfg = get_all_argument_values(created_obj.__init__, *args, **kwargs) cfg = traverse_and_apply(cfg, _replace_model_instance_with_config) - # Add `DatasetConfig` as member variables - self._config = DatasetConfig(**cfg) - return ret - - return wrapper + # Store config in + created_obj._config = DatasetConfig( + class_name=str(created_obj.__class__.__name__), + arguments=dict(**cfg), + ) + return created_obj diff --git a/src/graphnet/utilities/config/model_config.py b/src/graphnet/utilities/config/model_config.py index 9c4d21d26..e811a1ab0 100644 --- a/src/graphnet/utilities/config/model_config.py +++ b/src/graphnet/utilities/config/model_config.py @@ -248,38 +248,33 @@ def as_dict(self) -> Dict[str, Dict[str, Any]]: return {self.__class__.__name__: config_dict} -def save_model_config(init_fn: Callable) -> Callable: - """Save the arguments to `__init__` functions as a member `ModelConfig`.""" +class ModelConfigSaverMeta(type): + """Metaclass for saving `ModelConfig` to `Model` instances.""" - def _replace_model_instance_with_config( - obj: Union["Model", Any] - ) -> Union[ModelConfig, Any]: - """Replace `Model` instances in `obj` with their `ModelConfig`.""" - from graphnet.models import Model + def __call__(cls: Any, *args: Any, **kwargs: Any) -> object: + """Catch object construction and save config after `__init__`.""" - if isinstance(obj, Model): - return obj.config - else: - return obj + def _replace_model_instance_with_config( + obj: Union["Model", Any] + ) -> Union[ModelConfig, Any]: + """Replace `Model` instances in `obj` with their `ModelConfig`.""" + from graphnet.models import Model + + if isinstance(obj, Model): + return obj.config + else: + return obj - @wraps(init_fn) - def wrapper(self: Any, *args: Any, **kwargs: Any) -> Any: - """Set `ModelConfig` after calling `init_fn`.""" - # Call wrapped method - ret = init_fn(self, *args, **kwargs) + # Create object + created_obj = super().__call__(*args, **kwargs) # Get all argument values, including defaults - cfg = get_all_argument_values(init_fn, *args, **kwargs) - - # Handle nested `Model`s, etc. + cfg = get_all_argument_values(created_obj.__init__, *args, **kwargs) cfg = traverse_and_apply(cfg, _replace_model_instance_with_config) - # Add `ModelConfig` as member variables - self._config = ModelConfig( - class_name=str(self.__class__.__name__), + # Store config in + created_obj._config = ModelConfig( + class_name=str(created_obj.__class__.__name__), arguments=dict(**cfg), ) - - return ret - - return wrapper + return created_obj From 682213caba71602aac2c7b3a1a37924790d5c0ac Mon Sep 17 00:00:00 2001 From: AMHermansen Date: Wed, 13 Sep 2023 11:21:48 +0200 Subject: [PATCH 004/156] Remove usage of save_model_config and save_dataset_config --- src/graphnet/data/dataset/dataset.py | 11 ++++++++--- src/graphnet/models/coarsening.py | 3 --- src/graphnet/models/detector/detector.py | 2 -- src/graphnet/models/gnn/convnet.py | 2 -- src/graphnet/models/gnn/dynedge.py | 2 -- src/graphnet/models/gnn/dynedge_jinst.py | 2 -- src/graphnet/models/gnn/dynedge_kaggle_tito.py | 2 -- src/graphnet/models/gnn/gnn.py | 2 -- src/graphnet/models/graphs/edges/edges.py | 4 ---- src/graphnet/models/graphs/graph_definition.py | 3 --- src/graphnet/models/graphs/graphs.py | 2 -- src/graphnet/models/graphs/nodes/nodes.py | 2 -- src/graphnet/models/model.py | 10 ++++++++-- src/graphnet/models/standard_model.py | 2 -- src/graphnet/models/task/task.py | 3 --- src/graphnet/training/loss_functions.py | 3 --- src/graphnet/utilities/config/__init__.py | 14 ++++++++++++-- src/graphnet/utilities/config/dataset_config.py | 14 +++++++++++++- src/graphnet/utilities/config/model_config.py | 13 +++++++++++++ 19 files changed, 54 insertions(+), 42 deletions(-) diff --git a/src/graphnet/data/dataset/dataset.py b/src/graphnet/data/dataset/dataset.py index c1f785bc9..c1e4aad16 100644 --- a/src/graphnet/data/dataset/dataset.py +++ b/src/graphnet/data/dataset/dataset.py @@ -27,7 +27,7 @@ from graphnet.utilities.config import ( Configurable, DatasetConfig, - save_dataset_config, + DatasetConfigSaverABCMeta, ) from graphnet.utilities.config.parsing import traverse_and_apply from graphnet.utilities.logging import Logger @@ -85,7 +85,13 @@ def parse_graph_definition(cfg: dict) -> GraphDefinition: return graph_definition -class Dataset(Logger, Configurable, torch.utils.data.Dataset, ABC): +class Dataset( + Logger, + Configurable, + torch.utils.data.Dataset, + ABC, + metaclass=DatasetConfigSaverABCMeta, +): """Base Dataset class for reading from any intermediate file format.""" # Class method(s) @@ -188,7 +194,6 @@ def _resolve_graphnet_paths( .replace("${GRAPHNET}", GRAPHNET_ROOT_DIR) ) - @save_dataset_config def __init__( self, path: Union[str, List[str]], diff --git a/src/graphnet/models/coarsening.py b/src/graphnet/models/coarsening.py index 68eab50b9..d40f0c009 100644 --- a/src/graphnet/models/coarsening.py +++ b/src/graphnet/models/coarsening.py @@ -22,7 +22,6 @@ std_pool_x, ) from graphnet.models import Model -from graphnet.utilities.config import save_model_config # Utility method(s) from torch_geometric.utils import degree @@ -63,7 +62,6 @@ class Coarsening(Model): "sum": (sum_pool, sum_pool_x), } - @save_model_config def __init__( self, reduce: str = "avg", @@ -198,7 +196,6 @@ def _add_inc_dict(self, original: Data, pooled: Data) -> Data: class AttributeCoarsening(Coarsening): """Coarsen pulses based on specified attributes.""" - @save_model_config def __init__( self, attributes: List[str], diff --git a/src/graphnet/models/detector/detector.py b/src/graphnet/models/detector/detector.py index e1b1cc6ef..a7fb25f1d 100644 --- a/src/graphnet/models/detector/detector.py +++ b/src/graphnet/models/detector/detector.py @@ -8,13 +8,11 @@ from graphnet.models import Model from graphnet.utilities.decorators import final -from graphnet.utilities.config import save_model_config class Detector(Model): """Base class for all detector-specific read-ins in graphnet.""" - @save_model_config def __init__(self) -> None: """Construct `Detector`.""" # Base class constructor diff --git a/src/graphnet/models/gnn/convnet.py b/src/graphnet/models/gnn/convnet.py index 9c03c96f7..dcffd0c50 100644 --- a/src/graphnet/models/gnn/convnet.py +++ b/src/graphnet/models/gnn/convnet.py @@ -10,14 +10,12 @@ from torch_geometric.nn import TAGConv, global_add_pool, global_max_pool from torch_geometric.data import Data -from graphnet.utilities.config import save_model_config from graphnet.models.gnn.gnn import GNN class ConvNet(GNN): """ConvNet (convolutional network) model.""" - @save_model_config def __init__( self, nb_inputs: int, diff --git a/src/graphnet/models/gnn/dynedge.py b/src/graphnet/models/gnn/dynedge.py index 4e9e07b65..9ea93f9ce 100644 --- a/src/graphnet/models/gnn/dynedge.py +++ b/src/graphnet/models/gnn/dynedge.py @@ -7,7 +7,6 @@ from torch_scatter import scatter_max, scatter_mean, scatter_min, scatter_sum from graphnet.models.components.layers import DynEdgeConv -from graphnet.utilities.config import save_model_config from graphnet.models.gnn.gnn import GNN from graphnet.models.utils import calculate_xyzt_homophily @@ -22,7 +21,6 @@ class DynEdge(GNN): """DynEdge (dynamical edge convolutional) model.""" - @save_model_config def __init__( self, nb_inputs: int, diff --git a/src/graphnet/models/gnn/dynedge_jinst.py b/src/graphnet/models/gnn/dynedge_jinst.py index 36a0f1303..23c630fa9 100644 --- a/src/graphnet/models/gnn/dynedge_jinst.py +++ b/src/graphnet/models/gnn/dynedge_jinst.py @@ -10,7 +10,6 @@ from torch_scatter import scatter_max, scatter_mean, scatter_min, scatter_sum from graphnet.models.components.layers import DynEdgeConv -from graphnet.utilities.config import save_model_config from graphnet.models.gnn.gnn import GNN from graphnet.models.utils import calculate_xyzt_homophily @@ -18,7 +17,6 @@ class DynEdgeJINST(GNN): """DynEdge (dynamical edge convolutional) model used in [2209.03042].""" - @save_model_config def __init__( self, nb_inputs: int, diff --git a/src/graphnet/models/gnn/dynedge_kaggle_tito.py b/src/graphnet/models/gnn/dynedge_kaggle_tito.py index 2e07a4e72..d3196dd30 100644 --- a/src/graphnet/models/gnn/dynedge_kaggle_tito.py +++ b/src/graphnet/models/gnn/dynedge_kaggle_tito.py @@ -18,7 +18,6 @@ from torch_scatter import scatter_max, scatter_mean, scatter_min, scatter_sum from graphnet.models.components.layers import DynTrans -from graphnet.utilities.config import save_model_config from graphnet.models.gnn.gnn import GNN from graphnet.models.utils import calculate_xyzt_homophily @@ -33,7 +32,6 @@ class DynEdgeTITO(GNN): """DynEdge (dynamical edge convolutional) model.""" - @save_model_config def __init__( self, nb_inputs: int, diff --git a/src/graphnet/models/gnn/gnn.py b/src/graphnet/models/gnn/gnn.py index de155cb4a..5fd933d84 100644 --- a/src/graphnet/models/gnn/gnn.py +++ b/src/graphnet/models/gnn/gnn.py @@ -6,13 +6,11 @@ from torch_geometric.data import Data from graphnet.models import Model -from graphnet.utilities.config import save_model_config class GNN(Model): """Base class for all core GNN models in graphnet.""" - @save_model_config def __init__(self, nb_inputs: int, nb_outputs: int) -> None: """Construct `GNN`.""" # Base class constructor diff --git a/src/graphnet/models/graphs/edges/edges.py b/src/graphnet/models/graphs/edges/edges.py index 28507058b..cb9bf9112 100644 --- a/src/graphnet/models/graphs/edges/edges.py +++ b/src/graphnet/models/graphs/edges/edges.py @@ -7,7 +7,6 @@ from torch_geometric.nn import knn_graph, radius_graph from torch_geometric.data import Data -from graphnet.utilities.config import save_model_config from graphnet.models.utils import calculate_distance_matrix from graphnet.models import Model @@ -48,7 +47,6 @@ def _construct_edges(self, graph: Data) -> Data: class KNNEdges(EdgeDefinition): # pylint: disable=too-few-public-methods """Builds edges from the k-nearest neighbours.""" - @save_model_config def __init__( self, nb_nearest_neighbours: int, @@ -85,7 +83,6 @@ def _construct_edges(self, graph: Data) -> Data: class RadialEdges(EdgeDefinition): """Builds graph from a sphere of chosen radius centred at each node.""" - @save_model_config def __init__( self, radius: float, @@ -126,7 +123,6 @@ class EuclideanEdges(EdgeDefinition): # pylint: disable=too-few-public-methods See https://arxiv.org/pdf/1809.06166.pdf. """ - @save_model_config def __init__( self, sigma: float, diff --git a/src/graphnet/models/graphs/graph_definition.py b/src/graphnet/models/graphs/graph_definition.py index 48394ab73..6f41f739d 100644 --- a/src/graphnet/models/graphs/graph_definition.py +++ b/src/graphnet/models/graphs/graph_definition.py @@ -11,8 +11,6 @@ from torch_geometric.data import Data import numpy as np -from graphnet.utilities.config import save_model_config - from graphnet.models.detector import Detector from .edges import EdgeDefinition from .nodes import NodeDefinition @@ -22,7 +20,6 @@ class GraphDefinition(Model): """An Abstract class to create graph definitions from.""" - @save_model_config def __init__( self, detector: Detector, diff --git a/src/graphnet/models/graphs/graphs.py b/src/graphnet/models/graphs/graphs.py index dc2ded022..1cae33a5d 100644 --- a/src/graphnet/models/graphs/graphs.py +++ b/src/graphnet/models/graphs/graphs.py @@ -3,7 +3,6 @@ from typing import List, Optional import torch -from graphnet.utilities.config import save_model_config from .graph_definition import GraphDefinition from graphnet.models.detector import Detector from graphnet.models.graphs.edges import EdgeDefinition, KNNEdges @@ -13,7 +12,6 @@ class KNNGraph(GraphDefinition): """A Graph representation where Edges are drawn to nearest neighbours.""" - @save_model_config def __init__( self, detector: Detector, diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py index 6b3443e0c..ce539ee80 100644 --- a/src/graphnet/models/graphs/nodes/nodes.py +++ b/src/graphnet/models/graphs/nodes/nodes.py @@ -7,14 +7,12 @@ from torch_geometric.data import Data from graphnet.utilities.decorators import final -from graphnet.utilities.config import save_model_config from graphnet.models import Model class NodeDefinition(Model): # pylint: disable=too-few-public-methods """Base class for graph building.""" - @save_model_config def __init__(self) -> None: """Construct `Detector`.""" # Base class constructor diff --git a/src/graphnet/models/model.py b/src/graphnet/models/model.py index 193746919..7c34f0152 100644 --- a/src/graphnet/models/model.py +++ b/src/graphnet/models/model.py @@ -18,11 +18,17 @@ from torch_geometric.data import Data from graphnet.utilities.logging import Logger -from graphnet.utilities.config import Configurable, ModelConfig +from graphnet.utilities.config import ( + Configurable, + ModelConfig, + ModelConfigSaverABC, +) from graphnet.training.callbacks import ProgressBar -class Model(Logger, Configurable, LightningModule, ABC): +class Model( + Logger, Configurable, LightningModule, ABC, metaclass=ModelConfigSaverABC +): """Base class for all models in graphnet.""" @abstractmethod diff --git a/src/graphnet/models/standard_model.py b/src/graphnet/models/standard_model.py index 1d439133f..0f4f6895b 100644 --- a/src/graphnet/models/standard_model.py +++ b/src/graphnet/models/standard_model.py @@ -10,7 +10,6 @@ from torch_geometric.data import Data import pandas as pd -from graphnet.utilities.config import save_model_config from graphnet.models.graphs import GraphDefinition from graphnet.models.gnn.gnn import GNN from graphnet.models.model import Model @@ -24,7 +23,6 @@ class StandardModel(Model): model (detector read-in, GNN architecture, and task-specific read-outs). """ - @save_model_config def __init__( self, *, diff --git a/src/graphnet/models/task/task.py b/src/graphnet/models/task/task.py index 094071a7c..0d7379f00 100644 --- a/src/graphnet/models/task/task.py +++ b/src/graphnet/models/task/task.py @@ -15,7 +15,6 @@ from graphnet.training.loss_functions import LossFunction # type: ignore[attr-defined] from graphnet.models import Model -from graphnet.utilities.config import save_model_config from graphnet.utilities.decorators import final @@ -39,7 +38,6 @@ def default_prediction_labels(self) -> List[str]: """Return default prediction labels.""" return self._default_prediction_labels - @save_model_config def __init__( self, *, @@ -264,7 +262,6 @@ def _validate_and_set_transforms( class IdentityTask(Task): """Identity, or trivial, task.""" - @save_model_config def __init__( self, nb_outputs: int, diff --git a/src/graphnet/training/loss_functions.py b/src/graphnet/training/loss_functions.py index 740f0b912..624a5fa53 100644 --- a/src/graphnet/training/loss_functions.py +++ b/src/graphnet/training/loss_functions.py @@ -19,7 +19,6 @@ softplus, ) -from graphnet.utilities.config import save_model_config from graphnet.models.model import Model from graphnet.utilities.decorators import final @@ -27,7 +26,6 @@ class LossFunction(Model): """Base class for loss functions in `graphnet`.""" - @save_model_config def __init__(self, **kwargs: Any) -> None: """Construct `LossFunction`, saving model config.""" super().__init__(**kwargs) @@ -120,7 +118,6 @@ class CrossEntropyLoss(LossFunction): (0, num_classes - 1). """ - @save_model_config def __init__( self, options: Union[int, List[Any], Dict[Any, int]], diff --git a/src/graphnet/utilities/config/__init__.py b/src/graphnet/utilities/config/__init__.py index 426eae788..b53feb28e 100644 --- a/src/graphnet/utilities/config/__init__.py +++ b/src/graphnet/utilities/config/__init__.py @@ -1,6 +1,16 @@ """Modules for configuration files for use across `graphnet`.""" from .configurable import Configurable -from .dataset_config import DatasetConfig, DatasetConfigSaverMeta -from .model_config import ModelConfig, ModelConfigSaverMeta +from .dataset_config import ( + DatasetConfig, + DatasetConfigSaverMeta, + DatasetConfigSaverABCMeta, + DatasetConfigSaver, +) +from .model_config import ( + ModelConfig, + ModelConfigSaverMeta, + ModelConfigSaver, + ModelConfigSaverABC, +) from .training_config import TrainingConfig diff --git a/src/graphnet/utilities/config/dataset_config.py b/src/graphnet/utilities/config/dataset_config.py index b48459315..bb2dd1595 100644 --- a/src/graphnet/utilities/config/dataset_config.py +++ b/src/graphnet/utilities/config/dataset_config.py @@ -1,5 +1,5 @@ """Config classes for the `graphnet.data.dataset` module.""" - +from abc import ABCMeta from functools import wraps from typing import ( TYPE_CHECKING, @@ -208,3 +208,15 @@ def _replace_model_instance_with_config( arguments=dict(**cfg), ) return created_obj + + +class DatasetConfigSaverABCMeta(DatasetConfigSaverMeta, ABCMeta): + """Common interface between DatasetConfigSaver and ABC Metaclasses.""" + + pass + + +class DatasetConfigSaver(metaclass=DatasetConfigSaverMeta): + """Baseclass for DatasetConfig saving.""" + + pass diff --git a/src/graphnet/utilities/config/model_config.py b/src/graphnet/utilities/config/model_config.py index e811a1ab0..32978c4e1 100644 --- a/src/graphnet/utilities/config/model_config.py +++ b/src/graphnet/utilities/config/model_config.py @@ -1,4 +1,5 @@ """Config classes for the `graphnet.models` module.""" +from abc import ABCMeta from functools import wraps import inspect import re @@ -278,3 +279,15 @@ def _replace_model_instance_with_config( arguments=dict(**cfg), ) return created_obj + + +class ModelConfigSaverABC(ModelConfigSaverMeta, ABCMeta): + """Common interface between ModelConfigSaver and ABC Metaclasses.""" + + pass + + +class ModelConfigSaver(metaclass=ModelConfigSaverMeta): + """Base class for ModelConfig saving.""" + + pass From e4c06fdf085eb7f011b488650a533ca2773783dc Mon Sep 17 00:00:00 2001 From: AMHermansen Date: Wed, 13 Sep 2023 11:27:53 +0200 Subject: [PATCH 005/156] Remove redundant config saving baseclasses. --- src/graphnet/utilities/config/dataset_config.py | 15 +++++---------- src/graphnet/utilities/config/model_config.py | 6 ------ 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/src/graphnet/utilities/config/dataset_config.py b/src/graphnet/utilities/config/dataset_config.py index bb2dd1595..145776e06 100644 --- a/src/graphnet/utilities/config/dataset_config.py +++ b/src/graphnet/utilities/config/dataset_config.py @@ -189,9 +189,13 @@ def _replace_model_instance_with_config( ) -> Union[ModelConfig, Any]: """Replace `Model` instances in `obj` with their `ModelConfig`.""" from graphnet.models import Model + import torch if isinstance(obj, Model): return obj.config + + if isinstance(obj, torch.dtype): + return obj.__str__() else: return obj @@ -203,10 +207,7 @@ def _replace_model_instance_with_config( cfg = traverse_and_apply(cfg, _replace_model_instance_with_config) # Store config in - created_obj._config = DatasetConfig( - class_name=str(created_obj.__class__.__name__), - arguments=dict(**cfg), - ) + created_obj._config = DatasetConfig(**cfg) return created_obj @@ -214,9 +215,3 @@ class DatasetConfigSaverABCMeta(DatasetConfigSaverMeta, ABCMeta): """Common interface between DatasetConfigSaver and ABC Metaclasses.""" pass - - -class DatasetConfigSaver(metaclass=DatasetConfigSaverMeta): - """Baseclass for DatasetConfig saving.""" - - pass diff --git a/src/graphnet/utilities/config/model_config.py b/src/graphnet/utilities/config/model_config.py index 32978c4e1..3fdc7d307 100644 --- a/src/graphnet/utilities/config/model_config.py +++ b/src/graphnet/utilities/config/model_config.py @@ -285,9 +285,3 @@ class ModelConfigSaverABC(ModelConfigSaverMeta, ABCMeta): """Common interface between ModelConfigSaver and ABC Metaclasses.""" pass - - -class ModelConfigSaver(metaclass=ModelConfigSaverMeta): - """Base class for ModelConfig saving.""" - - pass From 63a87153ed385575bb2cc9f54ac11eced35fcc6d Mon Sep 17 00:00:00 2001 From: AMHermansen Date: Wed, 13 Sep 2023 11:29:35 +0200 Subject: [PATCH 006/156] Remove redundant config saving baseclasses. --- src/graphnet/utilities/config/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/graphnet/utilities/config/__init__.py b/src/graphnet/utilities/config/__init__.py index b53feb28e..15df77a24 100644 --- a/src/graphnet/utilities/config/__init__.py +++ b/src/graphnet/utilities/config/__init__.py @@ -5,12 +5,10 @@ DatasetConfig, DatasetConfigSaverMeta, DatasetConfigSaverABCMeta, - DatasetConfigSaver, ) from .model_config import ( ModelConfig, ModelConfigSaverMeta, - ModelConfigSaver, ModelConfigSaverABC, ) from .training_config import TrainingConfig From 9321927ee658225fb76b556849aac88555764879 Mon Sep 17 00:00:00 2001 From: AMHermansen Date: Thu, 14 Sep 2023 11:17:31 +0200 Subject: [PATCH 007/156] Reintroduce save_model_config and save_dataset_config but added deprecation warning. To not break backwards compatibility. --- src/graphnet/utilities/config/__init__.py | 2 + .../utilities/config/dataset_config.py | 46 ++++++++++++++++++- src/graphnet/utilities/config/model_config.py | 45 +++++++++++++++++- 3 files changed, 91 insertions(+), 2 deletions(-) diff --git a/src/graphnet/utilities/config/__init__.py b/src/graphnet/utilities/config/__init__.py index 15df77a24..1520ca68d 100644 --- a/src/graphnet/utilities/config/__init__.py +++ b/src/graphnet/utilities/config/__init__.py @@ -5,10 +5,12 @@ DatasetConfig, DatasetConfigSaverMeta, DatasetConfigSaverABCMeta, + save_dataset_config, ) from .model_config import ( ModelConfig, ModelConfigSaverMeta, ModelConfigSaverABC, + save_model_config, ) from .training_config import TrainingConfig diff --git a/src/graphnet/utilities/config/dataset_config.py b/src/graphnet/utilities/config/dataset_config.py index 145776e06..57739b667 100644 --- a/src/graphnet/utilities/config/dataset_config.py +++ b/src/graphnet/utilities/config/dataset_config.py @@ -1,4 +1,5 @@ """Config classes for the `graphnet.data.dataset` module.""" +import warnings from abc import ABCMeta from functools import wraps from typing import ( @@ -178,11 +179,54 @@ def _parse_torch(self, obj: Any) -> Any: return obj +def save_dataset_config(init_fn: Callable) -> Callable: + """Save the arguments to `__init__` functions as member `DatasetConfig`.""" + warnings.warn( + "Warning: `save_dataset_config` is deprecated. Config saving " + "is now done automatically, for all classes inheriting from Dataset", + DeprecationWarning, + ) + + def _replace_model_instance_with_config( + obj: Union["Model", Any] + ) -> Union[ModelConfig, Any]: + """Replace `Model` instances in `obj` with their `ModelConfig`.""" + from graphnet.models import Model + import torch + + if isinstance(obj, Model): + return obj.config + + if isinstance(obj, torch.dtype): + return obj.__str__() + + else: + return obj + + @wraps(init_fn) + def wrapper(self: Any, *args: Any, **kwargs: Any) -> Any: + """Set `DatasetConfig` after calling `init_fn`.""" + # Call wrapped method + ret = init_fn(self, *args, **kwargs) + + # Get all argument values, including defaults + cfg = get_all_argument_values(init_fn, *args, **kwargs) + + # Handle nested `Model`s, etc. + cfg = traverse_and_apply(cfg, _replace_model_instance_with_config) + # Add `DatasetConfig` as member variables + self._config = DatasetConfig(**cfg) + + return ret + + return wrapper + + class DatasetConfigSaverMeta(type): """Metaclass for `DatasetConfig` that saves the config after `__init__`.""" def __call__(cls: Any, *args: Any, **kwargs: Any) -> object: - """Catch object construction and save config after `__init__`.""" + """Catch object after construction and save config.""" def _replace_model_instance_with_config( obj: Union["Model", Any] diff --git a/src/graphnet/utilities/config/model_config.py b/src/graphnet/utilities/config/model_config.py index 3fdc7d307..23b4c9b58 100644 --- a/src/graphnet/utilities/config/model_config.py +++ b/src/graphnet/utilities/config/model_config.py @@ -3,6 +3,7 @@ from functools import wraps import inspect import re +import warnings from typing import ( TYPE_CHECKING, Any, @@ -249,6 +250,48 @@ def as_dict(self) -> Dict[str, Dict[str, Any]]: return {self.__class__.__name__: config_dict} +def save_model_config(init_fn: Callable) -> Callable: + """Save the arguments to `__init__` functions as a member `ModelConfig`.""" + warnings.warn( + "Warning: `save_model_config` is deprecated. Config saving is" + "now done automatically for all classes inheriting from Model", + DeprecationWarning, + ) + + def _replace_model_instance_with_config( + obj: Union["Model", Any] + ) -> Union[ModelConfig, Any]: + """Replace `Model` instances in `obj` with their `ModelConfig`.""" + from graphnet.models import Model + + if isinstance(obj, Model): + return obj.config + else: + return obj + + @wraps(init_fn) + def wrapper(self: Any, *args: Any, **kwargs: Any) -> Any: + """Set `ModelConfig` after calling `init_fn`.""" + # Call wrapped method + ret = init_fn(self, *args, **kwargs) + + # Get all argument values, including defaults + cfg = get_all_argument_values(init_fn, *args, **kwargs) + + # Handle nested `Model`s, etc. + cfg = traverse_and_apply(cfg, _replace_model_instance_with_config) + + # Add `ModelConfig` as member variables + self._config = ModelConfig( + class_name=str(self.__class__.__name__), + arguments=dict(**cfg), + ) + + return ret + + return wrapper + + class ModelConfigSaverMeta(type): """Metaclass for saving `ModelConfig` to `Model` instances.""" @@ -275,7 +318,7 @@ def _replace_model_instance_with_config( # Store config in created_obj._config = ModelConfig( - class_name=str(created_obj.__class__.__name__), + class_name=str(cls.__name__), arguments=dict(**cfg), ) return created_obj From dfbf7d986052d3a21620cf19984adf703052bb9e Mon Sep 17 00:00:00 2001 From: samadpls Date: Sat, 16 Sep 2023 21:38:04 +0500 Subject: [PATCH 008/156] docs: Updated _get_all_indices docstring --- src/graphnet/data/dataset/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/graphnet/data/dataset/dataset.py b/src/graphnet/data/dataset/dataset.py index c1f785bc9..d6fe1d019 100644 --- a/src/graphnet/data/dataset/dataset.py +++ b/src/graphnet/data/dataset/dataset.py @@ -369,7 +369,7 @@ def _post_init(self) -> None: @abstractmethod def _get_all_indices(self) -> List[int]: - """Return a list of all available values in `self._index_column`.""" + """Return a list of all unique values in `self._index_column`.""" @abstractmethod def _get_event_index( From 4af2872ad67abca7d99b50bfcee5c09892441827 Mon Sep 17 00:00:00 2001 From: amhermansen Date: Tue, 19 Sep 2023 18:00:15 +0200 Subject: [PATCH 009/156] Fixed typehints for make_(train_validation)_dataloader --- src/graphnet/training/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/graphnet/training/utils.py b/src/graphnet/training/utils.py index f7d5249f9..1bb2d89e1 100644 --- a/src/graphnet/training/utils.py +++ b/src/graphnet/training/utils.py @@ -32,7 +32,7 @@ def collate_fn(graphs: List[Data]) -> Batch: def make_dataloader( db: str, pulsemaps: Union[str, List[str]], - graph_definition: Optional[GraphDefinition], + graph_definition: GraphDefinition, features: List[str], truth: List[str], *, @@ -92,7 +92,7 @@ def make_dataloader( # @TODO: Remove in favour of DataLoader{,.from_dataset_config} def make_train_validation_dataloader( db: str, - graph_definition: Optional[GraphDefinition], + graph_definition: GraphDefinition, selection: Optional[List[int]], pulsemaps: Union[str, List[str]], features: List[str], From 368b9a8c7c9771a03a8e5d78fa8b13851abc9757 Mon Sep 17 00:00:00 2001 From: amhermansen Date: Tue, 19 Sep 2023 18:30:17 +0200 Subject: [PATCH 010/156] Fixed typehints for make_(train_validation)_dataloader --- src/graphnet/training/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/graphnet/training/utils.py b/src/graphnet/training/utils.py index 1bb2d89e1..1befa9f77 100644 --- a/src/graphnet/training/utils.py +++ b/src/graphnet/training/utils.py @@ -22,7 +22,7 @@ def collate_fn(graphs: List[Data]) -> Batch: """Remove graphs with less than two DOM hits. - Should not occur in "production. + Should not occur in "productio"n. """ graphs = [g for g in graphs if g.n_pulses > 1] return Batch.from_data_list(graphs) From 396eb77a8dd5e90d00f5dfe1c369788c53a17b33 Mon Sep 17 00:00:00 2001 From: Andreas Michael Hermansen <97125645+AMHermansen@users.noreply.github.com> Date: Tue, 19 Sep 2023 18:36:09 +0200 Subject: [PATCH 011/156] Update utils.py fixed typo --- src/graphnet/training/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/graphnet/training/utils.py b/src/graphnet/training/utils.py index 1befa9f77..ec3b4c461 100644 --- a/src/graphnet/training/utils.py +++ b/src/graphnet/training/utils.py @@ -22,7 +22,7 @@ def collate_fn(graphs: List[Data]) -> Batch: """Remove graphs with less than two DOM hits. - Should not occur in "productio"n. + Should not occur in "production". """ graphs = [g for g in graphs if g.n_pulses > 1] return Batch.from_data_list(graphs) From 04de8eab7ba2cfad85ac84bf1db4daf4effc91d6 Mon Sep 17 00:00:00 2001 From: ArturoLlorente Date: Wed, 20 Sep 2023 21:32:44 +0200 Subject: [PATCH 012/156] implemented Data as a List to be compatible with swquence bucketing --- src/graphnet/models/standard_model.py | 37 +++++++++++++++++++-------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/src/graphnet/models/standard_model.py b/src/graphnet/models/standard_model.py index da7c49a1b..f84f75f93 100644 --- a/src/graphnet/models/standard_model.py +++ b/src/graphnet/models/standard_model.py @@ -96,13 +96,11 @@ def configure_optimizers(self) -> Dict[str, Any]: ) return config - def forward(self, data: Data) -> List[Union[Tensor, Data]]: + def forward(self, data: List[Data]) -> List[Union[Tensor, Data]]: """Forward pass, chaining model components.""" - if isinstance(data, Data): - data = [data] + x_list = [] for d in data: - d = self._detector(d) x = self._gnn(d) x_list.append(x) x = torch.cat(x_list, dim=0) @@ -110,7 +108,7 @@ def forward(self, data: Data) -> List[Union[Tensor, Data]]: preds = [task(x) for task in self._tasks] return preds - def shared_step(self, batch: Data, batch_idx: int) -> Tensor: + def shared_step(self, batch: List[Data], batch_idx: int) -> Tensor: """Perform shared step. Applies the forward pass and the following loss calculation, shared @@ -120,8 +118,10 @@ def shared_step(self, batch: Data, batch_idx: int) -> Tensor: loss = self.compute_loss(preds, batch) return loss - def training_step(self, train_batch: Data, batch_idx: int) -> Tensor: + def training_step(self, train_batch: Union[Data,List[Data]], batch_idx: int) -> Tensor: """Perform training step.""" + if isinstance(train_batch, Data): + train_batch = [train_batch] loss = self.shared_step(train_batch, batch_idx) self.log( "train_loss", @@ -134,8 +134,10 @@ def training_step(self, train_batch: Data, batch_idx: int) -> Tensor: ) return loss - def validation_step(self, val_batch: Data, batch_idx: int) -> Tensor: + def validation_step(self, val_batch: Union[Data,List[Data]], batch_idx: int) -> Tensor: """Perform validation step.""" + if isinstance(val_batch, Data): + val_batch = [val_batch] loss = self.shared_step(val_batch, batch_idx) self.log( "val_loss", @@ -149,11 +151,24 @@ def validation_step(self, val_batch: Data, batch_idx: int) -> Tensor: return loss def compute_loss( - self, preds: Tensor, data: Data, verbose: bool = False + self, preds: Tensor, data: List[Data], verbose: bool = False ) -> Tensor: """Compute and sum losses across tasks.""" + + data_merged = {} + target_labels_merged = list(set(self.target_labels)) + for label in target_labels_merged: + data_merged[label] = torch.cat( + [d[label] for d in data], dim=0 + ) + for task in self._tasks: + if task._loss_weight is not None: + data_merged[task._loss_weight] = torch.cat( + [d[task._loss_weight] for d in data], dim=0 + ) + losses = [ - task.compute_loss(pred, data) + task.compute_loss(pred, data_merged) for task, pred in zip(self._tasks, preds) ] if verbose: @@ -163,8 +178,8 @@ def compute_loss( ), "Please reduce loss for each task separately" return torch.sum(torch.stack(losses)) - def _get_batch_size(self, data: Data) -> int: - return torch.numel(torch.unique(data.batch)) + def _get_batch_size(self, data: List[Data]) -> int: + return sum([torch.numel(torch.unique(d.batch)) for d in data]) def inference(self) -> None: """Activate inference mode.""" From 54baac7dbd7ce6e9b30164d3bf8afdbf168adffd Mon Sep 17 00:00:00 2001 From: ArturoLlorente Date: Thu, 21 Sep 2023 13:07:04 +0200 Subject: [PATCH 013/156] solve code quality issues --- src/graphnet/models/standard_model.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/graphnet/models/standard_model.py b/src/graphnet/models/standard_model.py index f84f75f93..ca7fd775b 100644 --- a/src/graphnet/models/standard_model.py +++ b/src/graphnet/models/standard_model.py @@ -98,7 +98,6 @@ def configure_optimizers(self) -> Dict[str, Any]: def forward(self, data: List[Data]) -> List[Union[Tensor, Data]]: """Forward pass, chaining model components.""" - x_list = [] for d in data: x = self._gnn(d) @@ -118,7 +117,9 @@ def shared_step(self, batch: List[Data], batch_idx: int) -> Tensor: loss = self.compute_loss(preds, batch) return loss - def training_step(self, train_batch: Union[Data,List[Data]], batch_idx: int) -> Tensor: + def training_step( + self, train_batch: Union[Data, List[Data]], batch_idx: int + ) -> Tensor: """Perform training step.""" if isinstance(train_batch, Data): train_batch = [train_batch] @@ -134,7 +135,9 @@ def training_step(self, train_batch: Union[Data,List[Data]], batch_idx: int) -> ) return loss - def validation_step(self, val_batch: Union[Data,List[Data]], batch_idx: int) -> Tensor: + def validation_step( + self, val_batch: Union[Data, List[Data]], batch_idx: int + ) -> Tensor: """Perform validation step.""" if isinstance(val_batch, Data): val_batch = [val_batch] @@ -154,19 +157,16 @@ def compute_loss( self, preds: Tensor, data: List[Data], verbose: bool = False ) -> Tensor: """Compute and sum losses across tasks.""" - data_merged = {} target_labels_merged = list(set(self.target_labels)) for label in target_labels_merged: - data_merged[label] = torch.cat( - [d[label] for d in data], dim=0 - ) + data_merged[label] = torch.cat([d[label] for d in data], dim=0) for task in self._tasks: if task._loss_weight is not None: data_merged[task._loss_weight] = torch.cat( [d[task._loss_weight] for d in data], dim=0 ) - + losses = [ task.compute_loss(pred, data_merged) for task, pred in zip(self._tasks, preds) From 287c721e4b3db1d8478e00931a08b8f01b261c3f Mon Sep 17 00:00:00 2001 From: ArturoLlorente Date: Thu, 21 Sep 2023 20:20:52 +0200 Subject: [PATCH 014/156] create class for collator_sequence_bucketing to be able to specify the cutting points for the different mini-batches --- src/graphnet/training/utils.py | 38 ++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/src/graphnet/training/utils.py b/src/graphnet/training/utils.py index 740c9c5dc..2293d44e0 100644 --- a/src/graphnet/training/utils.py +++ b/src/graphnet/training/utils.py @@ -28,24 +28,30 @@ def collate_fn(graphs: List[Data]) -> Batch: return Batch.from_data_list(graphs) -def collator_sequence_buckleting(graphs: List[Data]) -> List[Batch]: - """Remove graphs with less than two DOM hits. +class collator_sequence_buckleting(): + """Perform the sequence bucketing for the graphs in the batch. + + batch_splits: list of floats, each element is the fraction of the total + number of graphs. only the cutting points should be provided, the first + element will be 0 and the last element should be 1. - Should not occur in "production. """ - graphs = [g for g in graphs if g.n_pulses > 1] - graphs.sort(key=lambda x: x.n_pulses) - batch_list = [] - - splits_end = [0.8, 1] - for minp, maxp in zip([0] + splits_end[:-1], splits_end): - min_idx = int(minp * len(graphs)) - max_idx = int(maxp * len(graphs)) - this_graphs = graphs[min_idx:max_idx] - if len(this_graphs) > 0: - this_batch = Batch.from_data_list(this_graphs) - batch_list.append(this_batch) - return batch_list + def __init__(self, batch_splits: List[float] = [0.8]): + self.batch_splits = batch_splits + + def __call__(self, graphs: List[Data]): + graphs = [g for g in graphs if g.n_pulses > 1] + graphs.sort(key=lambda x: x.n_pulses) + batch_list = [] + + for minp, maxp in zip([0] + self.batch_splits, self.batch_splits + [1]): + min_idx = int(minp * len(graphs)) + max_idx = int(maxp * len(graphs)) + this_graphs = graphs[min_idx:max_idx] + if len(this_graphs) > 0: + this_batch = Batch.from_data_list(this_graphs) + batch_list.append(this_batch) + return batch_list # @TODO: Remove in favour of DataLoader{,.from_dataset_config} From 4a347bffad474827177abd8316bf1cabf01699b4 Mon Sep 17 00:00:00 2001 From: Andreas Michael Hermansen <97125645+AMHermansen@users.noreply.github.com> Date: Fri, 22 Sep 2023 08:40:38 +0200 Subject: [PATCH 015/156] Fix warnonce bug Change warnonce to warning_once, which is the correct method. --- src/graphnet/models/graphs/edges/edges.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/graphnet/models/graphs/edges/edges.py b/src/graphnet/models/graphs/edges/edges.py index 28507058b..9f7844375 100644 --- a/src/graphnet/models/graphs/edges/edges.py +++ b/src/graphnet/models/graphs/edges/edges.py @@ -27,7 +27,7 @@ def forward(self, graph: Data) -> Data: graph: a graph with edges """ if graph.edge_index is not None: - self.warnonce( + self.warning_once( "GraphBuilder received graph with pre-existing " "structure. Will overwrite." ) From 431f3de8b54e03736368bbf848677cfb838118ee Mon Sep 17 00:00:00 2001 From: Andreas Michael Hermansen <97125645+AMHermansen@users.noreply.github.com> Date: Fri, 22 Sep 2023 08:42:14 +0200 Subject: [PATCH 016/156] Fix warnonce Change warnonce to warning_once which is the correct method. --- src/graphnet/models/graphs/graph_definition.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/graphnet/models/graphs/graph_definition.py b/src/graphnet/models/graphs/graph_definition.py index 48394ab73..275c56e57 100644 --- a/src/graphnet/models/graphs/graph_definition.py +++ b/src/graphnet/models/graphs/graph_definition.py @@ -116,7 +116,7 @@ def forward( # type: ignore if self._edge_definition is not None: graph = self._edge_definition(graph) else: - self.warnonce( + self.warning_once( "No EdgeDefinition provided. Graphs will not have edges defined!" ) @@ -252,7 +252,7 @@ def _add_features_individually( if feature not in ["x"]: # reserved for node features. graph[feature] = graph.x[:, index].detach() else: - self.warnonce( + self.warning_once( """Cannot assign graph['x']. This field is reserved for node features. Please rename your input feature.""" ) return graph From b846b4067fccbbdc0a3b236fbf750cecf3f76e0a Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 22 Sep 2023 09:39:13 +0200 Subject: [PATCH 017/156] fix example 02-02 --- .../02_data/02_plot_feature_distributions.py | 38 +++---------------- 1 file changed, 5 insertions(+), 33 deletions(-) diff --git a/examples/02_data/02_plot_feature_distributions.py b/examples/02_data/02_plot_feature_distributions.py index 88ffef576..b46be0623 100644 --- a/examples/02_data/02_plot_feature_distributions.py +++ b/examples/02_data/02_plot_feature_distributions.py @@ -1,4 +1,4 @@ -"""Example of plotting feature distributions from SQLite database.""" +"""Example of visualization of input data from a configured Dataset.""" import os.path @@ -8,8 +8,6 @@ from graphnet.constants import CONFIG_DIR from graphnet.data.dataset import Dataset -from graphnet.models.detector.icecube import IceCubeDeepCore -from graphnet.models.graph_builders import KNNGraphBuilder from graphnet.utilities.logging import Logger from graphnet.utilities.argparse import ArgumentParser @@ -27,46 +25,20 @@ def main() -> None: assert isinstance(dataset, Dataset) features = dataset._features[1:] - # Building model - detector = IceCubeDeepCore( - graph_builder=KNNGraphBuilder(nb_nearest_neighbours=8), - ) - # Get feature matrix - x_original_list = [] x_preprocessed_list = [] for batch in tqdm(dataset, colour="green"): - x_original_list.append(batch.x.numpy()) - x_preprocessed_list.append(detector(batch).x.numpy()) + x_preprocessed_list.append(batch.x.numpy()) - x_original = np.concatenate(x_original_list, axis=0) x_preprocessed = np.concatenate(x_preprocessed_list, axis=0) - - logger.info(f"Number of NaNs: {np.sum(np.isnan(x_original))}") - logger.info(f"Number of infs: {np.sum(np.isinf(x_original))}") + logger.info(f"Number of NaNs: {np.sum(np.isnan(x_preprocessed))}") + logger.info(f"Number of infs: {np.sum(np.isinf(x_preprocessed))}") # Plot feature distributions - nb_features_original = x_original.shape[1] nb_features_preprocessed = x_preprocessed.shape[1] dim = int(np.ceil(np.sqrt(nb_features_preprocessed))) axis_size = 4 - bins = 100 - - # -- Original - fig, axes = plt.subplots( - dim, dim, figsize=(dim * axis_size, dim * axis_size) - ) - for ix, ax in enumerate(axes.ravel()[:nb_features_original]): - ax.hist(x_original[:, ix], bins=bins) - ax.set_xlabel( - f"x{ix}: {features[ix] if ix < len(features) else 'N/A'}" - ) - ax.set_yscale("log") - - fig.tight_layout - figure_name_original = "feature_distribution_original.png" - fig.savefig(figure_name_original) - logger.info(f"Figure written to {figure_name_original}") + bins = 50 # -- Preprocessed fig, axes = plt.subplots( From ae226e0b0fde149b0fad3539f940302a160ecd08 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 22 Sep 2023 10:13:05 +0200 Subject: [PATCH 018/156] default arguments, fix 02-01 --- examples/02_data/01_read_dataset.py | 21 ++++++++++++------- .../models/graphs/graph_definition.py | 6 +++--- src/graphnet/models/graphs/graphs.py | 4 ++-- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/examples/02_data/01_read_dataset.py b/examples/02_data/01_read_dataset.py index 302529050..be913e190 100644 --- a/examples/02_data/01_read_dataset.py +++ b/examples/02_data/01_read_dataset.py @@ -17,7 +17,10 @@ from graphnet.data.dataset import ParquetDataset from graphnet.utilities.argparse import ArgumentParser from graphnet.utilities.logging import Logger - +from graphnet.models.graphs import KNNGraph +from graphnet.models.detector.icecube import ( + IceCubeDeepCore, +) DATASET_CLASS = { "sqlite": SQLiteDataset, @@ -44,6 +47,9 @@ def main(backend: str) -> None: num_workers = 30 wait_time = 0.00 # sec. + # Define graph representation + graph_definition = KNNGraph(detector=IceCubeDeepCore()) + for table in [pulsemap, truth_table]: # Get column names from backend if backend == "sqlite": @@ -62,15 +68,16 @@ def main(backend: str) -> None: # Common variables dataset = DATASET_CLASS[backend]( - path, - pulsemap, - features, - truth, + path=path, + pulsemaps=pulsemap, + features=features, + truth=truth, truth_table=truth_table, + graph_definition=graph_definition, ) assert isinstance(dataset, Dataset) - logger.info(dataset[1]) + logger.info(str(dataset[1])) logger.info(dataset[1].x) if backend == "sqlite": assert isinstance(dataset, SQLiteDataset) @@ -92,7 +99,7 @@ def main(backend: str) -> None: for batch in tqdm(dataloader, unit=" batches", colour="green"): time.sleep(wait_time) - logger.info(batch) + logger.info(str(batch)) logger.info(batch.size()) logger.info(batch.num_graphs) diff --git a/src/graphnet/models/graphs/graph_definition.py b/src/graphnet/models/graphs/graph_definition.py index 6f41f739d..0a87a301e 100644 --- a/src/graphnet/models/graphs/graph_definition.py +++ b/src/graphnet/models/graphs/graph_definition.py @@ -13,7 +13,7 @@ from graphnet.models.detector import Detector from .edges import EdgeDefinition -from .nodes import NodeDefinition +from .nodes import NodeDefinition, NodesAsPulses from graphnet.models import Model @@ -23,7 +23,7 @@ class GraphDefinition(Model): def __init__( self, detector: Detector, - node_definition: NodeDefinition, + node_definition: NodeDefinition = NodesAsPulses(), edge_definition: Optional[EdgeDefinition] = None, node_feature_names: Optional[List[str]] = None, dtype: Optional[torch.dtype] = torch.float, @@ -39,7 +39,7 @@ def __init__( Args: detector: The corresponding ´Detector´ representing the data. - node_definition: Definition of nodes. + node_definition: Definition of nodes. Defaults to NodesAsPulses. edge_definition: Definition of edges. Defaults to None. node_feature_names: Names of node feature columns. Defaults to None dtype: data type used for node features. e.g. ´torch.float´ diff --git a/src/graphnet/models/graphs/graphs.py b/src/graphnet/models/graphs/graphs.py index 1cae33a5d..a48b33a0d 100644 --- a/src/graphnet/models/graphs/graphs.py +++ b/src/graphnet/models/graphs/graphs.py @@ -6,7 +6,7 @@ from .graph_definition import GraphDefinition from graphnet.models.detector import Detector from graphnet.models.graphs.edges import EdgeDefinition, KNNEdges -from graphnet.models.graphs.nodes import NodeDefinition +from graphnet.models.graphs.nodes import NodeDefinition, NodesAsPulses class KNNGraph(GraphDefinition): @@ -15,7 +15,7 @@ class KNNGraph(GraphDefinition): def __init__( self, detector: Detector, - node_definition: NodeDefinition, + node_definition: NodeDefinition = NodesAsPulses(), node_feature_names: Optional[List[str]] = None, dtype: Optional[torch.dtype] = torch.float, nb_nearest_neighbours: int = 8, From 8e04af206bb6a287fb8ca28dfd377e3de099a386 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 22 Sep 2023 10:46:47 +0200 Subject: [PATCH 019/156] tito_example update --- .../04_training/04_train_tito_model_without_configs.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/examples/04_training/04_train_tito_model_without_configs.py b/examples/04_training/04_train_tito_model_without_configs.py index 858aa45f4..40ab926e3 100644 --- a/examples/04_training/04_train_tito_model_without_configs.py +++ b/examples/04_training/04_train_tito_model_without_configs.py @@ -28,7 +28,6 @@ # Constants features = FEATURES.PROMETHEUS truth = TRUTH.PROMETHEUS -DYNTRANS_LAYER_SIZES = [(256, 256), (256, 256), (256, 256)] def main( @@ -76,12 +75,7 @@ def main( }, } - graph_definition = KNNGraph( - detector=Prometheus(), - node_definition=NodesAsPulses(), - nb_nearest_neighbours=8, - node_feature_names=features, - ) + graph_definition = KNNGraph(detector=Prometheus()) archive = os.path.join(EXAMPLE_OUTPUT_DIR, "train_tito_model") run_name = "dynedgeTITO_{}_example".format(config["target"]) if wandb: @@ -115,7 +109,6 @@ def main( gnn = DynEdgeTITO( nb_inputs=graph_definition.nb_outputs, global_pooling_schemes=["max"], - dyntrans_layer_sizes=DYNTRANS_LAYER_SIZES, ) task = DirectionReconstructionWithKappa( hidden_size=gnn.nb_outputs, From 15a14f7071d8e1b5927c475be4d67999cbef01fa Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 22 Sep 2023 11:33:23 +0200 Subject: [PATCH 020/156] Polish examples --- .../04_training/02_train_model_without_configs.py | 15 ++++++++------- .../04_train_tito_model_without_configs.py | 8 +++++++- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/examples/04_training/02_train_model_without_configs.py b/examples/04_training/02_train_model_without_configs.py index 6d9c5746e..6a8bf2f15 100644 --- a/examples/04_training/02_train_model_without_configs.py +++ b/examples/04_training/02_train_model_without_configs.py @@ -79,12 +79,7 @@ def main( wandb_logger.experiment.config.update(config) # Define graph representation - graph_definition = KNNGraph( - detector=Prometheus(), - node_definition=NodesAsPulses(), - nb_nearest_neighbours=8, - node_feature_names=features, - ) + graph_definition = KNNGraph(detector=Prometheus()) ( training_dataloader, @@ -166,10 +161,16 @@ def main( logger.info(f"Writing results to {path}") os.makedirs(path, exist_ok=True) + # Save results as .csv results.to_csv(f"{path}/results.csv") - model.save_state_dict(f"{path}/state_dict.pth") + + # Save full model (including weights) to .pth file - Not version proof model.save(f"{path}/model.pth") + # Save model config and state dict - Version safe save method. + model.save_state_dict(f"{path}/state_dict.pth") + model.save_config(f"{path}/model_config.yml") + if __name__ == "__main__": diff --git a/examples/04_training/04_train_tito_model_without_configs.py b/examples/04_training/04_train_tito_model_without_configs.py index 40ab926e3..60b19d392 100644 --- a/examples/04_training/04_train_tito_model_without_configs.py +++ b/examples/04_training/04_train_tito_model_without_configs.py @@ -175,10 +175,16 @@ def main( logger.info(f"Writing results to {path}") os.makedirs(path, exist_ok=True) + # Save results as .csv results.to_csv(f"{path}/results.csv") - model.save_state_dict(f"{path}/state_dict.pth") + + # Save full model (including weights) to .pth file - Not version proof model.save(f"{path}/model.pth") + # Save model config and state dict - Version safe save method. + model.save_state_dict(f"{path}/state_dict.pth") + model.save_config(f"{path}/model_config.yml") + if __name__ == "__main__": From a11014c220691f8012996198d90fd0d5d474c1fa Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 22 Sep 2023 11:36:06 +0200 Subject: [PATCH 021/156] delete shell script example --- .../04_training/03_train_multiple_models.sh | 62 ------------------- .../04_train_tito_model_without_configs.py | 1 - 2 files changed, 63 deletions(-) delete mode 100644 examples/04_training/03_train_multiple_models.sh diff --git a/examples/04_training/03_train_multiple_models.sh b/examples/04_training/03_train_multiple_models.sh deleted file mode 100644 index 931b574ca..000000000 --- a/examples/04_training/03_train_multiple_models.sh +++ /dev/null @@ -1,62 +0,0 @@ -#!/bin/bash - -#### This script enables the user to run multiple trainings in sequence on the same database but for different model configs. -# To execute this file, copy the file path and write in the terminal; $ bash - - -# execution of bash file in same directory as the script -bash_directory=$(dirname -- "$(readlink -f "${BASH_SOURCE}")") - -## Global; applies to all models -# path to dataset configuration file in the GraphNeT directory -dataset_config=$(realpath "$bash_directory/../../configs/datasets/training_example_data_sqlite.yml") -# what GPU to use; more information can be gained with the module nvitop -gpus=0 -# the maximum number of epochs; if used, this greatly affect learning rate scheduling -max_epochs=5 -# early stopping threshold -early_stopping_patience=5 -# events in a batch -batch_size=16 -# number of CPUs to use -num_workers=2 - -## Model dependent; applies to each model in sequence -# path to model files in the GraphNeT directory -model_directory=$(realpath "$bash_directory/../../configs/models") -# list of model configurations to train -declare -a model_configs=( - "${model_directory}/example_direction_reconstruction_model.yml" - "${model_directory}/example_energy_reconstruction_model.yml" - "${model_directory}/example_vertex_position_reconstruction_model.yml" -) - -# suffix ending on the created directory -declare -a suffixs=( - "direction" - "energy" - "position" -) - -# prediction name outputs per model -declare -a prediction_names=( - "zenith_pred zenith_kappa_pred azimuth_pred azimuth_kappa_pred" - "energy_pred" - "position_x_pred position_y_pred position_z_pred" -) - -for i in "${!model_configs[@]}"; do - echo "training iteration ${i} on ${model_configs[$i]} with output variables ${prediction_names[i][@]}" - python ${bash_directory}/01_train_model.py \ - --dataset-config ${dataset_config} \ - --model-config ${model_configs[$i]} \ - --gpus ${gpus} \ - --max-epochs ${max_epochs} \ - --early-stopping-patience ${early_stopping_patience} \ - --batch-size ${batch_size} \ - --num-workers ${num_workers} \ - --prediction-names ${prediction_names[i][@]} \ - --suffix ${suffixs[i]} - wait -done -echo "all trainings are done." \ No newline at end of file diff --git a/examples/04_training/04_train_tito_model_without_configs.py b/examples/04_training/04_train_tito_model_without_configs.py index 60b19d392..ee3d89760 100644 --- a/examples/04_training/04_train_tito_model_without_configs.py +++ b/examples/04_training/04_train_tito_model_without_configs.py @@ -14,7 +14,6 @@ from graphnet.models.detector.prometheus import Prometheus from graphnet.models.gnn import DynEdgeTITO from graphnet.models.graphs import KNNGraph -from graphnet.models.graphs.nodes import NodesAsPulses from graphnet.models.task.reconstruction import ( DirectionReconstructionWithKappa, ) From c840b44180ede370241c5727f48c2920e5a5342b Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 22 Sep 2023 11:48:09 +0200 Subject: [PATCH 022/156] rename examples, update readme.md --- ...without_configs.py => 01_train_dynedge.py} | 0 ...hout_configs.py => 02_train_tito_model.py} | 0 ...del.py => 03_train_dynedge_from_config.py} | 7 ++++--- ... 04_train_multiclassifier_from_configs.py} | 2 +- examples/04_training/README.md | 20 +++++++++---------- 5 files changed, 15 insertions(+), 14 deletions(-) rename examples/04_training/{02_train_model_without_configs.py => 01_train_dynedge.py} (100%) rename examples/04_training/{04_train_tito_model_without_configs.py => 02_train_tito_model.py} (100%) rename examples/04_training/{01_train_model.py => 03_train_dynedge_from_config.py} (94%) rename examples/04_training/{03_train_classification_model.py => 04_train_multiclassifier_from_configs.py} (98%) diff --git a/examples/04_training/02_train_model_without_configs.py b/examples/04_training/01_train_dynedge.py similarity index 100% rename from examples/04_training/02_train_model_without_configs.py rename to examples/04_training/01_train_dynedge.py diff --git a/examples/04_training/04_train_tito_model_without_configs.py b/examples/04_training/02_train_tito_model.py similarity index 100% rename from examples/04_training/04_train_tito_model_without_configs.py rename to examples/04_training/02_train_tito_model.py diff --git a/examples/04_training/01_train_model.py b/examples/04_training/03_train_dynedge_from_config.py similarity index 94% rename from examples/04_training/01_train_model.py rename to examples/04_training/03_train_dynedge_from_config.py index 0250f6602..9a6df73dd 100644 --- a/examples/04_training/01_train_model.py +++ b/examples/04_training/03_train_dynedge_from_config.py @@ -1,4 +1,4 @@ -"""Simplified example of training Model.""" +"""Simplified example of training DynEdge from pre-defined config files.""" from typing import List, Optional import os @@ -46,7 +46,7 @@ def main( log_model=True, ) - # Build model + # Build model from pre-defined config file made from Model.save_config model_config = ModelConfig.load(model_config_path) model: StandardModel = StandardModel.from_config(model_config, trust=True) @@ -69,7 +69,8 @@ def main( archive = os.path.join(EXAMPLE_OUTPUT_DIR, "train_model") run_name = "dynedge_{}_example".format("_".join(config.target)) - # Construct dataloaders + # Construct dataloaders from pre-defined dataset config files. + # i.e. from Dataset.save_config dataset_config = DatasetConfig.load(dataset_config_path) dataloaders = DataLoader.from_dataset_config( dataset_config, diff --git a/examples/04_training/03_train_classification_model.py b/examples/04_training/04_train_multiclassifier_from_configs.py similarity index 98% rename from examples/04_training/03_train_classification_model.py rename to examples/04_training/04_train_multiclassifier_from_configs.py index 65b1acf2a..6937b01b1 100644 --- a/examples/04_training/03_train_classification_model.py +++ b/examples/04_training/04_train_multiclassifier_from_configs.py @@ -1,4 +1,4 @@ -"""Simplified example of multi-class classification training Model.""" +"""Multi-class classification using DynEdge from pre-defined config files.""" import os from typing import List, Optional, Dict, Any diff --git a/examples/04_training/README.md b/examples/04_training/README.md index e60aef3ae..1849515be 100644 --- a/examples/04_training/README.md +++ b/examples/04_training/README.md @@ -2,44 +2,44 @@ This subfolder contains two main training scripts: -**`01_train_model.py`** Shows how to train a GNN on neutrino telescope data **using configuration files** to construct the dataset that loads the data and the model that is trained. This is the recommended way to configure standard dataset and models, as it is easier to ready and share than doing so in pure code. This example can be run using a few different models targeting different physics use cases. For instance, you can try running: +**`01_train_dynedge.py`** ** Shows how to train a GNN on neutrino telescope data **without configuration files,** i.e., by programatically constructing the dataset and model used. This is good for debugging and experimenting with different dataset settings and model configurations, as it is easier to build the model using the API than by writing configuration files from scratch. **This is our recommended way of getting started with the library**. For instance, try running: ```bash # Show the CLI -(graphnet) $ python examples/04_training/01_train_model.py --help +(graphnet) $ python examples/04_training/01_train_dynedge.py --help # Train energy regression model -(graphnet) $ python examples/04_training/01_train_model.py +(graphnet) $ python examples/04_training/01_train_dynedge.py # Same as above, as this is the default model config. (graphnet) $ python examples/04_training/01_train_model.py \ --model-config configs/models/example_energy_reconstruction_model.yml # Train using a single GPU -(graphnet) $ python examples/04_training/01_train_model.py --gpus 0 +(graphnet) $ python examples/04_training/01_train_dynedge.py --gpus 0 # Train using multiple GPUs -(graphnet) $ python examples/04_training/01_train_model.py --gpus 0 1 +(graphnet) $ python examples/04_training/01_train_dynedge.py --gpus 0 1 # Train a vertex position reconstruction model -(graphnet) $ python examples/04_training/01_train_model.py \ +(graphnet) $ python examples/04_training/01_train_dynedge.py \ --model-config configs/models/example_vertex_position_reconstruction_model.yml # Trains a direction (zenith, azimuth) reconstruction model. Note that the # chosen `Task` in the model config file also returns estimated "kappa" values, # i.e. inverse variance, for each predicted feature, meaning that we need to # manually specify the names of these. -(graphnet) $ python examples/04_training/01_train_model.py --gpus 0 \ +(graphnet) $ python examples/04_training/01_train_model_dynedge.py --gpus 0 \ --model-config configs/models/example_direction_reconstruction_model.yml \ --prediction-names zenith_pred zenith_kappa_pred azimuth_pred azimuth_kappa_pred ``` -**`02_train_model_without_configs.py`** Shows how to train a GNN on neutrino telescope data **without configuration files,** i.e., by programatically constructing the dataset and model used. This is good for debugging and experimenting with different dataset settings and model configurations, as it is easier to build the model using the API than by writing configuration files from scratch. For instance, try running: +**`02_train_model_dynedge_from_config.py** Shows how to train a GNN on neutrino telescope data **using configuration files** to construct the dataset that loads the data and the model that is trained. This is the recommended way to configure standard dataset and models, as it is easier to ready and share than doing so in pure code. This example can be run using a few different models targeting different physics use cases. For instance, you can try running: ```bash # Show the CLI -(graphnet) $ python examples/04_training/02_train_model_without_configs.py --help +(graphnet) $ python examples/04_training/02_train_dynedge_from_config.py --help # Train energy regression model -(graphnet) $ python examples/04_training/02_train_model_without_configs.py +(graphnet) $ python examples/04_training/02_train_dynedge_from_config.py ``` From 1fbf534c81bddf146ca4da2fe61d6bee94db50c2 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 22 Sep 2023 13:08:43 +0200 Subject: [PATCH 023/156] Move perturbations to graph_definition --- .../models/graphs/graph_definition.py | 48 ++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/src/graphnet/models/graphs/graph_definition.py b/src/graphnet/models/graphs/graph_definition.py index 0a87a301e..8ad95ce58 100644 --- a/src/graphnet/models/graphs/graph_definition.py +++ b/src/graphnet/models/graphs/graph_definition.py @@ -6,10 +6,11 @@ """ -from typing import Any, List, Optional, Dict, Callable +from typing import Any, List, Optional, Dict, Callable, Union import torch from torch_geometric.data import Data import numpy as np +from numpy.random import default_rng, Generator from graphnet.models.detector import Detector from .edges import EdgeDefinition @@ -27,6 +28,8 @@ def __init__( edge_definition: Optional[EdgeDefinition] = None, node_feature_names: Optional[List[str]] = None, dtype: Optional[torch.dtype] = torch.float, + perturbation_dict: Optional[Dict[str, float]] = None, + seed: Optional[Union[int, Generator]] = None, ): """Construct ´GraphDefinition´. The ´detector´ holds. @@ -43,6 +46,12 @@ def __init__( edge_definition: Definition of edges. Defaults to None. node_feature_names: Names of node feature columns. Defaults to None dtype: data type used for node features. e.g. ´torch.float´ + perturbation_dict: Dictionary mapping a feature name to a standard + deviation according to which the values for this + feature should be randomly perturbed. Defaults + to None. + seed: seed or Generator used to randomly sample perturbations. + Defaults to None. """ # Base class constructor super().__init__(name=__name__, class_name=self.__class__.__name__) @@ -51,6 +60,8 @@ def __init__( self._detector = detector self._edge_definition = edge_definition self._node_definition = node_definition + self._perturbation_dict = perturbation_dict + if node_feature_names is None: # Assume all features in Detector is used. node_feature_names = list(self._detector.feature_map().keys()) # type: ignore @@ -66,6 +77,24 @@ def __init__( self.nb_inputs = len(self._node_feature_names) self.nb_outputs = self._node_definition.nb_outputs + # Set perturbation_cols if needed + if isinstance(self._perturbation_dict, dict): + self._perturbation_cols = [ + self._node_feature_names.index(key) + for key in self._perturbation_dict.keys() + ] + if seed is not None: + if isinstance(seed, int): + self.rng = default_rng(seed) + elif isinstance(seed, Generator): + self.rng = seed + else: + raise ValueError( + "Invalid seed. Must be an int or a numpy Generator." + ) + else: + self.rng = default_rng() + def forward( # type: ignore self, node_features: np.ndarray, @@ -97,6 +126,9 @@ def forward( # type: ignore node_features=node_features, node_feature_names=node_feature_names ) + # Gaussian perturbation of each column if perturbation dict is given + node_features = self._perturb_input(node_features) + # Transform to pytorch tensor node_features = torch.tensor(node_features, dtype=self.dtype) @@ -164,6 +196,20 @@ def _validate_input( node_feature_names[idx] == self._node_feature_names[idx] ), f""" Order of node features in data are not the same as expected. Got {node_feature_names} vs. {self._node_feature_names}""" + def _perturb_input(self, node_features: np.ndarray) -> np.ndarray: + if isinstance(self._perturbation_dict, dict): + self.warning_once( + f"""Will randomly perturb {list(self._perturbation_dict.keys())} using standard diviations {self._perturbation_dict.values}""" + ) + perturbed_features = self.rng.normal( + loc=node_features[:, self._perturbation_cols], + scale=np.array( + list(self._perturbation_dict.values()), dtype=np.float + ), + ) + node_features[:, self._perturbation_cols] = perturbed_features + return node_features + def _add_loss_weights( self, graph: Data, From d4e166a19724bb2b885d234248b87a4eae791184 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 22 Sep 2023 13:58:23 +0200 Subject: [PATCH 024/156] minor adjustments, unit test --- src/graphnet/models/graphs/graph_definition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/graphnet/models/graphs/graph_definition.py b/src/graphnet/models/graphs/graph_definition.py index 8ad95ce58..01197fc46 100644 --- a/src/graphnet/models/graphs/graph_definition.py +++ b/src/graphnet/models/graphs/graph_definition.py @@ -199,7 +199,7 @@ def _validate_input( def _perturb_input(self, node_features: np.ndarray) -> np.ndarray: if isinstance(self._perturbation_dict, dict): self.warning_once( - f"""Will randomly perturb {list(self._perturbation_dict.keys())} using standard diviations {self._perturbation_dict.values}""" + f"""Will randomly perturb {list(self._perturbation_dict.keys())} using stds {self._perturbation_dict.values}""" ) perturbed_features = self.rng.normal( loc=node_features[:, self._perturbation_cols], From e93f5147d1568484799c36c125f5f4f3434e8f50 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 22 Sep 2023 14:00:33 +0200 Subject: [PATCH 025/156] Unit tests --- .../models/graphs/graph_definition.py | 2 +- src/graphnet/models/graphs/graphs.py | 13 ++++- tests/models/test_graph_definition.py | 53 +++++++++++++++++++ 3 files changed, 66 insertions(+), 2 deletions(-) create mode 100644 tests/models/test_graph_definition.py diff --git a/src/graphnet/models/graphs/graph_definition.py b/src/graphnet/models/graphs/graph_definition.py index 01197fc46..4741be7eb 100644 --- a/src/graphnet/models/graphs/graph_definition.py +++ b/src/graphnet/models/graphs/graph_definition.py @@ -199,7 +199,7 @@ def _validate_input( def _perturb_input(self, node_features: np.ndarray) -> np.ndarray: if isinstance(self._perturbation_dict, dict): self.warning_once( - f"""Will randomly perturb {list(self._perturbation_dict.keys())} using stds {self._perturbation_dict.values}""" + f"""Will randomly perturb {list(self._perturbation_dict.keys())} using stds {self._perturbation_dict.values()}""" ) perturbed_features = self.rng.normal( loc=node_features[:, self._perturbation_cols], diff --git a/src/graphnet/models/graphs/graphs.py b/src/graphnet/models/graphs/graphs.py index a48b33a0d..4ae53037a 100644 --- a/src/graphnet/models/graphs/graphs.py +++ b/src/graphnet/models/graphs/graphs.py @@ -1,7 +1,8 @@ """A module containing different graph representations in GraphNeT.""" -from typing import List, Optional +from typing import List, Optional, Dict, Union import torch +from numpy.random import Generator from .graph_definition import GraphDefinition from graphnet.models.detector import Detector @@ -18,6 +19,8 @@ def __init__( node_definition: NodeDefinition = NodesAsPulses(), node_feature_names: Optional[List[str]] = None, dtype: Optional[torch.dtype] = torch.float, + perturbation_dict: Optional[Dict[str, float]] = None, + seed: Optional[Union[int, Generator]] = None, nb_nearest_neighbours: int = 8, columns: List[int] = [0, 1, 2], ) -> None: @@ -28,6 +31,12 @@ def __init__( node_definition: Definition of nodes in the graph. node_feature_names: Name of node features. dtype: data type for node features. + perturbation_dict: Dictionary mapping a feature name to a standard + deviation according to which the values for this + feature should be randomly perturbed. Defaults + to None. + seed: seed or Generator used to randomly sample perturbations. + Defaults to None. nb_nearest_neighbours: Number of edges for each node. Defaults to 8. columns: node feature columns used for distance calculation . Defaults to [0, 1, 2]. @@ -42,4 +51,6 @@ def __init__( ), dtype=dtype, node_feature_names=node_feature_names, + perturbation_dict=perturbation_dict, + seed=seed, ) diff --git a/tests/models/test_graph_definition.py b/tests/models/test_graph_definition.py new file mode 100644 index 000000000..bf16d7853 --- /dev/null +++ b/tests/models/test_graph_definition.py @@ -0,0 +1,53 @@ +"""Unit tests for GraphDefinition.""" + +from graphnet.models.graphs import KNNGraph +from graphnet.models.detector.prometheus import Prometheus +from graphnet.data.constants import FEATURES + +import numpy as np +from copy import deepcopy +import torch + + +def test_graph_definition() -> None: + """Tests the forward pass of GraphDefinition.""" + # Test configuration + features = FEATURES.PROMETHEUS + perturbation_dict = { + "sensor_pos_x": 1.4, + "sensor_pos_y": 2.2, + "sensor_pos_z": 3.7, + "t": 1.2, + } + mock_data = np.array([[1, 5, 2, 3], [2, 9, 6, 2]]) + seed = 42 + n_reps = 5 + + graph_definition = KNNGraph( + detector=Prometheus(), perturbation_dict=perturbation_dict, seed=seed + ) + original_output = graph_definition( + node_features=deepcopy(mock_data), node_feature_names=features + ) + + for _ in range(n_reps): + graph_definition_perturbed = KNNGraph( + detector=Prometheus(), perturbation_dict=perturbation_dict + ) + + graph_definition = KNNGraph( + detector=Prometheus(), + perturbation_dict=perturbation_dict, + seed=seed, + ) + + data = graph_definition( + node_features=deepcopy(mock_data), node_feature_names=features + ) + + perturbed_data = graph_definition_perturbed( + node_features=deepcopy(mock_data), node_feature_names=features + ) + + assert ~torch.equal(data.x, perturbed_data.x) # should not be equal. + assert torch.equal(data.x, original_output.x) # should be equal. From b567581a5ee250df3f6ba2fe326def165ec5a95e Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 22 Sep 2023 14:01:14 +0200 Subject: [PATCH 026/156] delete perturbedsqlitedataset --- .../sqlite/sqlite_dataset_perturbed.py | 152 ------------------ 1 file changed, 152 deletions(-) delete mode 100644 src/graphnet/data/dataset/sqlite/sqlite_dataset_perturbed.py diff --git a/src/graphnet/data/dataset/sqlite/sqlite_dataset_perturbed.py b/src/graphnet/data/dataset/sqlite/sqlite_dataset_perturbed.py deleted file mode 100644 index b951e6916..000000000 --- a/src/graphnet/data/dataset/sqlite/sqlite_dataset_perturbed.py +++ /dev/null @@ -1,152 +0,0 @@ -"""`Dataset` class(es) for reading perturbed data from SQLite databases.""" - -from typing import Dict, List, Optional, Tuple, Union - -import numpy as np -from numpy.random import default_rng, Generator -import torch -from torch_geometric.data import Data - -from .sqlite_dataset import SQLiteDataset - - -class SQLiteDatasetPerturbed(SQLiteDataset): - """Pytorch dataset for reading perturbed data from SQLite databases. - - This including a pre-processing step, where the input data is randomly - perturbed according to given per-feature "noise" levels. This is intended - to test the stability of a trained model under small changes to the input - parameters. - """ - - def __init__( - self, - path: Union[str, List[str]], - pulsemaps: Union[str, List[str]], - features: List[str], - truth: List[str], - *, - perturbation_dict: Dict[str, float], - node_truth: Optional[List[str]] = None, - index_column: str = "event_no", - truth_table: str = "truth", - node_truth_table: Optional[str] = None, - string_selection: Optional[List[int]] = None, - selection: Optional[List[int]] = None, - dtype: torch.dtype = torch.float32, - loss_weight_table: Optional[str] = None, - loss_weight_column: Optional[str] = None, - loss_weight_default_value: Optional[float] = None, - seed: Optional[Union[int, Generator]] = None, - ): - """Construct SQLiteDatasetPerturbed. - - Args: - path: Path to the file(s) from which this `Dataset` should read. - pulsemaps: Name(s) of the pulse map series that should be used to - construct the nodes on the individual graph objects, and their - features. Multiple pulse series maps can be used, e.g., when - different DOM types are stored in different maps. - features: List of columns in the input files that should be used as - node features on the graph objects. - truth: List of event-level columns in the input files that should - be used added as attributes on the graph objects. - perturbation_dict (Dict[str, float]): Dictionary mapping a feature - name to a standard deviation according to which the values for - this feature should be randomly perturbed. - node_truth: List of node-level columns in the input files that - should be used added as attributes on the graph objects. - index_column: Name of the column in the input files that contains - unique indicies to identify and map events across tables. - truth_table: Name of the table containing event-level truth - information. - node_truth_table: Name of the table containing node-level truth - information. - string_selection: Subset of strings for which data should be read - and used to construct graph objects. Defaults to None, meaning - all strings for which data exists are used. - selection: List of indicies (in `index_column`) of the events in - the input files that should be read. Defaults to None, meaning - that all events in the input files are read. - dtype: Type of the feature tensor on the graph objects returned. - loss_weight_table: Name of the table containing per-event loss - weights. - loss_weight_column: Name of the column in `loss_weight_table` - containing per-event loss weights. This is also the name of the - corresponding attribute assigned to the graph object. - loss_weight_default_value: Default per-event loss weight. - NOTE: This default value is only applied when - `loss_weight_table` and `loss_weight_column` are specified, and - in this case to events with no value in the corresponding - table/column. That is, if no per-event loss weight table/column - is provided, this value is ignored. Defaults to None. - seed: Optional seed for random number generation. Defaults to None. - """ - # Base class constructor - super().__init__( - path=path, - pulsemaps=pulsemaps, - features=features, - truth=truth, - node_truth=node_truth, - index_column=index_column, - truth_table=truth_table, - node_truth_table=node_truth_table, - string_selection=string_selection, - selection=selection, - dtype=dtype, - loss_weight_table=loss_weight_table, - loss_weight_column=loss_weight_column, - loss_weight_default_value=loss_weight_default_value, - ) - - # Custom member variables - assert isinstance(perturbation_dict, dict) - assert len(set(perturbation_dict.keys())) == len( - perturbation_dict.keys() - ) - self._perturbation_dict = perturbation_dict - - self._perturbation_cols = [ - self._features.index(key) for key in self._perturbation_dict.keys() - ] - - if seed is not None: - if isinstance(seed, int): - self.rng = default_rng(seed) - elif isinstance(seed, Generator): - self.rng = seed - else: - raise ValueError( - "Invalid seed. Must be an int or a numpy Generator." - ) - else: - self.rng = default_rng() - - def __getitem__(self, sequential_index: int) -> Data: - """Return graph `Data` object at `index`.""" - if not (0 <= sequential_index < len(self)): - raise IndexError( - f"Index {sequential_index} not in range [0, {len(self) - 1}]" - ) - features, truth, node_truth, loss_weight = self._query( - sequential_index - ) - perturbed_features = self._perturb_features(features) - graph = self._create_graph( - perturbed_features, truth, node_truth, loss_weight - ) - return graph - - def _perturb_features( - self, features: List[Tuple[float, ...]] - ) -> List[Tuple[float, ...]]: - features_array = np.array(features) - perturbed_features = self.rng.normal( - loc=features_array[:, self._perturbation_cols], - scale=np.array( - list(self._perturbation_dict.values()), dtype=np.float - ), - ) - features_array[:, self._perturbation_cols] = perturbed_features - return features_array.tolist() From 8c54c77c5d77516734388631ca861738a22ed23b Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 22 Sep 2023 14:13:06 +0200 Subject: [PATCH 027/156] remove old import statements --- src/graphnet/data/dataset/__init__.py | 1 - src/graphnet/data/dataset/sqlite/__init__.py | 1 - 2 files changed, 2 deletions(-) diff --git a/src/graphnet/data/dataset/__init__.py b/src/graphnet/data/dataset/__init__.py index 3ccdd9642..f6eafee94 100644 --- a/src/graphnet/data/dataset/__init__.py +++ b/src/graphnet/data/dataset/__init__.py @@ -7,7 +7,6 @@ from .dataset import EnsembleDataset, Dataset, ColumnMissingException from .parquet.parquet_dataset import ParquetDataset from .sqlite.sqlite_dataset import SQLiteDataset - from .sqlite.sqlite_dataset_perturbed import SQLiteDatasetPerturbed torch.multiprocessing.set_sharing_strategy("file_system") diff --git a/src/graphnet/data/dataset/sqlite/__init__.py b/src/graphnet/data/dataset/sqlite/__init__.py index 84d67a921..c44d66184 100644 --- a/src/graphnet/data/dataset/sqlite/__init__.py +++ b/src/graphnet/data/dataset/sqlite/__init__.py @@ -3,6 +3,5 @@ if has_torch_package(): from .sqlite_dataset import SQLiteDataset - from .sqlite_dataset_perturbed import SQLiteDatasetPerturbed del has_torch_package From 6f014fa6f650bd1e98f11fee69b56c195ee509ba Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 22 Sep 2023 19:28:54 +0200 Subject: [PATCH 028/156] replace np.float with float --- src/graphnet/models/graphs/graph_definition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/graphnet/models/graphs/graph_definition.py b/src/graphnet/models/graphs/graph_definition.py index 4741be7eb..4bac67cd5 100644 --- a/src/graphnet/models/graphs/graph_definition.py +++ b/src/graphnet/models/graphs/graph_definition.py @@ -204,7 +204,7 @@ def _perturb_input(self, node_features: np.ndarray) -> np.ndarray: perturbed_features = self.rng.normal( loc=node_features[:, self._perturbation_cols], scale=np.array( - list(self._perturbation_dict.values()), dtype=np.float + list(self._perturbation_dict.values()), dtype=float ), ) node_features[:, self._perturbation_cols] = perturbed_features From 726e65367a602aeabd1f1b828b9e74567f30f990 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 22 Sep 2023 19:55:31 +0200 Subject: [PATCH 029/156] shorten warning --- src/graphnet/models/graphs/graph_definition.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/graphnet/models/graphs/graph_definition.py b/src/graphnet/models/graphs/graph_definition.py index 4bac67cd5..35eefd5f3 100644 --- a/src/graphnet/models/graphs/graph_definition.py +++ b/src/graphnet/models/graphs/graph_definition.py @@ -199,7 +199,9 @@ def _validate_input( def _perturb_input(self, node_features: np.ndarray) -> np.ndarray: if isinstance(self._perturbation_dict, dict): self.warning_once( - f"""Will randomly perturb {list(self._perturbation_dict.keys())} using stds {self._perturbation_dict.values()}""" + f"""Will randomly perturb +{list(self._perturbation_dict.keys())} +using stds {self._perturbation_dict.values()}""" ) perturbed_features = self.rng.normal( loc=node_features[:, self._perturbation_cols], From 822c0d7fde3c6f1f5834e0159178d1b6368e5a89 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 22 Sep 2023 19:57:52 +0200 Subject: [PATCH 030/156] shorten doc string --- src/graphnet/models/graphs/graph_definition.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/graphnet/models/graphs/graph_definition.py b/src/graphnet/models/graphs/graph_definition.py index 35eefd5f3..931f5e398 100644 --- a/src/graphnet/models/graphs/graph_definition.py +++ b/src/graphnet/models/graphs/graph_definition.py @@ -200,8 +200,8 @@ def _perturb_input(self, node_features: np.ndarray) -> np.ndarray: if isinstance(self._perturbation_dict, dict): self.warning_once( f"""Will randomly perturb -{list(self._perturbation_dict.keys())} -using stds {self._perturbation_dict.values()}""" + {list(self._perturbation_dict.keys())} + using stds {self._perturbation_dict.values()}""" # noqa ) perturbed_features = self.rng.normal( loc=node_features[:, self._perturbation_cols], From 1758b740d2fe21e501196ab2a08d2e2fed0e9325 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 22 Sep 2023 20:01:00 +0200 Subject: [PATCH 031/156] shorten error strings --- .../models/graphs/graph_definition.py | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/graphnet/models/graphs/graph_definition.py b/src/graphnet/models/graphs/graph_definition.py index 931f5e398..089eee008 100644 --- a/src/graphnet/models/graphs/graph_definition.py +++ b/src/graphnet/models/graphs/graph_definition.py @@ -113,9 +113,12 @@ def forward( # type: ignore node_feature_names: name of each column. Shape ´[,d]´. truth_dicts: Dictionary containing truth labels. custom_label_functions: Custom label functions. See https://github.com/graphnet-team/graphnet/blob/main/GETTING_STARTED.md#adding-custom-truth-labels. - loss_weight_column: Name of column that holds loss weight. Defaults to None. + loss_weight_column: Name of column that holds loss weight. + Defaults to None. loss_weight: Loss weight associated with event. Defaults to None. - loss_weight_default_value: default value for loss weight. Used in instances where some events have no pre-defined loss weight. Defaults to None. + loss_weight_default_value: default value for loss weight. + Used in instances where some events have + no pre-defined loss weight. Defaults to None. data_path: Path to dataset data files. Defaults to None. Returns: @@ -146,7 +149,8 @@ def forward( # type: ignore graph = self._edge_definition(graph) else: self.warnonce( - "No EdgeDefinition provided. Graphs will not have edges defined!" + """No EdgeDefinition provided. + Graphs will not have edges defined!""" # noqa ) # Attach data path - useful for Ensemble datasets. @@ -190,11 +194,15 @@ def _validate_input( # was instantiated with. assert len(node_feature_names) == len( self._node_feature_names - ), f"""Input features ({node_feature_names}) is not what {self.__class__.__name__} was instatiated with ({self._node_feature_names})""" + ), f"""Input features ({node_feature_names}) is not what + {self.__class__.__name__} was instatiated + with ({self._node_feature_names})""" # noqa for idx in range(len(node_feature_names)): assert ( node_feature_names[idx] == self._node_feature_names[idx] - ), f""" Order of node features in data are not the same as expected. Got {node_feature_names} vs. {self._node_feature_names}""" + ), f""" Order of node features in data + are not the same as expected. Got {node_feature_names} + vs. {self._node_feature_names}""" # noqa def _perturb_input(self, node_features: np.ndarray) -> np.ndarray: if isinstance(self._perturbation_dict, dict): @@ -298,7 +306,8 @@ def _add_features_individually( graph[feature] = graph.x[:, index].detach() else: self.warnonce( - """Cannot assign graph['x']. This field is reserved for node features. Please rename your input feature.""" + """Cannot assign graph['x']. This field is reserved + for node features. Please rename your input feature.""" # noqa ) return graph From afeec3e4dfec9dcd0e6df06e09e97763d0140f52 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 22 Sep 2023 20:17:53 +0200 Subject: [PATCH 032/156] Replace GenericExtractor in 01-03 for FeatureExtractor --- examples/01_icetray/01_convert_i3_files.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/examples/01_icetray/01_convert_i3_files.py b/examples/01_icetray/01_convert_i3_files.py index 03517f51b..495c9a13c 100644 --- a/examples/01_icetray/01_convert_i3_files.py +++ b/examples/01_icetray/01_convert_i3_files.py @@ -5,6 +5,7 @@ from graphnet.constants import EXAMPLE_OUTPUT_DIR, TEST_DATA_DIR from graphnet.data.extractors import ( I3FeatureExtractorIceCubeUpgrade, + I3FeatureExtractorIceCube86, I3RetroExtractor, I3TruthExtractor, I3GenericExtractor, @@ -34,12 +35,7 @@ def main_icecube86(backend: str) -> None: converter: DataConverter = CONVERTER_CLASS[backend]( [ - I3GenericExtractor( - keys=[ - "SRTInIcePulses", - "I3MCTree", - ] - ), + I3FeatureExtractorIceCube86("SRTInIcePulses"), I3TruthExtractor(), ], outdir, From 999e26d885a8590bd4fddf41b860c36747f3c73e Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 22 Sep 2023 21:07:36 +0200 Subject: [PATCH 033/156] fix typo in readme.md --- examples/04_training/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/04_training/README.md b/examples/04_training/README.md index 1849515be..934767247 100644 --- a/examples/04_training/README.md +++ b/examples/04_training/README.md @@ -34,7 +34,7 @@ This subfolder contains two main training scripts: --prediction-names zenith_pred zenith_kappa_pred azimuth_pred azimuth_kappa_pred ``` -**`02_train_model_dynedge_from_config.py** Shows how to train a GNN on neutrino telescope data **using configuration files** to construct the dataset that loads the data and the model that is trained. This is the recommended way to configure standard dataset and models, as it is easier to ready and share than doing so in pure code. This example can be run using a few different models targeting different physics use cases. For instance, you can try running: +**`03_train_model_dynedge_from_config.py** Shows how to train a GNN on neutrino telescope data **using configuration files** to construct the dataset that loads the data and the model that is trained. This is the recommended way to configure standard dataset and models, as it is easier to ready and share than doing so in pure code. This example can be run using a few different models targeting different physics use cases. For instance, you can try running: ```bash # Show the CLI From 03dc5d2f8a54a21f65f1844967215bd0c02befd8 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Sat, 23 Sep 2023 11:18:36 +0200 Subject: [PATCH 034/156] Update code comment in 04-01 --- examples/04_training/01_train_dynedge.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/04_training/01_train_dynedge.py b/examples/04_training/01_train_dynedge.py index 6a8bf2f15..58e513ec2 100644 --- a/examples/04_training/01_train_dynedge.py +++ b/examples/04_training/01_train_dynedge.py @@ -164,10 +164,13 @@ def main( # Save results as .csv results.to_csv(f"{path}/results.csv") - # Save full model (including weights) to .pth file - Not version proof + # Save full model (including weights) to .pth file - not version safe + # Note: Models saved as .pth files in one version of graphnet + # may not be compatible with a different version of graphnet. model.save(f"{path}/model.pth") # Save model config and state dict - Version safe save method. + # This method of saving models is the safest way. model.save_state_dict(f"{path}/state_dict.pth") model.save_config(f"{path}/model_config.yml") From 41e0164df609a243be23c79f5e25ad4cdc372c17 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Sun, 24 Sep 2023 10:28:44 +0200 Subject: [PATCH 035/156] fix github mistake --- tests/utilities/test_model_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utilities/test_model_config.py b/tests/utilities/test_model_config.py index 8979f0255..68ebd5c2a 100644 --- a/tests/utilities/test_model_config.py +++ b/tests/utilities/test_model_config.py @@ -18,7 +18,7 @@ def test_simple_model_config(path: str = "/tmp/simple_model.yml") -> None: - """Test saving, loading, and reconstructing simple model.""" + """Test saving, loading, and reconstructing using a simple model.""" # Construct single Model model = DynEdge( nb_inputs=9, From 5e87317b1bc004cdfc306e6a60e3fb6ebdd1be63 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Sun, 24 Sep 2023 10:32:37 +0200 Subject: [PATCH 036/156] Fix bracket --- src/graphnet/models/graphs/graph_definition.py | 4 +++- tests/utilities/test_model_config.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/graphnet/models/graphs/graph_definition.py b/src/graphnet/models/graphs/graph_definition.py index 042e54c58..aa73693ef 100644 --- a/src/graphnet/models/graphs/graph_definition.py +++ b/src/graphnet/models/graphs/graph_definition.py @@ -306,7 +306,9 @@ def _add_features_individually( graph[feature] = graph.x[:, index].detach() else: self.warning_once( - """Cannot assign graph['x']. This field is reserved for node features. Please rename your input feature.""" + """Cannot assign graph['x']. This field is reserved for + node features. Please rename your input feature.""" + ) # noqa return graph diff --git a/tests/utilities/test_model_config.py b/tests/utilities/test_model_config.py index 68ebd5c2a..8979f0255 100644 --- a/tests/utilities/test_model_config.py +++ b/tests/utilities/test_model_config.py @@ -18,7 +18,7 @@ def test_simple_model_config(path: str = "/tmp/simple_model.yml") -> None: - """Test saving, loading, and reconstructing using a simple model.""" + """Test saving, loading, and reconstructing simple model.""" # Construct single Model model = DynEdge( nb_inputs=9, From 9e2be1440e5eba2345bfb4303a03d47beede5582 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Sun, 24 Sep 2023 10:37:02 +0200 Subject: [PATCH 037/156] code climate --- src/graphnet/models/graphs/graph_definition.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/graphnet/models/graphs/graph_definition.py b/src/graphnet/models/graphs/graph_definition.py index aa73693ef..9c4db4d47 100644 --- a/src/graphnet/models/graphs/graph_definition.py +++ b/src/graphnet/models/graphs/graph_definition.py @@ -150,7 +150,8 @@ def forward( # type: ignore else: self.warning_once( - "No EdgeDefinition provided. Graphs will not have edges defined!" + """No EdgeDefinition provided. + Graphs will not have edges defined!""" # noqa ) # Attach data path - useful for Ensemble datasets. From faf38bd05dfe3edc94b6db9ecb6c99e2b5a13af1 Mon Sep 17 00:00:00 2001 From: Aske-Rosted Date: Mon, 25 Sep 2023 09:19:01 +0900 Subject: [PATCH 038/156] checks existence of filters --- src/graphnet/data/dataconverter.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/graphnet/data/dataconverter.py b/src/graphnet/data/dataconverter.py index 3d9304f9f..616030ae1 100644 --- a/src/graphnet/data/dataconverter.py +++ b/src/graphnet/data/dataconverter.py @@ -580,7 +580,17 @@ def _filter_mask( frame: I3Frame to check. I3filters: List of I3Filters to check for pass. """ + if "FilterMask" not in frame: + self.warning_once( + "FilterMask not found in frame. Skipping filter checks." + ) + return False for filter in I3filters: + if filter not in frame["FilterMask"]: + self.warning_once( + f"Filter {filter} not found in frame. Skipping." + ) + continue if frame["FilterMask"][filter].condition_passed is False: return True return False From 4e3e391057148b4022221b151db8fbee532f3bee Mon Sep 17 00:00:00 2001 From: Aske-Rosted Date: Mon, 25 Sep 2023 09:38:45 +0900 Subject: [PATCH 039/156] refactoring --- src/graphnet/data/dataconverter.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/graphnet/data/dataconverter.py b/src/graphnet/data/dataconverter.py index 616030ae1..54cbbf418 100644 --- a/src/graphnet/data/dataconverter.py +++ b/src/graphnet/data/dataconverter.py @@ -580,17 +580,19 @@ def _filter_mask( frame: I3Frame to check. I3filters: List of I3Filters to check for pass. """ - if "FilterMask" not in frame: + if "FilterMask" in frame: + for filter in I3filters: + if filter in frame["FilterMask"]: + if frame["FilterMask"][filter].condition_passed is False: + return True + else: + self.warning_once( + f"Filter {filter} not found in frame. Skipping filter check." + ) + continue + else: self.warning_once( "FilterMask not found in frame. Skipping filter checks." ) return False - for filter in I3filters: - if filter not in frame["FilterMask"]: - self.warning_once( - f"Filter {filter} not found in frame. Skipping." - ) - continue - if frame["FilterMask"][filter].condition_passed is False: - return True return False From 5d57ff623e32a53d3118b1d4bd88c4db9b49b6b1 Mon Sep 17 00:00:00 2001 From: Aske-Rosted Date: Mon, 25 Sep 2023 12:44:00 +0900 Subject: [PATCH 040/156] revert --- src/graphnet/data/dataconverter.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/graphnet/data/dataconverter.py b/src/graphnet/data/dataconverter.py index 54cbbf418..616030ae1 100644 --- a/src/graphnet/data/dataconverter.py +++ b/src/graphnet/data/dataconverter.py @@ -580,19 +580,17 @@ def _filter_mask( frame: I3Frame to check. I3filters: List of I3Filters to check for pass. """ - if "FilterMask" in frame: - for filter in I3filters: - if filter in frame["FilterMask"]: - if frame["FilterMask"][filter].condition_passed is False: - return True - else: - self.warning_once( - f"Filter {filter} not found in frame. Skipping filter check." - ) - continue - else: + if "FilterMask" not in frame: self.warning_once( "FilterMask not found in frame. Skipping filter checks." ) return False + for filter in I3filters: + if filter not in frame["FilterMask"]: + self.warning_once( + f"Filter {filter} not found in frame. Skipping." + ) + continue + if frame["FilterMask"][filter].condition_passed is False: + return True return False From 7f48bfcf2389d21d751bf7d1b2a9c9401350b845 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Mon, 25 Sep 2023 20:55:00 +0200 Subject: [PATCH 041/156] change num_workers to 1 instead of 10 --- examples/01_icetray/02_compare_sqlite_and_parquet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/01_icetray/02_compare_sqlite_and_parquet.py b/examples/01_icetray/02_compare_sqlite_and_parquet.py index 252f1a346..91bbe6f1d 100644 --- a/examples/01_icetray/02_compare_sqlite_and_parquet.py +++ b/examples/01_icetray/02_compare_sqlite_and_parquet.py @@ -34,7 +34,7 @@ def convert_data() -> None: I3FeatureExtractorIceCube86(PULSEMAP), ], outdir=OUTPUT_DIR, - workers=10, + workers=1, ) # Run data converters. From c6ee6b37c58b6c7758f8ac9aeceec1ac5bd20afd Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Mon, 25 Sep 2023 21:07:41 +0200 Subject: [PATCH 042/156] remove redundant import --- examples/01_icetray/01_convert_i3_files.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/01_icetray/01_convert_i3_files.py b/examples/01_icetray/01_convert_i3_files.py index 495c9a13c..40b304327 100644 --- a/examples/01_icetray/01_convert_i3_files.py +++ b/examples/01_icetray/01_convert_i3_files.py @@ -8,7 +8,6 @@ I3FeatureExtractorIceCube86, I3RetroExtractor, I3TruthExtractor, - I3GenericExtractor, ) from graphnet.data.dataconverter import DataConverter from graphnet.data.parquet import ParquetDataConverter From 124f67a81b01f10e43ca445576b166c20b05daa4 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Mon, 25 Sep 2023 22:08:31 +0200 Subject: [PATCH 043/156] add unit tests for examples, add default args. --- .github/workflows/build.yml | 6 +++--- examples/02_data/01_read_dataset.py | 8 +++++++- examples/04_training/01_train_dynedge.py | 2 +- examples/04_training/02_train_tito_model.py | 2 +- examples/04_training/03_train_dynedge_from_config.py | 2 +- .../04_training/04_train_multiclassifier_from_configs.py | 2 +- 6 files changed, 14 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 080146149..589afb9b3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -55,7 +55,7 @@ jobs: editable: true - name: Run unit tests and generate coverage report run: | - coverage run --source=graphnet -m pytest tests/ + coverage run --source=graphnet -m pytest tests/ examples/ --ignore=05_pisa coverage xml -o coverage.xml - name: Work around permission issue run: | @@ -88,7 +88,7 @@ jobs: - name: Run unit tests and generate coverage report run: | set -o pipefail # To propagate exit code from pytest - coverage run --source=graphnet -m pytest tests/ --ignore=tests/data/ --ignore=tests/deployment/ + coverage run --source=graphnet -m pytest tests/ examples/ --ignore=tests/data/ --ignore=tests/deployment/ --ignore=examples/01_icetray --ignore=examples/05_pisa coverage report -m build-macos: @@ -108,5 +108,5 @@ jobs: - name: Run unit tests and generate coverage report run: | set -o pipefail # To propagate exit code from pytest - coverage run --source=graphnet -m pytest tests/ --ignore=tests/data/ --ignore=tests/deployment/ + coverage run --source=graphnet -m pytest tests/ examples/ --ignore=tests/data/ --ignore=tests/deployment/ --ignore=examples/01_icetray --ignore=examples/05_pisa coverage report -m diff --git a/examples/02_data/01_read_dataset.py b/examples/02_data/01_read_dataset.py index be913e190..de8fecb18 100644 --- a/examples/02_data/01_read_dataset.py +++ b/examples/02_data/01_read_dataset.py @@ -113,7 +113,13 @@ def main(backend: str) -> None: """ ) - parser.add_argument("backend", choices=["sqlite", "parquet"]) + parser.add_argument( + "backend", + choices=["sqlite", "parquet"], + default="sqlite", + const="sqlite", + nargs="?", + ) args = parser.parse_args() diff --git a/examples/04_training/01_train_dynedge.py b/examples/04_training/01_train_dynedge.py index 58e513ec2..f9af11998 100644 --- a/examples/04_training/01_train_dynedge.py +++ b/examples/04_training/01_train_dynedge.py @@ -213,7 +213,7 @@ def main( parser.with_standard_arguments( "gpus", - ("max-epochs", 5), + ("max-epochs", 1), "early-stopping-patience", ("batch-size", 16), "num-workers", diff --git a/examples/04_training/02_train_tito_model.py b/examples/04_training/02_train_tito_model.py index ee3d89760..f3d60d553 100644 --- a/examples/04_training/02_train_tito_model.py +++ b/examples/04_training/02_train_tito_model.py @@ -223,7 +223,7 @@ def main( parser.with_standard_arguments( "gpus", - ("max-epochs", 5), + ("max-epochs", 1), ("early-stopping-patience", 2), ("batch-size", 16), "num-workers", diff --git a/examples/04_training/03_train_dynedge_from_config.py b/examples/04_training/03_train_dynedge_from_config.py index 9a6df73dd..1dec95961 100644 --- a/examples/04_training/03_train_dynedge_from_config.py +++ b/examples/04_training/03_train_dynedge_from_config.py @@ -138,7 +138,7 @@ def main( "dataset-config", "model-config", "gpus", - ("max-epochs", 5), + ("max-epochs", 1), "early-stopping-patience", ("batch-size", 16), "num-workers", diff --git a/examples/04_training/04_train_multiclassifier_from_configs.py b/examples/04_training/04_train_multiclassifier_from_configs.py index 6937b01b1..6385c010c 100644 --- a/examples/04_training/04_train_multiclassifier_from_configs.py +++ b/examples/04_training/04_train_multiclassifier_from_configs.py @@ -175,7 +175,7 @@ def main( ), ), "gpus", - ("max-epochs", 5), + ("max-epochs", 1), "early-stopping-patience", ("batch-size", 16), "num-workers", From 496215bcff4d8d0d2aa22afbaf1f196a931601c9 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Mon, 25 Sep 2023 22:49:28 +0200 Subject: [PATCH 044/156] update build.yml --- .github/workflows/build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 589afb9b3..5339d6d10 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -55,7 +55,7 @@ jobs: editable: true - name: Run unit tests and generate coverage report run: | - coverage run --source=graphnet -m pytest tests/ examples/ --ignore=05_pisa + coverage run --source=graphnet -m pytest tests/ examples/ --ignore=examples/05_pisa/ coverage xml -o coverage.xml - name: Work around permission issue run: | @@ -88,7 +88,7 @@ jobs: - name: Run unit tests and generate coverage report run: | set -o pipefail # To propagate exit code from pytest - coverage run --source=graphnet -m pytest tests/ examples/ --ignore=tests/data/ --ignore=tests/deployment/ --ignore=examples/01_icetray --ignore=examples/05_pisa + coverage run --source=graphnet -m pytest tests/ examples/ --ignore=tests/data/ --ignore=tests/deployment/ --ignore=examples/01_icetray/ --ignore=examples/05_pisa/ coverage report -m build-macos: @@ -108,5 +108,5 @@ jobs: - name: Run unit tests and generate coverage report run: | set -o pipefail # To propagate exit code from pytest - coverage run --source=graphnet -m pytest tests/ examples/ --ignore=tests/data/ --ignore=tests/deployment/ --ignore=examples/01_icetray --ignore=examples/05_pisa + coverage run --source=graphnet -m pytest tests/ examples/ --ignore=tests/data/ --ignore=tests/deployment/ --ignore=examples/01_icetray/ --ignore=examples/05_pisa/ coverage report -m From 3f3451fb9c6b15a09b95bd190dbc9b62b4ada6b3 Mon Sep 17 00:00:00 2001 From: Aske-Rosted Date: Tue, 26 Sep 2023 10:37:22 +0900 Subject: [PATCH 045/156] refactoring --- src/graphnet/data/dataconverter.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/graphnet/data/dataconverter.py b/src/graphnet/data/dataconverter.py index 616030ae1..92d7ae59b 100644 --- a/src/graphnet/data/dataconverter.py +++ b/src/graphnet/data/dataconverter.py @@ -437,7 +437,7 @@ def _extract_data(self, fileset: FileSet) -> List[OrderedDict]: continue if self._skip_frame(frame): continue - if self._filter_mask(frame, self._I3filters): + if self._filter_mask(frame): continue # Try to extract data from I3Frame @@ -571,11 +571,11 @@ def _skip_frame(self, frame: "icetray.I3Frame") -> bool: return True return False - def _filter_mask( - self, frame: "icetray.I3Frame", I3filters: List[str] - ) -> bool: + def _filter_mask(self, frame: "icetray.I3Frame") -> bool: """Check if specified condition(s) are met. + A 'True' return will skip the frame. + Args: frame: I3Frame to check. I3filters: List of I3Filters to check for pass. @@ -585,7 +585,7 @@ def _filter_mask( "FilterMask not found in frame. Skipping filter checks." ) return False - for filter in I3filters: + for filter in self._I3filters: if filter not in frame["FilterMask"]: self.warning_once( f"Filter {filter} not found in frame. Skipping." From 96264d7187fe77b067936dced05d6d818cdcfbc1 Mon Sep 17 00:00:00 2001 From: Aske-Rosted Date: Tue, 26 Sep 2023 15:53:22 +0900 Subject: [PATCH 046/156] create filter class --- src/graphnet/data/dataconverter.py | 52 +++++------- src/graphnet/data/filters.py | 124 +++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+), 34 deletions(-) create mode 100644 src/graphnet/data/filters.py diff --git a/src/graphnet/data/dataconverter.py b/src/graphnet/data/dataconverter.py index 92d7ae59b..51a69d36a 100644 --- a/src/graphnet/data/dataconverter.py +++ b/src/graphnet/data/dataconverter.py @@ -21,6 +21,7 @@ TypeVar, Union, cast, + Sequence, ) import numpy as np @@ -39,6 +40,7 @@ from graphnet.utilities.filesys import find_i3_files from graphnet.utilities.imports import has_icecube_package from graphnet.utilities.logging import Logger +from graphnet.data.filters import I3Filter, NullSplitI3Filter if has_icecube_package(): from icecube import icetray, dataio # pyright: reportMissingImports=false @@ -107,7 +109,7 @@ def __init__( workers: int = 1, index_column: str = "event_no", icetray_verbose: int = 0, - I3filters: List[str] = [], + i3_Filters: Union[I3Filter, List[Callable]] = [NullSplitI3Filter], ): """Construct DataConverter. @@ -167,7 +169,13 @@ def __init__( self._sequential_batch_pattern = sequential_batch_pattern self._input_file_batch_pattern = input_file_batch_pattern self._workers = workers - self._I3filters = I3filters + if isinstance(i3_Filters, I3Filter): + I3_Filters = [i3_Filters] + self._I3filters = I3_Filters + for filter in self._I3filters: + assert isinstance( + filter, I3Filter + ), f"{type(filter)} is not a subclass of I3Filter" # Create I3Extractors self._extractors = I3ExtractorCollection(*extractors) @@ -435,10 +443,9 @@ def _extract_data(self, fileset: FileSet) -> List[OrderedDict]: except Exception as e: if "I3" in str(e): continue + # check if frame should be skipped if self._skip_frame(frame): continue - if self._filter_mask(frame): - continue # Try to extract data from I3Frame results = self._extractors(frame) @@ -559,38 +566,15 @@ def _get_output_file(self, input_file: str) -> str: return output_file def _skip_frame(self, frame: "icetray.I3Frame") -> bool: - """Check if frame should be skipped. - - Args: - frame: I3Frame to check. + """Check the user defined filters. Returns: - True if frame is a null split frame, else False. + bool: True if frame should be skipped, False otherwise. """ - if frame["I3EventHeader"].sub_event_stream == "NullSplit": - return True - return False - - def _filter_mask(self, frame: "icetray.I3Frame") -> bool: - """Check if specified condition(s) are met. + if self._I3filters is None: + return False # No filters defined, so we keep the frame - A 'True' return will skip the frame. - - Args: - frame: I3Frame to check. - I3filters: List of I3Filters to check for pass. - """ - if "FilterMask" not in frame: - self.warning_once( - "FilterMask not found in frame. Skipping filter checks." - ) - return False for filter in self._I3filters: - if filter not in frame["FilterMask"]: - self.warning_once( - f"Filter {filter} not found in frame. Skipping." - ) - continue - if frame["FilterMask"][filter].condition_passed is False: - return True - return False + if not filter(frame): + return True # A filter keep_frame call has returned false, so we skip the frame + return False # All filter keep_frame calls have returned true, so we keep the frame diff --git a/src/graphnet/data/filters.py b/src/graphnet/data/filters.py new file mode 100644 index 000000000..2697e0a53 --- /dev/null +++ b/src/graphnet/data/filters.py @@ -0,0 +1,124 @@ +"""Filter classes for filtering I3-frames when converting I3-files.""" +from abc import abstractmethod +from graphnet.utilities.logging import Logger +from typing import List +from icecube import icetray + + +class I3Filter(Logger): + """A generic filter for I3-frames.""" + + @abstractmethod + def _pass_frame(self, frame: icetray.I3Frame) -> bool: + """Return True if the frame passes the filter, False otherwise. + + Args: + frame: I3-frame + The I3-frame to check. + + Returns: + bool: True if the frame passes the filter, False otherwise. + """ + raise NotImplementedError + + def __call__(self, frame: icetray.I3Frame) -> bool: + """Return True if the frame passes the filter, False otherwise. + + Args: + frame: I3-frame + The I3-frame to check. + + Returns: + bool: True if the frame passes the filter, False otherwise. + """ + pass_flag = self._pass_frame(frame) + try: + assert isinstance(pass_flag, bool) + except AssertionError: + raise TypeError( + f"Expected _pass_frame to return a bool, got {type(pass_flag)}." + ) + return pass_flag + + +class NullSplitI3Filter(I3Filter): + """A filter that skips all null-split frames.""" + + def _keep_frame(self, frame: icetray.I3Frame) -> bool: + """Check that frame is not a null-split frame. + + returns False if the frame is a null-split frame, True otherwise. + + Args: + frame: I3-frame + The I3-frame to check. + """ + if frame.Has("I3EventHeader"): + if frame["I3EventHeader"].sub_event_stream == "NullSplit": + return False + return True + + +class I3FilterMask(I3Filter): + """checks list of filters from the FilterMask in I3 frames.""" + + def __init__(self, filter_names: List[str], filter_any: bool = True): + """Initialize I3FilterMask. + + Args: + filter_names: List[str] + A list of filter names to check for. + filter_any: bool + standard: True + If True, the frame is kept if any of the filter names are present. + If False, the frame is kept if all of the filter names are present. + """ + self._filter_names = filter_names + self._filter_any = filter_any + + def _keep_frame(self, frame: icetray.I3Frame) -> bool: + """Check if current frame should be kept. + + Args: + frame: I3-frame + The I3-frame to check. + """ + if "FilterMask" in frame: + if ( + self._filter_any is True + ): # Require any of the filters to pass to keep the frame + bool_list = [] + for filter_name in self._filter_names: + if filter_name not in frame["FilterMask"]: + self.warning_once( + f"FilterMask {filter_name} not found in frame.skipping filter." + ) + continue + elif frame["FilterMask"][filter].condition_passed is True: + bool_list.append(True) + else: + bool_list.append(False) + if len(bool_list) == 0: + self.warning_once( + "None of the FilterMask filters found in frame, FilterMask filters will not be applied." + ) + return any(bool_list) or len(bool_list) == 0 + else: # Require all filters to pass in order to keep the frame. + for filter_name in self._filter_names: + if filter_name not in frame["FilterMask"]: + self.warning_once( + f"FilterMask {filter_name} not found in frame, skipping filter." + ) + continue + elif frame["FilterMask"][filter].condition_passed is True: + continue # current filter is passed, continue to next filter + else: + return ( + False # current filter failed so frame is skipped. + ) + return True + else: + self.warning_once( + "FilterMask not found in frame, FilterMask filters will not be applied." + ) + return True From 53d4a09abb3d48108316ebf20f88c4d282c2d0af Mon Sep 17 00:00:00 2001 From: Aske-Rosted Date: Tue, 26 Sep 2023 16:02:12 +0900 Subject: [PATCH 047/156] code_climate_fixes --- src/graphnet/data/dataconverter.py | 4 ++-- src/graphnet/data/filters.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/graphnet/data/dataconverter.py b/src/graphnet/data/dataconverter.py index 51a69d36a..1e8b64271 100644 --- a/src/graphnet/data/dataconverter.py +++ b/src/graphnet/data/dataconverter.py @@ -576,5 +576,5 @@ def _skip_frame(self, frame: "icetray.I3Frame") -> bool: for filter in self._I3filters: if not filter(frame): - return True # A filter keep_frame call has returned false, so we skip the frame - return False # All filter keep_frame calls have returned true, so we keep the frame + return True # keep_frame call false, skip the frame. + return False # All filter keep_frame calls true, keep the frame. diff --git a/src/graphnet/data/filters.py b/src/graphnet/data/filters.py index 2697e0a53..f862ea197 100644 --- a/src/graphnet/data/filters.py +++ b/src/graphnet/data/filters.py @@ -36,7 +36,7 @@ def __call__(self, frame: icetray.I3Frame) -> bool: assert isinstance(pass_flag, bool) except AssertionError: raise TypeError( - f"Expected _pass_frame to return a bool, got {type(pass_flag)}." + f"Expected _pass_frame to return bool, got {type(pass_flag)}." ) return pass_flag @@ -91,7 +91,7 @@ def _keep_frame(self, frame: icetray.I3Frame) -> bool: for filter_name in self._filter_names: if filter_name not in frame["FilterMask"]: self.warning_once( - f"FilterMask {filter_name} not found in frame.skipping filter." + f"FilterMask {filter_name} not found in frame. skipping filter." ) continue elif frame["FilterMask"][filter].condition_passed is True: @@ -111,7 +111,7 @@ def _keep_frame(self, frame: icetray.I3Frame) -> bool: ) continue elif frame["FilterMask"][filter].condition_passed is True: - continue # current filter is passed, continue to next filter + continue # current filter passed, continue to next filter else: return ( False # current filter failed so frame is skipped. From 55cdf515231ce97dda6801a5eeeaf0109d45d2e9 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 26 Sep 2023 10:17:38 +0200 Subject: [PATCH 048/156] cmon pytest --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5339d6d10..8fa657cd2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -55,7 +55,7 @@ jobs: editable: true - name: Run unit tests and generate coverage report run: | - coverage run --source=graphnet -m pytest tests/ examples/ --ignore=examples/05_pisa/ + coverage run --source=graphnet -m pytest examples/ --ignore=examples/05_pisa/ coverage xml -o coverage.xml - name: Work around permission issue run: | From 2f4934a7abe3a29f666add1874254ebfac819d8f Mon Sep 17 00:00:00 2001 From: ArturoLlorente Date: Tue, 26 Sep 2023 10:44:29 +0200 Subject: [PATCH 049/156] add class for sequence bucketing --- src/graphnet/training/utils.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/src/graphnet/training/utils.py b/src/graphnet/training/utils.py index 2293d44e0..523a5237b 100644 --- a/src/graphnet/training/utils.py +++ b/src/graphnet/training/utils.py @@ -28,23 +28,36 @@ def collate_fn(graphs: List[Data]) -> Batch: return Batch.from_data_list(graphs) -class collator_sequence_buckleting(): - """Perform the sequence bucketing for the graphs in the batch. +class collator_sequence_buckleting: + """Perform the sequence bucketing for the graphs in the batch.""" - batch_splits: list of floats, each element is the fraction of the total - number of graphs. only the cutting points should be provided, the first - element will be 0 and the last element should be 1. - - """ def __init__(self, batch_splits: List[float] = [0.8]): + """Set cutting points of the different mini-batches. + + batch_splits: list of floats, each element is the fraction of the total + number of graphs. This list should not explicitly define the first and + last elements, which will always be 0 and 1 respectively. + """ self.batch_splits = batch_splits - def __call__(self, graphs: List[Data]): + def __call__(self, graphs: List[Data]) -> Batch: + """Execute sequence bucketing on the input list of graphs and sort them + by the number of pulses for each mini-batch. + + Args: + graphs: A list of Data objects representing the input graphs. + + Returns: + A list of Batch objects, each containing a mini-batch of the input graphs + sorted by their number of pulses. + """ graphs = [g for g in graphs if g.n_pulses > 1] graphs.sort(key=lambda x: x.n_pulses) batch_list = [] - for minp, maxp in zip([0] + self.batch_splits, self.batch_splits + [1]): + for minp, maxp in zip( + [0] + self.batch_splits, self.batch_splits + [1] + ): min_idx = int(minp * len(graphs)) max_idx = int(maxp * len(graphs)) this_graphs = graphs[min_idx:max_idx] From 122f99852f5888a1773fa4bfadf51345a72ddff0 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 26 Sep 2023 11:45:04 +0200 Subject: [PATCH 050/156] add unit tests --- .../01_icetray/test_icetray_examples.py | 32 +++++++++++++++++++ tests/examples/02_data/test_data_examples.py | 30 +++++++++++++++++ .../03_weights/test_weights_examples.py | 16 ++++++++++ .../04_training/test_training_examples.py | 30 +++++++++++++++++ 4 files changed, 108 insertions(+) create mode 100644 tests/examples/01_icetray/test_icetray_examples.py create mode 100644 tests/examples/02_data/test_data_examples.py create mode 100644 tests/examples/03_weights/test_weights_examples.py create mode 100644 tests/examples/04_training/test_training_examples.py diff --git a/tests/examples/01_icetray/test_icetray_examples.py b/tests/examples/01_icetray/test_icetray_examples.py new file mode 100644 index 000000000..538a67d36 --- /dev/null +++ b/tests/examples/01_icetray/test_icetray_examples.py @@ -0,0 +1,32 @@ +"""Test for examples in 01_icetray.""" +import runpy +import os +from graphnet.constants import GRAPHNET_ROOT_DIR + +EXAMPLE_PATH = os.path.join(GRAPHNET_ROOT_DIR, "examples/01_icetray") + + +def test_01_convert_i3_files() -> None: + """Test for 01_convert_i3_files.""" + runpy.run_path(os.path.join(EXAMPLE_PATH, "01_convert_i3_files.py")) + + +def test_02_compare_sqlite_and_parquet() -> None: + """Test for 02_compare_sqlite_and_parquet.""" + runpy.run_path( + os.path.join(EXAMPLE_PATH, "02_compare_sqlite_and_parquet.py") + ) + + +def test_03_i3_deployer_example() -> None: + """Test for 03_i3_deployer_example.""" + runpy.run_path(os.path.join(EXAMPLE_PATH, "03_i3_deployer_example.py")) + + +def test_04_i3_module_in_native_icetray_example() -> None: + """Test for 04_i3_module_in_native_icetray_example.""" + runpy.run_path( + os.path.join( + EXAMPLE_PATH, "04_i3_module_in_naticve_icetray_example.py" + ) + ) diff --git a/tests/examples/02_data/test_data_examples.py b/tests/examples/02_data/test_data_examples.py new file mode 100644 index 000000000..ba7f4a072 --- /dev/null +++ b/tests/examples/02_data/test_data_examples.py @@ -0,0 +1,30 @@ +"""Tests for examples in 02_data.""" +import runpy +import os +from graphnet.constants import GRAPHNET_ROOT_DIR + +EXAMPLE_PATH = os.path.join(GRAPHNET_ROOT_DIR, "examples/02_data") + + +def test_01_read_dataset() -> None: + """Test for 01_read_dataset.""" + runpy.run_path(os.path.join(EXAMPLE_PATH, "01_read_dataset.py")) + + +def test_02_plot_feature_distribution() -> None: + """Test for 02_plot_feature_distribution.""" + runpy.run_path( + os.path.join(EXAMPLE_PATH, "02_plot_feature_distribution.py") + ) + + +def test_03_convert_parquet_to_sqlite() -> None: + """Test for 03_convert_parquet_to_sqlite.""" + runpy.run_path( + os.path.join(EXAMPLE_PATH, "03_convert_parquet_to_sqlite.py") + ) + + +def test_04_ensemble_dataset() -> None: + """Test for 04_ensemble_dataset.""" + runpy.run_path(os.path.join(EXAMPLE_PATH, "04_ensemble_dataset.py")) diff --git a/tests/examples/03_weights/test_weights_examples.py b/tests/examples/03_weights/test_weights_examples.py new file mode 100644 index 000000000..8715fefc6 --- /dev/null +++ b/tests/examples/03_weights/test_weights_examples.py @@ -0,0 +1,16 @@ +"""Test for examples in 03_weights.""" +import runpy +import os +from graphnet.constants import GRAPHNET_ROOT_DIR + +EXAMPLE_PATH = os.path.join(GRAPHNET_ROOT_DIR, "examples/03_weights") + + +def test_01_fit_uniform_weights() -> None: + """Test for 01_fit_uniform_weights.""" + runpy.run_path(os.path.join(EXAMPLE_PATH, "01_fit_uniform_weights.py")) + + +def test_02_fit_bjoern_low_weights() -> None: + """Test for 02_fit_bjoern_low_weights.""" + runpy.run_path(os.path.join(EXAMPLE_PATH, "02_fit_bjoern_low_weights.py")) diff --git a/tests/examples/04_training/test_training_examples.py b/tests/examples/04_training/test_training_examples.py new file mode 100644 index 000000000..3c049e6bb --- /dev/null +++ b/tests/examples/04_training/test_training_examples.py @@ -0,0 +1,30 @@ +"""Test for examples in 04_training.""" +import runpy +import os +from graphnet.constants import GRAPHNET_ROOT_DIR + +EXAMPLE_PATH = os.path.join(GRAPHNET_ROOT_DIR, "examples/04_data") + + +def test_01_train_dynedge() -> None: + """Test for 01_train_dynedge.""" + runpy.run_path(os.path.join(EXAMPLE_PATH, "01_train_dynedge.py")) + + +def test_02_train_tito_model() -> None: + """Test for 02_train_tito_model.""" + runpy.run_path(os.path.join(EXAMPLE_PATH, "02_train_tito_model.py")) + + +def test_03_train_dynedge_from_config() -> None: + """Test for 03_train_dynedge_from_config.""" + runpy.run_path( + os.path.join(EXAMPLE_PATH, "03_train_dynedge_from_config.py") + ) + + +def test_04_train_multiclassifier_from_configs() -> None: + """Test for 04_train_multiclassifier_from_configs.""" + runpy.run_path( + os.path.join(EXAMPLE_PATH, "04_train_multiclassifier_from_configs.py") + ) From 494e1146d9f111935bb218b91b8098346a4287cc Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 26 Sep 2023 11:46:55 +0200 Subject: [PATCH 051/156] update build.yml --- .github/workflows/build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8fa657cd2..6998ce22f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -55,7 +55,7 @@ jobs: editable: true - name: Run unit tests and generate coverage report run: | - coverage run --source=graphnet -m pytest examples/ --ignore=examples/05_pisa/ + coverage run --source=graphnet -m pytest tests/ coverage xml -o coverage.xml - name: Work around permission issue run: | @@ -88,7 +88,7 @@ jobs: - name: Run unit tests and generate coverage report run: | set -o pipefail # To propagate exit code from pytest - coverage run --source=graphnet -m pytest tests/ examples/ --ignore=tests/data/ --ignore=tests/deployment/ --ignore=examples/01_icetray/ --ignore=examples/05_pisa/ + coverage run --source=graphnet -m pytest tests/ --ignore=tests/data/ --ignore=tests/deployment/ --ignore=tests/01_icetray/ coverage report -m build-macos: @@ -108,5 +108,5 @@ jobs: - name: Run unit tests and generate coverage report run: | set -o pipefail # To propagate exit code from pytest - coverage run --source=graphnet -m pytest tests/ examples/ --ignore=tests/data/ --ignore=tests/deployment/ --ignore=examples/01_icetray/ --ignore=examples/05_pisa/ + coverage run --source=graphnet -m pytest tests/ --ignore=tests/data/ --ignore=tests/deployment/ --ignore=tests/examples/01_icetray/ coverage report -m From 069314c984bc21cea9870e36c23c074d6f9eefdf Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 26 Sep 2023 11:56:21 +0200 Subject: [PATCH 052/156] fix unit test paths --- tests/examples/02_data/test_data_examples.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/examples/02_data/test_data_examples.py b/tests/examples/02_data/test_data_examples.py index ba7f4a072..2d076151a 100644 --- a/tests/examples/02_data/test_data_examples.py +++ b/tests/examples/02_data/test_data_examples.py @@ -14,7 +14,7 @@ def test_01_read_dataset() -> None: def test_02_plot_feature_distribution() -> None: """Test for 02_plot_feature_distribution.""" runpy.run_path( - os.path.join(EXAMPLE_PATH, "02_plot_feature_distribution.py") + os.path.join(EXAMPLE_PATH, "02_plot_feature_distributions.py") ) From 4b24ed5e45b07c5c63b23626c7da1acc688022f0 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 26 Sep 2023 11:56:50 +0200 Subject: [PATCH 053/156] fix unit test paths --- tests/examples/04_training/test_training_examples.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/examples/04_training/test_training_examples.py b/tests/examples/04_training/test_training_examples.py index 3c049e6bb..086f0fed7 100644 --- a/tests/examples/04_training/test_training_examples.py +++ b/tests/examples/04_training/test_training_examples.py @@ -3,7 +3,7 @@ import os from graphnet.constants import GRAPHNET_ROOT_DIR -EXAMPLE_PATH = os.path.join(GRAPHNET_ROOT_DIR, "examples/04_data") +EXAMPLE_PATH = os.path.join(GRAPHNET_ROOT_DIR, "examples/04_training") def test_01_train_dynedge() -> None: From 6774e1e7cd9ae9125683311ec3c25434adfcdd07 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 26 Sep 2023 12:11:55 +0200 Subject: [PATCH 054/156] remove _common_icetray --- examples/01_icetray/03_i3_deployer_example.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/examples/01_icetray/03_i3_deployer_example.py b/examples/01_icetray/03_i3_deployer_example.py index 5180fc7b7..c7c5358cf 100644 --- a/examples/01_icetray/03_i3_deployer_example.py +++ b/examples/01_icetray/03_i3_deployer_example.py @@ -23,7 +23,17 @@ I3InferenceModule, ) -from _common_icetray import ERROR_MESSAGE_MISSING_ICETRAY +ERROR_MESSAGE_MISSING_ICETRAY = ( + "This example requires IceTray to be installed, which doesn't seem to be " + "the case. Please install IceTray; run this example in the GraphNeT " + "Docker container which comes with IceTray installed; or run an example " + "script in one of the other folders:" + "\n * examples/02_data/" + "\n * examples/03_weights/" + "\n * examples/04_training/" + "\n * examples/05_pisa/" + "\nExiting." +) # Constants features = FEATURES.UPGRADE From 40b36eff90b64fb65d98392751867cd166294901 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 26 Sep 2023 12:12:34 +0200 Subject: [PATCH 055/156] add error message --- examples/01_icetray/01_convert_i3_files.py | 12 +++++++++++- .../04_i3_module_in_native_icetray_example.py | 12 +++++++++++- examples/01_icetray/_common_icetray.py | 11 ----------- tests/examples/01_icetray/test_icetray_examples.py | 4 +--- 4 files changed, 23 insertions(+), 16 deletions(-) delete mode 100644 examples/01_icetray/_common_icetray.py diff --git a/examples/01_icetray/01_convert_i3_files.py b/examples/01_icetray/01_convert_i3_files.py index 40b304327..9d9f80033 100644 --- a/examples/01_icetray/01_convert_i3_files.py +++ b/examples/01_icetray/01_convert_i3_files.py @@ -16,7 +16,17 @@ from graphnet.utilities.imports import has_icecube_package from graphnet.utilities.logging import Logger -from _common_icetray import ERROR_MESSAGE_MISSING_ICETRAY +ERROR_MESSAGE_MISSING_ICETRAY = ( + "This example requires IceTray to be installed, which doesn't seem to be " + "the case. Please install IceTray; run this example in the GraphNeT " + "Docker container which comes with IceTray installed; or run an example " + "script in one of the other folders:" + "\n * examples/02_data/" + "\n * examples/03_weights/" + "\n * examples/04_training/" + "\n * examples/05_pisa/" + "\nExiting." +) CONVERTER_CLASS = { "sqlite": SQLiteDataConverter, diff --git a/examples/01_icetray/04_i3_module_in_native_icetray_example.py b/examples/01_icetray/04_i3_module_in_native_icetray_example.py index 730eb5bc1..957fb108d 100644 --- a/examples/01_icetray/04_i3_module_in_native_icetray_example.py +++ b/examples/01_icetray/04_i3_module_in_native_icetray_example.py @@ -26,7 +26,17 @@ GraphNeTI3Module, ) -from _common_icetray import ERROR_MESSAGE_MISSING_ICETRAY +ERROR_MESSAGE_MISSING_ICETRAY = ( + "This example requires IceTray to be installed, which doesn't seem to be " + "the case. Please install IceTray; run this example in the GraphNeT " + "Docker container which comes with IceTray installed; or run an example " + "script in one of the other folders:" + "\n * examples/02_data/" + "\n * examples/03_weights/" + "\n * examples/04_training/" + "\n * examples/05_pisa/" + "\nExiting." +) def apply_to_files( diff --git a/examples/01_icetray/_common_icetray.py b/examples/01_icetray/_common_icetray.py deleted file mode 100644 index c1d3b10a0..000000000 --- a/examples/01_icetray/_common_icetray.py +++ /dev/null @@ -1,11 +0,0 @@ -ERROR_MESSAGE_MISSING_ICETRAY = ( - "This example requires IceTray to be installed, which doesn't seem to be " - "the case. Please install IceTray; run this example in the GraphNeT " - "Docker container which comes with IceTray installed; or run an example " - "script in one of the other folders:" - "\n * examples/02_data/" - "\n * examples/03_weights/" - "\n * examples/04_training/" - "\n * examples/05_pisa/" - "\nExiting." -) diff --git a/tests/examples/01_icetray/test_icetray_examples.py b/tests/examples/01_icetray/test_icetray_examples.py index 538a67d36..ddd99423e 100644 --- a/tests/examples/01_icetray/test_icetray_examples.py +++ b/tests/examples/01_icetray/test_icetray_examples.py @@ -26,7 +26,5 @@ def test_03_i3_deployer_example() -> None: def test_04_i3_module_in_native_icetray_example() -> None: """Test for 04_i3_module_in_native_icetray_example.""" runpy.run_path( - os.path.join( - EXAMPLE_PATH, "04_i3_module_in_naticve_icetray_example.py" - ) + os.path.join(EXAMPLE_PATH, "04_i3_module_in_native_icetray_example.py") ) From 67621c087e8a248d0db8bf6ea8cd6ba287fc36bb Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 26 Sep 2023 12:17:56 +0200 Subject: [PATCH 056/156] add error message to 02 --- examples/01_icetray/02_compare_sqlite_and_parquet.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/examples/01_icetray/02_compare_sqlite_and_parquet.py b/examples/01_icetray/02_compare_sqlite_and_parquet.py index 91bbe6f1d..75d6c18ba 100644 --- a/examples/01_icetray/02_compare_sqlite_and_parquet.py +++ b/examples/01_icetray/02_compare_sqlite_and_parquet.py @@ -16,7 +16,17 @@ from graphnet.utilities.imports import has_icecube_package from graphnet.utilities.logging import Logger -from _common_icetray import ERROR_MESSAGE_MISSING_ICETRAY +ERROR_MESSAGE_MISSING_ICETRAY = ( + "This example requires IceTray to be installed, which doesn't seem to be " + "the case. Please install IceTray; run this example in the GraphNeT " + "Docker container which comes with IceTray installed; or run an example " + "script in one of the other folders:" + "\n * examples/02_data/" + "\n * examples/03_weights/" + "\n * examples/04_training/" + "\n * examples/05_pisa/" + "\nExiting." +) OUTPUT_DIR = f"{EXAMPLE_OUTPUT_DIR}/compare_sqlite_and_parquet" PULSEMAP = "SRTInIcePulses" From 9d2eb14695965569cede9fc4d4cb5b8270320cbb Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 26 Sep 2023 12:29:13 +0200 Subject: [PATCH 057/156] add func calls to new unit tests --- tests/examples/01_icetray/test_icetray_examples.py | 7 +++++++ tests/examples/02_data/test_data_examples.py | 7 +++++++ tests/examples/03_weights/test_weights_examples.py | 5 +++++ tests/examples/04_training/test_training_examples.py | 7 +++++++ 4 files changed, 26 insertions(+) diff --git a/tests/examples/01_icetray/test_icetray_examples.py b/tests/examples/01_icetray/test_icetray_examples.py index ddd99423e..613e90bf9 100644 --- a/tests/examples/01_icetray/test_icetray_examples.py +++ b/tests/examples/01_icetray/test_icetray_examples.py @@ -28,3 +28,10 @@ def test_04_i3_module_in_native_icetray_example() -> None: runpy.run_path( os.path.join(EXAMPLE_PATH, "04_i3_module_in_native_icetray_example.py") ) + + +if __name__ == "__main__": + test_01_convert_i3_files() + test_02_compare_sqlite_and_parquet() + test_03_i3_deployer_example() + test_04_i3_module_in_native_icetray_example() diff --git a/tests/examples/02_data/test_data_examples.py b/tests/examples/02_data/test_data_examples.py index 2d076151a..434e60e23 100644 --- a/tests/examples/02_data/test_data_examples.py +++ b/tests/examples/02_data/test_data_examples.py @@ -28,3 +28,10 @@ def test_03_convert_parquet_to_sqlite() -> None: def test_04_ensemble_dataset() -> None: """Test for 04_ensemble_dataset.""" runpy.run_path(os.path.join(EXAMPLE_PATH, "04_ensemble_dataset.py")) + + +if __name__ == "__main__": + test_01_read_dataset() + test_02_plot_feature_distribution() + test_03_convert_parquet_to_sqlite() + test_04_ensemble_dataset() diff --git a/tests/examples/03_weights/test_weights_examples.py b/tests/examples/03_weights/test_weights_examples.py index 8715fefc6..a810d0236 100644 --- a/tests/examples/03_weights/test_weights_examples.py +++ b/tests/examples/03_weights/test_weights_examples.py @@ -14,3 +14,8 @@ def test_01_fit_uniform_weights() -> None: def test_02_fit_bjoern_low_weights() -> None: """Test for 02_fit_bjoern_low_weights.""" runpy.run_path(os.path.join(EXAMPLE_PATH, "02_fit_bjoern_low_weights.py")) + + +if __name__ == "__main__": + test_01_fit_uniform_weights() + test_02_fit_bjoern_low_weights() diff --git a/tests/examples/04_training/test_training_examples.py b/tests/examples/04_training/test_training_examples.py index 086f0fed7..0fbaee52c 100644 --- a/tests/examples/04_training/test_training_examples.py +++ b/tests/examples/04_training/test_training_examples.py @@ -28,3 +28,10 @@ def test_04_train_multiclassifier_from_configs() -> None: runpy.run_path( os.path.join(EXAMPLE_PATH, "04_train_multiclassifier_from_configs.py") ) + + +if __name__ == "__main__": + test_01_train_dynedge() + test_02_train_tito_model() + test_03_train_dynedge_from_config() + test_04_train_multiclassifier_from_configs() From 1ca3e879421e6a11193570da2b996ed849170423 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 26 Sep 2023 12:48:45 +0200 Subject: [PATCH 058/156] set run_name to __main__ for runpy --- .../01_icetray/test_icetray_examples.py | 18 ++++++++++++++---- tests/examples/02_data/test_data_examples.py | 15 +++++++++++---- .../03_weights/test_weights_examples.py | 10 ++++++++-- .../04_training/test_training_examples.py | 15 +++++++++++---- 4 files changed, 44 insertions(+), 14 deletions(-) diff --git a/tests/examples/01_icetray/test_icetray_examples.py b/tests/examples/01_icetray/test_icetray_examples.py index 613e90bf9..630f43f6f 100644 --- a/tests/examples/01_icetray/test_icetray_examples.py +++ b/tests/examples/01_icetray/test_icetray_examples.py @@ -8,25 +8,35 @@ def test_01_convert_i3_files() -> None: """Test for 01_convert_i3_files.""" - runpy.run_path(os.path.join(EXAMPLE_PATH, "01_convert_i3_files.py")) + runpy.run_path( + os.path.join(EXAMPLE_PATH, "01_convert_i3_files.py"), + run_name="__main__", + ) def test_02_compare_sqlite_and_parquet() -> None: """Test for 02_compare_sqlite_and_parquet.""" runpy.run_path( - os.path.join(EXAMPLE_PATH, "02_compare_sqlite_and_parquet.py") + os.path.join(EXAMPLE_PATH, "02_compare_sqlite_and_parquet.py"), + run_name="__main__", ) def test_03_i3_deployer_example() -> None: """Test for 03_i3_deployer_example.""" - runpy.run_path(os.path.join(EXAMPLE_PATH, "03_i3_deployer_example.py")) + runpy.run_path( + os.path.join(EXAMPLE_PATH, "03_i3_deployer_example.py"), + run_name="__main__", + ) def test_04_i3_module_in_native_icetray_example() -> None: """Test for 04_i3_module_in_native_icetray_example.""" runpy.run_path( - os.path.join(EXAMPLE_PATH, "04_i3_module_in_native_icetray_example.py") + os.path.join( + EXAMPLE_PATH, "04_i3_module_in_native_icetray_example.py" + ), + run_name="__main__", ) diff --git a/tests/examples/02_data/test_data_examples.py b/tests/examples/02_data/test_data_examples.py index 434e60e23..4e1a9b40e 100644 --- a/tests/examples/02_data/test_data_examples.py +++ b/tests/examples/02_data/test_data_examples.py @@ -8,26 +8,33 @@ def test_01_read_dataset() -> None: """Test for 01_read_dataset.""" - runpy.run_path(os.path.join(EXAMPLE_PATH, "01_read_dataset.py")) + runpy.run_path( + os.path.join(EXAMPLE_PATH, "01_read_dataset.py"), run_name="__main__" + ) def test_02_plot_feature_distribution() -> None: """Test for 02_plot_feature_distribution.""" runpy.run_path( - os.path.join(EXAMPLE_PATH, "02_plot_feature_distributions.py") + os.path.join(EXAMPLE_PATH, "02_plot_feature_distributions.py"), + run_name="__main__", ) def test_03_convert_parquet_to_sqlite() -> None: """Test for 03_convert_parquet_to_sqlite.""" runpy.run_path( - os.path.join(EXAMPLE_PATH, "03_convert_parquet_to_sqlite.py") + os.path.join(EXAMPLE_PATH, "03_convert_parquet_to_sqlite.py"), + run_name="__main__", ) def test_04_ensemble_dataset() -> None: """Test for 04_ensemble_dataset.""" - runpy.run_path(os.path.join(EXAMPLE_PATH, "04_ensemble_dataset.py")) + runpy.run_path( + os.path.join(EXAMPLE_PATH, "04_ensemble_dataset.py"), + run_name="__main__", + ) if __name__ == "__main__": diff --git a/tests/examples/03_weights/test_weights_examples.py b/tests/examples/03_weights/test_weights_examples.py index a810d0236..7ab580502 100644 --- a/tests/examples/03_weights/test_weights_examples.py +++ b/tests/examples/03_weights/test_weights_examples.py @@ -8,12 +8,18 @@ def test_01_fit_uniform_weights() -> None: """Test for 01_fit_uniform_weights.""" - runpy.run_path(os.path.join(EXAMPLE_PATH, "01_fit_uniform_weights.py")) + runpy.run_path( + os.path.join(EXAMPLE_PATH, "01_fit_uniform_weights.py"), + run_name="__main__", + ) def test_02_fit_bjoern_low_weights() -> None: """Test for 02_fit_bjoern_low_weights.""" - runpy.run_path(os.path.join(EXAMPLE_PATH, "02_fit_bjoern_low_weights.py")) + runpy.run_path( + os.path.join(EXAMPLE_PATH, "02_fit_bjoern_low_weights.py"), + run_name="__main__", + ) if __name__ == "__main__": diff --git a/tests/examples/04_training/test_training_examples.py b/tests/examples/04_training/test_training_examples.py index 0fbaee52c..4df5ba636 100644 --- a/tests/examples/04_training/test_training_examples.py +++ b/tests/examples/04_training/test_training_examples.py @@ -8,25 +8,32 @@ def test_01_train_dynedge() -> None: """Test for 01_train_dynedge.""" - runpy.run_path(os.path.join(EXAMPLE_PATH, "01_train_dynedge.py")) + runpy.run_path( + os.path.join(EXAMPLE_PATH, "01_train_dynedge.py"), run_name="__main__" + ) def test_02_train_tito_model() -> None: """Test for 02_train_tito_model.""" - runpy.run_path(os.path.join(EXAMPLE_PATH, "02_train_tito_model.py")) + runpy.run_path( + os.path.join(EXAMPLE_PATH, "02_train_tito_model.py"), + run_name="__main__", + ) def test_03_train_dynedge_from_config() -> None: """Test for 03_train_dynedge_from_config.""" runpy.run_path( - os.path.join(EXAMPLE_PATH, "03_train_dynedge_from_config.py") + os.path.join(EXAMPLE_PATH, "03_train_dynedge_from_config.py"), + run_name="__main__", ) def test_04_train_multiclassifier_from_configs() -> None: """Test for 04_train_multiclassifier_from_configs.""" runpy.run_path( - os.path.join(EXAMPLE_PATH, "04_train_multiclassifier_from_configs.py") + os.path.join(EXAMPLE_PATH, "04_train_multiclassifier_from_configs.py"), + run_name="__main__", ) From 2b22ce78bc8f82990bfd2c27c22932b8cbdc3faa Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 26 Sep 2023 13:23:46 +0200 Subject: [PATCH 059/156] simplify unit tests --- .../01_icetray/test_icetray_examples.py | 46 ++++--------------- tests/examples/02_data/test_data_examples.py | 44 ++++-------------- .../03_weights/test_weights_examples.py | 25 +++------- .../04_training/test_training_examples.py | 42 ++++------------- 4 files changed, 31 insertions(+), 126 deletions(-) diff --git a/tests/examples/01_icetray/test_icetray_examples.py b/tests/examples/01_icetray/test_icetray_examples.py index 630f43f6f..cbdf470fd 100644 --- a/tests/examples/01_icetray/test_icetray_examples.py +++ b/tests/examples/01_icetray/test_icetray_examples.py @@ -1,47 +1,17 @@ """Test for examples in 01_icetray.""" import runpy import os +import pytest +from glob import glob + from graphnet.constants import GRAPHNET_ROOT_DIR EXAMPLE_PATH = os.path.join(GRAPHNET_ROOT_DIR, "examples/01_icetray") - -def test_01_convert_i3_files() -> None: - """Test for 01_convert_i3_files.""" - runpy.run_path( - os.path.join(EXAMPLE_PATH, "01_convert_i3_files.py"), - run_name="__main__", - ) - - -def test_02_compare_sqlite_and_parquet() -> None: - """Test for 02_compare_sqlite_and_parquet.""" - runpy.run_path( - os.path.join(EXAMPLE_PATH, "02_compare_sqlite_and_parquet.py"), - run_name="__main__", - ) - - -def test_03_i3_deployer_example() -> None: - """Test for 03_i3_deployer_example.""" - runpy.run_path( - os.path.join(EXAMPLE_PATH, "03_i3_deployer_example.py"), - run_name="__main__", - ) - - -def test_04_i3_module_in_native_icetray_example() -> None: - """Test for 04_i3_module_in_native_icetray_example.""" - runpy.run_path( - os.path.join( - EXAMPLE_PATH, "04_i3_module_in_native_icetray_example.py" - ), - run_name="__main__", - ) +examples = glob(EXAMPLE_PATH + "/*.py") -if __name__ == "__main__": - test_01_convert_i3_files() - test_02_compare_sqlite_and_parquet() - test_03_i3_deployer_example() - test_04_i3_module_in_native_icetray_example() +@pytest.mark.parametrize("example", examples) +def test_script_execution(example: str) -> None: + """Test function that executes example.""" + runpy.run_path(os.path.join(EXAMPLE_PATH, example)) diff --git a/tests/examples/02_data/test_data_examples.py b/tests/examples/02_data/test_data_examples.py index 4e1a9b40e..8faaca5c0 100644 --- a/tests/examples/02_data/test_data_examples.py +++ b/tests/examples/02_data/test_data_examples.py @@ -1,44 +1,16 @@ """Tests for examples in 02_data.""" import runpy import os +import pytest +from glob import glob + from graphnet.constants import GRAPHNET_ROOT_DIR EXAMPLE_PATH = os.path.join(GRAPHNET_ROOT_DIR, "examples/02_data") +examples = glob(EXAMPLE_PATH + "/*.py") -def test_01_read_dataset() -> None: - """Test for 01_read_dataset.""" - runpy.run_path( - os.path.join(EXAMPLE_PATH, "01_read_dataset.py"), run_name="__main__" - ) - - -def test_02_plot_feature_distribution() -> None: - """Test for 02_plot_feature_distribution.""" - runpy.run_path( - os.path.join(EXAMPLE_PATH, "02_plot_feature_distributions.py"), - run_name="__main__", - ) - - -def test_03_convert_parquet_to_sqlite() -> None: - """Test for 03_convert_parquet_to_sqlite.""" - runpy.run_path( - os.path.join(EXAMPLE_PATH, "03_convert_parquet_to_sqlite.py"), - run_name="__main__", - ) - - -def test_04_ensemble_dataset() -> None: - """Test for 04_ensemble_dataset.""" - runpy.run_path( - os.path.join(EXAMPLE_PATH, "04_ensemble_dataset.py"), - run_name="__main__", - ) - - -if __name__ == "__main__": - test_01_read_dataset() - test_02_plot_feature_distribution() - test_03_convert_parquet_to_sqlite() - test_04_ensemble_dataset() +@pytest.mark.parametrize("example", examples) +def test_script_execution(example: str) -> None: + """Test function that executes example.""" + runpy.run_path(os.path.join(EXAMPLE_PATH, example)) diff --git a/tests/examples/03_weights/test_weights_examples.py b/tests/examples/03_weights/test_weights_examples.py index 7ab580502..5dddee264 100644 --- a/tests/examples/03_weights/test_weights_examples.py +++ b/tests/examples/03_weights/test_weights_examples.py @@ -2,26 +2,15 @@ import runpy import os from graphnet.constants import GRAPHNET_ROOT_DIR +from glob import glob +import pytest EXAMPLE_PATH = os.path.join(GRAPHNET_ROOT_DIR, "examples/03_weights") +examples = glob(EXAMPLE_PATH + "/*.py") -def test_01_fit_uniform_weights() -> None: - """Test for 01_fit_uniform_weights.""" - runpy.run_path( - os.path.join(EXAMPLE_PATH, "01_fit_uniform_weights.py"), - run_name="__main__", - ) - -def test_02_fit_bjoern_low_weights() -> None: - """Test for 02_fit_bjoern_low_weights.""" - runpy.run_path( - os.path.join(EXAMPLE_PATH, "02_fit_bjoern_low_weights.py"), - run_name="__main__", - ) - - -if __name__ == "__main__": - test_01_fit_uniform_weights() - test_02_fit_bjoern_low_weights() +@pytest.mark.parametrize("example", examples) +def test_script_execution(example: str) -> None: + """Test function that executes example.""" + runpy.run_path(os.path.join(EXAMPLE_PATH, example)) diff --git a/tests/examples/04_training/test_training_examples.py b/tests/examples/04_training/test_training_examples.py index 4df5ba636..87cb955e4 100644 --- a/tests/examples/04_training/test_training_examples.py +++ b/tests/examples/04_training/test_training_examples.py @@ -1,44 +1,18 @@ """Test for examples in 04_training.""" import runpy import os +from glob import glob +import pytest + from graphnet.constants import GRAPHNET_ROOT_DIR EXAMPLE_PATH = os.path.join(GRAPHNET_ROOT_DIR, "examples/04_training") -def test_01_train_dynedge() -> None: - """Test for 01_train_dynedge.""" - runpy.run_path( - os.path.join(EXAMPLE_PATH, "01_train_dynedge.py"), run_name="__main__" - ) - - -def test_02_train_tito_model() -> None: - """Test for 02_train_tito_model.""" - runpy.run_path( - os.path.join(EXAMPLE_PATH, "02_train_tito_model.py"), - run_name="__main__", - ) - - -def test_03_train_dynedge_from_config() -> None: - """Test for 03_train_dynedge_from_config.""" - runpy.run_path( - os.path.join(EXAMPLE_PATH, "03_train_dynedge_from_config.py"), - run_name="__main__", - ) - - -def test_04_train_multiclassifier_from_configs() -> None: - """Test for 04_train_multiclassifier_from_configs.""" - runpy.run_path( - os.path.join(EXAMPLE_PATH, "04_train_multiclassifier_from_configs.py"), - run_name="__main__", - ) +examples = glob(EXAMPLE_PATH + "/*.py") -if __name__ == "__main__": - test_01_train_dynedge() - test_02_train_tito_model() - test_03_train_dynedge_from_config() - test_04_train_multiclassifier_from_configs() +@pytest.mark.parametrize("example", examples) +def test_script_execution(example: str) -> None: + """Test function that executes example.""" + runpy.run_path(os.path.join(EXAMPLE_PATH, example)) From d3e34fd96a78ddf3be9f36868361cf532179aeb9 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 26 Sep 2023 13:57:06 +0200 Subject: [PATCH 060/156] parse only known args in examples --- examples/01_icetray/01_convert_i3_files.py | 2 +- examples/01_icetray/02_compare_sqlite_and_parquet.py | 2 +- examples/01_icetray/03_i3_deployer_example.py | 2 +- examples/01_icetray/04_i3_module_in_native_icetray_example.py | 2 +- examples/02_data/01_read_dataset.py | 2 +- examples/02_data/02_plot_feature_distributions.py | 2 +- examples/02_data/03_convert_parquet_to_sqlite.py | 2 +- examples/02_data/04_ensemble_dataset.py | 1 + examples/03_weights/01_fit_uniform_weights.py | 2 +- examples/03_weights/02_fit_bjoern_low_weights.py | 2 +- examples/04_training/01_train_dynedge.py | 2 +- examples/04_training/02_train_tito_model.py | 2 +- examples/04_training/03_train_dynedge_from_config.py | 2 +- examples/04_training/04_train_multiclassifier_from_configs.py | 2 +- tests/examples/04_training/test_training_examples.py | 2 +- 15 files changed, 15 insertions(+), 14 deletions(-) diff --git a/examples/01_icetray/01_convert_i3_files.py b/examples/01_icetray/01_convert_i3_files.py index 9d9f80033..88dcf714a 100644 --- a/examples/01_icetray/01_convert_i3_files.py +++ b/examples/01_icetray/01_convert_i3_files.py @@ -99,7 +99,7 @@ def main_icecube_upgrade(backend: str) -> None: "detector", choices=["icecube-86", "icecube-upgrade"] ) - args = parser.parse_args() + args, unknown = parser.parse_known_args() # Run example script if args.detector == "icecube-86": diff --git a/examples/01_icetray/02_compare_sqlite_and_parquet.py b/examples/01_icetray/02_compare_sqlite_and_parquet.py index 75d6c18ba..99250d4b0 100644 --- a/examples/01_icetray/02_compare_sqlite_and_parquet.py +++ b/examples/01_icetray/02_compare_sqlite_and_parquet.py @@ -95,7 +95,7 @@ def load_data() -> None: """ ) - args = parser.parse_args() + args, unknown = parser.parse_known_args() # Run example script(s) convert_data() diff --git a/examples/01_icetray/03_i3_deployer_example.py b/examples/01_icetray/03_i3_deployer_example.py index c7c5358cf..f55aa769c 100644 --- a/examples/01_icetray/03_i3_deployer_example.py +++ b/examples/01_icetray/03_i3_deployer_example.py @@ -93,7 +93,7 @@ def main() -> None: """ ) - args = parser.parse_args() + args, unknown = parser.parse_known_args() # Run example script main() diff --git a/examples/01_icetray/04_i3_module_in_native_icetray_example.py b/examples/01_icetray/04_i3_module_in_native_icetray_example.py index 957fb108d..74da5e499 100644 --- a/examples/01_icetray/04_i3_module_in_native_icetray_example.py +++ b/examples/01_icetray/04_i3_module_in_native_icetray_example.py @@ -126,7 +126,7 @@ def main() -> None: """ ) - args = parser.parse_args() + args, unknown = parser.parse_known_args() # Run example script main() diff --git a/examples/02_data/01_read_dataset.py b/examples/02_data/01_read_dataset.py index de8fecb18..77d7c1438 100644 --- a/examples/02_data/01_read_dataset.py +++ b/examples/02_data/01_read_dataset.py @@ -121,6 +121,6 @@ def main(backend: str) -> None: nargs="?", ) - args = parser.parse_args() + args, unknown = parser.parse_known_args() main(args.backend) diff --git a/examples/02_data/02_plot_feature_distributions.py b/examples/02_data/02_plot_feature_distributions.py index b46be0623..ac08ae9fb 100644 --- a/examples/02_data/02_plot_feature_distributions.py +++ b/examples/02_data/02_plot_feature_distributions.py @@ -66,6 +66,6 @@ def main() -> None: """ ) - args = parser.parse_args() + args, unknown = parser.parse_known_args() main() diff --git a/examples/02_data/03_convert_parquet_to_sqlite.py b/examples/02_data/03_convert_parquet_to_sqlite.py index 12a33d181..5757bb5c9 100644 --- a/examples/02_data/03_convert_parquet_to_sqlite.py +++ b/examples/02_data/03_convert_parquet_to_sqlite.py @@ -57,6 +57,6 @@ def main(parquet_path: str, mc_truth_table: str) -> None: default="truth", ) - args = parser.parse_args() + args, unknown = parser.parse_known_args() main(args.parquet_path, args.mc_truth_table) diff --git a/examples/02_data/04_ensemble_dataset.py b/examples/02_data/04_ensemble_dataset.py index 13998c3e7..f1cc9de68 100644 --- a/examples/02_data/04_ensemble_dataset.py +++ b/examples/02_data/04_ensemble_dataset.py @@ -88,4 +88,5 @@ def main() -> None: Combine multiple Datasets using EnsembleDataset. """ ) + args, unknown = parser.parse_known_args() main() diff --git a/examples/03_weights/01_fit_uniform_weights.py b/examples/03_weights/01_fit_uniform_weights.py index 629e8497b..e2f68487a 100644 --- a/examples/03_weights/01_fit_uniform_weights.py +++ b/examples/03_weights/01_fit_uniform_weights.py @@ -37,6 +37,6 @@ def main() -> None: """ ) - args = parser.parse_args() + args, unknown = parser.parse_known_args() main() diff --git a/examples/03_weights/02_fit_bjoern_low_weights.py b/examples/03_weights/02_fit_bjoern_low_weights.py index cde075812..4d54e8c7d 100644 --- a/examples/03_weights/02_fit_bjoern_low_weights.py +++ b/examples/03_weights/02_fit_bjoern_low_weights.py @@ -45,6 +45,6 @@ def main() -> None: """ ) - args = parser.parse_args() + args, unknown = parser.parse_known_args() main() diff --git a/examples/04_training/01_train_dynedge.py b/examples/04_training/01_train_dynedge.py index f9af11998..7cde873b2 100644 --- a/examples/04_training/01_train_dynedge.py +++ b/examples/04_training/01_train_dynedge.py @@ -225,7 +225,7 @@ def main( help="If True, Weights & Biases are used to track the experiment.", ) - args = parser.parse_args() + args, unknown = parser.parse_known_args() main( args.path, diff --git a/examples/04_training/02_train_tito_model.py b/examples/04_training/02_train_tito_model.py index f3d60d553..735dea055 100644 --- a/examples/04_training/02_train_tito_model.py +++ b/examples/04_training/02_train_tito_model.py @@ -235,7 +235,7 @@ def main( help="If True, Weights & Biases are used to track the experiment.", ) - args = parser.parse_args() + args, unknown = parser.parse_known_args() main( args.path, diff --git a/examples/04_training/03_train_dynedge_from_config.py b/examples/04_training/03_train_dynedge_from_config.py index 1dec95961..4b4db17f8 100644 --- a/examples/04_training/03_train_dynedge_from_config.py +++ b/examples/04_training/03_train_dynedge_from_config.py @@ -157,7 +157,7 @@ def main( help="If True, Weights & Biases are used to track the experiment.", ) - args = parser.parse_args() + args, unknown = parser.parse_known_args() main( args.dataset_config, diff --git a/examples/04_training/04_train_multiclassifier_from_configs.py b/examples/04_training/04_train_multiclassifier_from_configs.py index 6385c010c..e71813b24 100644 --- a/examples/04_training/04_train_multiclassifier_from_configs.py +++ b/examples/04_training/04_train_multiclassifier_from_configs.py @@ -188,7 +188,7 @@ def main( default=None, ) - args = parser.parse_args() + args, unknown = parser.parse_known_args() main( args.dataset_config, diff --git a/tests/examples/04_training/test_training_examples.py b/tests/examples/04_training/test_training_examples.py index 87cb955e4..e97b4bb3c 100644 --- a/tests/examples/04_training/test_training_examples.py +++ b/tests/examples/04_training/test_training_examples.py @@ -15,4 +15,4 @@ @pytest.mark.parametrize("example", examples) def test_script_execution(example: str) -> None: """Test function that executes example.""" - runpy.run_path(os.path.join(EXAMPLE_PATH, example)) + runpy.run_path(os.path.join(EXAMPLE_PATH, example), run_name="__main__") From 6e1cf3c8959b459fe0495caee6cf9c1a901cf4b6 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 26 Sep 2023 14:17:50 +0200 Subject: [PATCH 061/156] fix multiclassification config --- configs/models/dynedge_PID_classification_example.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/configs/models/dynedge_PID_classification_example.yml b/configs/models/dynedge_PID_classification_example.yml index 4b2fd0246..57fec3e88 100644 --- a/configs/models/dynedge_PID_classification_example.yml +++ b/configs/models/dynedge_PID_classification_example.yml @@ -38,6 +38,7 @@ arguments: - ModelConfig: arguments: nb_outputs: 3 # number of classes + prediction_labels: ['noise', 'muon', 'neutrino'] hidden_size: 128 loss_function: ModelConfig: From 4376fb049b406434c20f233c3973a75d2f01b55e Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 26 Sep 2023 14:25:58 +0200 Subject: [PATCH 062/156] Comment out save_config statement in 02_train_tito --- examples/04_training/02_train_tito_model.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/04_training/02_train_tito_model.py b/examples/04_training/02_train_tito_model.py index 735dea055..f586b0581 100644 --- a/examples/04_training/02_train_tito_model.py +++ b/examples/04_training/02_train_tito_model.py @@ -182,7 +182,8 @@ def main( # Save model config and state dict - Version safe save method. model.save_state_dict(f"{path}/state_dict.pth") - model.save_config(f"{path}/model_config.yml") + # model.save_config(f"{path}/model_config.yml") + # Pending https://github.com/graphnet-team/graphnet/issues/606 if __name__ == "__main__": From 123831a17c5c3c86dc37349bcb042ecb493cdb33 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 26 Sep 2023 14:50:57 +0200 Subject: [PATCH 063/156] seperate coverage run for utilities --- .github/workflows/build.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6998ce22f..988f6a4e9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -55,7 +55,8 @@ jobs: editable: true - name: Run unit tests and generate coverage report run: | - coverage run --source=graphnet -m pytest tests/ + coverage run --source=graphnet -m pytest tests/ --ignore=tests/utilities + coverage run --source=graphnet -m pytest tests/utilities coverage xml -o coverage.xml - name: Work around permission issue run: | @@ -88,7 +89,8 @@ jobs: - name: Run unit tests and generate coverage report run: | set -o pipefail # To propagate exit code from pytest - coverage run --source=graphnet -m pytest tests/ --ignore=tests/data/ --ignore=tests/deployment/ --ignore=tests/01_icetray/ + coverage run --source=graphnet -m pytest tests/ --ignore=tests/utilities --ignore=tests/data/ --ignore=tests/deployment/ --ignore=tests/01_icetray/ + coverage run --source=graphnet -m pytest tests/utilities coverage report -m build-macos: @@ -108,5 +110,6 @@ jobs: - name: Run unit tests and generate coverage report run: | set -o pipefail # To propagate exit code from pytest - coverage run --source=graphnet -m pytest tests/ --ignore=tests/data/ --ignore=tests/deployment/ --ignore=tests/examples/01_icetray/ + coverage run --source=graphnet -m pytest tests/ --ignore=tests/utilities --ignore=tests/data/ --ignore=tests/deployment/ --ignore=tests/01_icetray/ + coverage run --source=graphnet -m pytest tests/utilities coverage report -m From 9f912a682c83fb487e96edfe8a14883dff3c003f Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 26 Sep 2023 15:08:50 +0200 Subject: [PATCH 064/156] change run priorities --- .github/workflows/build.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 988f6a4e9..465c1bc1b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -55,8 +55,8 @@ jobs: editable: true - name: Run unit tests and generate coverage report run: | - coverage run --source=graphnet -m pytest tests/ --ignore=tests/utilities - coverage run --source=graphnet -m pytest tests/utilities + coverage run --source=graphnet -m pytest tests/ --ignore=tests/examples + coverage run --source=graphnet -m pytest tests/examples/01_icetray coverage xml -o coverage.xml - name: Work around permission issue run: | @@ -89,7 +89,7 @@ jobs: - name: Run unit tests and generate coverage report run: | set -o pipefail # To propagate exit code from pytest - coverage run --source=graphnet -m pytest tests/ --ignore=tests/utilities --ignore=tests/data/ --ignore=tests/deployment/ --ignore=tests/01_icetray/ + coverage run --source=graphnet -m pytest tests/ --ignore=tests/utilities --ignore=tests/data/ --ignore=tests/deployment/ --ignore=tests/examples/01_icetray/ coverage run --source=graphnet -m pytest tests/utilities coverage report -m @@ -110,6 +110,6 @@ jobs: - name: Run unit tests and generate coverage report run: | set -o pipefail # To propagate exit code from pytest - coverage run --source=graphnet -m pytest tests/ --ignore=tests/utilities --ignore=tests/data/ --ignore=tests/deployment/ --ignore=tests/01_icetray/ + coverage run --source=graphnet -m pytest tests/ --ignore=tests/utilities --ignore=tests/data/ --ignore=tests/deployment/ --ignore=tests/examples/01_icetray/ coverage run --source=graphnet -m pytest tests/utilities coverage report -m From 5a5317bc21868d2ceaf318fb8be22b6c561b6138 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 26 Sep 2023 15:43:49 +0200 Subject: [PATCH 065/156] remove unit tests of examples for macos. --- .github/workflows/build.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 465c1bc1b..001b8bed4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -110,6 +110,5 @@ jobs: - name: Run unit tests and generate coverage report run: | set -o pipefail # To propagate exit code from pytest - coverage run --source=graphnet -m pytest tests/ --ignore=tests/utilities --ignore=tests/data/ --ignore=tests/deployment/ --ignore=tests/examples/01_icetray/ - coverage run --source=graphnet -m pytest tests/utilities + coverage run --source=graphnet -m pytest tests/ --ignore=tests/data/ --ignore=tests/deployment/ --ignore=tests/examples/ coverage report -m From 5c4260f8fd3569b8d96506084afc4975a0d759da Mon Sep 17 00:00:00 2001 From: Aske-Rosted Date: Wed, 27 Sep 2023 09:56:05 +0900 Subject: [PATCH 066/156] add to init --- src/graphnet/data/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/graphnet/data/__init__.py b/src/graphnet/data/__init__.py index 1eca4f6cd..fbb1ee095 100644 --- a/src/graphnet/data/__init__.py +++ b/src/graphnet/data/__init__.py @@ -3,3 +3,4 @@ `graphnet.data` enables converting domain-specific data to industry-standard, intermediate file formats and reading this data. """ +from .filters import I3Filter, I3FilterMask From 83cce2a1de1ecd4312ade1d6a24a8e265728818b Mon Sep 17 00:00:00 2001 From: Aske-Rosted Date: Wed, 27 Sep 2023 10:11:59 +0900 Subject: [PATCH 067/156] optional icetray dependency --- src/graphnet/data/filters.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/graphnet/data/filters.py b/src/graphnet/data/filters.py index f862ea197..21a31e429 100644 --- a/src/graphnet/data/filters.py +++ b/src/graphnet/data/filters.py @@ -2,14 +2,18 @@ from abc import abstractmethod from graphnet.utilities.logging import Logger from typing import List -from icecube import icetray + +from graphnet.utilities.imports import has_icecube_package + +if has_icecube_package(): + from icecube import icetray class I3Filter(Logger): """A generic filter for I3-frames.""" @abstractmethod - def _pass_frame(self, frame: icetray.I3Frame) -> bool: + def _pass_frame(self, frame: "icetray.I3Frame") -> bool: """Return True if the frame passes the filter, False otherwise. Args: @@ -21,7 +25,7 @@ def _pass_frame(self, frame: icetray.I3Frame) -> bool: """ raise NotImplementedError - def __call__(self, frame: icetray.I3Frame) -> bool: + def __call__(self, frame: "icetray.I3Frame") -> bool: """Return True if the frame passes the filter, False otherwise. Args: @@ -44,7 +48,7 @@ def __call__(self, frame: icetray.I3Frame) -> bool: class NullSplitI3Filter(I3Filter): """A filter that skips all null-split frames.""" - def _keep_frame(self, frame: icetray.I3Frame) -> bool: + def _keep_frame(self, frame: "icetray.I3Frame") -> bool: """Check that frame is not a null-split frame. returns False if the frame is a null-split frame, True otherwise. @@ -76,7 +80,7 @@ def __init__(self, filter_names: List[str], filter_any: bool = True): self._filter_names = filter_names self._filter_any = filter_any - def _keep_frame(self, frame: icetray.I3Frame) -> bool: + def _keep_frame(self, frame: "icetray.I3Frame") -> bool: """Check if current frame should be kept. Args: From b091cbc84978c965d811a3f55c682ebd805e4d73 Mon Sep 17 00:00:00 2001 From: Aske-Rosted Date: Thu, 28 Sep 2023 09:58:31 +0900 Subject: [PATCH 068/156] typo-fix --- src/graphnet/data/dataconverter.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/graphnet/data/dataconverter.py b/src/graphnet/data/dataconverter.py index 1e8b64271..eb42dc7a1 100644 --- a/src/graphnet/data/dataconverter.py +++ b/src/graphnet/data/dataconverter.py @@ -109,7 +109,7 @@ def __init__( workers: int = 1, index_column: str = "event_no", icetray_verbose: int = 0, - i3_Filters: Union[I3Filter, List[Callable]] = [NullSplitI3Filter], + I3_Filters: Union[I3Filter, List[Callable]] = [NullSplitI3Filter], ): """Construct DataConverter. @@ -169,10 +169,10 @@ def __init__( self._sequential_batch_pattern = sequential_batch_pattern self._input_file_batch_pattern = input_file_batch_pattern self._workers = workers - if isinstance(i3_Filters, I3Filter): - I3_Filters = [i3_Filters] - self._I3filters = I3_Filters - for filter in self._I3filters: + if isinstance(I3_Filters, I3Filter): + I3_Filters = [I3_Filters] + self._I3Filters = I3_Filters + for filter in self._I3Filters: assert isinstance( filter, I3Filter ), f"{type(filter)} is not a subclass of I3Filter" @@ -571,10 +571,10 @@ def _skip_frame(self, frame: "icetray.I3Frame") -> bool: Returns: bool: True if frame should be skipped, False otherwise. """ - if self._I3filters is None: + if self._I3Filters is None: return False # No filters defined, so we keep the frame - for filter in self._I3filters: + for filter in self._I3Filters: if not filter(frame): return True # keep_frame call false, skip the frame. return False # All filter keep_frame calls true, keep the frame. From 12aec80e01acb2dd88362f8391b85de653d7b98d Mon Sep 17 00:00:00 2001 From: Aske-Rosted Date: Thu, 28 Sep 2023 11:22:12 +0900 Subject: [PATCH 069/156] refactoring/bugfixing --- src/graphnet/data/dataconverter.py | 10 +++++----- src/graphnet/data/filters.py | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/graphnet/data/dataconverter.py b/src/graphnet/data/dataconverter.py index eb42dc7a1..0f12e7ab5 100644 --- a/src/graphnet/data/dataconverter.py +++ b/src/graphnet/data/dataconverter.py @@ -21,7 +21,6 @@ TypeVar, Union, cast, - Sequence, ) import numpy as np @@ -109,7 +108,7 @@ def __init__( workers: int = 1, index_column: str = "event_no", icetray_verbose: int = 0, - I3_Filters: Union[I3Filter, List[Callable]] = [NullSplitI3Filter], + I3_Filters: List[I3Filter] = [], ): """Construct DataConverter. @@ -169,9 +168,10 @@ def __init__( self._sequential_batch_pattern = sequential_batch_pattern self._input_file_batch_pattern = input_file_batch_pattern self._workers = workers - if isinstance(I3_Filters, I3Filter): - I3_Filters = [I3_Filters] - self._I3Filters = I3_Filters + + # I3Filters (NullSplitI3Filter is always included) + self._I3Filters = [NullSplitI3Filter()] + I3_Filters + for filter in self._I3Filters: assert isinstance( filter, I3Filter diff --git a/src/graphnet/data/filters.py b/src/graphnet/data/filters.py index 21a31e429..ca83f4217 100644 --- a/src/graphnet/data/filters.py +++ b/src/graphnet/data/filters.py @@ -13,15 +13,15 @@ class I3Filter(Logger): """A generic filter for I3-frames.""" @abstractmethod - def _pass_frame(self, frame: "icetray.I3Frame") -> bool: - """Return True if the frame passes the filter, False otherwise. + def _keep_frame(self, frame: "icetray.I3Frame") -> bool: + """Return True if the frame is kept, False otherwise. Args: frame: I3-frame The I3-frame to check. Returns: - bool: True if the frame passes the filter, False otherwise. + bool: True if the frame is kept, False otherwise. """ raise NotImplementedError @@ -35,7 +35,7 @@ def __call__(self, frame: "icetray.I3Frame") -> bool: Returns: bool: True if the frame passes the filter, False otherwise. """ - pass_flag = self._pass_frame(frame) + pass_flag = self._keep_frame(frame) try: assert isinstance(pass_flag, bool) except AssertionError: From 8c21d2e57738d12c75d19eb2c159b2d43ab429cd Mon Sep 17 00:00:00 2001 From: Aske-Rosted Date: Thu, 28 Sep 2023 16:44:29 +0900 Subject: [PATCH 070/156] snake-case --- src/graphnet/data/dataconverter.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/graphnet/data/dataconverter.py b/src/graphnet/data/dataconverter.py index 0f12e7ab5..41cec5eec 100644 --- a/src/graphnet/data/dataconverter.py +++ b/src/graphnet/data/dataconverter.py @@ -108,7 +108,7 @@ def __init__( workers: int = 1, index_column: str = "event_no", icetray_verbose: int = 0, - I3_Filters: List[I3Filter] = [], + i3_filters: List[I3Filter] = [], ): """Construct DataConverter. @@ -170,9 +170,9 @@ def __init__( self._workers = workers # I3Filters (NullSplitI3Filter is always included) - self._I3Filters = [NullSplitI3Filter()] + I3_Filters + self._i3filters = [NullSplitI3Filter()] + i3_filters - for filter in self._I3Filters: + for filter in self._i3filters: assert isinstance( filter, I3Filter ), f"{type(filter)} is not a subclass of I3Filter" @@ -571,10 +571,10 @@ def _skip_frame(self, frame: "icetray.I3Frame") -> bool: Returns: bool: True if frame should be skipped, False otherwise. """ - if self._I3Filters is None: + if self._i3filters is None: return False # No filters defined, so we keep the frame - for filter in self._I3Filters: + for filter in self._i3filters: if not filter(frame): return True # keep_frame call false, skip the frame. return False # All filter keep_frame calls true, keep the frame. From 27ebae94c6261b2cfbb521628ca462acc2bce303 Mon Sep 17 00:00:00 2001 From: AMHermansen Date: Fri, 29 Sep 2023 10:10:21 +0200 Subject: [PATCH 071/156] bugfix tito features_subset. --- src/graphnet/models/gnn/dynedge_kaggle_tito.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/graphnet/models/gnn/dynedge_kaggle_tito.py b/src/graphnet/models/gnn/dynedge_kaggle_tito.py index d3196dd30..4a4662256 100644 --- a/src/graphnet/models/gnn/dynedge_kaggle_tito.py +++ b/src/graphnet/models/gnn/dynedge_kaggle_tito.py @@ -35,7 +35,7 @@ class DynEdgeTITO(GNN): def __init__( self, nb_inputs: int, - features_subset: slice = slice(0, 4), + features_subset: List[int] = None, dyntrans_layer_sizes: Optional[List[Tuple[int, ...]]] = None, global_pooling_schemes: List[str] = ["max"], ): @@ -120,7 +120,7 @@ def __init__( self._activation = torch.nn.LeakyReLU() self._nb_inputs = nb_inputs self._nb_global_variables = 5 + nb_inputs - self._features_subset = features_subset + self._features_subset = features_subset or [0, 1, 2, 3] self._construct_layers() def _construct_layers(self) -> None: From d5fc33fa99bc30e767741a21527b97f541d0020b Mon Sep 17 00:00:00 2001 From: AMHermansen Date: Fri, 29 Sep 2023 10:16:55 +0200 Subject: [PATCH 072/156] Add save_config in tito example script --- examples/04_training/02_train_tito_model.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/04_training/02_train_tito_model.py b/examples/04_training/02_train_tito_model.py index f586b0581..735dea055 100644 --- a/examples/04_training/02_train_tito_model.py +++ b/examples/04_training/02_train_tito_model.py @@ -182,8 +182,7 @@ def main( # Save model config and state dict - Version safe save method. model.save_state_dict(f"{path}/state_dict.pth") - # model.save_config(f"{path}/model_config.yml") - # Pending https://github.com/graphnet-team/graphnet/issues/606 + model.save_config(f"{path}/model_config.yml") if __name__ == "__main__": From 9d005985d88b22049872f65d615985b7c0c589d2 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 29 Sep 2023 15:23:07 +0200 Subject: [PATCH 073/156] introduce prints of free disk space --- .github/workflows/build.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 001b8bed4..3158a51f4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -44,6 +44,8 @@ jobs: echo "PYTHONPATH=/usr/local/icetray/lib:$PYTHONPATH" >> $GITHUB_ENV echo "LD_LIBRARY_PATH=/usr/local/icetray/lib:/usr/local/icetray/cernroot/lib:/usr/local/icetray/lib/tools:$LD_LIBRARY_PATH" >> $GITHUB_ENV - uses: actions/checkout@v3 + - name: Print available disk space before graphnet install + run: df -h - name: Upgrade packages already installed on icecube/icetray run: | pip install --upgrade astropy # Installed version incompatible with numpy 1.23.0 [https://github.com/astropy/astropy/issues/12534] @@ -82,16 +84,22 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + - name: Print available disk space before graphnet install + run: df -h - name: Install package uses: ./.github/actions/install with: editable: true + - name: Print available disk space after graphnet install + run: df -h - name: Run unit tests and generate coverage report run: | set -o pipefail # To propagate exit code from pytest coverage run --source=graphnet -m pytest tests/ --ignore=tests/utilities --ignore=tests/data/ --ignore=tests/deployment/ --ignore=tests/examples/01_icetray/ coverage run --source=graphnet -m pytest tests/utilities coverage report -m + - name: Print available disk space after unit tests + run: df -h build-macos: name: Unit tests - macOS From c0f7f063298abd0470ed5eac7070331be6a356a7 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 29 Sep 2023 15:44:16 +0200 Subject: [PATCH 074/156] copy paste weird space freeing commands --- .github/workflows/build.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3158a51f4..d0e53d786 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -46,6 +46,13 @@ jobs: - uses: actions/checkout@v3 - name: Print available disk space before graphnet install run: df -h + - name: free disk space + run: | + sudo swapoff -a + sudo rm -f /swapfile + sudo apt clean + docker rmi $(docker image ls -aq) + df -h - name: Upgrade packages already installed on icecube/icetray run: | pip install --upgrade astropy # Installed version incompatible with numpy 1.23.0 [https://github.com/astropy/astropy/issues/12534] From 0a1c15321b2bc8e1d9f1baa055d7c8653c7710a0 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 29 Sep 2023 15:54:49 +0200 Subject: [PATCH 075/156] remove sudo commands --- .github/workflows/build.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d0e53d786..f408b4e87 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -48,9 +48,6 @@ jobs: run: df -h - name: free disk space run: | - sudo swapoff -a - sudo rm -f /swapfile - sudo apt clean docker rmi $(docker image ls -aq) df -h - name: Upgrade packages already installed on icecube/icetray From 4b70150b2319cfb6273303748f1262d5152c6a62 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 29 Sep 2023 15:57:03 +0200 Subject: [PATCH 076/156] change icetray docker image to "stable-slim" --- .github/workflows/build.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f408b4e87..4b2ba6fe7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -36,7 +36,7 @@ jobs: name: Unit tests - IceTray needs: [ check-codeclimate-credentials ] runs-on: ubuntu-latest - container: icecube/icetray:combo-stable + container: icecube/icetray:stable-slim steps: - name: Set environment variables run: | @@ -46,10 +46,6 @@ jobs: - uses: actions/checkout@v3 - name: Print available disk space before graphnet install run: df -h - - name: free disk space - run: | - docker rmi $(docker image ls -aq) - df -h - name: Upgrade packages already installed on icecube/icetray run: | pip install --upgrade astropy # Installed version incompatible with numpy 1.23.0 [https://github.com/astropy/astropy/issues/12534] From 4490ac04ff73e4642627f8e208ce6160417895dd Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 29 Sep 2023 16:00:48 +0200 Subject: [PATCH 077/156] changed docker image to "combo-stable-slim" --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4b2ba6fe7..f26d9571a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -36,7 +36,7 @@ jobs: name: Unit tests - IceTray needs: [ check-codeclimate-credentials ] runs-on: ubuntu-latest - container: icecube/icetray:stable-slim + container: icecube/icetray:combo-stable-slim steps: - name: Set environment variables run: | From 4971ad608f51a8acb89fb3aeb99fd5d17aead4fd Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 29 Sep 2023 16:05:46 +0200 Subject: [PATCH 078/156] try pip3... --- .github/workflows/build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f26d9571a..3dd90c223 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -48,9 +48,9 @@ jobs: run: df -h - name: Upgrade packages already installed on icecube/icetray run: | - pip install --upgrade astropy # Installed version incompatible with numpy 1.23.0 [https://github.com/astropy/astropy/issues/12534] - pip install --ignore-installed PyYAML # Distutils installed [https://github.com/pypa/pip/issues/5247] - pip install --upgrade psutil # Original version from IceTray Environment incompatible + pip3 install --upgrade astropy # Installed version incompatible with numpy 1.23.0 [https://github.com/astropy/astropy/issues/12534] + pip3 install --ignore-installed PyYAML # Distutils installed [https://github.com/pypa/pip/issues/5247] + pip3 install --upgrade psutil # Original version from IceTray Environment incompatible - name: Install package uses: ./.github/actions/install with: From c643ac1f03fc923cdc51693c3d959021a5d24d6a Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 29 Sep 2023 16:11:42 +0200 Subject: [PATCH 079/156] "stable-prod".. --- .github/workflows/build.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3dd90c223..282fc5570 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -36,7 +36,7 @@ jobs: name: Unit tests - IceTray needs: [ check-codeclimate-credentials ] runs-on: ubuntu-latest - container: icecube/icetray:combo-stable-slim + container: icecube/icetray:stable-prod steps: - name: Set environment variables run: | @@ -48,9 +48,9 @@ jobs: run: df -h - name: Upgrade packages already installed on icecube/icetray run: | - pip3 install --upgrade astropy # Installed version incompatible with numpy 1.23.0 [https://github.com/astropy/astropy/issues/12534] - pip3 install --ignore-installed PyYAML # Distutils installed [https://github.com/pypa/pip/issues/5247] - pip3 install --upgrade psutil # Original version from IceTray Environment incompatible + pip install --upgrade astropy # Installed version incompatible with numpy 1.23.0 [https://github.com/astropy/astropy/issues/12534] + pip install --ignore-installed PyYAML # Distutils installed [https://github.com/pypa/pip/issues/5247] + pip install --upgrade psutil # Original version from IceTray Environment incompatible - name: Install package uses: ./.github/actions/install with: From a919de85d42b8491138d99d04dcf10ebe7ff2559 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 29 Sep 2023 16:19:26 +0200 Subject: [PATCH 080/156] "icetray-prod-v1.8.1-ubuntu20.04-X64" --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 282fc5570..3d5ad6575 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -36,7 +36,7 @@ jobs: name: Unit tests - IceTray needs: [ check-codeclimate-credentials ] runs-on: ubuntu-latest - container: icecube/icetray:stable-prod + container: icecube/icetray:icetray-prod-v1.8.1-ubuntu20.04-X64 steps: - name: Set environment variables run: | From eb695258ae099274d378e1583d00a5feb812db07 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 29 Sep 2023 16:32:13 +0200 Subject: [PATCH 081/156] remove permission workaround --- .github/workflows/build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3d5ad6575..3fddcfaef 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -60,9 +60,9 @@ jobs: coverage run --source=graphnet -m pytest tests/ --ignore=tests/examples coverage run --source=graphnet -m pytest tests/examples/01_icetray coverage xml -o coverage.xml - - name: Work around permission issue - run: | - git config --global --add safe.directory /__w/graphnet/graphnet + #- name: Work around permission issue + # run: | + # git config --global --add safe.directory /__w/graphnet/graphnet - name: Publish code coverage uses: paambaati/codeclimate-action@v3.0.0 if: needs.check-codeclimate-credentials.outputs.has_credentials == 'true' From 31647315d994afdd1e7a600fbec76e9ae4475f3b Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 29 Sep 2023 16:45:16 +0200 Subject: [PATCH 082/156] install git --- .github/workflows/build.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3fddcfaef..b2bdfd635 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -60,9 +60,10 @@ jobs: coverage run --source=graphnet -m pytest tests/ --ignore=tests/examples coverage run --source=graphnet -m pytest tests/examples/01_icetray coverage xml -o coverage.xml - #- name: Work around permission issue - # run: | - # git config --global --add safe.directory /__w/graphnet/graphnet + - name: Work around permission issue + run: | + conda install -c anaconda git + git config --global --add safe.directory /__w/graphnet/graphnet - name: Publish code coverage uses: paambaati/codeclimate-action@v3.0.0 if: needs.check-codeclimate-credentials.outputs.has_credentials == 'true' From 013b47e65b41e958337cff9352a90a623867e1fe Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 29 Sep 2023 16:51:47 +0200 Subject: [PATCH 083/156] comment out permission fix --- .github/workflows/build.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b2bdfd635..3fddcfaef 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -60,10 +60,9 @@ jobs: coverage run --source=graphnet -m pytest tests/ --ignore=tests/examples coverage run --source=graphnet -m pytest tests/examples/01_icetray coverage xml -o coverage.xml - - name: Work around permission issue - run: | - conda install -c anaconda git - git config --global --add safe.directory /__w/graphnet/graphnet + #- name: Work around permission issue + # run: | + # git config --global --add safe.directory /__w/graphnet/graphnet - name: Publish code coverage uses: paambaati/codeclimate-action@v3.0.0 if: needs.check-codeclimate-credentials.outputs.has_credentials == 'true' From d0515a97fd0c6e71b6173eae8727f6a406ff6491 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 29 Sep 2023 17:19:19 +0200 Subject: [PATCH 084/156] benedikt's trick --- .github/workflows/build.yml | 39 ++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3fddcfaef..f79fdb10c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -36,7 +36,7 @@ jobs: name: Unit tests - IceTray needs: [ check-codeclimate-credentials ] runs-on: ubuntu-latest - container: icecube/icetray:icetray-prod-v1.8.1-ubuntu20.04-X64 + container: icecube/icetray:combo-stable steps: - name: Set environment variables run: | @@ -44,7 +44,36 @@ jobs: echo "PYTHONPATH=/usr/local/icetray/lib:$PYTHONPATH" >> $GITHUB_ENV echo "LD_LIBRARY_PATH=/usr/local/icetray/lib:/usr/local/icetray/cernroot/lib:/usr/local/icetray/lib/tools:$LD_LIBRARY_PATH" >> $GITHUB_ENV - uses: actions/checkout@v3 - - name: Print available disk space before graphnet install + - name: Print available disk space before clean-up + run: df -h + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tool-cache: false + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: false + swap-storage: true + - name: manually remove gcloud + shell: bash + run: sudo apt-get remove google-cloud-cli + - name: same as 'large-packages' but without 'google-cloud-sdk' + shell: bash + run: | + sudo apt-get remove -y '^dotnet-.*' + sudo apt-get remove -y '^llvm-.*' + sudo apt-get remove -y 'php.*' + sudo apt-get remove -y '^mongodb-.*' + sudo apt-get remove -y '^mysql-.*' + sudo apt-get remove -y azure-cli google-chrome-stable firefox powershell mono-devel libgl1-mesa-dri + sudo apt-get autoremove -y + sudo apt-get clean + - name: Print available disk space after disk clean-up run: df -h - name: Upgrade packages already installed on icecube/icetray run: | @@ -60,9 +89,9 @@ jobs: coverage run --source=graphnet -m pytest tests/ --ignore=tests/examples coverage run --source=graphnet -m pytest tests/examples/01_icetray coverage xml -o coverage.xml - #- name: Work around permission issue - # run: | - # git config --global --add safe.directory /__w/graphnet/graphnet + - name: Work around permission issue + run: | + git config --global --add safe.directory /__w/graphnet/graphnet - name: Publish code coverage uses: paambaati/codeclimate-action@v3.0.0 if: needs.check-codeclimate-credentials.outputs.has_credentials == 'true' From c32d6a2943c79fb8103319b289ab63b0e3d960cf Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 29 Sep 2023 17:24:30 +0200 Subject: [PATCH 085/156] remove google cli reference --- .github/workflows/build.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f79fdb10c..ce8c7b82f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -59,9 +59,6 @@ jobs: haskell: true large-packages: false swap-storage: true - - name: manually remove gcloud - shell: bash - run: sudo apt-get remove google-cloud-cli - name: same as 'large-packages' but without 'google-cloud-sdk' shell: bash run: | From 82d7c83e346b882dd4e88c1f2c4d0ef75ebe7c2b Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 29 Sep 2023 17:29:29 +0200 Subject: [PATCH 086/156] more edits.. --- .github/workflows/build.yml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ce8c7b82f..de669d907 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -62,13 +62,6 @@ jobs: - name: same as 'large-packages' but without 'google-cloud-sdk' shell: bash run: | - sudo apt-get remove -y '^dotnet-.*' - sudo apt-get remove -y '^llvm-.*' - sudo apt-get remove -y 'php.*' - sudo apt-get remove -y '^mongodb-.*' - sudo apt-get remove -y '^mysql-.*' - sudo apt-get remove -y azure-cli google-chrome-stable firefox powershell mono-devel libgl1-mesa-dri - sudo apt-get autoremove -y sudo apt-get clean - name: Print available disk space after disk clean-up run: df -h From e0ba0755db2db8f859335a74eb1eee470e3a0e19 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 29 Sep 2023 17:37:32 +0200 Subject: [PATCH 087/156] try to install git --- .github/workflows/build.yml | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index de669d907..6220c7c59 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -44,7 +44,7 @@ jobs: echo "PYTHONPATH=/usr/local/icetray/lib:$PYTHONPATH" >> $GITHUB_ENV echo "LD_LIBRARY_PATH=/usr/local/icetray/lib:/usr/local/icetray/cernroot/lib:/usr/local/icetray/lib/tools:$LD_LIBRARY_PATH" >> $GITHUB_ENV - uses: actions/checkout@v3 - - name: Print available disk space before clean-up + - name: Print available disk space before graphnet install run: df -h - name: Free Disk Space (Ubuntu) uses: jlumbroso/free-disk-space@main @@ -59,12 +59,20 @@ jobs: haskell: true large-packages: false swap-storage: true + - name: manually remove gcloud + shell: bash + run: sudo apt-get remove google-cloud-cli - name: same as 'large-packages' but without 'google-cloud-sdk' shell: bash run: | + sudo apt-get remove -y '^dotnet-.*' + sudo apt-get remove -y '^llvm-.*' + sudo apt-get remove -y 'php.*' + sudo apt-get remove -y '^mongodb-.*' + sudo apt-get remove -y '^mysql-.*' + sudo apt-get remove -y azure-cli google-chrome-stable firefox powershell mono-devel libgl1-mesa-dri + sudo apt-get autoremove -y sudo apt-get clean - - name: Print available disk space after disk clean-up - run: df -h - name: Upgrade packages already installed on icecube/icetray run: | pip install --upgrade astropy # Installed version incompatible with numpy 1.23.0 [https://github.com/astropy/astropy/issues/12534] @@ -79,9 +87,13 @@ jobs: coverage run --source=graphnet -m pytest tests/ --ignore=tests/examples coverage run --source=graphnet -m pytest tests/examples/01_icetray coverage xml -o coverage.xml + - name: install git + shell: bash + run: | + sudo apt-get install git-all - name: Work around permission issue run: | - git config --global --add safe.directory /__w/graphnet/graphnet + # git config --global --add safe.directory /__w/graphnet/graphnet - name: Publish code coverage uses: paambaati/codeclimate-action@v3.0.0 if: needs.check-codeclimate-credentials.outputs.has_credentials == 'true' From 6226542d8d64bc614490e44ecd89f6ba68b29bb1 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 29 Sep 2023 17:37:50 +0200 Subject: [PATCH 088/156] uncomment --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6220c7c59..29b4227eb 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -93,7 +93,7 @@ jobs: sudo apt-get install git-all - name: Work around permission issue run: | - # git config --global --add safe.directory /__w/graphnet/graphnet + git config --global --add safe.directory /__w/graphnet/graphnet - name: Publish code coverage uses: paambaati/codeclimate-action@v3.0.0 if: needs.check-codeclimate-credentials.outputs.has_credentials == 'true' From 876db26b7b79a09ca4edaf9928a0b9133c7bb689 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 29 Sep 2023 17:38:32 +0200 Subject: [PATCH 089/156] remove benedikt's trick --- .github/workflows/build.yml | 29 +---------------------------- 1 file changed, 1 insertion(+), 28 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 29b4227eb..ef6850aa8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -36,7 +36,7 @@ jobs: name: Unit tests - IceTray needs: [ check-codeclimate-credentials ] runs-on: ubuntu-latest - container: icecube/icetray:combo-stable + container: icecube/icetray:icetray-prod-v1.8.1-ubuntu22.04-x64 steps: - name: Set environment variables run: | @@ -46,33 +46,6 @@ jobs: - uses: actions/checkout@v3 - name: Print available disk space before graphnet install run: df -h - - name: Free Disk Space (Ubuntu) - uses: jlumbroso/free-disk-space@main - with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB - tool-cache: false - # all of these default to true, but feel free to set to - # "false" if necessary for your workflow - android: true - dotnet: true - haskell: true - large-packages: false - swap-storage: true - - name: manually remove gcloud - shell: bash - run: sudo apt-get remove google-cloud-cli - - name: same as 'large-packages' but without 'google-cloud-sdk' - shell: bash - run: | - sudo apt-get remove -y '^dotnet-.*' - sudo apt-get remove -y '^llvm-.*' - sudo apt-get remove -y 'php.*' - sudo apt-get remove -y '^mongodb-.*' - sudo apt-get remove -y '^mysql-.*' - sudo apt-get remove -y azure-cli google-chrome-stable firefox powershell mono-devel libgl1-mesa-dri - sudo apt-get autoremove -y - sudo apt-get clean - name: Upgrade packages already installed on icecube/icetray run: | pip install --upgrade astropy # Installed version incompatible with numpy 1.23.0 [https://github.com/astropy/astropy/issues/12534] From b742c9aa208ad13fa96c12e0b08b1f227593bbf9 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 29 Sep 2023 17:41:23 +0200 Subject: [PATCH 090/156] cmon --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ef6850aa8..9cc81f300 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -36,7 +36,7 @@ jobs: name: Unit tests - IceTray needs: [ check-codeclimate-credentials ] runs-on: ubuntu-latest - container: icecube/icetray:icetray-prod-v1.8.1-ubuntu22.04-x64 + container: icecube/icetray:icetray-prod-v1.8.1-ubuntu20.04-X64 steps: - name: Set environment variables run: | From 677cb50d77d9ec11f46ad3f04b17cd5dcbda62d7 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 29 Sep 2023 17:55:05 +0200 Subject: [PATCH 091/156] return to normal --- .github/workflows/build.yml | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9cc81f300..75be67797 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -60,13 +60,10 @@ jobs: coverage run --source=graphnet -m pytest tests/ --ignore=tests/examples coverage run --source=graphnet -m pytest tests/examples/01_icetray coverage xml -o coverage.xml - - name: install git - shell: bash - run: | - sudo apt-get install git-all - - name: Work around permission issue - run: | - git config --global --add safe.directory /__w/graphnet/graphnet + + #- name: Work around permission issue + # run: | + # git config --global --add safe.directory /__w/graphnet/graphnet - name: Publish code coverage uses: paambaati/codeclimate-action@v3.0.0 if: needs.check-codeclimate-credentials.outputs.has_credentials == 'true' From 33bb913a28b753222ddd9a8b940a20c8d7c4d37e Mon Sep 17 00:00:00 2001 From: ArturoLlorente Date: Mon, 2 Oct 2023 12:08:00 +0200 Subject: [PATCH 092/156] add assertion when data in forward method is not a list --- src/graphnet/models/standard_model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/graphnet/models/standard_model.py b/src/graphnet/models/standard_model.py index c5fd828b4..ea9aed35f 100644 --- a/src/graphnet/models/standard_model.py +++ b/src/graphnet/models/standard_model.py @@ -96,6 +96,7 @@ def configure_optimizers(self) -> Dict[str, Any]: def forward(self, data: List[Data]) -> List[Union[Tensor, Data]]: """Forward pass, chaining model components.""" + assert isinstance(data, List[Data]) x_list = [] for d in data: x = self._gnn(d) From b0b559bb011593accc3af99d3466d0bfa478fc05 Mon Sep 17 00:00:00 2001 From: ArturoLlorente Date: Mon, 2 Oct 2023 12:38:51 +0200 Subject: [PATCH 093/156] solve pre-commit issues --- src/graphnet/models/standard_model.py | 2 +- src/graphnet/training/utils.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/graphnet/models/standard_model.py b/src/graphnet/models/standard_model.py index ea9aed35f..d6506d933 100644 --- a/src/graphnet/models/standard_model.py +++ b/src/graphnet/models/standard_model.py @@ -96,7 +96,7 @@ def configure_optimizers(self) -> Dict[str, Any]: def forward(self, data: List[Data]) -> List[Union[Tensor, Data]]: """Forward pass, chaining model components.""" - assert isinstance(data, List[Data]) + assert isinstance(data, List) x_list = [] for d in data: x = self._gnn(d) diff --git a/src/graphnet/training/utils.py b/src/graphnet/training/utils.py index ea03d6cc3..1cded30d2 100644 --- a/src/graphnet/training/utils.py +++ b/src/graphnet/training/utils.py @@ -41,8 +41,7 @@ def __init__(self, batch_splits: List[float] = [0.8]): self.batch_splits = batch_splits def __call__(self, graphs: List[Data]) -> Batch: - """Execute sequence bucketing on the input list of graphs and sort them - by the number of pulses for each mini-batch. + """Execute sequence bucketing on the input list of graphs. Args: graphs: A list of Data objects representing the input graphs. From 4f04ffcacab0822ee334a3959d6911c2aa8375f9 Mon Sep 17 00:00:00 2001 From: ArturoLlorente Date: Tue, 3 Oct 2023 09:50:52 +0200 Subject: [PATCH 094/156] =?UTF-8?q?prediction=20gives=20a=20Data=20object?= =?UTF-8?q?=20instead=20List=20object.=20Now=20in=20forward=20method=20als?= =?UTF-8?q?o=20is=20checked=20if=20the=20object=C2=A0is=C2=A0not=C2=A0a?= =?UTF-8?q?=C2=A0list?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/graphnet/models/standard_model.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/graphnet/models/standard_model.py b/src/graphnet/models/standard_model.py index d6506d933..467bfc462 100644 --- a/src/graphnet/models/standard_model.py +++ b/src/graphnet/models/standard_model.py @@ -94,9 +94,10 @@ def configure_optimizers(self) -> Dict[str, Any]: ) return config - def forward(self, data: List[Data]) -> List[Union[Tensor, Data]]: + def forward(self, data: Union[Data, List[Data]]) -> List[Union[Tensor, Data]]: """Forward pass, chaining model components.""" - assert isinstance(data, List) + if isinstance(data, Data): + data = [data] x_list = [] for d in data: x = self._gnn(d) From 919b6781309dad532f3c202ffd85bb2913b84878 Mon Sep 17 00:00:00 2001 From: ArturoLlorente Date: Tue, 3 Oct 2023 10:06:18 +0200 Subject: [PATCH 095/156] changes werenot staged for commit --- src/graphnet/models/standard_model.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/graphnet/models/standard_model.py b/src/graphnet/models/standard_model.py index 467bfc462..25a6e5107 100644 --- a/src/graphnet/models/standard_model.py +++ b/src/graphnet/models/standard_model.py @@ -94,7 +94,9 @@ def configure_optimizers(self) -> Dict[str, Any]: ) return config - def forward(self, data: Union[Data, List[Data]]) -> List[Union[Tensor, Data]]: + def forward( + self, data: Union[Data, List[Data]] + ) -> List[Union[Tensor, Data]]: """Forward pass, chaining model components.""" if isinstance(data, Data): data = [data] From a49fa892d67abcb83bc6ecb43dfb853259dc6222 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 4 Oct 2023 13:58:14 +0200 Subject: [PATCH 096/156] test removal of stuff in workflow --- .github/workflows/build.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 75be67797..325de3efc 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -80,6 +80,19 @@ jobs: matrix: python-version: [3.8, 3.9, '3.10', '3.11'] steps: + - name: Maximize build space + uses: easimon/maximize-build-space@master + with: + root-reserve-mb: 512 + swap-size-mb: 1024 + remove-dotnet: 'true' + - name: Checkout + uses: actions/checkout@v3 + + - name: Build + run: | + echo "Free space:" + df -h - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 From 70407a521b81faf3798687801bb2937e9e4ac08b Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 4 Oct 2023 13:58:58 +0200 Subject: [PATCH 097/156] add print statements of df --- .github/workflows/build.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 325de3efc..2be9d5eec 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -80,6 +80,10 @@ jobs: matrix: python-version: [3.8, 3.9, '3.10', '3.11'] steps: + - name: Before Clean-up + run: | + echo "Free space:" + df -h - name: Maximize build space uses: easimon/maximize-build-space@master with: @@ -89,7 +93,7 @@ jobs: - name: Checkout uses: actions/checkout@v3 - - name: Build + - name: After Clean-up run: | echo "Free space:" df -h From c185f567c1017afe7a0114f2550f8db50e9a410d Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 4 Oct 2023 14:05:04 +0200 Subject: [PATCH 098/156] 2nd test --- .github/workflows/build.yml | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2be9d5eec..e6037e985 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -84,14 +84,19 @@ jobs: run: | echo "Free space:" df -h - - name: Maximize build space - uses: easimon/maximize-build-space@master + + - name: Free Disk Space + uses: jlumbroso/free-disk-space@main with: - root-reserve-mb: 512 - swap-size-mb: 1024 - remove-dotnet: 'true' - - name: Checkout - uses: actions/checkout@v3 + tool-cache: true + + # all of these default to true, but feel free to set to + # false if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: true - name: After Clean-up run: | From ad4c01b420b3bd4a580045bea29fd0fe38a4f2a1 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 4 Oct 2023 14:12:15 +0200 Subject: [PATCH 099/156] try for icetray... --- .github/workflows/build.yml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e6037e985..ed4792fb4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -38,6 +38,28 @@ jobs: runs-on: ubuntu-latest container: icecube/icetray:icetray-prod-v1.8.1-ubuntu20.04-X64 steps: + - name: Before Clean-up + run: | + echo "Free space:" + df -h + + - name: Free Disk Space + uses: jlumbroso/free-disk-space@main + with: + tool-cache: true + + # all of these default to true, but feel free to set to + # false if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: true + + - name: After Clean-up + run: | + echo "Free space:" + df -h - name: Set environment variables run: | echo "PATH=/usr/local/icetray/bin:$PATH" >> $GITHUB_ENV From 6a61bf0f9935c7cb6e96146dfe4be3ae258fc160 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 4 Oct 2023 14:24:53 +0200 Subject: [PATCH 100/156] try to install git in icetray cvmfs --- .github/workflows/build.yml | 34 ++++++++-------------------------- 1 file changed, 8 insertions(+), 26 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ed4792fb4..1f7614798 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -36,30 +36,12 @@ jobs: name: Unit tests - IceTray needs: [ check-codeclimate-credentials ] runs-on: ubuntu-latest - container: icecube/icetray:icetray-prod-v1.8.1-ubuntu20.04-X64 + container: + image: icecube/icetray:icetray-prod-v1.8.1-ubuntu20.04-X64 + options: --user root steps: - - name: Before Clean-up - run: | - echo "Free space:" - df -h - - - name: Free Disk Space - uses: jlumbroso/free-disk-space@main - with: - tool-cache: true - - # all of these default to true, but feel free to set to - # false if necessary for your workflow - android: true - dotnet: true - haskell: true - large-packages: true - swap-storage: true - - - name: After Clean-up - run: | - echo "Free space:" - df -h + - name: install git + run: sudo apt-get install git-all - name: Set environment variables run: | echo "PATH=/usr/local/icetray/bin:$PATH" >> $GITHUB_ENV @@ -83,9 +65,9 @@ jobs: coverage run --source=graphnet -m pytest tests/examples/01_icetray coverage xml -o coverage.xml - #- name: Work around permission issue - # run: | - # git config --global --add safe.directory /__w/graphnet/graphnet + - name: Work around permission issue + run: | + git config --global --add safe.directory /__w/graphnet/graphnet - name: Publish code coverage uses: paambaati/codeclimate-action@v3.0.0 if: needs.check-codeclimate-credentials.outputs.has_credentials == 'true' From 05494d4592862bd35db477e1e09cc8ba366a6805 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 4 Oct 2023 14:32:36 +0200 Subject: [PATCH 101/156] install sudo.. --- .github/workflows/build.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1f7614798..bcc845095 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -41,7 +41,9 @@ jobs: options: --user root steps: - name: install git - run: sudo apt-get install git-all + run: | + apt-get install sudo + sudo apt-get install git-all - name: Set environment variables run: | echo "PATH=/usr/local/icetray/bin:$PATH" >> $GITHUB_ENV From fa44ad9d928263af9daa166ae4e3ba66f5d5a5ae Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 4 Oct 2023 14:35:28 +0200 Subject: [PATCH 102/156] force "yes" --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bcc845095..0dac4d4ea 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -42,7 +42,7 @@ jobs: steps: - name: install git run: | - apt-get install sudo + apt-get -y install sudo sudo apt-get install git-all - name: Set environment variables run: | From cda283ae424e104f6210240364b78ae31267818e Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 4 Oct 2023 14:38:26 +0200 Subject: [PATCH 103/156] really force yes --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0dac4d4ea..6f46e81f3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -42,7 +42,7 @@ jobs: steps: - name: install git run: | - apt-get -y install sudo + apt-get --yes --force-yes install sudo sudo apt-get install git-all - name: Set environment variables run: | From c3181ef529a320d49556caeffd0ba69cbe0b5ea8 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 4 Oct 2023 14:51:56 +0200 Subject: [PATCH 104/156] linux sucks --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6f46e81f3..611a31bfc 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -42,7 +42,7 @@ jobs: steps: - name: install git run: | - apt-get --yes --force-yes install sudo + apt-get --allow-change-held-packages install sudo sudo apt-get install git-all - name: Set environment variables run: | From 6455b8f7e6e16001c954962571ac30437f0f7897 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 4 Oct 2023 14:55:39 +0200 Subject: [PATCH 105/156] liiiinux --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 611a31bfc..eb750164e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -42,7 +42,7 @@ jobs: steps: - name: install git run: | - apt-get --allow-change-held-packages install sudo + apt-get -y --allow-change-held-packages install sudo sudo apt-get install git-all - name: Set environment variables run: | From 77966eccaaf83587c8441df5abde6cd3e1dcee45 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 4 Oct 2023 14:59:32 +0200 Subject: [PATCH 106/156] lets go --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index eb750164e..e8f655e2a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -42,8 +42,8 @@ jobs: steps: - name: install git run: | - apt-get -y --allow-change-held-packages install sudo - sudo apt-get install git-all + apt-get --yes install sudo + sudo apt-get install --yes git-all - name: Set environment variables run: | echo "PATH=/usr/local/icetray/bin:$PATH" >> $GITHUB_ENV From 1edeb5bbb7489a673d8e75035eb5330be9366ace Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 4 Oct 2023 15:07:45 +0200 Subject: [PATCH 107/156] update docker workflow to remove space --- .github/workflows/build.yml | 22 ---------------------- .github/workflows/docker.yml | 22 ++++++++++++++++++++++ 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e8f655e2a..1a23eea64 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -86,28 +86,6 @@ jobs: matrix: python-version: [3.8, 3.9, '3.10', '3.11'] steps: - - name: Before Clean-up - run: | - echo "Free space:" - df -h - - - name: Free Disk Space - uses: jlumbroso/free-disk-space@main - with: - tool-cache: true - - # all of these default to true, but feel free to set to - # false if necessary for your workflow - android: true - dotnet: true - haskell: true - large-packages: true - swap-storage: true - - - name: After Clean-up - run: | - echo "Free space:" - df -h - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 7d5ad7964..09cb04bf8 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -19,6 +19,28 @@ jobs: packages: write contents: read steps: + - name: Before Clean-up + run: | + echo "Free space:" + df -h + + - name: Free Disk Space + uses: jlumbroso/free-disk-space@main + with: + tool-cache: true + + # all of these default to true, but feel free to set to + # false if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: true + + - name: After Clean-up + run: | + echo "Free space:" + df -h - name: Checkout uses: actions/checkout@v3 - name: Set up QEMU From 366cdaec16ed19e0bd36ac84d7a64808f3ba0a06 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 4 Oct 2023 16:09:43 +0200 Subject: [PATCH 108/156] remove macos as supported OS --- .github/workflows/build.yml | 38 ++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1a23eea64..3b959584a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -108,22 +108,22 @@ jobs: - name: Print available disk space after unit tests run: df -h - build-macos: - name: Unit tests - macOS - runs-on: macos-latest - steps: - - uses: actions/checkout@v3 - - name: Set up Python 3.8 - uses: actions/setup-python@v4 - with: - python-version: 3.8 - - name: Install package - uses: ./.github/actions/install - with: - editable: true - hardware: "macos" - - name: Run unit tests and generate coverage report - run: | - set -o pipefail # To propagate exit code from pytest - coverage run --source=graphnet -m pytest tests/ --ignore=tests/data/ --ignore=tests/deployment/ --ignore=tests/examples/ - coverage report -m +# build-macos: +# name: Unit tests - macOS +# runs-on: macos-latest +# steps: +# - uses: actions/checkout@v3 +# - name: Set up Python 3.8 +# uses: actions/setup-python@v4 +# with: +# python-version: 3.8 +# - name: Install package +# uses: ./.github/actions/install +# with: +# editable: true +# hardware: "macos" +# - name: Run unit tests and generate coverage report +# run: | +# set -o pipefail # To propagate exit code from pytest +# coverage run --source=graphnet -m pytest tests/ --ignore=tests/data/ --ignore=tests/deployment/ --ignore=tests/examples/ +# coverage report -m From a0bec79ebbd7b74bab675c741aa6bf25cbddca6c Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 4 Oct 2023 16:10:30 +0200 Subject: [PATCH 109/156] remove macos from readme.md --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 8afc9d773..7a2e69af9 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,6 @@ $ conda create --name graphnet python=3.8 gcc_linux-64 gxx_linux-64 libgcc cudat $ conda activate graphnet # Optional (graphnet) $ pip install -r requirements/torch_cpu.txt -e .[develop,torch] # CPU-only torch (graphnet) $ pip install -r requirements/torch_gpu.txt -e .[develop,torch] # GPU support -(graphnet) $ pip install -r requirements/torch_macos.txt -e .[develop,torch] # On macOS ``` This should allow you to e.g. run the scripts in [examples/](./examples/) out of the box. From d84bdef7977f129858d0929bd14e6bb8a13ee754 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 4 Oct 2023 21:03:03 +0200 Subject: [PATCH 110/156] remove macos lines --- .github/workflows/build.yml | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3b959584a..ad5a9b6a9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -107,23 +107,3 @@ jobs: coverage report -m - name: Print available disk space after unit tests run: df -h - -# build-macos: -# name: Unit tests - macOS -# runs-on: macos-latest -# steps: -# - uses: actions/checkout@v3 -# - name: Set up Python 3.8 -# uses: actions/setup-python@v4 -# with: -# python-version: 3.8 -# - name: Install package -# uses: ./.github/actions/install -# with: -# editable: true -# hardware: "macos" -# - name: Run unit tests and generate coverage report -# run: | -# set -o pipefail # To propagate exit code from pytest -# coverage run --source=graphnet -m pytest tests/ --ignore=tests/data/ --ignore=tests/deployment/ --ignore=tests/examples/ -# coverage report -m From f3d47b5d85828437dd55632b63cedb1f8590ebec Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 4 Oct 2023 21:19:10 +0200 Subject: [PATCH 111/156] test --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ad5a9b6a9..49ac2fbbb 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -106,4 +106,4 @@ jobs: coverage run --source=graphnet -m pytest tests/utilities coverage report -m - name: Print available disk space after unit tests - run: df -h + run: df -h \ No newline at end of file From aa537ebedb2727455ff95a058610ccc9893df19b Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Thu, 5 Oct 2023 12:33:25 +0200 Subject: [PATCH 112/156] torch version test --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ef1b48e52..84a731bf8 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ "versioneer", ], "torch": [ - "torch>=2.0", + "torch=2.0", "torch-cluster>=1.6", "torch-scatter>=2.0", "torch-sparse>=0.6", From d7ea48b44fdb7e2c97a5b9bbe411212bf566e5d1 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Thu, 5 Oct 2023 13:29:32 +0200 Subject: [PATCH 113/156] == --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 84a731bf8..133dac79e 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ "versioneer", ], "torch": [ - "torch=2.0", + "torch==2.0", "torch-cluster>=1.6", "torch-scatter>=2.0", "torch-sparse>=0.6", From 6ad414756e0a38d04efa60415f001aae973d64c7 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Thu, 5 Oct 2023 15:26:36 +0200 Subject: [PATCH 114/156] fix linux --- .github/workflows/build.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 49ac2fbbb..5d0578d5e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -43,6 +43,8 @@ jobs: - name: install git run: | apt-get --yes install sudo + sudo apt update --fix-missing + sudo apt upgrade sudo apt-get install --yes git-all - name: Set environment variables run: | From dc6bdfc885a00913594bfeb03925b051e779dcb5 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Thu, 5 Oct 2023 15:38:11 +0200 Subject: [PATCH 115/156] --yes --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5d0578d5e..ef509972d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -43,8 +43,8 @@ jobs: - name: install git run: | apt-get --yes install sudo - sudo apt update --fix-missing - sudo apt upgrade + sudo apt update --fix-missing --yes + sudo apt upgrade --yes sudo apt-get install --yes git-all - name: Set environment variables run: | From 260b7840c758d8cdfff6be8931af8ac411755977 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Thu, 5 Oct 2023 16:00:48 +0200 Subject: [PATCH 116/156] test macos --- .github/workflows/build.yml | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ef509972d..2f7086b8d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -108,4 +108,24 @@ jobs: coverage run --source=graphnet -m pytest tests/utilities coverage report -m - name: Print available disk space after unit tests - run: df -h \ No newline at end of file + run: df -h + + build-macos: + name: Unit tests - macOS + runs-on: macos-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.8 + uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: Install package + uses: ./.github/actions/install + with: + editable: true + hardware: "macos" + - name: Run unit tests and generate coverage report + run: | + set -o pipefail # To propagate exit code from pytest + coverage run --source=graphnet -m pytest tests/ --ignore=tests/data/ --ignore=tests/deployment/ --ignore=tests/examples/ + coverage report -m \ No newline at end of file From ad1f1492530a979f091e15166618efcc7942a19d Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Sat, 7 Oct 2023 14:34:42 +0200 Subject: [PATCH 117/156] bump to torch 2.1 --- requirements/torch_cpu.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/torch_cpu.txt b/requirements/torch_cpu.txt index 6f68e3600..59e273288 100644 --- a/requirements/torch_cpu.txt +++ b/requirements/torch_cpu.txt @@ -1,2 +1,2 @@ --find-links https://download.pytorch.org/whl/cpu ---find-links https://data.pyg.org/whl/torch-2.0.0+cpu.html \ No newline at end of file +--find-links https://data.pyg.org/whl/torch-2.1.0+cpu.html \ No newline at end of file diff --git a/setup.py b/setup.py index 133dac79e..ef1b48e52 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ "versioneer", ], "torch": [ - "torch==2.0", + "torch>=2.0", "torch-cluster>=1.6", "torch-scatter>=2.0", "torch-sparse>=0.6", From b50224d835ee97d424642996274d170a48ad7283 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Sat, 7 Oct 2023 14:49:04 +0200 Subject: [PATCH 118/156] bump --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ef1b48e52..671bc626b 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ "versioneer", ], "torch": [ - "torch>=2.0", + "torch>=2.1", "torch-cluster>=1.6", "torch-scatter>=2.0", "torch-sparse>=0.6", From e73df14160d0b9aa2629b645e6af6ac28b7227b9 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Sun, 8 Oct 2023 09:01:01 +0200 Subject: [PATCH 119/156] print versions --- .github/workflows/build.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2f7086b8d..116c7a85b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -101,6 +101,13 @@ jobs: editable: true - name: Print available disk space after graphnet install run: df -h + - name: Print packages in pip + run: | + pip show torch + pip show torch-geometric + pip show torch-cluster + pip show torch-sparse + pip show torch-scatter - name: Run unit tests and generate coverage report run: | set -o pipefail # To propagate exit code from pytest @@ -124,6 +131,13 @@ jobs: with: editable: true hardware: "macos" + - name: Print packages in pip + run: | + pip show torch + pip show torch-geometric + pip show torch-cluster + pip show torch-sparse + pip show torch-scatter - name: Run unit tests and generate coverage report run: | set -o pipefail # To propagate exit code from pytest From 9af0e49c32fd7f11c3b00821e20b232716b45045 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Sun, 8 Oct 2023 09:29:46 +0200 Subject: [PATCH 120/156] bump pyg to 2.4 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 671bc626b..1253108c1 100644 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ "torch-cluster>=1.6", "torch-scatter>=2.0", "torch-sparse>=0.6", - "torch-geometric>=2.1", + "torch-geometric>=2.4", "pytorch-lightning>=2.0", ], } From 239dd47521ca5af38e1295e90d197fe565bb0659 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Sun, 8 Oct 2023 09:38:02 +0200 Subject: [PATCH 121/156] bump to pyg 2.3 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1253108c1..3b70233ab 100644 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ "torch-cluster>=1.6", "torch-scatter>=2.0", "torch-sparse>=0.6", - "torch-geometric>=2.4", + "torch-geometric>=2.3", "pytorch-lightning>=2.0", ], } From 3230b073eec5518b94fe7d7a1208d693f5208dcf Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Sun, 8 Oct 2023 10:06:28 +0200 Subject: [PATCH 122/156] fix torch to 2.1 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 3b70233ab..938daf694 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ "versioneer", ], "torch": [ - "torch>=2.1", + "torch==2.1", "torch-cluster>=1.6", "torch-scatter>=2.0", "torch-sparse>=0.6", From 12f1fd817532d59a9750e0160db3e2d11f3f6664 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Sun, 8 Oct 2023 10:15:12 +0200 Subject: [PATCH 123/156] fix to 2.0 --- requirements/torch_cpu.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/torch_cpu.txt b/requirements/torch_cpu.txt index 59e273288..6f68e3600 100644 --- a/requirements/torch_cpu.txt +++ b/requirements/torch_cpu.txt @@ -1,2 +1,2 @@ --find-links https://download.pytorch.org/whl/cpu ---find-links https://data.pyg.org/whl/torch-2.1.0+cpu.html \ No newline at end of file +--find-links https://data.pyg.org/whl/torch-2.0.0+cpu.html \ No newline at end of file diff --git a/setup.py b/setup.py index 938daf694..d4d5f3f45 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ "versioneer", ], "torch": [ - "torch==2.1", + "torch==2.0", "torch-cluster>=1.6", "torch-scatter>=2.0", "torch-sparse>=0.6", From c3501d3fa796f102083b4319cb7cdf857d3ab78f Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Sun, 8 Oct 2023 10:17:05 +0200 Subject: [PATCH 124/156] macos --- requirements/torch_cpu.txt | 2 +- requirements/torch_macos.txt | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements/torch_cpu.txt b/requirements/torch_cpu.txt index 6f68e3600..59e273288 100644 --- a/requirements/torch_cpu.txt +++ b/requirements/torch_cpu.txt @@ -1,2 +1,2 @@ --find-links https://download.pytorch.org/whl/cpu ---find-links https://data.pyg.org/whl/torch-2.0.0+cpu.html \ No newline at end of file +--find-links https://data.pyg.org/whl/torch-2.1.0+cpu.html \ No newline at end of file diff --git a/requirements/torch_macos.txt b/requirements/torch_macos.txt index be7a35257..3e9d75df4 100644 --- a/requirements/torch_macos.txt +++ b/requirements/torch_macos.txt @@ -1,2 +1,2 @@ --find-links https://download.pytorch.org/whl/torch_stable.html ---find-links https://data.pyg.org/whl/torch-2.0.0+cpu.html \ No newline at end of file +--find-links https://data.pyg.org/whl/torch-2.1.0+cpu.html \ No newline at end of file diff --git a/setup.py b/setup.py index d4d5f3f45..3b70233ab 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ "versioneer", ], "torch": [ - "torch==2.0", + "torch>=2.1", "torch-cluster>=1.6", "torch-scatter>=2.0", "torch-sparse>=0.6", From c88cbd5e63ea1ba9f4f7ae36e12bebbb039abf0c Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Sun, 8 Oct 2023 10:36:29 +0200 Subject: [PATCH 125/156] bump gpu to 2.1 --- requirements/torch_gpu.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/torch_gpu.txt b/requirements/torch_gpu.txt index 553d306e5..3ccedd562 100644 --- a/requirements/torch_gpu.txt +++ b/requirements/torch_gpu.txt @@ -1,4 +1,4 @@ # Contains packages recommended for functional performance --find-links https://download.pytorch.org/whl/torch_stable.html torch==2.0.1+cu117 ---find-links https://data.pyg.org/whl/torch-2.0.0+cu117.html +--find-links https://data.pyg.org/whl/torch-2.1.0+cu117.html From 1a3c4b93b0a733083a24a5f289ab9fa97cad71d9 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Sun, 8 Oct 2023 10:39:32 +0200 Subject: [PATCH 126/156] bump gpu to torch 2.1 --- requirements/torch_gpu.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/torch_gpu.txt b/requirements/torch_gpu.txt index 3ccedd562..03e2a6906 100644 --- a/requirements/torch_gpu.txt +++ b/requirements/torch_gpu.txt @@ -1,4 +1,4 @@ # Contains packages recommended for functional performance --find-links https://download.pytorch.org/whl/torch_stable.html -torch==2.0.1+cu117 +torch==2.1.1+cu117 --find-links https://data.pyg.org/whl/torch-2.1.0+cu117.html From 15e765df50b3f1077aba9126f85fdf9d9c4cb715 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Mon, 9 Oct 2023 13:15:17 +0200 Subject: [PATCH 127/156] adjust gpu versions in requirements.txt --- requirements/torch_gpu.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/torch_gpu.txt b/requirements/torch_gpu.txt index 03e2a6906..cc9f55588 100644 --- a/requirements/torch_gpu.txt +++ b/requirements/torch_gpu.txt @@ -1,4 +1,4 @@ # Contains packages recommended for functional performance --find-links https://download.pytorch.org/whl/torch_stable.html -torch==2.1.1+cu117 ---find-links https://data.pyg.org/whl/torch-2.1.0+cu117.html +torch==2.1.0+cu118 +--find-links https://data.pyg.org/whl/torch-2.1.0+cu118.html From 0370069a4c1c03802461596b11bc38fa1f93c2d2 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Mon, 9 Oct 2023 13:17:11 +0200 Subject: [PATCH 128/156] update comment --- requirements/torch_gpu.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/torch_gpu.txt b/requirements/torch_gpu.txt index cc9f55588..1f1abba3f 100644 --- a/requirements/torch_gpu.txt +++ b/requirements/torch_gpu.txt @@ -1,4 +1,4 @@ -# Contains packages recommended for functional performance +# Contains packages requirements for GPU installation --find-links https://download.pytorch.org/whl/torch_stable.html torch==2.1.0+cu118 --find-links https://data.pyg.org/whl/torch-2.1.0+cu118.html From 84f73f528fbe8c0e133819f2b8369ccf60b2d555 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Mon, 9 Oct 2023 14:32:57 +0200 Subject: [PATCH 129/156] copy-paste of code --- src/graphnet/models/graphs/nodes/nodes.py | 114 ++++++++++++++++++++-- 1 file changed, 107 insertions(+), 7 deletions(-) diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py index ce539ee80..8966d2891 100644 --- a/src/graphnet/models/graphs/nodes/nodes.py +++ b/src/graphnet/models/graphs/nodes/nodes.py @@ -1,6 +1,6 @@ """Class(es) for building/connecting graphs.""" -from typing import List +from typing import List, Tuple from abc import abstractmethod import torch @@ -8,6 +8,11 @@ from graphnet.utilities.decorators import final from graphnet.models import Model +from graphnet.models.graphs.utils import ( + cluster_summarize_with_percentiles, + identify_indices, +) +from copy import deepcopy class NodeDefinition(Model): # pylint: disable=too-few-public-methods @@ -19,18 +24,24 @@ def __init__(self) -> None: super().__init__(name=__name__, class_name=self.__class__.__name__) @final - def forward(self, x: torch.tensor) -> Data: + def forward( + self, x: torch.tensor, node_feature_names: List[str] + ) -> Tuple[Data, List[str]]: """Construct nodes from raw node features. Args: x: standardized node features with shape ´[num_pulses, d]´, where ´d´ is the number of node features. + node_feature_names: list of names for each column in ´x´. Returns: graph: a graph without edges + new_features_name: List of new feature names. """ - graph = self._construct_nodes(x) - return graph + graph, new_feature_names = self._construct_nodes( + x=x, feature_names=node_feature_names + ) + return graph, new_feature_names @property def nb_outputs(self) -> int: @@ -51,20 +62,109 @@ def set_number_of_inputs(self, node_feature_names: List[str]) -> None: self.nb_inputs = len(node_feature_names) @abstractmethod - def _construct_nodes(self, x: torch.tensor) -> Data: + def _construct_nodes( + self, x: torch.tensor, feature_names: List[str] + ) -> Data: """Construct nodes from raw node features ´x´. Args: x: standardized node features with shape ´[num_pulses, d]´, where ´d´ is the number of node features. + feature_names: List of names for reach column in `x`. Identical + order of appearance. Length `d`. Returns: graph: graph without edges. + new_node_features: A list of node features names. """ class NodesAsPulses(NodeDefinition): """Represent each measured pulse of Cherenkov Radiation as a node.""" - def _construct_nodes(self, x: torch.Tensor) -> Data: - return Data(x=x) + def _construct_nodes( + self, x: torch.Tensor, feature_names: List[str] + ) -> Data: + return Data(x=x), feature_names + + +class PercentileClusters(NodeDefinition): + """Represent nodes as clusters with percentile summary node features. + + If `cluster_on` is set to the xyz coordinates of DOMs + e.g. `cluster_on = ['dom_x', 'dom_y', 'dom_z']`, each node will be a + unique DOM and the pulse information (charge, time) is summarized using + percentiles. + """ + + def __init__( + self, + cluster_on: List[str], + feature_names: List[str], + percentiles: List[int], + add_counts: bool = True, + ) -> None: + """Construct `PercentileClusters`. + + Args: + cluster_on: Names of features to create clusters from. + feature_names: List of colum names for the input data. + E.g. ['dom_x', 'dom_y', 'dom_z',..] + percentiles: List of percentiles. E.g. `[10, 50, 90]`. + add_counts: If True, number of duplicates is added to output array. + """ + self._cluster_on = cluster_on + self._percentiles = percentiles + self._add_counts = add_counts + ( + cluster_idx, + summ_idx, + new_feature_names, + ) = self._get_indices_and_feature_names( + feature_names, self._add_counts + ) + self._cluster_indices = cluster_idx + self._summarization_indices = summ_idx + self._output_feature_names = new_feature_names + # Base class constructor + super().__init__() + + def _get_indices_and_feature_names( + self, + feature_names: List[str], + add_counts: bool, + ) -> Tuple[List[int], List[int], List[str]]: + cluster_idx, summ_idx, summ_names = identify_indices( + feature_names, self._cluster_on + ) + new_feature_names = deepcopy(self._cluster_on) + for feature in summ_names: + for pct in self._percentiles: + new_feature_names.append(f"{feature}_pct{pct}") + if add_counts: + # add "counts" as the last feature + new_feature_names.append("counts") + return cluster_idx, summ_idx, new_feature_names + + def _construct_nodes( + self, x: torch.Tensor, feature_names: List[str] + ) -> Data: + # Cast to Numpy + x = x.numpy() + # Construct clusters with percentile-summarized features + array = cluster_summarize_with_percentiles( + x=x, + summarization_indices=self._summarization_indices, + cluster_indices=self._cluster_indices, + percentiles=self._percentiles, + add_counts=self._add_counts, + ) + + return Data(x=torch.tensor(array)), self._output_feature_names + + def nb_outputs(self) -> int: + """Return number of output features. + + This the default, but may be overridden by specific inheriting classes. + """ + return len(self._output_feature_names) From 776e3001548ea8df5c6240800610c83eebc42e8b Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Mon, 9 Oct 2023 14:34:43 +0200 Subject: [PATCH 130/156] copy-paste --- src/graphnet/models/graphs/graph_definition.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/graphnet/models/graphs/graph_definition.py b/src/graphnet/models/graphs/graph_definition.py index 9c4db4d47..f311340b9 100644 --- a/src/graphnet/models/graphs/graph_definition.py +++ b/src/graphnet/models/graphs/graph_definition.py @@ -139,7 +139,10 @@ def forward( # type: ignore node_features = self._detector(node_features, node_feature_names) # Create graph - graph = self._node_definition(node_features) + graph, node_feature_names = self._node_definition( + node_features, node_feature_names + ) + graph.x = graph.x.type(self.dtype) # Attach number of pulses as static attribute. graph.n_pulses = torch.tensor(len(node_features), dtype=torch.int32) From f8577a47ee519b47eafb090ce0cab5ec5687fadb Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Mon, 9 Oct 2023 14:36:59 +0200 Subject: [PATCH 131/156] add comment --- src/graphnet/models/graphs/graph_definition.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/graphnet/models/graphs/graph_definition.py b/src/graphnet/models/graphs/graph_definition.py index f311340b9..8bd25759e 100644 --- a/src/graphnet/models/graphs/graph_definition.py +++ b/src/graphnet/models/graphs/graph_definition.py @@ -138,10 +138,10 @@ def forward( # type: ignore # Standardize / Scale node features node_features = self._detector(node_features, node_feature_names) - # Create graph - graph, node_feature_names = self._node_definition( - node_features, node_feature_names - ) + # Create graph & get new node feature names + graph, node_feature_names = self._node_definition(node_features) + + # Enforce dtype graph.x = graph.x.type(self.dtype) # Attach number of pulses as static attribute. From 27d0b3ac36e1bd33cf9f78f33231769fdbab5d96 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Mon, 9 Oct 2023 16:19:20 +0200 Subject: [PATCH 132/156] introduce set function, refactor --- .../models/graphs/graph_definition.py | 5 + src/graphnet/models/graphs/nodes/nodes.py | 91 +++++++++++++------ 2 files changed, 70 insertions(+), 26 deletions(-) diff --git a/src/graphnet/models/graphs/graph_definition.py b/src/graphnet/models/graphs/graph_definition.py index 8bd25759e..6e8d74715 100644 --- a/src/graphnet/models/graphs/graph_definition.py +++ b/src/graphnet/models/graphs/graph_definition.py @@ -67,6 +67,11 @@ def __init__( node_feature_names = list(self._detector.feature_map().keys()) # type: ignore self._node_feature_names = node_feature_names + # Set input data column names for node definition + self._node_definition.set_output_feature_names( + self._node_feature_names + ) + # Set data type self.to(dtype) diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py index 8966d2891..477751934 100644 --- a/src/graphnet/models/graphs/nodes/nodes.py +++ b/src/graphnet/models/graphs/nodes/nodes.py @@ -1,6 +1,6 @@ """Class(es) for building/connecting graphs.""" -from typing import List, Tuple +from typing import List, Tuple, Optional from abc import abstractmethod import torch @@ -18,15 +18,19 @@ class NodeDefinition(Model): # pylint: disable=too-few-public-methods """Base class for graph building.""" - def __init__(self) -> None: + def __init__( + self, input_feature_names: Optional[List[str]] = None + ) -> None: """Construct `Detector`.""" # Base class constructor super().__init__(name=__name__, class_name=self.__class__.__name__) + if input_feature_names is not None: + self.set_output_feature_names( + input_feature_names=input_feature_names + ) @final - def forward( - self, x: torch.tensor, node_feature_names: List[str] - ) -> Tuple[Data, List[str]]: + def forward(self, x: torch.tensor) -> Tuple[Data, List[str]]: """Construct nodes from raw node features. Args: @@ -38,10 +42,18 @@ def forward( graph: a graph without edges new_features_name: List of new feature names. """ - graph, new_feature_names = self._construct_nodes( - x=x, feature_names=node_feature_names - ) - return graph, new_feature_names + graph = self._construct_nodes(x=x) + try: + self._output_feature_names + except AttributeError as e: + self.error( + f"""{self.__class__.__name__} was instantiated without + `input_feature_names` and it was not set prior to this + forward call. If you are using this class outside a + `GraphDefinition`, please instatiate with `input_feature_names`.""" + ) # noqa + raise e + return graph, self._output_feature_names @property def nb_outputs(self) -> int: @@ -61,10 +73,33 @@ def set_number_of_inputs(self, node_feature_names: List[str]) -> None: assert isinstance(node_feature_names, list) self.nb_inputs = len(node_feature_names) + @final + def set_output_feature_names(self, input_feature_names: List[str]) -> None: + """Set output features names as a member variable. + + Args: + input_feature_names: List of column names of the input to the + node definition. + """ + self._output_feature_names = self._define_output_feature_names( + input_feature_names + ) + @abstractmethod - def _construct_nodes( - self, x: torch.tensor, feature_names: List[str] - ) -> Data: + def _define_output_feature_names( + self, input_feature_names: List[str] + ) -> List[str]: + """Construct names of output columns. + + Args: + input_feature_names: List of column names for the input data. + + Returns: + A list of column names for each column in the node definition output. + """ + + @abstractmethod + def _construct_nodes(self, x: torch.tensor) -> Tuple[Data, List[str]]: """Construct nodes from raw node features ´x´. Args: @@ -82,10 +117,13 @@ def _construct_nodes( class NodesAsPulses(NodeDefinition): """Represent each measured pulse of Cherenkov Radiation as a node.""" - def _construct_nodes( - self, x: torch.Tensor, feature_names: List[str] - ) -> Data: - return Data(x=x), feature_names + def _define_output_feature_names( + self, input_feature_names: List[str] + ) -> List[str]: + return input_feature_names + + def _construct_nodes(self, x: torch.Tensor) -> Tuple[Data, List[str]]: + return Data(x=x) class PercentileClusters(NodeDefinition): @@ -100,34 +138,37 @@ class PercentileClusters(NodeDefinition): def __init__( self, cluster_on: List[str], - feature_names: List[str], percentiles: List[int], add_counts: bool = True, + input_feature_names: Optional[List[str]] = None, ) -> None: """Construct `PercentileClusters`. Args: cluster_on: Names of features to create clusters from. - feature_names: List of colum names for the input data. - E.g. ['dom_x', 'dom_y', 'dom_z',..] percentiles: List of percentiles. E.g. `[10, 50, 90]`. add_counts: If True, number of duplicates is added to output array. + input_feature_names: (Optional) column names for input features. """ self._cluster_on = cluster_on self._percentiles = percentiles self._add_counts = add_counts + # Base class constructor + super().__init__(input_feature_names=input_feature_names) + + def _define_output_feature_names( + self, input_feature_names: List[str] + ) -> List[str]: ( cluster_idx, summ_idx, new_feature_names, ) = self._get_indices_and_feature_names( - feature_names, self._add_counts + input_feature_names, self._add_counts ) self._cluster_indices = cluster_idx self._summarization_indices = summ_idx - self._output_feature_names = new_feature_names - # Base class constructor - super().__init__() + return new_feature_names def _get_indices_and_feature_names( self, @@ -146,9 +187,7 @@ def _get_indices_and_feature_names( new_feature_names.append("counts") return cluster_idx, summ_idx, new_feature_names - def _construct_nodes( - self, x: torch.Tensor, feature_names: List[str] - ) -> Data: + def _construct_nodes(self, x: torch.Tensor) -> Tuple[Data, List[str]]: # Cast to Numpy x = x.numpy() # Construct clusters with percentile-summarized features From d41af7d0dc0e0078c1a5309b5d53f41e06f7a5c5 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Mon, 9 Oct 2023 16:20:42 +0200 Subject: [PATCH 133/156] copy-paste utils --- src/graphnet/models/graphs/utils.py | 158 ++++++++++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 src/graphnet/models/graphs/utils.py diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py new file mode 100644 index 000000000..72928befb --- /dev/null +++ b/src/graphnet/models/graphs/utils.py @@ -0,0 +1,158 @@ +"""Utility functions for construction of graphs.""" + +from typing import List, Tuple +import numpy as np + + +def lex_sort(x: np.array, cluster_columns: List[int]) -> np.ndarray: + """Sort numpy arrays according to columns on ´cluster_columns´. + + Note that `x` is sorted along the dimensions in `cluster_columns` + backwards. I.e. `cluster_columns = [0,1,2]` + means `x` is sorted along `[2,1,0]`. + + Args: + x: array to be sorted. + cluster_columns: Columns of `x` to be sorted along. + + Returns: + A sorted version of `x`. + """ + tmp_list = [] + for cluster_column in cluster_columns: + tmp_list.append(x[:, cluster_column]) + return x[np.lexsort(tuple(tmp_list)), :] + + +def gather_cluster_sequence( + x: np.ndarray, feature_idx: int, cluster_columns: List[int] +) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + """Turn `x` into rows of clusters with sequences along columns. + + Sequences along columns are added which correspond to + gathered sequences of the feature in `x` specified by column index + `feature_idx` associated with each column. Sequences are padded with NaN to + be of same length. Dimension of clustered array is `[n_clusters, l + + len(cluster_columns)]`,where l is the largest sequence length. + + **Example**: + Suppose `x` represents a neutrino event and we have chosen to cluster on + the PMT positions. Suppose also that `feature_idx` correspond to pulse time. + + The resulting array will have dimensions `[n_pmts, m + 3]` where `m` is the + maximum number of same-pmt pulses found in `x`, and `+3`for the three + spatial directions defining each cluster. + + Args: + x: Array for clustering + feature_idx: Index of the feature in `x` to be gathered for each cluster. + cluster_columns: Index in `x` from which to build clusters. + + Returns: + array: Array with dimensions `[n_clusters, l + len(cluster_columns)]` + column_offset: Indices of the columns in `array` that defines clusters. + """ + # sort pulses according to cluster columns + x = lex_sort(x=x, cluster_columns=cluster_columns) + + # Calculate clusters and counts + unique_sensors, counts = np.unique( + x[:, cluster_columns], return_counts=True, axis=0 + ) + # sort DOMs and pulse-counts + sort_this = np.concatenate([unique_sensors, counts.reshape(-1, 1)], axis=1) + sort_this = lex_sort(x=sort_this, cluster_columns=cluster_columns) + unique_sensors = sort_this[:, 0 : unique_sensors.shape[1]] + counts = sort_this[:, unique_sensors.shape[1] :].flatten().astype(int) + + # Pad unique sensor columns with NaN's up until the maximum number of + # Same pmt-pulses. Each of padded columns represents a pulse. + pad = np.empty((unique_sensors.shape[0], max(counts))) + pad[:] = np.nan + array = np.concatenate([unique_sensors, pad], axis=1) + column_offset = unique_sensors.shape[1] + + # Construct indices for loop + cumsum = np.zeros(len(np.cumsum(counts)) + 1) + cumsum[0] = 0 + cumsum[1:] = np.cumsum(counts) + cumsum = cumsum.astype(int) + + # Insert pulse attribute in place of NaN. + for k in range(len(counts)): + array[k, column_offset : (column_offset + counts[k])] = x[ + cumsum[k] : cumsum[k + 1], feature_idx + ] + return array, column_offset, counts + + +def identify_indices( + feature_names: List[str], cluster_on: List[str] +) -> Tuple[List[int], List[int], List[str]]: + """Identify indices for clustering and summarization.""" + features_for_summarization = [] + for feature in feature_names: + if feature not in cluster_on: + features_for_summarization.append(feature) + cluster_indices = [feature_names.index(column) for column in cluster_on] + summarization_indices = [ + feature_names.index(column) for column in features_for_summarization + ] + return cluster_indices, summarization_indices, features_for_summarization + + +def cluster_summarize_with_percentiles( + x: np.ndarray, + summarization_indices: List[int], + cluster_indices: List[int], + percentiles: List[int], + add_counts: bool, +) -> np.ndarray: + """Turn `x` into clusters with percentile summary. + + From variables specified by column indices `cluster_indices`, `x` is turned + into clusters. Information in columns of `x` specified by indices + `summarization_indices` with each cluster is summarized using percentiles. + It is assumed `x` represents a single event. + + **Example use-case**: + Suppose `x` contains raw pulses from a neutrino event where some DOMs have + multiple measurements of Cherenkov radiation. If `cluster_indices` is set + to the columns corresponding to the xyz-position of the DOMs, and the + features specified in `summarization_indices` correspond to time, charge, + then each row in the returned array will correspond to a DOM, + and the time and charge for each DOM will be summarized by percentiles. + Returned output array has dimensions + `[n_clusters, len(percentiles)*len(summarization_indices) + len(cluster_indices)]` + + Args: + x: Array to be clustered + summarization_indices: List of column indices that defines features that + will be summarized with percentiles. + cluster_indices: List of column indices on which the clusters are constructed. + percentiles: percentiles used to summarize `x`. E.g. [10,50,90]. + + Returns: + Percentile-summarized array + """ + pct_dict = {} + for feature_idx in summarization_indices: + summarized_array, column_offset, counts = gather_cluster_sequence( + x, feature_idx, cluster_indices + ) + pct_dict[feature_idx] = np.nanpercentile( + summarized_array[:, column_offset:], percentiles, axis=1 + ).T + + for i, key in enumerate(pct_dict.keys()): + if i == 0: + array = summarized_array[:, 0:column_offset] + + array = np.concatenate([array, pct_dict[key]], axis=1) + + if add_counts: + array = np.concatenate( + [array, np.log10(counts).reshape(-1, 1)], axis=1 + ) + + return array From c666087fb7bc95cf334f0e9458e081fbdc2a6866 Mon Sep 17 00:00:00 2001 From: Frederik Hansen Date: Mon, 9 Oct 2023 17:01:14 +0200 Subject: [PATCH 134/156] copy graphdefinition in dataset --- src/graphnet/data/dataset/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/graphnet/data/dataset/dataset.py b/src/graphnet/data/dataset/dataset.py index 4253788a8..88cdc4bd4 100644 --- a/src/graphnet/data/dataset/dataset.py +++ b/src/graphnet/data/dataset/dataset.py @@ -282,7 +282,7 @@ def __init__( self._index_column = index_column self._truth_table = truth_table self._loss_weight_default_value = loss_weight_default_value - self._graph_definition = graph_definition + self._graph_definition = deepcopy(graph_definition) if node_truth is not None: assert isinstance(node_truth_table, str) From d7e9b821762074a7d3865e085bb86352ad2e038a Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 10 Oct 2023 09:00:25 +0200 Subject: [PATCH 135/156] add import statement --- src/graphnet/models/graphs/nodes/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/graphnet/models/graphs/nodes/__init__.py b/src/graphnet/models/graphs/nodes/__init__.py index 05194b61a..0119d2b98 100644 --- a/src/graphnet/models/graphs/nodes/__init__.py +++ b/src/graphnet/models/graphs/nodes/__init__.py @@ -5,4 +5,4 @@ and their features. """ -from .nodes import NodeDefinition, NodesAsPulses +from .nodes import NodeDefinition, NodesAsPulses, PercentileClusters From 15caafc39089c32d915b09709c0d46224362fd2e Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 10 Oct 2023 10:29:23 +0200 Subject: [PATCH 136/156] cov test --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 116c7a85b..8b0476987 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -66,7 +66,7 @@ jobs: - name: Run unit tests and generate coverage report run: | coverage run --source=graphnet -m pytest tests/ --ignore=tests/examples - coverage run --source=graphnet -m pytest tests/examples/01_icetray + coverage run -a --source=graphnet -m pytest tests/examples/01_icetray coverage xml -o coverage.xml - name: Work around permission issue @@ -112,7 +112,7 @@ jobs: run: | set -o pipefail # To propagate exit code from pytest coverage run --source=graphnet -m pytest tests/ --ignore=tests/utilities --ignore=tests/data/ --ignore=tests/deployment/ --ignore=tests/examples/01_icetray/ - coverage run --source=graphnet -m pytest tests/utilities + coverage run -a --source=graphnet -m pytest tests/utilities coverage report -m - name: Print available disk space after unit tests run: df -h From a6010331b2089e8aba7b6b02171c5fe3f26412b3 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 10 Oct 2023 10:45:59 +0200 Subject: [PATCH 137/156] fix output of construct_nodes --- src/graphnet/models/graphs/nodes/nodes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py index 477751934..b31857d2f 100644 --- a/src/graphnet/models/graphs/nodes/nodes.py +++ b/src/graphnet/models/graphs/nodes/nodes.py @@ -199,7 +199,7 @@ def _construct_nodes(self, x: torch.Tensor) -> Tuple[Data, List[str]]: add_counts=self._add_counts, ) - return Data(x=torch.tensor(array)), self._output_feature_names + return Data(x=torch.tensor(array)) def nb_outputs(self) -> int: """Return number of output features. From 4c7e121c82f9e65a14a865dee6b1b65be4c20806 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 10 Oct 2023 10:46:34 +0200 Subject: [PATCH 138/156] type hint --- src/graphnet/models/graphs/nodes/nodes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py index b31857d2f..7131788e5 100644 --- a/src/graphnet/models/graphs/nodes/nodes.py +++ b/src/graphnet/models/graphs/nodes/nodes.py @@ -187,7 +187,7 @@ def _get_indices_and_feature_names( new_feature_names.append("counts") return cluster_idx, summ_idx, new_feature_names - def _construct_nodes(self, x: torch.Tensor) -> Tuple[Data, List[str]]: + def _construct_nodes(self, x: torch.Tensor) -> Data: # Cast to Numpy x = x.numpy() # Construct clusters with percentile-summarized features From 57571f2976c78109bda5cb701522e8e0c582f8e4 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 10 Oct 2023 11:14:52 +0200 Subject: [PATCH 139/156] nb_output property --- src/graphnet/models/graphs/nodes/nodes.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py index 7131788e5..7c1d0d21c 100644 --- a/src/graphnet/models/graphs/nodes/nodes.py +++ b/src/graphnet/models/graphs/nodes/nodes.py @@ -61,7 +61,7 @@ def nb_outputs(self) -> int: This the default, but may be overridden by specific inheriting classes. """ - return self.nb_inputs + return len(self._output_feature_names) @final def set_number_of_inputs(self, node_feature_names: List[str]) -> None: @@ -200,10 +200,3 @@ def _construct_nodes(self, x: torch.Tensor) -> Data: ) return Data(x=torch.tensor(array)) - - def nb_outputs(self) -> int: - """Return number of output features. - - This the default, but may be overridden by specific inheriting classes. - """ - return len(self._output_feature_names) From a1f6b7e00addd3d6de31c1f99fb9f4615f9d483b Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 10 Oct 2023 12:15:28 +0200 Subject: [PATCH 140/156] add unit test of node definition --- src/graphnet/models/graphs/nodes/nodes.py | 20 ++++-- tests/models/test_node_definition.py | 80 +++++++++++++++++++++++ 2 files changed, 93 insertions(+), 7 deletions(-) create mode 100644 tests/models/test_node_definition.py diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py index 7c1d0d21c..2f5e0dde8 100644 --- a/src/graphnet/models/graphs/nodes/nodes.py +++ b/src/graphnet/models/graphs/nodes/nodes.py @@ -191,12 +191,18 @@ def _construct_nodes(self, x: torch.Tensor) -> Data: # Cast to Numpy x = x.numpy() # Construct clusters with percentile-summarized features - array = cluster_summarize_with_percentiles( - x=x, - summarization_indices=self._summarization_indices, - cluster_indices=self._cluster_indices, - percentiles=self._percentiles, - add_counts=self._add_counts, - ) + if hasattr(self, "_summarization_indices"): + array = cluster_summarize_with_percentiles( + x=x, + summarization_indices=self._summarization_indices, + cluster_indices=self._cluster_indices, + percentiles=self._percentiles, + add_counts=self._add_counts, + ) + else: + self.error( + f"""{self.__class__.__name__} was not instatiated with `input_feature_names` and has not been set later. Please instantiate this class with `input_feature_names` if you're using it outside `GraphDefinition`.""" + ) # noqa + raise AttributeError return Data(x=torch.tensor(array)) diff --git a/tests/models/test_node_definition.py b/tests/models/test_node_definition.py new file mode 100644 index 000000000..4c199abd6 --- /dev/null +++ b/tests/models/test_node_definition.py @@ -0,0 +1,80 @@ +"""Unit tests for node definitions.""" +import numpy as np +import pandas as pd +import sqlite3 +import torch +from graphnet.models.graphs.nodes import PercentileClusters +from graphnet.constants import EXAMPLE_DATA_DIR + + +def test_percentile_cluster() -> None: + """Test that percentiles outputted by PercentileCluster. + + Here we check that it matches percentiles obtained from "traditional" ways. + """ + # definitions + percentiles = [0, 10, 50, 90, 100] + database = f"{EXAMPLE_DATA_DIR}/sqlite/prometheus/prometheus-events.db" + # Grab first event in database + with sqlite3.connect(database) as con: + query = "select event_no from mc_truth limit 1" + event_no = pd.read_sql(query, con) + query = f'select sensor_pos_x, sensor_pos_y, sensor_pos_z, t from total where event_no = {str(event_no["event_no"][0])}' + df = pd.read_sql(query, con) + + # Save original feature names, create variables. + original_features = list(df.columns) + x = np.array(df) + tensor = torch.tensor(x) + + # Construct node definition + # This defines each DOM as a cluster, and will summarize pulses seen by + # DOMs using percentiles. + node_definition = PercentileClusters( + cluster_on=["sensor_pos_x", "sensor_pos_y", "sensor_pos_z"], + percentiles=percentiles, + input_feature_names=original_features, + ) + + # Apply node definition to torch tensor with raw pulses + graph, new_features = node_definition(tensor) + x_tilde = graph.x.numpy() + + # Calculate percentiles "the normal way" and compare that output of + # node definition match. + + unique_doms = ( + df.groupby(["sensor_pos_x", "sensor_pos_y", "sensor_pos_z"]) + .size() + .reset_index() + ) + for i in range(len(unique_doms)): + idx_original = ( + (df["sensor_pos_x"] == unique_doms["sensor_pos_x"][i]) + & ((df["sensor_pos_y"] == unique_doms["sensor_pos_y"][i])) + & (df["sensor_pos_z"] == unique_doms["sensor_pos_z"][i]) + ) + idx_tilde = ( + ( + x_tilde[:, new_features.index("sensor_pos_x")] + == unique_doms["sensor_pos_x"][i] + ) + & ( + x_tilde[:, new_features.index("sensor_pos_y")] + == unique_doms["sensor_pos_y"][i] + ) + & ( + x_tilde[:, new_features.index("sensor_pos_z")] + == unique_doms["sensor_pos_z"][i] + ) + ) + for percentile in percentiles: + pct_idx = new_features.index(f"t_pct{percentile}") + try: + assert np.isclose( + x_tilde[idx_tilde, pct_idx], + np.percentile(df.loc[idx_original, "t"], percentile), + ) + except AssertionError as e: + print(f"Percentile {percentile} does not match.") + raise e From 1e1ffc850a6c7d372a4570ac54a502441cb7b40d Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 10 Oct 2023 13:13:52 +0200 Subject: [PATCH 141/156] code-climate --- src/graphnet/models/graphs/nodes/nodes.py | 11 ++++++++--- src/graphnet/models/graphs/utils.py | 12 +++++++----- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py index 2f5e0dde8..abca07588 100644 --- a/src/graphnet/models/graphs/nodes/nodes.py +++ b/src/graphnet/models/graphs/nodes/nodes.py @@ -50,7 +50,8 @@ def forward(self, x: torch.tensor) -> Tuple[Data, List[str]]: f"""{self.__class__.__name__} was instantiated without `input_feature_names` and it was not set prior to this forward call. If you are using this class outside a - `GraphDefinition`, please instatiate with `input_feature_names`.""" + `GraphDefinition`, please instatiate + with `input_feature_names`.""" ) # noqa raise e return graph, self._output_feature_names @@ -95,7 +96,8 @@ def _define_output_feature_names( input_feature_names: List of column names for the input data. Returns: - A list of column names for each column in the node definition output. + A list of column names for each column in + the node definition output. """ @abstractmethod @@ -201,7 +203,10 @@ def _construct_nodes(self, x: torch.Tensor) -> Data: ) else: self.error( - f"""{self.__class__.__name__} was not instatiated with `input_feature_names` and has not been set later. Please instantiate this class with `input_feature_names` if you're using it outside `GraphDefinition`.""" + f"""{self.__class__.__name__} was not instatiated with + `input_feature_names` and has not been set later. + Please instantiate this class with `input_feature_names` + if you're using it outside `GraphDefinition`.""" ) # noqa raise AttributeError diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py index 72928befb..ccd861783 100644 --- a/src/graphnet/models/graphs/utils.py +++ b/src/graphnet/models/graphs/utils.py @@ -37,7 +37,7 @@ def gather_cluster_sequence( **Example**: Suppose `x` represents a neutrino event and we have chosen to cluster on - the PMT positions. Suppose also that `feature_idx` correspond to pulse time. + the PMT positions and that `feature_idx` correspond to pulse time. The resulting array will have dimensions `[n_pmts, m + 3]` where `m` is the maximum number of same-pmt pulses found in `x`, and `+3`for the three @@ -45,7 +45,8 @@ def gather_cluster_sequence( Args: x: Array for clustering - feature_idx: Index of the feature in `x` to be gathered for each cluster. + feature_idx: Index of the feature in `x` to + be gathered for each cluster. cluster_columns: Index in `x` from which to build clusters. Returns: @@ -127,9 +128,10 @@ def cluster_summarize_with_percentiles( Args: x: Array to be clustered - summarization_indices: List of column indices that defines features that - will be summarized with percentiles. - cluster_indices: List of column indices on which the clusters are constructed. + summarization_indices: List of column indices that defines features + that will be summarized with percentiles. + cluster_indices: List of column indices on which the clusters + are constructed. percentiles: percentiles used to summarize `x`. E.g. [10,50,90]. Returns: From 6c2caeba26d284bee762e1be15878b77ad11d6df Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 10 Oct 2023 13:22:50 +0200 Subject: [PATCH 142/156] coverage combine --- .github/workflows/build.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8b0476987..2f44c682e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -67,6 +67,7 @@ jobs: run: | coverage run --source=graphnet -m pytest tests/ --ignore=tests/examples coverage run -a --source=graphnet -m pytest tests/examples/01_icetray + coverage combine coverage xml -o coverage.xml - name: Work around permission issue @@ -113,6 +114,7 @@ jobs: set -o pipefail # To propagate exit code from pytest coverage run --source=graphnet -m pytest tests/ --ignore=tests/utilities --ignore=tests/data/ --ignore=tests/deployment/ --ignore=tests/examples/01_icetray/ coverage run -a --source=graphnet -m pytest tests/utilities + coverage combine coverage report -m - name: Print available disk space after unit tests run: df -h From 67373bf5df9236a60cf39331e19ef889371276f0 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 10 Oct 2023 13:37:07 +0200 Subject: [PATCH 143/156] run all in icetray --- .github/workflows/build.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2f44c682e..5c7c8d0d1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -65,9 +65,7 @@ jobs: editable: true - name: Run unit tests and generate coverage report run: | - coverage run --source=graphnet -m pytest tests/ --ignore=tests/examples - coverage run -a --source=graphnet -m pytest tests/examples/01_icetray - coverage combine + coverage run --source=graphnet -m pytest tests/ coverage xml -o coverage.xml - name: Work around permission issue @@ -114,7 +112,6 @@ jobs: set -o pipefail # To propagate exit code from pytest coverage run --source=graphnet -m pytest tests/ --ignore=tests/utilities --ignore=tests/data/ --ignore=tests/deployment/ --ignore=tests/examples/01_icetray/ coverage run -a --source=graphnet -m pytest tests/utilities - coverage combine coverage report -m - name: Print available disk space after unit tests run: df -h From ee14ec849b936f23d7aa7d5560e2beb392f527c5 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Tue, 10 Oct 2023 14:18:21 +0200 Subject: [PATCH 144/156] two seperate runs --- .github/workflows/build.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5c7c8d0d1..7ae7881a6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -65,7 +65,8 @@ jobs: editable: true - name: Run unit tests and generate coverage report run: | - coverage run --source=graphnet -m pytest tests/ + coverage run --source=graphnet -m pytest tests/ --ignore=tests/examples/04_training + coverage run -a --source=graphnet -m pytest tests/examples/04_training coverage xml -o coverage.xml - name: Work around permission issue From 8272bf3ce425cd8e7f306779c7cebaca7e2ac617 Mon Sep 17 00:00:00 2001 From: ArturoLlorente Date: Tue, 10 Oct 2023 18:49:56 +0200 Subject: [PATCH 145/156] solve codeclimate issues --- src/graphnet/training/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/graphnet/training/utils.py b/src/graphnet/training/utils.py index 1cded30d2..df7c92e15 100644 --- a/src/graphnet/training/utils.py +++ b/src/graphnet/training/utils.py @@ -47,8 +47,8 @@ def __call__(self, graphs: List[Data]) -> Batch: graphs: A list of Data objects representing the input graphs. Returns: - A list of Batch objects, each containing a mini-batch of the input graphs - sorted by their number of pulses. + A list of Batch objects, each containing a mini-batch of the input + graphs sorted by their number of pulses. """ graphs = [g for g in graphs if g.n_pulses > 1] graphs.sort(key=lambda x: x.n_pulses) From 2f0f21ad018e2cec92ee10faf8a90b4e90ff6afc Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 18 Oct 2023 11:26:35 +0200 Subject: [PATCH 146/156] rename variables --- .../models/graphs/graph_definition.py | 70 ++++++++++--------- src/graphnet/models/graphs/nodes/nodes.py | 8 +-- 2 files changed, 41 insertions(+), 37 deletions(-) diff --git a/src/graphnet/models/graphs/graph_definition.py b/src/graphnet/models/graphs/graph_definition.py index 6e8d74715..f75f65b98 100644 --- a/src/graphnet/models/graphs/graph_definition.py +++ b/src/graphnet/models/graphs/graph_definition.py @@ -26,7 +26,7 @@ def __init__( detector: Detector, node_definition: NodeDefinition = NodesAsPulses(), edge_definition: Optional[EdgeDefinition] = None, - node_feature_names: Optional[List[str]] = None, + input_feature_names: Optional[List[str]] = None, dtype: Optional[torch.dtype] = torch.float, perturbation_dict: Optional[Dict[str, float]] = None, seed: Optional[Union[int, Generator]] = None, @@ -44,7 +44,10 @@ def __init__( detector: The corresponding ´Detector´ representing the data. node_definition: Definition of nodes. Defaults to NodesAsPulses. edge_definition: Definition of edges. Defaults to None. - node_feature_names: Names of node feature columns. Defaults to None + input_feature_names: Names of each column in expected input data + that will be built into a graph. If not provided, + it is automatically assumed that all features in `Detector` is + used. dtype: data type used for node features. e.g. ´torch.float´ perturbation_dict: Dictionary mapping a feature name to a standard deviation according to which the values for this @@ -62,14 +65,14 @@ def __init__( self._node_definition = node_definition self._perturbation_dict = perturbation_dict - if node_feature_names is None: + if input_feature_names is None: # Assume all features in Detector is used. - node_feature_names = list(self._detector.feature_map().keys()) # type: ignore - self._node_feature_names = node_feature_names + input_feature_names = list(self._detector.feature_map().keys()) # type: ignore + self._input_feature_names = input_feature_names # Set input data column names for node definition self._node_definition.set_output_feature_names( - self._node_feature_names + self._input_feature_names ) # Set data type @@ -77,15 +80,15 @@ def __init__( # Set Input / Output dimensions self._node_definition.set_number_of_inputs( - node_feature_names=node_feature_names + input_feature_names=input_feature_names ) - self.nb_inputs = len(self._node_feature_names) + self.nb_inputs = len(self._input_feature_names) self.nb_outputs = self._node_definition.nb_outputs # Set perturbation_cols if needed if isinstance(self._perturbation_dict, dict): self._perturbation_cols = [ - self._node_feature_names.index(key) + self._input_feature_names.index(key) for key in self._perturbation_dict.keys() ] if seed is not None: @@ -102,8 +105,8 @@ def __init__( def forward( # type: ignore self, - node_features: np.ndarray, - node_feature_names: List[str], + input_features: np.ndarray, + input_feature_names: List[str], truth_dicts: Optional[List[Dict[str, Any]]] = None, custom_label_functions: Optional[Dict[str, Callable[..., Any]]] = None, loss_weight_column: Optional[str] = None, @@ -114,8 +117,8 @@ def forward( # type: ignore """Construct graph as ´Data´ object. Args: - node_features: node features for graph. Shape ´[num_nodes, d]´ - node_feature_names: name of each column. Shape ´[,d]´. + input_features: Input features for graph construction. Shape ´[num_rows, d]´ + input_feature_names: name of each column. Shape ´[,d]´. truth_dicts: Dictionary containing truth labels. custom_label_functions: Custom label functions. See https://github.com/graphnet-team/graphnet/blob/main/GETTING_STARTED.md#adding-custom-truth-labels. loss_weight_column: Name of column that holds loss weight. @@ -131,26 +134,27 @@ def forward( # type: ignore """ # Checks self._validate_input( - node_features=node_features, node_feature_names=node_feature_names + input_features=input_features, + input_feature_names=input_feature_names, ) # Gaussian perturbation of each column if perturbation dict is given - node_features = self._perturb_input(node_features) + input_features = self._perturb_input(input_features) # Transform to pytorch tensor - node_features = torch.tensor(node_features, dtype=self.dtype) + input_features = torch.tensor(input_features, dtype=self.dtype) # Standardize / Scale node features - node_features = self._detector(node_features, node_feature_names) + input_features = self._detector(input_features, input_feature_names) # Create graph & get new node feature names - graph, node_feature_names = self._node_definition(node_features) + graph, node_feature_names = self._node_definition(input_features) # Enforce dtype graph.x = graph.x.type(self.dtype) # Attach number of pulses as static attribute. - graph.n_pulses = torch.tensor(len(node_features), dtype=torch.int32) + graph.n_pulses = torch.tensor(len(input_features), dtype=torch.int32) # Assign edges if self._edge_definition is not None: @@ -194,26 +198,26 @@ def forward( # type: ignore return graph def _validate_input( - self, node_features: np.array, node_feature_names: List[str] + self, input_features: np.array, input_feature_names: List[str] ) -> None: # node feature matrix dimension check - assert node_features.shape[1] == len(node_feature_names) + assert input_features.shape[1] == len(input_feature_names) # check that provided features for input is the same that the ´Graph´ # was instantiated with. - assert len(node_feature_names) == len( - self._node_feature_names - ), f"""Input features ({node_feature_names}) is not what + assert len(input_feature_names) == len( + self._input_feature_names + ), f"""Input features ({input_feature_names}) is not what {self.__class__.__name__} was instatiated - with ({self._node_feature_names})""" # noqa - for idx in range(len(node_feature_names)): + with ({self._input_feature_names})""" # noqa + for idx in range(len(input_feature_names)): assert ( - node_feature_names[idx] == self._node_feature_names[idx] + input_feature_names[idx] == self._input_feature_names[idx] ), f""" Order of node features in data - are not the same as expected. Got {node_feature_names} - vs. {self._node_feature_names}""" # noqa + are not the same as expected. Got {input_feature_names} + vs. {self._input_feature_names}""" # noqa - def _perturb_input(self, node_features: np.ndarray) -> np.ndarray: + def _perturb_input(self, input_features: np.ndarray) -> np.ndarray: if isinstance(self._perturbation_dict, dict): self.warning_once( f"""Will randomly perturb @@ -221,13 +225,13 @@ def _perturb_input(self, node_features: np.ndarray) -> np.ndarray: using stds {self._perturbation_dict.values()}""" # noqa ) perturbed_features = self.rng.normal( - loc=node_features[:, self._perturbation_cols], + loc=input_features[:, self._perturbation_cols], scale=np.array( list(self._perturbation_dict.values()), dtype=float ), ) - node_features[:, self._perturbation_cols] = perturbed_features - return node_features + input_features[:, self._perturbation_cols] = perturbed_features + return input_features def _add_loss_weights( self, diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py index abca07588..fa0400b97 100644 --- a/src/graphnet/models/graphs/nodes/nodes.py +++ b/src/graphnet/models/graphs/nodes/nodes.py @@ -65,14 +65,14 @@ def nb_outputs(self) -> int: return len(self._output_feature_names) @final - def set_number_of_inputs(self, node_feature_names: List[str]) -> None: + def set_number_of_inputs(self, input_feature_names: List[str]) -> None: """Return number of inputs expected by node definition. Args: - node_feature_names: name of each node feature column. + input_feature_names: name of each input feature column. """ - assert isinstance(node_feature_names, list) - self.nb_inputs = len(node_feature_names) + assert isinstance(input_feature_names, list) + self.nb_inputs = len(input_feature_names) @final def set_output_feature_names(self, input_feature_names: List[str]) -> None: From d07115cdd1a78ff85673131ebed06fb4351378e6 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 18 Oct 2023 11:36:21 +0200 Subject: [PATCH 147/156] rename --- .../dynedge_PID_classification_example.yml | 2 +- ...ynedge_position_custom_scaling_example.yml | 2 +- configs/models/dynedge_position_example.yml | 44 ------------------- ...example_direction_reconstruction_model.yml | 2 +- .../example_energy_reconstruction_model.yml | 2 +- ...e_vertex_position_reconstruction_model.yml | 2 +- tests/models/test_graph_definition.py | 6 +-- 7 files changed, 8 insertions(+), 52 deletions(-) delete mode 100644 configs/models/dynedge_position_example.yml diff --git a/configs/models/dynedge_PID_classification_example.yml b/configs/models/dynedge_PID_classification_example.yml index 57fec3e88..f9b1509c4 100644 --- a/configs/models/dynedge_PID_classification_example.yml +++ b/configs/models/dynedge_PID_classification_example.yml @@ -25,7 +25,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - node_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] + input_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] class_name: KNNGraph optimizer_class: '!class torch.optim.adam Adam' optimizer_kwargs: {eps: 0.001, lr: 0.001} diff --git a/configs/models/dynedge_position_custom_scaling_example.yml b/configs/models/dynedge_position_custom_scaling_example.yml index 195695a8d..013dab592 100644 --- a/configs/models/dynedge_position_custom_scaling_example.yml +++ b/configs/models/dynedge_position_custom_scaling_example.yml @@ -17,7 +17,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - node_feature_names: null + input_feature_names: null class_name: KNNGraph gnn: ModelConfig: diff --git a/configs/models/dynedge_position_example.yml b/configs/models/dynedge_position_example.yml deleted file mode 100644 index c82223825..000000000 --- a/configs/models/dynedge_position_example.yml +++ /dev/null @@ -1,44 +0,0 @@ -arguments: - coarsening: null - detector: - ModelConfig: - arguments: - graph_builder: - ModelConfig: - arguments: {columns: null, nb_nearest_neighbours: 8} - class_name: KNNGraphBuilder - scalers: null - class_name: IceCubeDeepCore - gnn: - ModelConfig: - arguments: - add_global_variables_after_pooling: false - dynedge_layer_sizes: null - features_subset: null - global_pooling_schemes: [min, max, mean, sum] - nb_inputs: 7 - nb_neighbours: 8 - post_processing_layer_sizes: null - readout_layer_sizes: null - class_name: DynEdge - optimizer_class: '!class torch.optim.adam Adam' - optimizer_kwargs: {eps: 0.001, lr: 1e-05} - scheduler_class: '!class torch.optim.lr_scheduler ReduceLROnPlateau' - scheduler_config: {frequency: 1, monitor: val_loss} - scheduler_kwargs: {patience: 5} - tasks: - - ModelConfig: - arguments: - hidden_size: 128 - loss_function: - ModelConfig: - arguments: {} - class_name: MSELoss - loss_weight: null - target_labels: ["position_x", "position_y", "position_z"] - transform_inference: null - transform_prediction_and_target: null - transform_support: null - transform_target: null - class_name: PositionReconstruction -class_name: StandardModel diff --git a/configs/models/example_direction_reconstruction_model.yml b/configs/models/example_direction_reconstruction_model.yml index cb1c4d841..faf168ed5 100644 --- a/configs/models/example_direction_reconstruction_model.yml +++ b/configs/models/example_direction_reconstruction_model.yml @@ -13,7 +13,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - node_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] + input_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] class_name: KNNGraph gnn: ModelConfig: diff --git a/configs/models/example_energy_reconstruction_model.yml b/configs/models/example_energy_reconstruction_model.yml index 827c84748..5983ef799 100644 --- a/configs/models/example_energy_reconstruction_model.yml +++ b/configs/models/example_energy_reconstruction_model.yml @@ -25,7 +25,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - node_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] + input_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] class_name: KNNGraph optimizer_class: '!class torch.optim.adam Adam' optimizer_kwargs: {eps: 0.001, lr: 0.001} diff --git a/configs/models/example_vertex_position_reconstruction_model.yml b/configs/models/example_vertex_position_reconstruction_model.yml index 0522a1f2d..ce0a993c4 100644 --- a/configs/models/example_vertex_position_reconstruction_model.yml +++ b/configs/models/example_vertex_position_reconstruction_model.yml @@ -25,7 +25,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - node_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] + input_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] class_name: KNNGraph optimizer_class: '!class torch.optim.adam Adam' optimizer_kwargs: {eps: 0.001, lr: 0.001} diff --git a/tests/models/test_graph_definition.py b/tests/models/test_graph_definition.py index bf16d7853..ec6c75e24 100644 --- a/tests/models/test_graph_definition.py +++ b/tests/models/test_graph_definition.py @@ -27,7 +27,7 @@ def test_graph_definition() -> None: detector=Prometheus(), perturbation_dict=perturbation_dict, seed=seed ) original_output = graph_definition( - node_features=deepcopy(mock_data), node_feature_names=features + input_features=deepcopy(mock_data), input_feature_names=features ) for _ in range(n_reps): @@ -42,11 +42,11 @@ def test_graph_definition() -> None: ) data = graph_definition( - node_features=deepcopy(mock_data), node_feature_names=features + input_features=deepcopy(mock_data), input_feature_names=features ) perturbed_data = graph_definition_perturbed( - node_features=deepcopy(mock_data), node_feature_names=features + input_features=deepcopy(mock_data), input_feature_names=features ) assert ~torch.equal(data.x, perturbed_data.x) # should not be equal. From b67ba0865b70ab8f320bedecbe6141dc1d98a42d Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 18 Oct 2023 11:41:51 +0200 Subject: [PATCH 148/156] rename --- tests/training/test_dataloader_utilities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/training/test_dataloader_utilities.py b/tests/training/test_dataloader_utilities.py index 0fdaccf60..423b2f34b 100644 --- a/tests/training/test_dataloader_utilities.py +++ b/tests/training/test_dataloader_utilities.py @@ -22,7 +22,7 @@ detector=IceCubeDeepCore(), node_definition=NodesAsPulses(), nb_nearest_neighbours=8, - node_feature_names=FEATURES.DEEPCORE, + input_feature_names=FEATURES.DEEPCORE, ) From 1049765f5ca5bc638208a84406702bb6d9e989c4 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 18 Oct 2023 11:43:45 +0200 Subject: [PATCH 149/156] update pretrained configs --- .../SplitInIcePulses_cleaner_config.yml | 2 +- .../QUESO/neutrino_direction/neutrino_direction_config.yml | 2 +- .../neutrino_vs_muon_classifier_config.yml | 2 +- .../upgrade/QUESO/neutrino_zenith/neutrino_zenith_config.yml | 2 +- .../total_neutrino_energy/total_neutrino_energy_config.yml | 2 +- .../track_vs_cascade_classifier_config.yml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/SplitInIcePulses_cleaner/SplitInIcePulses_cleaner_config.yml b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/SplitInIcePulses_cleaner/SplitInIcePulses_cleaner_config.yml index 281bda2f4..a13f11aa2 100644 --- a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/SplitInIcePulses_cleaner/SplitInIcePulses_cleaner_config.yml +++ b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/SplitInIcePulses_cleaner/SplitInIcePulses_cleaner_config.yml @@ -19,7 +19,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - node_feature_names: null + input_feature_names: null class_name: KNNGraph optimizer_class: '!class torch.optim.adam Adam' optimizer_kwargs: null diff --git a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_direction/neutrino_direction_config.yml b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_direction/neutrino_direction_config.yml index 6cabc6985..b42e1fef8 100644 --- a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_direction/neutrino_direction_config.yml +++ b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_direction/neutrino_direction_config.yml @@ -25,7 +25,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - node_feature_names: null + input_feature_names: null class_name: KNNGraph optimizer_class: '!class torch.optim.adam Adam' optimizer_kwargs: null diff --git a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_vs_muon_classifier/neutrino_vs_muon_classifier_config.yml b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_vs_muon_classifier/neutrino_vs_muon_classifier_config.yml index 3c0c7510a..326617c00 100644 --- a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_vs_muon_classifier/neutrino_vs_muon_classifier_config.yml +++ b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_vs_muon_classifier/neutrino_vs_muon_classifier_config.yml @@ -25,7 +25,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - node_feature_names: null + input_feature_names: null class_name: KNNGraph optimizer_class: '!class torch.optim.adam Adam' optimizer_kwargs: null diff --git a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_zenith/neutrino_zenith_config.yml b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_zenith/neutrino_zenith_config.yml index fee57a531..c54f6ec5b 100644 --- a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_zenith/neutrino_zenith_config.yml +++ b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_zenith/neutrino_zenith_config.yml @@ -25,7 +25,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - node_feature_names: null + input_feature_names: null class_name: KNNGraph optimizer_class: '!class torch.optim.adam Adam' optimizer_kwargs: null diff --git a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/total_neutrino_energy/total_neutrino_energy_config.yml b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/total_neutrino_energy/total_neutrino_energy_config.yml index 16d9ddde5..a35c0203a 100644 --- a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/total_neutrino_energy/total_neutrino_energy_config.yml +++ b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/total_neutrino_energy/total_neutrino_energy_config.yml @@ -25,7 +25,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - node_feature_names: null + input_feature_names: null class_name: KNNGraph optimizer_class: '!class torch.optim.adam Adam' optimizer_kwargs: null diff --git a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/track_vs_cascade_classifier/track_vs_cascade_classifier_config.yml b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/track_vs_cascade_classifier/track_vs_cascade_classifier_config.yml index a49c60a22..5e88b510a 100644 --- a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/track_vs_cascade_classifier/track_vs_cascade_classifier_config.yml +++ b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/track_vs_cascade_classifier/track_vs_cascade_classifier_config.yml @@ -25,7 +25,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - node_feature_names: null + input_feature_names: null class_name: KNNGraph optimizer_class: '!class torch.optim.adam Adam' optimizer_kwargs: null From 1d190267975408121fbda9eb62c2a39ad1b83e97 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 18 Oct 2023 11:49:22 +0200 Subject: [PATCH 150/156] rename arg in KNNGraph --- src/graphnet/models/graphs/graphs.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/graphnet/models/graphs/graphs.py b/src/graphnet/models/graphs/graphs.py index 4ae53037a..bd52eaeae 100644 --- a/src/graphnet/models/graphs/graphs.py +++ b/src/graphnet/models/graphs/graphs.py @@ -17,7 +17,7 @@ def __init__( self, detector: Detector, node_definition: NodeDefinition = NodesAsPulses(), - node_feature_names: Optional[List[str]] = None, + input_feature_names: Optional[List[str]] = None, dtype: Optional[torch.dtype] = torch.float, perturbation_dict: Optional[Dict[str, float]] = None, seed: Optional[Union[int, Generator]] = None, @@ -29,7 +29,7 @@ def __init__( Args: detector: Detector that represents your data. node_definition: Definition of nodes in the graph. - node_feature_names: Name of node features. + input_feature_names: Name of input feature columns. dtype: data type for node features. perturbation_dict: Dictionary mapping a feature name to a standard deviation according to which the values for this @@ -50,7 +50,7 @@ def __init__( columns=columns, ), dtype=dtype, - node_feature_names=node_feature_names, + input_feature_names=input_feature_names, perturbation_dict=perturbation_dict, seed=seed, ) From 170c2b31171ed40001dd8d59ef96259252f948c7 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 18 Oct 2023 12:12:23 +0200 Subject: [PATCH 151/156] arg update in dataset --- src/graphnet/data/dataset/dataset.py | 4 ++-- tests/models/test_task.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/graphnet/data/dataset/dataset.py b/src/graphnet/data/dataset/dataset.py index 4253788a8..0988c5793 100644 --- a/src/graphnet/data/dataset/dataset.py +++ b/src/graphnet/data/dataset/dataset.py @@ -629,8 +629,8 @@ def _create_graph( # Construct graph data object assert self._graph_definition is not None graph = self._graph_definition( - node_features=node_features, - node_feature_names=self._features[ + input_features=node_features, + input_feature_names=self._features[ 1: ], # first entry is index column truth_dicts=truth_dicts, diff --git a/tests/models/test_task.py b/tests/models/test_task.py index 68e014f33..bfadb6263 100644 --- a/tests/models/test_task.py +++ b/tests/models/test_task.py @@ -18,7 +18,7 @@ def test_transform_prediction_and_target() -> None: detector=IceCube86(), node_definition=NodesAsPulses(), nb_nearest_neighbours=8, - node_feature_names=FEATURES.DEEPCORE, + input_feature_names=FEATURES.DEEPCORE, ) gnn = DynEdge( nb_inputs=graph_definition.nb_outputs, From fd279974622748057506b34821e8170742c00876 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 18 Oct 2023 12:24:03 +0200 Subject: [PATCH 152/156] update args --- examples/02_data/04_ensemble_dataset.py | 2 +- examples/05_pisa/02_make_pipeline_database.py | 2 +- tests/data/test_dataconverters_and_datasets.py | 6 +++--- tests/utilities/test_model_config.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/02_data/04_ensemble_dataset.py b/examples/02_data/04_ensemble_dataset.py index f1cc9de68..4ade95de6 100644 --- a/examples/02_data/04_ensemble_dataset.py +++ b/examples/02_data/04_ensemble_dataset.py @@ -24,7 +24,7 @@ detector=IceCubeDeepCore(), node_definition=NodesAsPulses(), nb_nearest_neighbours=8, - node_feature_names=features, + input_feature_names=features, ) diff --git a/examples/05_pisa/02_make_pipeline_database.py b/examples/05_pisa/02_make_pipeline_database.py index 17e86646d..722b997f3 100644 --- a/examples/05_pisa/02_make_pipeline_database.py +++ b/examples/05_pisa/02_make_pipeline_database.py @@ -65,7 +65,7 @@ def main() -> None: detector=IceCubeDeepCore(), node_definition=NodesAsPulses(), nb_nearest_neighbours=8, - node_feature_names=FEATURES.DEEPCORE, + input_feature_names=FEATURES.DEEPCORE, ) # Remove `interaction_time` if it exists diff --git a/tests/data/test_dataconverters_and_datasets.py b/tests/data/test_dataconverters_and_datasets.py index 64fcd85c6..480f11d4d 100644 --- a/tests/data/test_dataconverters_and_datasets.py +++ b/tests/data/test_dataconverters_and_datasets.py @@ -115,7 +115,7 @@ def test_dataset(backend: str) -> None: detector=IceCubeDeepCore(), node_definition=NodesAsPulses(), nb_nearest_neighbours=8, - node_feature_names=FEATURES.DEEPCORE, + input_feature_names=FEATURES.DEEPCORE, ) # Constructor DataConverter instance @@ -168,7 +168,7 @@ def test_datasetquery_table(backend: str) -> None: detector=IceCubeDeepCore(), node_definition=NodesAsPulses(), nb_nearest_neighbours=8, - node_feature_names=FEATURES.DEEPCORE, + input_feature_names=FEATURES.DEEPCORE, ) # Constructor DataConverter instance pulsemap = "SRTInIcePulses" @@ -220,7 +220,7 @@ def test_parquet_to_sqlite_converter() -> None: detector=IceCubeDeepCore(), node_definition=NodesAsPulses(), nb_nearest_neighbours=8, - node_feature_names=FEATURES.DEEPCORE, + input_feature_names=FEATURES.DEEPCORE, ) # Perform conversion from I3 to `backend` database_name = FILE_NAME + "_from_parquet" diff --git a/tests/utilities/test_model_config.py b/tests/utilities/test_model_config.py index 8979f0255..59eb6343a 100644 --- a/tests/utilities/test_model_config.py +++ b/tests/utilities/test_model_config.py @@ -49,7 +49,7 @@ def test_complete_model_config(path: str = "/tmp/complete_model.yml") -> None: detector=IceCubeDeepCore(), node_definition=NodesAsPulses(), nb_nearest_neighbours=8, - node_feature_names=FEATURES.DEEPCORE, + input_feature_names=FEATURES.DEEPCORE, ) gnn = DynEdge( nb_inputs=graph_definition.nb_outputs, From 7c921c3cb8ea25c0de3a93de44e7e5bbcc0316c3 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 18 Oct 2023 12:40:39 +0200 Subject: [PATCH 153/156] update configs --- configs/datasets/dev_lvl7_robustness_muon_neutrino_0000.yml | 2 +- configs/datasets/test_data_sqlite.yml | 2 +- .../datasets/training_classification_example_data_sqlite.yml | 2 +- configs/datasets/training_example_data_parquet.yml | 2 +- configs/datasets/training_example_data_sqlite.yml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/configs/datasets/dev_lvl7_robustness_muon_neutrino_0000.yml b/configs/datasets/dev_lvl7_robustness_muon_neutrino_0000.yml index d70de5294..523f4fa90 100644 --- a/configs/datasets/dev_lvl7_robustness_muon_neutrino_0000.yml +++ b/configs/datasets/dev_lvl7_robustness_muon_neutrino_0000.yml @@ -10,7 +10,7 @@ graph_definition: node_definition: arguments: {} class_name: NodesAsPulses - node_feature_names: [dom_x, dom_y, dom_z, dom_time, charge, rde, pmt_area] + input_feature_names: [dom_x, dom_y, dom_z, dom_time, charge, rde, pmt_area] class_name: KNNGraph pulsemaps: - SRTTWOfflinePulsesDC diff --git a/configs/datasets/test_data_sqlite.yml b/configs/datasets/test_data_sqlite.yml index 689e8af31..11ea4496d 100644 --- a/configs/datasets/test_data_sqlite.yml +++ b/configs/datasets/test_data_sqlite.yml @@ -10,7 +10,7 @@ graph_definition: node_definition: arguments: {} class_name: NodesAsPulses - node_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] + input_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] class_name: KNNGraph index_column: event_no loss_weight_column: null diff --git a/configs/datasets/training_classification_example_data_sqlite.yml b/configs/datasets/training_classification_example_data_sqlite.yml index ae94420ee..3a13f8749 100644 --- a/configs/datasets/training_classification_example_data_sqlite.yml +++ b/configs/datasets/training_classification_example_data_sqlite.yml @@ -10,7 +10,7 @@ graph_definition: node_definition: arguments: {} class_name: NodesAsPulses - node_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] + input_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] class_name: KNNGraph pulsemaps: - total diff --git a/configs/datasets/training_example_data_parquet.yml b/configs/datasets/training_example_data_parquet.yml index d8bde7e30..67abca0c4 100644 --- a/configs/datasets/training_example_data_parquet.yml +++ b/configs/datasets/training_example_data_parquet.yml @@ -10,7 +10,7 @@ graph_definition: node_definition: arguments: {} class_name: NodesAsPulses - node_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] + input_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] class_name: KNNGraph pulsemaps: - total diff --git a/configs/datasets/training_example_data_sqlite.yml b/configs/datasets/training_example_data_sqlite.yml index b33a0ee0c..20c4aa8c0 100644 --- a/configs/datasets/training_example_data_sqlite.yml +++ b/configs/datasets/training_example_data_sqlite.yml @@ -10,7 +10,7 @@ graph_definition: node_definition: arguments: {} class_name: NodesAsPulses - node_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] + input_feature_names: [sensor_pos_x, sensor_pos_y, sensor_pos_z, t] class_name: KNNGraph pulsemaps: - total From aaa8dc62d32ae9e32edb447b8e12c9e6f4adfb0e Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 18 Oct 2023 13:03:48 +0200 Subject: [PATCH 154/156] update args in i3modules --- src/graphnet/deployment/i3modules/graphnet_module.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/graphnet/deployment/i3modules/graphnet_module.py b/src/graphnet/deployment/i3modules/graphnet_module.py index 2c85600a3..dee0973b8 100644 --- a/src/graphnet/deployment/i3modules/graphnet_module.py +++ b/src/graphnet/deployment/i3modules/graphnet_module.py @@ -84,12 +84,12 @@ def _make_graph( ) -> Data: # py-l-i-n-t-:- -d-i-s-able=invalid-name """Process Physics I3Frame into graph.""" # Extract features - node_features = self._extract_feature_array_from_frame(frame) + input_features = self._extract_feature_array_from_frame(frame) # Prepare graph data - if len(node_features) > 0: + if len(input_features) > 0: data = self._graph_definition( - node_features=node_features, - node_feature_names=self._features, + input_features=input_features, + input_feature_names=self._features, ) return Batch.from_data_list([data]) else: From 928c221a0060e4798e38f5783dca357f3be7d39d Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Wed, 18 Oct 2023 13:13:40 +0200 Subject: [PATCH 155/156] update args dataset config test --- tests/utilities/test_dataset_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utilities/test_dataset_config.py b/tests/utilities/test_dataset_config.py index 5f7de5b6a..ca906d659 100644 --- a/tests/utilities/test_dataset_config.py +++ b/tests/utilities/test_dataset_config.py @@ -30,7 +30,7 @@ detector=IceCubeDeepCore(), node_definition=NodesAsPulses(), nb_nearest_neighbours=8, - node_feature_names=FEATURES.DEEPCORE, + input_feature_names=FEATURES.DEEPCORE, ) From d1b97d559a4a3b381f7378bf7d13811969f883cb Mon Sep 17 00:00:00 2001 From: ArturoLlorente <85907219+ArturoLlorente@users.noreply.github.com> Date: Tue, 24 Oct 2023 16:37:02 +0200 Subject: [PATCH 156/156] Redefinition of Tito model (#611) * changes to have a more general GNN definition. All models used in the Tito solution can be replicated with this class. Small changes in the training example to fit new input variables * changes from PR #607 to solve config summarizing issue * added features_subset definition in gnn definition. target set to str --- examples/04_training/02_train_tito_model.py | 6 +- .../models/gnn/dynedge_kaggle_tito.py | 85 ++++++++++++------- 2 files changed, 57 insertions(+), 34 deletions(-) diff --git a/examples/04_training/02_train_tito_model.py b/examples/04_training/02_train_tito_model.py index 735dea055..eeffacfed 100644 --- a/examples/04_training/02_train_tito_model.py +++ b/examples/04_training/02_train_tito_model.py @@ -107,7 +107,11 @@ def main( # Building model gnn = DynEdgeTITO( nb_inputs=graph_definition.nb_outputs, + features_subset=[0, 1, 2, 3], + dyntrans_layer_sizes=[(256, 256), (256, 256), (256, 256), (256, 256)], global_pooling_schemes=["max"], + use_global_features=True, + use_post_processing_layers=True, ) task = DirectionReconstructionWithKappa( hidden_size=gnn.nb_outputs, @@ -212,7 +216,7 @@ def main( "Name of feature to use as regression target (default: " "%(default)s)" ), - default=["direction"], + default="direction", ) parser.add_argument( diff --git a/src/graphnet/models/gnn/dynedge_kaggle_tito.py b/src/graphnet/models/gnn/dynedge_kaggle_tito.py index 4a4662256..266739c8b 100644 --- a/src/graphnet/models/gnn/dynedge_kaggle_tito.py +++ b/src/graphnet/models/gnn/dynedge_kaggle_tito.py @@ -8,16 +8,16 @@ Solution by TITO. """ -from typing import List, Tuple, Optional +from typing import List, Tuple, Optional, Union import torch from torch import Tensor, LongTensor from torch_geometric.data import Data -from torch_geometric.utils import to_dense_batch from torch_scatter import scatter_max, scatter_mean, scatter_min, scatter_sum from graphnet.models.components.layers import DynTrans +from graphnet.utilities.config import save_model_config from graphnet.models.gnn.gnn import GNN from graphnet.models.utils import calculate_xyzt_homophily @@ -30,16 +30,19 @@ class DynEdgeTITO(GNN): - """DynEdge (dynamical edge convolutional) model.""" + """DynEdgeTITO (dynamical edge convolutional with Transformer) model.""" + @save_model_config def __init__( self, nb_inputs: int, features_subset: List[int] = None, dyntrans_layer_sizes: Optional[List[Tuple[int, ...]]] = None, global_pooling_schemes: List[str] = ["max"], + use_global_features: bool = True, + use_post_processing_layers: bool = True, ): - """Construct `DynEdge`. + """Construct `DynEdgeTITO`. Args: nb_inputs: Number of input features on each node. @@ -48,10 +51,14 @@ def __init__( neighbours clustering. Defaults to [0,1,2,3]. dyntrans_layer_sizes: The layer sizes, or latent feature dimenions, used in the `DynTrans` layer. + Defaults to [(256, 256), (256, 256), (256, 256), (256, 256)]. global_pooling_schemes: The list global pooling schemes to use. Options are: "min", "max", "mean", and "sum". + use_global_features: Whether to use global features after pooling. + use_post_processing_layers: Whether to use post-processing layers + after the `DynTrans` layers. """ - # DynEdge layer sizes + # DynTrans layer sizes if dyntrans_layer_sizes is None: dyntrans_layer_sizes = [ ( @@ -66,6 +73,10 @@ def __init__( 256, 256, ), + ( + 256, + 256, + ), ] assert isinstance(dyntrans_layer_sizes, list) @@ -120,7 +131,10 @@ def __init__( self._activation = torch.nn.LeakyReLU() self._nb_inputs = nb_inputs self._nb_global_variables = 5 + nb_inputs + self._nb_neighbours = 8 self._features_subset = features_subset or [0, 1, 2, 3] + self._use_global_features = use_global_features + self._use_post_processing_layers = use_post_processing_layers self._construct_layers() def _construct_layers(self) -> None: @@ -140,16 +154,21 @@ def _construct_layers(self) -> None: self._conv_layers.append(conv_layer) nb_latent_features = sizes[-1] - post_processing_layers = [] - layer_sizes = [nb_latent_features] + list( - self._post_processing_layer_sizes - ) - for nb_in, nb_out in zip(layer_sizes[:-1], layer_sizes[1:]): - post_processing_layers.append(torch.nn.Linear(nb_in, nb_out)) - post_processing_layers.append(self._activation) - last_posting_layer_output_dim = nb_out + if self._use_post_processing_layers: + post_processing_layers = [] + layer_sizes = [nb_latent_features] + list( + self._post_processing_layer_sizes + ) + for nb_in, nb_out in zip(layer_sizes[:-1], layer_sizes[1:]): + post_processing_layers.append(torch.nn.Linear(nb_in, nb_out)) + post_processing_layers.append(self._activation) + last_posting_layer_output_dim = nb_out - self._post_processing = torch.nn.Sequential(*post_processing_layers) + self._post_processing = torch.nn.Sequential( + *post_processing_layers + ) + else: + last_posting_layer_output_dim = nb_latent_features # Read-out operations nb_poolings = ( @@ -158,7 +177,8 @@ def _construct_layers(self) -> None: else 1 ) nb_latent_features = last_posting_layer_output_dim * nb_poolings - nb_latent_features += self._nb_global_variables + if self._use_global_features: + nb_latent_features += self._nb_global_variables readout_layers = [] layer_sizes = [nb_latent_features] + list(self._readout_layer_sizes) @@ -217,32 +237,31 @@ def forward(self, data: Data) -> Tensor: # Convenience variables x, edge_index, batch = data.x, data.edge_index, data.batch - global_variables = self._calculate_global_variables( - x, - edge_index, - batch, - torch.log10(data.n_pulses), - ) + if self._use_global_features: + global_variables = self._calculate_global_variables( + x, + edge_index, + batch, + torch.log10(data.n_pulses), + ) # DynEdge-convolutions for conv_layer in self._conv_layers: x = conv_layer(x, edge_index, batch) - x, mask = to_dense_batch(x, batch) - x = x[mask] - # Post-processing - x = self._post_processing(x) + if self._use_post_processing_layers: + x = self._post_processing(x) - # (Optional) Global pooling x = self._global_pooling(x, batch=batch) - x = torch.cat( - [ - x, - global_variables, - ], - dim=1, - ) + if self._use_global_features: + x = torch.cat( + [ + x, + global_variables, + ], + dim=1, + ) # Read-out x = self._readout(x)