diff --git a/data/geometry_tables/icecube/icecube86.parquet b/data/geometry_tables/icecube/icecube86.parquet index 776949d61..94beceb94 100644 Binary files a/data/geometry_tables/icecube/icecube86.parquet and b/data/geometry_tables/icecube/icecube86.parquet differ diff --git a/data/geometry_tables/icecube/icecube_upgrade.parquet b/data/geometry_tables/icecube/icecube_upgrade.parquet index cadbd967a..18aa7e8ad 100644 Binary files a/data/geometry_tables/icecube/icecube_upgrade.parquet and b/data/geometry_tables/icecube/icecube_upgrade.parquet differ diff --git a/data/ice_properties/ice_transparency.parquet b/data/ice_properties/ice_transparency.parquet new file mode 100644 index 000000000..2a4068a58 Binary files /dev/null and b/data/ice_properties/ice_transparency.parquet differ diff --git a/data/tests/parquet/oscNext_genie_level7_v02/oscNext_genie_level7_v02_first_5_frames.parquet b/data/tests/parquet/oscNext_genie_level7_v02/oscNext_genie_level7_v02_first_5_frames.parquet index d9dfb6e98..f633a10c3 100644 Binary files a/data/tests/parquet/oscNext_genie_level7_v02/oscNext_genie_level7_v02_first_5_frames.parquet and b/data/tests/parquet/oscNext_genie_level7_v02/oscNext_genie_level7_v02_first_5_frames.parquet differ diff --git a/data/tests/sqlite/oscNext_genie_level7_v02/oscNext_genie_level7_v02_first_5_frames.db b/data/tests/sqlite/oscNext_genie_level7_v02/oscNext_genie_level7_v02_first_5_frames.db index befb894e7..67d190132 100644 Binary files a/data/tests/sqlite/oscNext_genie_level7_v02/oscNext_genie_level7_v02_first_5_frames.db and b/data/tests/sqlite/oscNext_genie_level7_v02/oscNext_genie_level7_v02_first_5_frames.db differ diff --git a/data/tests/sqlite/upgrade_genie_step4_140028_000998_first_5_frames/upgrade_genie_step4_140028_000998_first_5_frames.db b/data/tests/sqlite/upgrade_genie_step4_140028_000998_first_5_frames/upgrade_genie_step4_140028_000998_first_5_frames.db index 84369a46c..d05a12ecc 100644 Binary files a/data/tests/sqlite/upgrade_genie_step4_140028_000998_first_5_frames/upgrade_genie_step4_140028_000998_first_5_frames.db and b/data/tests/sqlite/upgrade_genie_step4_140028_000998_first_5_frames/upgrade_genie_step4_140028_000998_first_5_frames.db differ diff --git a/src/graphnet/models/components/embedding.py b/src/graphnet/models/components/embedding.py index ee03a2193..40145ad1a 100644 --- a/src/graphnet/models/components/embedding.py +++ b/src/graphnet/models/components/embedding.py @@ -1,9 +1,13 @@ """Classes for performing embedding of input data.""" import torch +import torch.nn as nn +from torch.functional import Tensor +from pytorch_lightning import LightningModule -class SinusoidalPosEmb(torch.nn.Module): - """Sinusoidal positional embedding layer. + +class SinusoidalPosEmb(LightningModule): + """Sinusoidal positional embeddings module. This module is from the kaggle competition 2nd place solution (see arXiv:2310.15674): It performs what is called Fourier encoding or it's used @@ -11,23 +15,129 @@ class SinusoidalPosEmb(torch.nn.Module): digitization of the input data """ - def __init__(self, dim: int = 16, n_freq: int = 10000) -> None: + def __init__( + self, + dim: int = 16, + n_freq: int = 10000, + scaled: bool = False, + ): """Construct `SinusoidalPosEmb`. Args: dim: Embedding dimension. n_freq: Number of frequencies. + scaled: Whether or not to scale the output. """ super().__init__() + if dim % 2 != 0: + raise ValueError(f"dim has to be even. Got: {dim}") + self.scale = ( + nn.Parameter(torch.ones(1) * dim**-0.5) if scaled else 1.0 + ) self.dim = dim - self.n_freq = n_freq + self.n_freq = torch.Tensor([n_freq]) - def forward(self, x: torch.Tensor) -> torch.Tensor: - """Apply learnable forward pass to the layer.""" + def forward(self, x: Tensor) -> Tensor: + """Forward pass.""" device = x.device - half_dim = self.dim // 2 - emb = torch.log(torch.tensor(self.n_freq, device=device)) / half_dim + half_dim = self.dim / 2 + emb = torch.log(self.n_freq.to(device=device)) / half_dim emb = torch.exp(torch.arange(half_dim, device=device) * (-emb)) - emb = x[..., None] * emb[None, ...] - emb = torch.cat((emb.sin(), emb.cos()), dim=-1) - return emb + emb = x.unsqueeze(-1) * emb.unsqueeze(0) + emb = torch.cat((torch.sin(emb), torch.cos(emb)), dim=-1) + return emb * self.scale + + +class FourierEncoder(LightningModule): + """Fourier encoder module. + + This module incorporates sinusoidal positional embeddings and auxiliary + embeddings to process input sequences and produce meaningful + representations. + """ + + def __init__( + self, + seq_length: int = 128, + output_dim: int = 384, + scaled: bool = False, + ): + """Construct `FourierEncoder`. + + Args: + seq_length: Dimensionality of the base sinusoidal positional + embeddings. + output_dim: Output dimensionality of the final projection. + scaled: Whether or not to scale the embeddings. + """ + super().__init__() + self.sin_emb = SinusoidalPosEmb(dim=seq_length, scaled=scaled) + self.aux_emb = nn.Embedding(2, seq_length // 2) + self.sin_emb2 = SinusoidalPosEmb(dim=seq_length // 2, scaled=scaled) + self.projection = nn.Sequential( + nn.Linear(6 * seq_length, 6 * seq_length), + nn.LayerNorm(6 * seq_length), + nn.GELU(), + nn.Linear(6 * seq_length, output_dim), + ) + + def forward( + self, + x: Tensor, + seq_length: Tensor, + ) -> Tensor: + """Forward pass.""" + length = torch.log10(seq_length.to(dtype=x.dtype)) + x = torch.cat( + [ + self.sin_emb(4096 * x[:, :, :3]).flatten(-2), # pos + self.sin_emb(1024 * x[:, :, 4]), # charge + self.sin_emb(4096 * x[:, :, 3]), # time + self.aux_emb(x[:, :, 5].long()), # auxiliary + self.sin_emb2(length) + .unsqueeze(1) + .expand(-1, max(seq_length), -1), + ], + -1, + ) + x = self.projection(x) + return x + + +class SpacetimeEncoder(LightningModule): + """Spacetime encoder module.""" + + def __init__( + self, + seq_length: int = 32, + ): + """Construct `SpacetimeEncoder`. + + This module calculates space-time interval between each pair of events + and generates sinusoidal positional embeddings to be added to input + sequences. + + Args: + seq_length: Dimensionality of the sinusoidal positional embeddings. + """ + super().__init__() + self.sin_emb = SinusoidalPosEmb(dim=seq_length) + self.projection = nn.Linear(seq_length, seq_length) + + def forward( + self, + x: Tensor, + # Lmax: Optional[int] = None, + ) -> Tensor: + """Forward pass.""" + pos = x[:, :, :3] + time = x[:, :, 3] + spacetime_interval = (pos[:, :, None] - pos[:, None, :]).pow(2).sum( + -1 + ) - ((time[:, :, None] - time[:, None, :]) * (3e4 / 500 * 3e-1)).pow(2) + four_distance = torch.sign(spacetime_interval) * torch.sqrt( + torch.abs(spacetime_interval) + ) + sin_emb = self.sin_emb(1024 * four_distance.clip(-4, 4)) + rel_attn = self.projection(sin_emb) + return rel_attn diff --git a/src/graphnet/models/components/layers.py b/src/graphnet/models/components/layers.py index 53f970286..bd2ed8daf 100644 --- a/src/graphnet/models/components/layers.py +++ b/src/graphnet/models/components/layers.py @@ -1,6 +1,6 @@ """Class(es) implementing layers to be used in `graphnet` models.""" -from typing import Any, Callable, Optional, Sequence, Union, List, Tuple +from typing import Any, Callable, Optional, Sequence, Union, List import torch from torch.functional import Tensor @@ -9,8 +9,10 @@ from torch_geometric.typing import Adj, PairTensor from torch_geometric.nn.conv import MessagePassing from torch_geometric.nn.inits import reset +from torch_geometric.data import Data +import torch.nn as nn +from torch.nn.functional import linear from torch.nn.modules import TransformerEncoder, TransformerEncoderLayer -from torch.nn.modules.normalization import LayerNorm from torch_geometric.utils import to_dense_batch from pytorch_lightning import LightningModule @@ -129,13 +131,13 @@ def __init__( """Construct `DynTrans`. Args: - nn: The MLP/torch.Module to be used within the `DynTrans`. layer_sizes: List of layer sizes to be used in `DynTrans`. aggr: Aggregation method to be used with `DynTrans`. features_subset: Subset of features in `Data.x` that should be used when dynamically performing the new graph clustering after the `EdgeConv` operation. Defaults to all features. - n_head: Number of heads to be used in the multiheadattention models. + n_head: Number of heads to be used in the multiheadattention + models. **kwargs: Additional features to be passed to `DynTrans`. """ # Check(s) @@ -151,17 +153,17 @@ def __init__( ): if ix == 0: nb_in *= 3 # edgeConv1 - layers.append(torch.nn.Linear(nb_in, nb_out)) - layers.append(torch.nn.LeakyReLU()) + layers.append(nn.Linear(nb_in, nb_out)) + layers.append(nn.LeakyReLU()) d_model = nb_out # Base class constructor - super().__init__(nn=torch.nn.Sequential(*layers), aggr=aggr, **kwargs) + super().__init__(nn=nn.Sequential(*layers), aggr=aggr, **kwargs) # Additional member variables self.features_subset = features_subset - self.norm1 = LayerNorm(d_model, eps=1e-5) # lNorm + self.norm1 = nn.LayerNorm(d_model, eps=1e-5) # lNorm # Transformer layer(s) encoder_layer = TransformerEncoderLayer( @@ -193,3 +195,402 @@ def forward( x = x[mask] return x + + +class DropPath(LightningModule): + """Drop paths (Stochastic Depth) per sample.""" + + def __init__( + self, + drop_prob: float = 0.0, + ): + """Construct `DropPath`. + + Args: + drop_prob: Probability of dropping a path during training. + If 0.0, no paths are dropped. Defaults to None. + """ + super(DropPath, self).__init__() + self.drop_prob = drop_prob + + def forward(self, x: Tensor) -> Tensor: + """Forward pass.""" + if self.drop_prob == 0.0 or not self.training: + return x + keep_prob = 1 - self.drop_prob + shape = (x.shape[0],) + (1,) * (x.ndim - 1) + random_tensor = x.new_empty(shape).bernoulli_(keep_prob) + if keep_prob > 0.0: + random_tensor.div_(keep_prob) + return x * random_tensor + + def extra_repr(self) -> str: + """Return extra representation of the module.""" + return "p={}".format(self.drop_prob) + + +class Mlp(LightningModule): + """Multi-Layer Perceptron (MLP) module.""" + + def __init__( + self, + in_features: int, + hidden_features: Optional[int] = None, + out_features: Optional[int] = None, + activation: nn.Module = nn.GELU, + dropout_prob: float = 0.0, + ): + """Construct `Mlp`. + + Args: + in_features: Number of input features. + hidden_features: Number of hidden features. Defaults to None. + If None, it is set to the value of `in_features`. + out_features: Number of output features. Defaults to None. + If None, it is set to the value of `in_features`. + activation: Activation layer. Defaults to `nn.GELU`. + dropout_prob: Dropout probability. Defaults to 0.0. + """ + super().__init__() + if in_features <= 0: + raise ValueError( + f"in_features must be greater than 0, got in_features " + f"{in_features} instead" + ) + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.input_projection = nn.Linear(in_features, hidden_features) + self.activation = activation() + self.output_projection = nn.Linear(hidden_features, out_features) + self.dropout = nn.Dropout(dropout_prob) + + def forward(self, x: Tensor) -> Tensor: + """Forward pass.""" + x = self.input_projection(x) + x = self.activation(x) + x = self.output_projection(x) + x = self.dropout(x) + return x + + +class Block_rel(LightningModule): + """Implementation of BEiTv2 Block.""" + + def __init__( + self, + input_dim: int, + num_heads: int, + mlp_ratio: float = 4.0, + qkv_bias: bool = False, + qk_scale: Optional[float] = None, + dropout: float = 0.0, + attn_drop: float = 0.0, + drop_path: float = 0.0, + init_values: Optional[float] = None, + activation: nn.Module = nn.GELU, + norm_layer: nn.Module = nn.LayerNorm, + attn_head_dim: Optional[int] = None, + ): + """Construct 'Block_rel'. + + Args: + input_dim: Dimension of the input tensor. + num_heads: Number of attention heads to use in the `Attention_rel` + layer. + mlp_ratio: Ratio of the hidden size of the feedforward network to + the input size in the `Mlp` layer. + qkv_bias: Whether or not to include bias terms in the query, key, + and value matrices in the `Attention_rel` layer. + qk_scale: Scaling factor for the dot product of the query and key + matrices in the `Attention_rel` layer. + dropout: Dropout probability to use in the `Mlp` layer. + attn_drop: Dropout probability to use in the `Attention_rel` layer. + drop_path: Probability of applying drop path regularization to the + output of the layer. + init_values: Initial value to use for the `gamma_1` and `gamma_2` + parameters if not `None`. + activation: Activation function to use in the `Mlp` layer. + norm_layer: Normalization layer to use. + attn_head_dim: Dimension of the attention head outputs in the + `Attention_rel` layer. + """ + super().__init__() + self.norm1 = norm_layer(input_dim) + self.attn = Attention_rel( + input_dim, + num_heads, + attn_drop=attn_drop, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_head_dim=attn_head_dim, + ) + self.drop_path = ( + DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + ) + self.norm2 = norm_layer(input_dim) + mlp_hidden_dim = int(input_dim * mlp_ratio) + self.mlp = Mlp( + in_features=input_dim, + hidden_features=mlp_hidden_dim, + activation=activation, + dropout_prob=dropout, + ) + + if init_values is not None: + self.gamma_1 = nn.Parameter( + init_values * torch.ones(input_dim), requires_grad=True + ) + self.gamma_2 = nn.Parameter( + init_values * torch.ones(input_dim), requires_grad=True + ) + else: + self.gamma_1, self.gamma_2 = None, None + + def forward( + self, + x: Tensor, + key_padding_mask: Optional[Tensor] = None, + rel_pos_bias: Optional[Tensor] = None, + kv: Optional[Tensor] = None, + ) -> Tensor: + """Forward pass.""" + if self.gamma_1 is None: + xn = self.norm1(x) + kv = xn if kv is None else self.norm1(kv) + x = x + self.drop_path( + self.attn( + xn, + kv, + kv, + rel_pos_bias=rel_pos_bias, + key_padding_mask=key_padding_mask, + ) + ) + x = x + self.drop_path(self.mlp(self.norm2(x))) + else: + xn = self.norm1(x) + kv = xn if kv is None else self.norm1(kv) + x = x + self.drop_path( + self.gamma_1 + * self.drop_path( + self.attn( + xn, + kv, + kv, + rel_pos_bias=rel_pos_bias, + key_padding_mask=key_padding_mask, + ) + ) + ) + x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x))) + return x + + +class Attention_rel(LightningModule): + """Attention mechanism with relative position bias.""" + + def __init__( + self, + input_dim: int, + num_heads: int = 8, + qkv_bias: bool = False, + qk_scale: Optional[float] = None, + attn_drop: float = 0.0, + proj_drop: float = 0.0, + attn_head_dim: Optional[int] = None, + ): + """Construct 'Attention_rel'. + + Args: + input_dim: Dimension of the input tensor. + num_heads: the number of attention heads to use (default: 8) + qkv_bias: whether to add bias to the query, key, and value + projections. Defaults to False. + qk_scale: a scaling factor that multiplies the dot product of query + and key vectors. Defaults to None. If None, computed as + :math: `head_dim^(-1/2)`. + attn_drop: the dropout probability for the attention weights. + Defaults to 0.0. + proj_drop: the dropout probability for the output of the attention + module. Defaults to 0.0. + attn_head_dim: the feature dimensionality of each attention head. + Defaults to None. If None, computed as `dim // num_heads`. + """ + if input_dim <= 0 or num_heads <= 0: + raise ValueError( + f"dim and num_heads must be greater than 0," + f" got input_dim={input_dim} and num_heads={num_heads} instead" + ) + + super().__init__() + self.num_heads = num_heads + head_dim = attn_head_dim or input_dim // num_heads + all_head_dim = head_dim * self.num_heads + self.scale = qk_scale or head_dim**-0.5 + + self.proj_q = nn.Linear(input_dim, all_head_dim, bias=False) + self.proj_k = nn.Linear(input_dim, all_head_dim, bias=False) + self.proj_v = nn.Linear(input_dim, all_head_dim, bias=False) + if qkv_bias: + self.q_bias = nn.Parameter(torch.zeros(all_head_dim)) + self.v_bias = nn.Parameter(torch.zeros(all_head_dim)) + else: + self.q_bias = None + self.v_bias = None + + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(all_head_dim, input_dim) + self.proj_drop = nn.Dropout(proj_drop) + + def forward( + self, + q: Tensor, + k: Tensor, + v: Tensor, + rel_pos_bias: Optional[Tensor] = None, + key_padding_mask: Optional[Tensor] = None, + ) -> Tensor: + """Forward pass.""" + batch_size, event_length, _ = q.shape + + q = linear(input=q, weight=self.proj_q.weight, bias=self.q_bias) + q = q.reshape(batch_size, event_length, self.num_heads, -1).permute( + 0, 2, 1, 3 + ) + k = linear(input=k, weight=self.proj_k.weight, bias=None) + k = k.reshape(batch_size, k.shape[1], self.num_heads, -1).permute( + 0, 2, 1, 3 + ) + v = linear(input=v, weight=self.proj_v.weight, bias=self.v_bias) + v = v.reshape(batch_size, v.shape[1], self.num_heads, -1).permute( + 0, 2, 1, 3 + ) + + q = q * self.scale + attn = q @ k.transpose(-2, -1) + if rel_pos_bias is not None: + bias = torch.einsum("bhic,bijc->bhij", q, rel_pos_bias) + attn = attn + bias + if key_padding_mask is not None: + assert ( + key_padding_mask.dtype == torch.float32 + or key_padding_mask.dtype == torch.float16 + ), "incorrect mask dtype" + bias = torch.min( + key_padding_mask[:, None, :], key_padding_mask[:, :, None] + ) + bias[ + torch.max( + key_padding_mask[:, None, :], key_padding_mask[:, :, None] + ) + < 0 + ] = 0 + attn = attn + bias.unsqueeze(1) + + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2) + if rel_pos_bias is not None: + x = x + torch.einsum("bhij,bijc->bihc", attn, rel_pos_bias) + x = x.reshape(batch_size, event_length, -1) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class Block(LightningModule): + """Transformer block.""" + + def __init__( + self, + input_dim: int, + num_heads: int, + mlp_ratio: float = 4.0, + dropout: float = 0.0, + attn_drop: float = 0.0, + drop_path: float = 0.0, + init_values: Optional[float] = None, + activation: nn.Module = nn.GELU, + norm_layer: nn.Module = nn.LayerNorm, + ): + """Construct 'Block'. + + Args: + input_dim: Dimension of the input tensor. + num_heads: Number of attention heads to use in the + `MultiheadAttention` layer. + mlp_ratio: Ratio of the hidden size of the feedforward network to + the input size in the `Mlp` layer. + dropout: Dropout probability to use in the `Mlp` layer. + attn_drop: Dropout probability to use in the `MultiheadAttention` + layer. + drop_path: Probability of applying drop path regularization to the + output of the layer. + init_values: Initial value to use for the `gamma_1` and `gamma_2` + parameters if not `None`. + activation: Activation function to use in the `Mlp` layer. + norm_layer: Normalization layer to use. + """ + super().__init__() + self.norm1 = norm_layer(input_dim) + self.attn = nn.MultiheadAttention( + input_dim, num_heads, dropout=attn_drop, batch_first=True + ) + self.drop_path = ( + DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + ) + self.norm2 = norm_layer(input_dim) + mlp_hidden_dim = int(input_dim * mlp_ratio) + self.mlp = Mlp( + in_features=input_dim, + hidden_features=mlp_hidden_dim, + activation=activation, + dropout_prob=dropout, + ) + + if init_values is not None: + self.gamma_1 = nn.Parameter( + init_values * torch.ones((input_dim)), requires_grad=True + ) + self.gamma_2 = nn.Parameter( + init_values * torch.ones((input_dim)), requires_grad=True + ) + else: + self.gamma_1, self.gamma_2 = None, None + + def forward( + self, + x: Tensor, + attn_mask: Optional[Tensor] = None, + key_padding_mask: Optional[Tensor] = None, + ) -> Tensor: + """Forward pass.""" + if self.gamma_1 is None: + xn = self.norm1(x) + x = x + self.drop_path( + self.attn( + xn, + xn, + xn, + attn_mask=attn_mask, + key_padding_mask=key_padding_mask, + need_weights=False, + )[0] + ) + x = x + self.drop_path(self.mlp(self.norm2(x))) + else: + xn = self.norm1(x) + x = x + self.drop_path( + self.gamma_1 + * self.attn( + xn, + xn, + xn, + attn_mask=attn_mask, + key_padding_mask=key_padding_mask, + need_weights=False, + )[0] + ) + x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x))) + return x diff --git a/src/graphnet/models/detector/icecube.py b/src/graphnet/models/detector/icecube.py index 691b94fc7..c39240d7f 100644 --- a/src/graphnet/models/detector/icecube.py +++ b/src/graphnet/models/detector/icecube.py @@ -28,6 +28,7 @@ def feature_map(self) -> Dict[str, Callable]: "charge": self._charge, "rde": self._rde, "pmt_area": self._pmt_area, + "hlc": self._identity, } return feature_map @@ -85,6 +86,7 @@ def feature_map(self) -> Dict[str, Callable]: "charge": self._identity, "rde": self._rde, "pmt_area": self._pmt_area, + "hlc": self._identity, } return feature_map @@ -131,6 +133,7 @@ def feature_map(self) -> Dict[str, Callable]: "pmt_dir_y": self._identity, "pmt_dir_z": self._identity, "dom_type": self._dom_type, + "hlc": self._identity, } return feature_map diff --git a/src/graphnet/models/gnn/__init__.py b/src/graphnet/models/gnn/__init__.py index 2d3ff7910..9b4de8238 100644 --- a/src/graphnet/models/gnn/__init__.py +++ b/src/graphnet/models/gnn/__init__.py @@ -5,3 +5,4 @@ from .dynedge_jinst import DynEdgeJINST from .dynedge_kaggle_tito import DynEdgeTITO from .RNN_tito import RNN_TITO +from .icemix import DeepIce diff --git a/src/graphnet/models/gnn/dynedge.py b/src/graphnet/models/gnn/dynedge.py index 9ea93f9ce..e4d0160e4 100644 --- a/src/graphnet/models/gnn/dynedge.py +++ b/src/graphnet/models/gnn/dynedge.py @@ -1,5 +1,5 @@ """Implementation of the DynEdge GNN model architecture.""" -from typing import List, Optional, Sequence, Tuple, Union +from typing import List, Optional, Callable, Tuple, Union import torch from torch import Tensor, LongTensor @@ -32,6 +32,9 @@ def __init__( readout_layer_sizes: Optional[List[int]] = None, global_pooling_schemes: Optional[Union[str, List[str]]] = None, add_global_variables_after_pooling: bool = False, + activation_layer: Callable = None, + add_norm_layer: bool = False, + skip_readout: bool = False, ): """Construct `DynEdge`. @@ -65,6 +68,11 @@ def __init__( after global pooling. The alternative is to added (distribute) them to the individual nodes before any convolutional operations. + activation_layer: The activation function to use in the model. + add_norm_layer: Whether to add a normalization layer after each + linear layer. + skip_readout: Whether to skip the readout layer(s). If `True`, the + output of the last post-processing layer is returned directly. """ # Latent feature subset for computing nearest neighbours in DynEdge. if features_subset is None: @@ -149,15 +157,20 @@ def __init__( add_global_variables_after_pooling ) + if activation_layer is None: + activation_layer = torch.nn.ReLU() + # Base class constructor super().__init__(nb_inputs, self._readout_layer_sizes[-1]) # Remaining member variables() - self._activation = torch.nn.LeakyReLU() + self._activation = activation_layer self._nb_inputs = nb_inputs self._nb_global_variables = 5 + nb_inputs self._nb_neighbours = nb_neighbours self._features_subset = features_subset + self._add_norm_layer = add_norm_layer + self._skip_readout = skip_readout self._construct_layers() @@ -179,6 +192,8 @@ def _construct_layers(self) -> None: if ix == 0: nb_in *= 2 layers.append(torch.nn.Linear(nb_in, nb_out)) + if self._add_norm_layer: + layers.append(torch.nn.LayerNorm(nb_out)) layers.append(self._activation) conv_layer = DynEdgeConv( @@ -203,6 +218,8 @@ def _construct_layers(self) -> None: ) for nb_in, nb_out in zip(layer_sizes[:-1], layer_sizes[1:]): post_processing_layers.append(torch.nn.Linear(nb_in, nb_out)) + if self._add_norm_layer: + post_processing_layers.append(torch.nn.LayerNorm(nb_out)) post_processing_layers.append(self._activation) self._post_processing = torch.nn.Sequential(*post_processing_layers) @@ -307,19 +324,20 @@ def forward(self, data: Data) -> Tensor: # Post-processing x = self._post_processing(x) - # (Optional) Global pooling - if self._global_pooling_schemes: - x = self._global_pooling(x, batch=batch) - if self._add_global_variables_after_pooling: - x = torch.cat( - [ - x, - global_variables, - ], - dim=1, - ) - - # Read-out - x = self._readout(x) + if not self._skip_readout: + # (Optional) Global pooling + if self._global_pooling_schemes: + x = self._global_pooling(x, batch=batch) + if self._add_global_variables_after_pooling: + x = torch.cat( + [ + x, + global_variables, + ], + dim=1, + ) + + # Read-out + x = self._readout(x) return x diff --git a/src/graphnet/models/gnn/icemix.py b/src/graphnet/models/gnn/icemix.py new file mode 100644 index 000000000..8ecf0bf62 --- /dev/null +++ b/src/graphnet/models/gnn/icemix.py @@ -0,0 +1,159 @@ +"""Implementation of IceMix architecture used in. + + IceCube - Neutrinos in Deep Ice +Reconstruct the direction of neutrinos from the Universe to the South Pole + +Kaggle competition. + +Solution by DrHB: https://github.com/DrHB/icecube-2nd-place +""" +import torch +import torch.nn as nn +from typing import Set, Dict, Any, List + +from graphnet.models.components.layers import ( + Block_rel, + Block, +) +from graphnet.models.components.embedding import ( + FourierEncoder, + SpacetimeEncoder, +) +from graphnet.models.gnn.dynedge import DynEdge +from graphnet.models.gnn.gnn import GNN +from graphnet.models.utils import array_to_sequence + +from torch_geometric.utils import to_dense_batch +from torch_geometric.data import Data +from torch import Tensor + + +class DeepIce(GNN): + """DeepIce model.""" + + def __init__( + self, + hidden_dim: int = 384, + seq_length: int = 128, + depth: int = 12, + head_size: int = 32, + depth_rel: int = 4, + n_rel: int = 1, + scaled_emb: bool = False, + include_dynedge: bool = False, + dynedge_args: Dict[str, Any] = None, + ): + """Construct `DeepIce`. + + Args: + hidden_dim: The latent feature dimension. + seq_length: The base feature dimension. + depth: The depth of the transformer. + head_size: The size of the attention heads. + depth_rel: The depth of the relative transformer. + n_rel: The number of relative transformer layers to use. + scaled_emb: Whether to scale the sinusoidal positional embeddings. + include_dynedge: If True, pulse-level predictions from `DynEdge` + will be added as features to the model. + dynedge_args: Initialization arguments for DynEdge. If not + provided, DynEdge will be initialized with the original Kaggle + Competition settings. If `include_dynedge` is False, this + argument have no impact. + """ + super().__init__(seq_length, hidden_dim) + fourier_out_dim = hidden_dim // 2 if include_dynedge else hidden_dim + self.fourier_ext = FourierEncoder( + seq_length, fourier_out_dim, scaled=scaled_emb + ) + self.rel_pos = SpacetimeEncoder(head_size) + self.sandwich = nn.ModuleList( + [ + Block_rel( + input_dim=hidden_dim, num_heads=hidden_dim // head_size + ) + for _ in range(depth_rel) + ] + ) + self.cls_token = nn.Linear(hidden_dim, 1, bias=False) + self.blocks = nn.ModuleList( + [ + Block( + input_dim=hidden_dim, + num_heads=hidden_dim // head_size, + mlp_ratio=4, + drop_path=0.0 * (i / (depth - 1)), + init_values=1, + ) + for i in range(depth) + ] + ) + self.n_rel = n_rel + + if include_dynedge and dynedge_args is None: + self.warning_once("Running with default DynEdge settings") + self.dyn_edge = DynEdge( + nb_inputs=9, + nb_neighbours=9, + post_processing_layer_sizes=[336, hidden_dim // 2], + dynedge_layer_sizes=[ + (128, 256), + (336, 256), + (336, 256), + (336, 256), + ], + global_pooling_schemes=None, + activation_layer=nn.GELU(), + add_norm_layer=True, + skip_readout=True, + ) + elif include_dynedge and not (dynedge_args is None): + self.dyn_edge = DynEdge(**dynedge_args) + + self.include_dynedge = include_dynedge + + @torch.jit.ignore + def no_weight_decay(self) -> Set: + """cls_tocken should not be subject to weight decay during training.""" + return {"cls_token"} + + def forward(self, data: Data) -> Tensor: + """Apply learnable forward pass.""" + x0, mask, seq_length = array_to_sequence( + data.x, data.batch, padding_value=0 + ) + x = self.fourier_ext(x0, seq_length) + rel_pos_bias = self.rel_pos(x0) + batch_size = mask.shape[0] + if self.include_dynedge: + graph = self.dyn_edge(data) + graph, _ = to_dense_batch(graph, data.batch) + x = torch.cat([x, graph], 2) + + attn_mask = torch.zeros(mask.shape, device=mask.device) + attn_mask[~mask] = -torch.inf + + for i, blk in enumerate(self.sandwich): + x = blk(x, attn_mask, rel_pos_bias) + if i + 1 == self.n_rel: + rel_pos_bias = None + + mask = torch.cat( + [ + torch.ones( + batch_size, 1, dtype=mask.dtype, device=mask.device + ), + mask, + ], + 1, + ) + attn_mask = torch.zeros(mask.shape, device=mask.device) + attn_mask[~mask] = -torch.inf + cls_token = self.cls_token.weight.unsqueeze(0).expand( + batch_size, -1, -1 + ) + x = torch.cat([cls_token, x], 1) + + for blk in self.blocks: + x = blk(x, None, attn_mask) + + return x[:, 0] diff --git a/src/graphnet/models/graphs/nodes/__init__.py b/src/graphnet/models/graphs/nodes/__init__.py index 1fbafd43f..64bcf70ba 100644 --- a/src/graphnet/models/graphs/nodes/__init__.py +++ b/src/graphnet/models/graphs/nodes/__init__.py @@ -10,4 +10,5 @@ NodesAsPulses, PercentileClusters, NodeAsDOMTimeSeries, + IceMixNodes, ) diff --git a/src/graphnet/models/graphs/nodes/nodes.py b/src/graphnet/models/graphs/nodes/nodes.py index a71ef1c59..5d47a8487 100644 --- a/src/graphnet/models/graphs/nodes/nodes.py +++ b/src/graphnet/models/graphs/nodes/nodes.py @@ -12,6 +12,7 @@ cluster_summarize_with_percentiles, identify_indices, lex_sort, + ice_transparency, ) from copy import deepcopy @@ -301,3 +302,125 @@ def _construct_nodes(self, x: torch.Tensor) -> Data: x = np.column_stack([x, new_node_col]) return Data(x=torch.tensor(x)) + + +class IceMixNodes(NodeDefinition): + """Calculate ice properties and perform random sampling. + + Ice properties are calculated based on the z-coordinate of the pulse. For + each event, a random sampling is performed to keep the number of pulses + below a maximum number of pulses if n_pulses is over the limit. + """ + + def __init__( + self, + input_feature_names: Optional[List[str]] = None, + max_pulses: int = 768, + z_name: str = "dom_z", + hlc_name: str = "hlc", + ) -> None: + """Construct `IceMixNodes`. + + Args: + input_feature_names: Column names for input features. Minimum + required features are z coordinate and hlc column names. + max_pulses: Maximum number of pulses to keep in the event. + z_name: Name of the z-coordinate column. + hlc_name: Name of the `Hard Local Coincidence Check` column. + """ + super().__init__(input_feature_names=input_feature_names) + + if input_feature_names is None: + input_feature_names = [ + "dom_x", + "dom_y", + "dom_z", + "dom_time", + "charge", + "hlc", + "rde", + ] + + if z_name not in input_feature_names: + raise ValueError( + f"z name {z_name} not found in " + f"input_feature_names {input_feature_names}" + ) + if hlc_name not in input_feature_names: + raise ValueError( + f"hlc name {hlc_name} not found in " + f"input_feature_names {input_feature_names}" + ) + + self.all_features = input_feature_names + [ + "scatt_lenght", + "abs_lenght", + ] + + self.feature_indexes = { + feat: self.all_features.index(feat) for feat in input_feature_names + } + + self.f_scattering, self.f_absoprtion = ice_transparency() + + self.input_feature_names = input_feature_names + self.n_features = len(self.all_features) + self.max_length = max_pulses + self.z_name = z_name + self.hlc_name = hlc_name + + def _define_output_feature_names( + self, input_feature_names: List[str] + ) -> List[str]: + return self.all_features + + def _add_ice_properties( + self, graph: torch.Tensor, x: torch.Tensor, ids: List[int] + ) -> torch.Tensor: + + graph[: len(ids), -2] = torch.tensor( + self.f_scattering(x[ids, self.feature_indexes[self.z_name]]) + ) + graph[: len(ids), -1] = torch.tensor( + self.f_absoprtion(x[ids, self.feature_indexes[self.z_name]]) + ) + return graph + + def _pulse_sampler( + self, x: torch.Tensor, event_length: int + ) -> torch.Tensor: + + if event_length < self.max_length: + ids = torch.arange(event_length) + else: + ids = torch.randperm(event_length) + auxiliary_n = torch.nonzero( + x[:, self.feature_indexes[self.hlc_name]] == 0 + ).squeeze(1) + auxiliary_p = torch.nonzero( + x[:, self.feature_indexes[self.hlc_name]] == 1 + ).squeeze(1) + ids_n = ids[auxiliary_n][: min(self.max_length, len(auxiliary_n))] + ids_p = ids[auxiliary_p][ + : min(self.max_length - len(ids_n), len(auxiliary_p)) + ] + ids = torch.cat([ids_n, ids_p]).sort().values + return ids + + def _construct_nodes(self, x: torch.Tensor) -> Tuple[Data, List[str]]: + + event_length = x.shape[0] + x[:, self.feature_indexes[self.hlc_name]] = torch.logical_not( + x[:, self.feature_indexes[self.hlc_name]] + ) # hlc in kaggle was flipped + ids = self._pulse_sampler(x, event_length) + event_length = min(self.max_length, event_length) + + graph = torch.zeros([event_length, self.n_features]) + for idx, feature in enumerate( + self.all_features[: self.n_features - 2] + ): + graph[:event_length, idx] = x[ids, self.feature_indexes[feature]] + + graph = self._add_ice_properties(graph, x, ids) # ice properties + return Data(x=graph) diff --git a/src/graphnet/models/graphs/utils.py b/src/graphnet/models/graphs/utils.py index ccd861783..63868d7a6 100644 --- a/src/graphnet/models/graphs/utils.py +++ b/src/graphnet/models/graphs/utils.py @@ -1,7 +1,12 @@ """Utility functions for construction of graphs.""" from typing import List, Tuple +import os import numpy as np +import pandas as pd +from scipy.interpolate import interp1d +from sklearn.preprocessing import RobustScaler +from graphnet.constants import DATA_DIR def lex_sort(x: np.array, cluster_columns: List[int]) -> np.ndarray: @@ -158,3 +163,31 @@ def cluster_summarize_with_percentiles( ) return array + + +def ice_transparency() -> Tuple[interp1d, interp1d]: + """Return interpolation functions for optical properties of IceCube. + + NOTE: The resulting interpolation functions assumes that the + Z-coordinate of pulse are scaled as `z = z/500`. + Any deviation from this scaling method results in inaccurate results. + + Returns: + f_scattering: Function that takes a normalized depth and returns the + corresponding normalized scattering length. + f_absorption: Function that takes a normalized depth and returns the + corresponding normalized absorption length. + """ + # Data from page 31 of https://arxiv.org/pdf/1301.5361.pdf + df = pd.read_parquet( + os.path.join(DATA_DIR, "ice_properties/ice_transparency.parquet"), + ) + df["z"] = df["depth"] - 1950 + df["z_norm"] = df["z"] / 500 + df[ + ["scattering_len_norm", "absorption_len_norm"] + ] = RobustScaler().fit_transform(df[["scattering_len", "absorption_len"]]) + + f_scattering = interp1d(df["z_norm"], df["scattering_len_norm"]) + f_absorption = interp1d(df["z_norm"], df["absorption_len_norm"]) + return f_scattering, f_absorption diff --git a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/SplitInIcePulses_cleaner/SplitInIcePulses_cleaner_config.yml b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/SplitInIcePulses_cleaner/SplitInIcePulses_cleaner_config.yml index d9ca5b4f1..b3adc498b 100644 --- a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/SplitInIcePulses_cleaner/SplitInIcePulses_cleaner_config.yml +++ b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/SplitInIcePulses_cleaner/SplitInIcePulses_cleaner_config.yml @@ -19,7 +19,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - input_feature_names: null + input_feature_names: ['dom_x', 'dom_y', 'dom_z', 'dom_time', 'charge', 'rde', 'pmt_area', 'string', 'pmt_number', 'dom_number', 'pmt_dir_x', 'pmt_dir_y', 'pmt_dir_z', 'dom_type'] class_name: KNNGraph optimizer_class: '!class torch.optim.adam Adam' optimizer_kwargs: null diff --git a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_direction/neutrino_direction_config.yml b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_direction/neutrino_direction_config.yml index 70c82f90d..fdb414b37 100644 --- a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_direction/neutrino_direction_config.yml +++ b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_direction/neutrino_direction_config.yml @@ -25,7 +25,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - input_feature_names: null + input_feature_names: ['dom_x', 'dom_y', 'dom_z', 'dom_time', 'charge', 'rde', 'pmt_area', 'string', 'pmt_number', 'dom_number', 'pmt_dir_x', 'pmt_dir_y', 'pmt_dir_z', 'dom_type'] class_name: KNNGraph optimizer_class: '!class torch.optim.adam Adam' optimizer_kwargs: null diff --git a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_vs_muon_classifier/neutrino_vs_muon_classifier_config.yml b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_vs_muon_classifier/neutrino_vs_muon_classifier_config.yml index b75cbf87d..9379e16fd 100644 --- a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_vs_muon_classifier/neutrino_vs_muon_classifier_config.yml +++ b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_vs_muon_classifier/neutrino_vs_muon_classifier_config.yml @@ -25,7 +25,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - input_feature_names: null + input_feature_names: ['dom_x', 'dom_y', 'dom_z', 'dom_time', 'charge', 'rde', 'pmt_area', 'string', 'pmt_number', 'dom_number', 'pmt_dir_x', 'pmt_dir_y', 'pmt_dir_z', 'dom_type'] class_name: KNNGraph optimizer_class: '!class torch.optim.adam Adam' optimizer_kwargs: null diff --git a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_zenith/neutrino_zenith_config.yml b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_zenith/neutrino_zenith_config.yml index 29f8e3e63..937152aa2 100644 --- a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_zenith/neutrino_zenith_config.yml +++ b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/neutrino_zenith/neutrino_zenith_config.yml @@ -25,7 +25,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - input_feature_names: null + input_feature_names: ['dom_x', 'dom_y', 'dom_z', 'dom_time', 'charge', 'rde', 'pmt_area', 'string', 'pmt_number', 'dom_number', 'pmt_dir_x', 'pmt_dir_y', 'pmt_dir_z', 'dom_type'] class_name: KNNGraph optimizer_class: '!class torch.optim.adam Adam' optimizer_kwargs: null diff --git a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/total_neutrino_energy/total_neutrino_energy_config.yml b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/total_neutrino_energy/total_neutrino_energy_config.yml index 916c7db62..8091df6e3 100644 --- a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/total_neutrino_energy/total_neutrino_energy_config.yml +++ b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/total_neutrino_energy/total_neutrino_energy_config.yml @@ -25,7 +25,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - input_feature_names: null + input_feature_names: ['dom_x', 'dom_y', 'dom_z', 'dom_time', 'charge', 'rde', 'pmt_area', 'string', 'pmt_number', 'dom_number', 'pmt_dir_x', 'pmt_dir_y', 'pmt_dir_z', 'dom_type'] class_name: KNNGraph optimizer_class: '!class torch.optim.adam Adam' optimizer_kwargs: null diff --git a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/track_vs_cascade_classifier/track_vs_cascade_classifier_config.yml b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/track_vs_cascade_classifier/track_vs_cascade_classifier_config.yml index d233ed7e9..3a5c52631 100644 --- a/src/graphnet/models/pretrained/icecube/upgrade/QUESO/track_vs_cascade_classifier/track_vs_cascade_classifier_config.yml +++ b/src/graphnet/models/pretrained/icecube/upgrade/QUESO/track_vs_cascade_classifier/track_vs_cascade_classifier_config.yml @@ -25,7 +25,7 @@ arguments: ModelConfig: arguments: {} class_name: NodesAsPulses - input_feature_names: null + input_feature_names: ['dom_x', 'dom_y', 'dom_z', 'dom_time', 'charge', 'rde', 'pmt_area', 'string', 'pmt_number', 'dom_number', 'pmt_dir_x', 'pmt_dir_y', 'pmt_dir_z', 'dom_type'] class_name: KNNGraph optimizer_class: '!class torch.optim.adam Adam' optimizer_kwargs: null diff --git a/src/graphnet/models/utils.py b/src/graphnet/models/utils.py index 1d77903a5..d05e8223f 100644 --- a/src/graphnet/models/utils.py +++ b/src/graphnet/models/utils.py @@ -1,6 +1,6 @@ """Utility functions for `graphnet.models`.""" -from typing import List, Tuple, Union +from typing import List, Tuple, Any from torch_geometric.nn import knn_graph from torch_geometric.data import Batch import torch @@ -59,3 +59,47 @@ def knn_graph_batch(batch: Batch, k: List[int], columns: List[int]) -> Batch: x=data_list[i].x[:, columns], k=k[i] ) return Batch.from_data_list(data_list) + + +def array_to_sequence( + x: Tensor, + batch_idx: LongTensor, + padding_value: Any = 0, + excluding_value: Any = torch.inf, +) -> Tuple[Tensor, Tensor, Tensor]: + """Convert `x` of shape [n,d] into a padded sequence of shape [B, L, D]. + + Where B is the batch size, L is the sequence length and D is the + features for each time step. + + Args: + x: array-like tensor with shape `[n,d]` where `n` is the total number + of pulses in the batch and `d` is the number of node features. + batch_idx: a LongTensor identifying which row in `x` belongs to + which training example. + E.g. `torch_geometric.data.Batch.batch`. + padding_value: The value to use for padding. + excluding_value: This parameter represents a unique value that should + not be present in the input tensor 'x' + Returns: + x: Padded sequence with dimensions [B, L, D]. + mask: A tensor that identifies masked entries in `x`. + E.g. : `masked_entries = x[mask]` + seq_length: A tensor containing the number of pulses in each event. + """ + if torch.any(torch.eq(x, excluding_value)): + raise ValueError( + f"Transformation cannot be made because input tensor " + f"`x` contains at least one element equal to " + f"excluding value {excluding_value}." + ) + + _, seq_length = torch.unique(batch_idx, return_counts=True) + x_list = torch.split(x, seq_length.tolist()) + + x = torch.nn.utils.rnn.pad_sequence( + x_list, batch_first=True, padding_value=excluding_value + ) + mask = torch.ne(x[:, :, 1], excluding_value) + x[~mask] = padding_value + return x, mask, seq_length