Skip to content

Commit

Permalink
automatic_name_generation
Browse files Browse the repository at this point in the history
  • Loading branch information
Aske-Rosted committed Dec 6, 2024
1 parent eaa12e5 commit 165fedb
Showing 1 changed file with 82 additions and 12 deletions.
94 changes: 82 additions & 12 deletions src/graphnet/models/graphs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,13 +202,20 @@ class cluster_and_pad:
# Gets the clustered matrix with all the aggregate statistics.
"""

def __init__(self, x: np.ndarray, cluster_columns: List[int]) -> None:
def __init__(
self,
x: np.ndarray,
cluster_columns: List[int],
input_names: Optional[List[str]] = None,
) -> None:
"""Initialize the class with the data and cluster columns.
Args:
x: Array to be clustered
cluster_columns: List of column indices on which the clusters
are constructed.
input_names: Names of the columns in the input data for automatic
generation of names.
Adds:
clustered_x: Added to the class
_counts: Added to the class
Expand Down Expand Up @@ -244,6 +251,14 @@ def __init__(self, x: np.ndarray, cluster_columns: List[int]) -> None:
self._padded_x[i, : self._counts[i]] = x[: self._counts[i]]
x = x[self._counts[i] :]

self._input_names = input_names
if self._input_names is not None:
assert (
len(self._input_names) == x.shape[1]
), "The input names must have the same length as the input data"

self._cluster_names = np.array(input_names)[cluster_columns]

def _add_column(
self, column: np.ndarray, location: Optional[int] = None
) -> None:
Expand All @@ -263,6 +278,25 @@ def _add_column(
self.clustered_x, location, column, axis=1
)

def _add_column_names(
self, names: List[str], location: Optional[int] = None
) -> None:
"""Add names to the columns of the clustered tensor.
Args:
names: Names to be added to the columns of the tensor
location: Location to insert the names in the clustered tensor
Altered:
_cluster_names: The names are added at the end of the tensor
or inserted at the specified location
"""
if location is None:
self._cluster_names = np.append(self._cluster_names, names)
else:
self._cluster_names = np.insert(
self._cluster_names, location, names
)

def _calculate_charge_sum(self, charge_index: int) -> np.ndarray:
"""Calculate the sum of the charge."""
assert not hasattr(
Expand Down Expand Up @@ -310,6 +344,8 @@ def add_charge_threshold_summary(
of the charge divided by the total charge
clustered_x: The summarization indices are added at the end
of the tensor or inserted at the specified location.
_cluster_names: The names are added at the end of the tensor
or inserted at the specified location
"""
# convert the charge to the cumulative sum of the charge divided
# by the total charge
Expand Down Expand Up @@ -340,6 +376,15 @@ def add_charge_threshold_summary(
)
self._add_column(selections, location)

# update the cluster names
if self._input_names is not None:
new_names = [
self._input_names[i] + "_charge_threshold_" + str(p)
for i in summarization_indices
for p in percentiles
]
self._add_column_names(new_names, location)

def add_percentile_summary(
self,
summarization_indices: List[int],
Expand All @@ -359,6 +404,8 @@ def add_percentile_summary(
Altered:
clustered_x: The summarization indices are added at the end of
the tensor or inserted at the specified location
_cluster_names: The names are added at the end of the tensor
or inserted at the specified location
"""
percentiles_x = np.nanpercentile(
self._padded_x[:, :, summarization_indices],
Expand All @@ -372,48 +419,71 @@ def add_percentile_summary(
)
self._add_column(percentiles_x, location)

# update the cluster names
if self._input_names is not None:
new_names = [
self._input_names[i] + "_percentile_" + str(p)
for i in summarization_indices
for p in percentiles
]
self._add_column_names(new_names, location)

def add_counts(self, location: Optional[int] = None) -> np.ndarray:
"""Add the counts of the sensor to the summarization features."""
self._add_column(np.log10(self._counts), location)
new_name = ["counts"]
self._add_column_names(new_name, location)

def add_sum_charge(self, location: Optional[int] = None) -> np.ndarray:
def add_sum_charge(
self, charge_index: int, location: Optional[int] = None
) -> np.ndarray:
"""Add the sum of the charge to the summarization features."""
assert hasattr(
self, "_charge_sum"
), "Charge sum has not been calculated, \
please run calculate_charge_sum"
if not hasattr(self, "_charge_sum"):
self._calculate_charge_sum(charge_index)
self._add_column(self._charge_sum, location)
# update the cluster names
if self._input_names is not None:
new_name = [self._input_names[charge_index] + "_sum"]
self._add_column_names(new_name, location)

def add_std(
self,
column: int,
columns: List[int],
location: Optional[int] = None,
weights: Union[np.ndarray, int] = 1,
) -> np.ndarray:
"""Add the standard deviation of the column.
Args:
column: Index of the column in the padded tensor to
calculate the standard deviation
columns: Index of the columns from which to calculate the standard
deviation.
location: Location to insert the standard deviation in the
clustered tensor defaults to adding at the end
weights: Optional weights to be applied to the standard deviation
"""
self._add_column(
np.nanstd(self._padded_x[:, :, column] * weights, axis=1), location
np.nanstd(self._padded_x[:, :, columns] * weights, axis=1),
location,
)
if self._input_names is not None:
new_names = [self._input_names[i] + "_std" for i in columns]
self._add_column_names(new_names, location)

def add_mean(
self,
column: int,
columns: List[int],
location: Optional[int] = None,
weights: Union[np.ndarray, int] = 1,
) -> np.ndarray:
"""Add the mean of the column."""
self._add_column(
np.nanmean(self._padded_x[:, :, column] * weights, axis=1),
np.nanmean(self._padded_x[:, :, columns] * weights, axis=1),
location,
)
# update the cluster names
if self._input_names is not None:
new_names = [self._input_names[i] + "_mean" for i in columns]
self._add_column_names(new_names, location)


def ice_transparency(
Expand Down

0 comments on commit 165fedb

Please sign in to comment.