From ab28e87f260109e66d2b7f1e2674bf1d5b8cae16 Mon Sep 17 00:00:00 2001 From: Sam Reeve <6740307+streeve@users.noreply.github.com> Date: Wed, 23 Mar 2022 11:03:54 -0400 Subject: [PATCH] Fixup: normalize node feature by feature (not by component) --- hydragnn/preprocess/raw_dataset_loader.py | 56 +++++++++++------------ tests/inputs/ci_vectoroutput.json | 2 +- tests/test_graphs.py | 2 +- 3 files changed, 28 insertions(+), 32 deletions(-) diff --git a/hydragnn/preprocess/raw_dataset_loader.py b/hydragnn/preprocess/raw_dataset_loader.py index 38ed87c5b..2792a2f5c 100644 --- a/hydragnn/preprocess/raw_dataset_loader.py +++ b/hydragnn/preprocess/raw_dataset_loader.py @@ -330,50 +330,40 @@ def __normalize_dataset(self): for dataset in self.dataset_list: for data in dataset: # find maximum and minimum values for graph level features - index_counter_y = 0 + g_index_start = 0 for ifeat in range(num_graph_features): + g_index_end = g_index_start + self.graph_feature_dim[ifeat] self.minmax_graph_feature[0, ifeat] = min( - min( - data.y[ - index_counter_y : index_counter_y - + self.graph_feature_dim[ifeat] - ] - ), + torch.min(data.y[g_index_start:g_index_end]), self.minmax_graph_feature[0, ifeat], ) self.minmax_graph_feature[1, ifeat] = max( - max( - data.y[ - index_counter_y : index_counter_y - + self.graph_feature_dim[ifeat] - ] - ), + torch.max(data.y[g_index_start:g_index_end]), self.minmax_graph_feature[1, ifeat], ) - index_counter_y = index_counter_y + self.graph_feature_dim[ifeat] + g_index_start = g_index_end + # find maximum and minimum values for node level features + n_index_start = 0 for ifeat in range(num_node_features): - self.minmax_node_feature[0, ifeat] = np.minimum( - np.amin(data.x[:, ifeat].numpy()), + n_index_end = n_index_start + self.node_feature_dim[ifeat] + self.minmax_node_feature[0, ifeat] = min( + torch.min(data.x[:, n_index_start:n_index_end]), self.minmax_node_feature[0, ifeat], ) - self.minmax_node_feature[1, ifeat] = np.maximum( - np.amax(data.x[:, ifeat].numpy()), + self.minmax_node_feature[1, ifeat] = max( + torch.max(data.x[:, n_index_start:n_index_end]), self.minmax_node_feature[1, ifeat], ) + n_index_start = n_index_end for dataset in self.dataset_list: for data in dataset: - index_counter_y = 0 + g_index_start = 0 for ifeat in range(num_graph_features): - data.y[ - index_counter_y : index_counter_y - + self.graph_feature_dim[ifeat] - ] = tensor_divide( + g_index_end = g_index_start + self.graph_feature_dim[ifeat] + data.y[g_index_start:g_index_end] = tensor_divide( ( - data.y[ - index_counter_y : index_counter_y - + self.graph_feature_dim[ifeat] - ] + data.y[g_index_start:g_index_end] - self.minmax_graph_feature[0, ifeat] ), ( @@ -381,12 +371,18 @@ def __normalize_dataset(self): - self.minmax_graph_feature[0, ifeat] ), ) - index_counter_y = index_counter_y + self.graph_feature_dim[ifeat] + g_index_start = g_index_end + n_index_start = 0 for ifeat in range(num_node_features): - data.x[:, ifeat] = tensor_divide( - (data.x[:, ifeat] - self.minmax_node_feature[0, ifeat]), + n_index_end = n_index_start + self.node_feature_dim[ifeat] + data.x[:, n_index_start:n_index_end] = tensor_divide( + ( + data.x[:, n_index_start:n_index_end] + - self.minmax_node_feature[0, ifeat] + ), ( self.minmax_node_feature[1, ifeat] - self.minmax_node_feature[0, ifeat] ), ) + n_index_start = n_index_end diff --git a/tests/inputs/ci_vectoroutput.json b/tests/inputs/ci_vectoroutput.json index e1b3c0bcf..5dc5e462e 100644 --- a/tests/inputs/ci_vectoroutput.json +++ b/tests/inputs/ci_vectoroutput.json @@ -38,7 +38,7 @@ }, "node": { "num_headlayers": 2, - "dim_headlayers": [10, 10], + "dim_headlayers": [40, 10], "type": "mlp" } }, diff --git a/tests/test_graphs.py b/tests/test_graphs.py index aea83b555..818a10c94 100755 --- a/tests/test_graphs.py +++ b/tests/test_graphs.py @@ -132,7 +132,7 @@ def unittest_train_model(model_type, ci_input, use_lengths, overwrite_data=False thresholds["CGCNN"] = [0.15, 0.15, 0.40] thresholds["PNA"] = [0.10, 0.10, 0.40] if use_lengths and "vector" in ci_input: - thresholds["PNA"] = [0.15, 0.10, 0.60] + thresholds["PNA"] = [0.15, 0.10, 0.75] verbosity = 2 for ihead in range(len(true_values)):