Skip to content

Commit

Permalink
Fixup: normalize node feature by feature (not by component)
Browse files Browse the repository at this point in the history
  • Loading branch information
streeve committed Mar 24, 2022
1 parent 5e83738 commit ab28e87
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 32 deletions.
56 changes: 26 additions & 30 deletions hydragnn/preprocess/raw_dataset_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,63 +330,59 @@ def __normalize_dataset(self):
for dataset in self.dataset_list:
for data in dataset:
# find maximum and minimum values for graph level features
index_counter_y = 0
g_index_start = 0
for ifeat in range(num_graph_features):
g_index_end = g_index_start + self.graph_feature_dim[ifeat]
self.minmax_graph_feature[0, ifeat] = min(
min(
data.y[
index_counter_y : index_counter_y
+ self.graph_feature_dim[ifeat]
]
),
torch.min(data.y[g_index_start:g_index_end]),
self.minmax_graph_feature[0, ifeat],
)
self.minmax_graph_feature[1, ifeat] = max(
max(
data.y[
index_counter_y : index_counter_y
+ self.graph_feature_dim[ifeat]
]
),
torch.max(data.y[g_index_start:g_index_end]),
self.minmax_graph_feature[1, ifeat],
)
index_counter_y = index_counter_y + self.graph_feature_dim[ifeat]
g_index_start = g_index_end

# find maximum and minimum values for node level features
n_index_start = 0
for ifeat in range(num_node_features):
self.minmax_node_feature[0, ifeat] = np.minimum(
np.amin(data.x[:, ifeat].numpy()),
n_index_end = n_index_start + self.node_feature_dim[ifeat]
self.minmax_node_feature[0, ifeat] = min(
torch.min(data.x[:, n_index_start:n_index_end]),
self.minmax_node_feature[0, ifeat],
)
self.minmax_node_feature[1, ifeat] = np.maximum(
np.amax(data.x[:, ifeat].numpy()),
self.minmax_node_feature[1, ifeat] = max(
torch.max(data.x[:, n_index_start:n_index_end]),
self.minmax_node_feature[1, ifeat],
)
n_index_start = n_index_end
for dataset in self.dataset_list:
for data in dataset:
index_counter_y = 0
g_index_start = 0
for ifeat in range(num_graph_features):
data.y[
index_counter_y : index_counter_y
+ self.graph_feature_dim[ifeat]
] = tensor_divide(
g_index_end = g_index_start + self.graph_feature_dim[ifeat]
data.y[g_index_start:g_index_end] = tensor_divide(
(
data.y[
index_counter_y : index_counter_y
+ self.graph_feature_dim[ifeat]
]
data.y[g_index_start:g_index_end]
- self.minmax_graph_feature[0, ifeat]
),
(
self.minmax_graph_feature[1, ifeat]
- self.minmax_graph_feature[0, ifeat]
),
)
index_counter_y = index_counter_y + self.graph_feature_dim[ifeat]
g_index_start = g_index_end
n_index_start = 0
for ifeat in range(num_node_features):
data.x[:, ifeat] = tensor_divide(
(data.x[:, ifeat] - self.minmax_node_feature[0, ifeat]),
n_index_end = n_index_start + self.node_feature_dim[ifeat]
data.x[:, n_index_start:n_index_end] = tensor_divide(
(
data.x[:, n_index_start:n_index_end]
- self.minmax_node_feature[0, ifeat]
),
(
self.minmax_node_feature[1, ifeat]
- self.minmax_node_feature[0, ifeat]
),
)
n_index_start = n_index_end
2 changes: 1 addition & 1 deletion tests/inputs/ci_vectoroutput.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
},
"node": {
"num_headlayers": 2,
"dim_headlayers": [10, 10],
"dim_headlayers": [40, 10],
"type": "mlp"
}
},
Expand Down
2 changes: 1 addition & 1 deletion tests/test_graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def unittest_train_model(model_type, ci_input, use_lengths, overwrite_data=False
thresholds["CGCNN"] = [0.15, 0.15, 0.40]
thresholds["PNA"] = [0.10, 0.10, 0.40]
if use_lengths and "vector" in ci_input:
thresholds["PNA"] = [0.15, 0.10, 0.60]
thresholds["PNA"] = [0.15, 0.10, 0.75]
verbosity = 2

for ihead in range(len(true_values)):
Expand Down

0 comments on commit ab28e87

Please sign in to comment.