Skip to content

Commit

Permalink
[GSProcessing] Name reverse edges as dst:relation-rev:src (#490)
Browse files Browse the repository at this point in the history
*Issue #, if available:*

*Description of changes:*

* We use `dst:relation-rev:src` for reverse edge names instead of
`dst:rev-relation:src` to match GConstruct.

*Testing*:

* pytest and test job (with re-partition follow-up) succeed


By submitting this pull request, I confirm that you can use, modify,
copy, and redistribute this contribution, under the terms of your
choice.
  • Loading branch information
thvasilo committed Sep 27, 2023
1 parent 4a31c29 commit 0fc856c
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ def _initialize_metadata_dict(
# Add original and reverse edge types
edge_types.append(f"{src_type}:{rel_type}:{dst_type}")
if self.add_reverse_edges:
edge_types.append(f"{dst_type}:rev-{rel_type}:{src_type}")
edge_types.append(f"{dst_type}:{rel_type}-rev:{src_type}")

metadata_dict["edge_type"] = edge_types
metadata_dict["node_type"] = sorted(node_type_set)
Expand Down Expand Up @@ -1072,7 +1072,7 @@ def write_edge_structure(
f"{edge_config.src_ntype}:{edge_config.get_relation_name()}:{edge_config.dst_ntype}"
)
rev_edge_type = (
f"{edge_config.dst_ntype}:rev-{edge_config.get_relation_name()}:{edge_config.src_ntype}"
f"{edge_config.dst_ntype}:{edge_config.get_relation_name()}-rev:{edge_config.src_ntype}"
)

src_node_id_mapping = (
Expand Down Expand Up @@ -1223,7 +1223,7 @@ def process_edge_data(self, edge_configs: Sequence[EdgeConfig]) -> Tuple[Dict, D
)
reverse_edge_type = (
f"{edge_config.dst_ntype}"
f":rev-{edge_config.get_relation_name()}"
f":{edge_config.get_relation_name()}-rev"
f":{edge_config.src_ntype}"
)
logging.info("Processing edge type '%s'...", edge_type)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -833,7 +833,7 @@ def main():
for type_idx, (type_name, type_data_dict) in enumerate(edge_data_meta.items()):
src, relation, dst = type_name.split(":")

if relation.startswith("rev-"):
if relation.endswith("-rev"):
# Reverse edge types do not have their own data,
# and if needed we re-partition their structure while
# handling the "regular" edge type.
Expand All @@ -845,7 +845,7 @@ def main():
type_name,
)
continue
reverse_edge_type_name = f"{dst}:rev-{relation}:{src}"
reverse_edge_type_name = f"{dst}:{relation}-rev:{src}"
most_frequent_counts = list(edge_row_counts_frequencies[type_name].most_common(1)[0][0])
repartitioner = ParquetRepartitioner(
input_prefix, filesystem_type, region, verify_outputs=True
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"edge_type": [
"src:dummy_type:dst",
"dst:rev-dummy_type:src"
"dst:dummy_type-rev:src"
],
"edges": {
"src:dummy_type:dst": {
Expand All @@ -18,7 +18,7 @@
"delimiter": ""
}
},
"dst:rev-dummy_type:src": {
"dst:dummy_type-rev:src": {
"data": [
"edges/dummy_type/parquet/part-00000.parquet",
"edges/dummy_type/parquet/part-00001.parquet",
Expand Down Expand Up @@ -78,7 +78,7 @@
}
}
},
"dst:rev-dummy_type:src": {
"dst:dummy_type-rev:src": {
"label": {
"data":[
"edge_data/dummy_type-label/parquet/part-00000.parquet",
Expand Down
28 changes: 14 additions & 14 deletions graphstorm-processing/tests/test_dist_heterogenous_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,11 +165,11 @@ def verify_integ_test_output(
assert metadata["node_type"] == ["director", "genre", "movie", "user"]
assert metadata["edge_type"] == [
"movie:included_in:genre",
"genre:rev-included_in:movie",
"genre:included_in-rev:movie",
"user:rated:movie",
"movie:rev-rated:user",
"movie:rated-rev:user",
"director:directed:movie",
"movie:rev-directed:director",
"movie:directed-rev:director",
]

expected_node_counts = {"director": 3, "genre": 2, "movie": 4, "user": 5}
Expand All @@ -182,11 +182,11 @@ def verify_integ_test_output(

expected_edge_counts = {
"movie:included_in:genre": 4,
"genre:rev-included_in:movie": 4,
"genre:included_in-rev:movie": 4,
"user:rated:movie": 6,
"movie:rev-rated:user": 6,
"movie:rated-rev:user": 6,
"director:directed:movie": 4,
"movie:rev-directed:director": 4,
"movie:directed-rev:director": 4,
}

for edge_type in metadata["edge_type"]:
Expand Down Expand Up @@ -266,11 +266,11 @@ def test_load_dist_hgl_without_labels(dghl_loader_no_label: DistHeterogeneousGra
"task_type": "link_predict",
"etype_label": [
"movie:included_in:genre",
"genre:rev-included_in:movie",
"genre:included_in-rev:movie",
"user:rated:movie",
"movie:rev-rated:user",
"movie:rated-rev:user",
"director:directed:movie",
"movie:rev-directed:director",
"movie:directed-rev:director",
],
"etype_label_property": [],
"ntype_label": [],
Expand All @@ -283,18 +283,18 @@ def test_load_dist_hgl_without_labels(dghl_loader_no_label: DistHeterogeneousGra

expected_edge_data = {
"user:rated:movie": {"train_mask", "val_mask", "test_mask"},
"movie:rev-rated:user": {"train_mask", "val_mask", "test_mask"},
"movie:rated-rev:user": {"train_mask", "val_mask", "test_mask"},
"movie:included_in:genre": {"train_mask", "val_mask", "test_mask"},
"genre:rev-included_in:movie": {"train_mask", "val_mask", "test_mask"},
"genre:included_in-rev:movie": {"train_mask", "val_mask", "test_mask"},
"director:directed:movie": {"train_mask", "val_mask", "test_mask"},
"movie:rev-directed:director": {"train_mask", "val_mask", "test_mask"},
"movie:directed-rev:director": {"train_mask", "val_mask", "test_mask"},
}

for edge_type in metadata["edge_data"]:
assert metadata["edge_data"][edge_type].keys() == expected_edge_data[edge_type]
if not "rev-" in edge_type:
if not "-rev" in edge_type:
src_type, relation, dst_type = edge_type.split(":")
rev_type = f"{dst_type}:rev-{relation}:{src_type}"
rev_type = f"{dst_type}:{relation}-rev:{src_type}"
assert (
metadata["edge_data"][rev_type]["train_mask"]
== metadata["edge_data"][edge_type]["train_mask"]
Expand Down
4 changes: 2 additions & 2 deletions graphstorm-processing/tests/test_repartition_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,8 @@ def test_verify_metadata_only_edge_data():
row_counts = [10, 10, 10, 10, 10]
original_metadata_dict["edge_data"]["src:dummy_type:dst"]["label"]["row_counts"] = row_counts
original_metadata_dict["edges"]["src:dummy_type:dst"]["row_counts"] = row_counts
original_metadata_dict["edges"].pop("dst:rev-dummy_type:src")
original_metadata_dict["edge_data"].pop("dst:rev-dummy_type:src")
original_metadata_dict["edges"].pop("dst:dummy_type-rev:src")
original_metadata_dict["edge_data"].pop("dst:dummy_type-rev:src")

# Ensure success when counts match
repartition_files.verify_metadata(
Expand Down

0 comments on commit 0fc856c

Please sign in to comment.