From ea67d08cde4060ba53eb31281a943b23029e83dd Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Mon, 11 Dec 2023 20:16:29 +0000
Subject: [PATCH 01/21] add optimization for gconstruct

---
 python/graphstorm/gconstruct/construct_graph.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/python/graphstorm/gconstruct/construct_graph.py b/python/graphstorm/gconstruct/construct_graph.py
index 0065da5403..bf3cb6bcb6 100644
--- a/python/graphstorm/gconstruct/construct_graph.py
+++ b/python/graphstorm/gconstruct/construct_graph.py
@@ -582,7 +582,7 @@ def process_edge_data(process_confs, node_id_map, arr_merger,
 
     return (edges, edge_data, label_stats)
 
-def verify_confs(confs):
+def verify_confs(confs, args=None):
     """ Verify the configuration of the input data.
     """
     if "version" not in confs:
@@ -591,6 +591,16 @@ def verify_confs(confs):
             "The config file does not have a 'version' entry. Assuming gconstruct-v0.1")
     ntypes = {conf['node_type'] for conf in confs["nodes"]}
     etypes = [conf['relation'] for conf in confs["edges"]]
+    # Adjust input to DGL requirement if it is a honogeneous graph
+    if len(ntypes) == 1 and len(etypes) == 1 and not args.add_reverse_edges:
+        assert etypes[0][0] in ntypes, \
+            f"source node type {etypes[0][0]} does not exist. Please check your input data."
+        assert etypes[0][2] in ntypes, \
+            f"dest node type {etypes[0][2]} does not exist. Please check your input data."
+        logging.warning("Generated Graph is a homogeneous graph, so the node type will be "
+                        "changed to _N and edge type should be changed to [_N, _E, _N]")
+        confs['nodes'][0]['node_type'] = "_N"
+        confs['edges'][0]['relation'] = ["_N", "_E", "_N"]
     for etype in etypes:
         assert len(etype) == 3, \
                 "The edge type must be (source node type, relation type, dest node type)."
@@ -668,7 +678,7 @@ def process_graph(args):
             if args.num_processes_for_nodes is not None else args.num_processes
     num_processes_for_edges = args.num_processes_for_edges \
             if args.num_processes_for_edges is not None else args.num_processes
-    verify_confs(process_confs)
+    verify_confs(process_confs, args)
     output_format = args.output_format
     for out_format in output_format:
         assert out_format in ["DGL", "DistDGL"], \

From 3de2100eb7ad302f8b7b83c66933ca9cbf62a2be Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Mon, 11 Dec 2023 20:49:24 +0000
Subject: [PATCH 02/21] update unit test

---
 .../gconstruct/test_construct_graph.py        | 23 ++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/tests/unit-tests/gconstruct/test_construct_graph.py b/tests/unit-tests/gconstruct/test_construct_graph.py
index a03a7cbec7..22321a1b14 100644
--- a/tests/unit-tests/gconstruct/test_construct_graph.py
+++ b/tests/unit-tests/gconstruct/test_construct_graph.py
@@ -21,12 +21,13 @@
 import pyarrow.parquet as pq
 import numpy as np
 import dgl
+import argparse
 import torch as th
 
 from functools import partial
 from numpy.testing import assert_equal, assert_almost_equal
 
-from graphstorm.gconstruct.construct_graph import parse_edge_data
+from graphstorm.gconstruct.construct_graph import parse_edge_data, verify_confs
 from graphstorm.gconstruct.file_io import write_data_parquet, read_data_parquet
 from graphstorm.gconstruct.file_io import write_data_json, read_data_json
 from graphstorm.gconstruct.file_io import write_data_csv, read_data_csv
@@ -1705,6 +1706,26 @@ def test_gc():
     assert not os.path.isdir("/tmp_featurewrapper2"), \
         "Directory /tmp_featurewrapper2 should not exist after gc"
 
+
+def test_homo():
+    conf = {'version': 'gconstruct-v0.1', 'nodes': [{'node_id_col': 'id', 'node_type': 'movie', 'format': {'name': 'parquet'},
+                                              'files': '/data/ml-100k/movie.parquet', 'features': [
+            {'feature_col': 'title',
+             'transform': {'name': 'bert_hf', 'bert_model': 'bert-base-uncased', 'max_seq_length': 16}}], 'labels': [
+            {'label_col': 'label', 'task_type': 'classification', 'split_pct': [0.8, 0.1, 0.1]}]}], 'edges': [
+        {'source_id_col': 'src_id', 'dest_id_col': 'dst_id', 'relation': ['movie', 'rating', 'movie'],
+         'format': {'name': 'parquet'}, 'files': '/data/ml-100k/edges.parquet',
+         'labels': [{'label_col': 'rate', 'task_type': 'classification', 'split_pct': [0.1, 0.1, 0.1]}]}]}
+    verify_confs(conf, rev_edges=False)
+    assert conf['nodes'][0]["node_type"] == "_N"
+    assert conf['edges'][0]['relation'] == ["_N", "_E", "_N"]
+
+    conf['nodes'][0]["node_type"] = "movie"
+    conf['edges'][0]['relation'] = ['movie', 'rating', 'movie']
+    verify_confs(conf, rev_edges=True)
+    assert conf['nodes'][0]["node_type"] == "movie"
+    assert conf['edges'][0]['relation'] == ["movie", "rating", "movie"]
+
 if __name__ == '__main__':
     test_parse_edge_data()
     test_multiprocessing_checks()

From 98038fd53ce7ecee698ff4a406c9926a6834a245 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Mon, 11 Dec 2023 21:11:18 +0000
Subject: [PATCH 03/21] add e2e test

---
 .../graphstorm/gconstruct/construct_graph.py  |  6 +--
 .../data_gen/movielens_homo.json              | 44 +++++++++++++++++++
 .../data_gen/process_movielens.py             |  5 +++
 .../data_process/movielens_test.sh            | 10 ++++-
 4 files changed, 61 insertions(+), 4 deletions(-)
 create mode 100644 tests/end2end-tests/data_gen/movielens_homo.json

diff --git a/python/graphstorm/gconstruct/construct_graph.py b/python/graphstorm/gconstruct/construct_graph.py
index bf3cb6bcb6..42a63a8fac 100644
--- a/python/graphstorm/gconstruct/construct_graph.py
+++ b/python/graphstorm/gconstruct/construct_graph.py
@@ -582,7 +582,7 @@ def process_edge_data(process_confs, node_id_map, arr_merger,
 
     return (edges, edge_data, label_stats)
 
-def verify_confs(confs, args=None):
+def verify_confs(confs, rev_edges):
     """ Verify the configuration of the input data.
     """
     if "version" not in confs:
@@ -592,7 +592,7 @@ def verify_confs(confs, args=None):
     ntypes = {conf['node_type'] for conf in confs["nodes"]}
     etypes = [conf['relation'] for conf in confs["edges"]]
     # Adjust input to DGL requirement if it is a honogeneous graph
-    if len(ntypes) == 1 and len(etypes) == 1 and not args.add_reverse_edges:
+    if len(ntypes) == 1 and len(etypes) == 1 and not rev_edges:
         assert etypes[0][0] in ntypes, \
             f"source node type {etypes[0][0]} does not exist. Please check your input data."
         assert etypes[0][2] in ntypes, \
@@ -678,7 +678,7 @@ def process_graph(args):
             if args.num_processes_for_nodes is not None else args.num_processes
     num_processes_for_edges = args.num_processes_for_edges \
             if args.num_processes_for_edges is not None else args.num_processes
-    verify_confs(process_confs, args)
+    verify_confs(process_confs, args.add_reverse_edges)
     output_format = args.output_format
     for out_format in output_format:
         assert out_format in ["DGL", "DistDGL"], \
diff --git a/tests/end2end-tests/data_gen/movielens_homo.json b/tests/end2end-tests/data_gen/movielens_homo.json
new file mode 100644
index 0000000000..f64d4a0c2f
--- /dev/null
+++ b/tests/end2end-tests/data_gen/movielens_homo.json
@@ -0,0 +1,44 @@
+{
+    "version": "gconstruct-v0.1",
+    "nodes": [
+            {
+                    "node_id_col":  "id",
+                    "node_type":    "movie",
+                    "format":       {"name": "parquet"},
+                    "files":        "/data/ml-100k/movie.parquet",
+                    "features":     [
+                        {
+                                "feature_col":  "title",
+                                "transform":    {
+                                        "name": "bert_hf",
+                                        "bert_model": "bert-base-uncased",
+                                        "max_seq_length": 16
+                                }
+                        }
+                 ],
+                    "labels":	[
+                        {
+                            "label_col":	"label",
+                            "task_type":	"classification",
+                            "split_pct":	[0.8, 0.1, 0.1]
+                        }
+                    ]
+            }
+    ],
+    "edges": [
+            {
+                    "source_id_col":    "src_id",
+                    "dest_id_col":      "dst_id",
+                    "relation":         ["movie", "rating", "movie"],
+                    "format":           {"name": "parquet"},
+                    "files":        "/data/ml-100k/edges_homo.parquet",
+                    "labels":	[
+                        {
+                            "label_col":	"rate",
+                            "task_type":	"classification",
+                            "split_pct":	[0.1, 0.1, 0.1]
+                        }
+                    ]
+            }
+    ]
+}
\ No newline at end of file
diff --git a/tests/end2end-tests/data_gen/process_movielens.py b/tests/end2end-tests/data_gen/process_movielens.py
index 90fdcd1702..9ecc34de35 100644
--- a/tests/end2end-tests/data_gen/process_movielens.py
+++ b/tests/end2end-tests/data_gen/process_movielens.py
@@ -90,6 +90,11 @@ def write_data_parquet(data, data_file):
 edge_data = {'src_id': edges[0], 'dst_id': edges[1], 'rate': edges[2]}
 write_data_parquet(edge_data, '/data/ml-100k/edges.parquet')
 
+# generate data for homogeneous optimization test
+edges = pandas.read_csv('/data/ml-100k/u.data', delimiter='\t', header=None)
+edge_data = {'src_id': edges[1], 'dst_id': edges[1], 'rate': edges[2]}
+write_data_parquet(edge_data, '/data/ml-100k/edges_homo.parquet')
+
 # generate synthetic user data with label
 user_labels = np.random.randint(11, size=feat.shape[0])
 user_data = {'id': user['id'].values, 'feat': feat, 'occupation': user['occupation'], 'label': user_labels}
diff --git a/tests/end2end-tests/data_process/movielens_test.sh b/tests/end2end-tests/data_process/movielens_test.sh
index 455330bc38..e47a7aaac9 100644
--- a/tests/end2end-tests/data_process/movielens_test.sh
+++ b/tests/end2end-tests/data_process/movielens_test.sh
@@ -5,8 +5,9 @@ service ssh restart
 GS_HOME=$(pwd)
 NUM_TRAINERS=4
 export PYTHONPATH=$GS_HOME/python/
+cd $GS_HOME/training_scripts/gsgnn_np
+echo "127.0.0.1" > ip_list.txt
 cd $GS_HOME/training_scripts/gsgnn_ep
-
 echo "127.0.0.1" > ip_list.txt
 
 error_and_exit () {
@@ -27,6 +28,13 @@ python3 -m graphstorm.gconstruct.construct_graph --conf-file $GS_HOME/tests/end2
 
 error_and_exit $?
 
+echo "********* Test Homogeneous Graph Optimization ********"
+python3 -m graphstorm.gconstruct.construct_graph --conf-file $GS_HOME/tests/end2end-tests/data_gen/movielens_homo.json --num-processes 1 --output-dir /tmp/movielen_100k_train_val_1p_4t_homo --graph-name movie-lens-100k
+error_and_exit $?
+
+python3 -m graphstorm.run.gs_node_classification --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /tmp/movielen_100k_train_val_1p_4t_homo/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc.yaml --target-ntype _N
+error_and_exit $?
+
 echo "********* Test the DistDGL graph format with BERT embeddings ********"
 python3 -m graphstorm.gconstruct.construct_graph --conf-file $GS_HOME/tests/end2end-tests/data_gen/movielens.json --num-processes 1 --output-dir /tmp/movielens_bert_emb --graph-name ml --add-reverse-edges
 

From 0917eaf8d90ad8d8b660fc91759d600a053f8e61 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Mon, 11 Dec 2023 21:54:10 +0000
Subject: [PATCH 04/21] update

---
 tests/unit-tests/gconstruct/test_construct_graph.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/unit-tests/gconstruct/test_construct_graph.py b/tests/unit-tests/gconstruct/test_construct_graph.py
index 22321a1b14..c5fd13a5f0 100644
--- a/tests/unit-tests/gconstruct/test_construct_graph.py
+++ b/tests/unit-tests/gconstruct/test_construct_graph.py
@@ -1744,4 +1744,5 @@ def test_homo():
     test_label()
     test_multicolumn(None)
     test_multicolumn("/")
-    test_feature_wrapper()
\ No newline at end of file
+    test_feature_wrapper()
+    test_homo()
\ No newline at end of file

From b68d01b1a49e8ddcf1f9b8a893e18552c53db060 Mon Sep 17 00:00:00 2001
From: jalencato <jalencato23pistons@gmail.com>
Date: Wed, 13 Dec 2023 10:17:12 -0800
Subject: [PATCH 05/21] Apply suggestions from code review

Co-authored-by: xiang song(charlie.song) <classicxsong@gmail.com>
---
 python/graphstorm/gconstruct/construct_graph.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/graphstorm/gconstruct/construct_graph.py b/python/graphstorm/gconstruct/construct_graph.py
index 42a63a8fac..05d388f54c 100644
--- a/python/graphstorm/gconstruct/construct_graph.py
+++ b/python/graphstorm/gconstruct/construct_graph.py
@@ -591,14 +591,14 @@ def verify_confs(confs, rev_edges):
             "The config file does not have a 'version' entry. Assuming gconstruct-v0.1")
     ntypes = {conf['node_type'] for conf in confs["nodes"]}
     etypes = [conf['relation'] for conf in confs["edges"]]
-    # Adjust input to DGL requirement if it is a honogeneous graph
+    # Adjust input to DGL homogeneous graph format if it is a homogeneous graph
     if len(ntypes) == 1 and len(etypes) == 1 and not rev_edges:
         assert etypes[0][0] in ntypes, \
             f"source node type {etypes[0][0]} does not exist. Please check your input data."
         assert etypes[0][2] in ntypes, \
             f"dest node type {etypes[0][2]} does not exist. Please check your input data."
         logging.warning("Generated Graph is a homogeneous graph, so the node type will be "
-                        "changed to _N and edge type should be changed to [_N, _E, _N]")
+                        "changed to _N and edge type will be changed to [_N, _E, _N]")
         confs['nodes'][0]['node_type'] = "_N"
         confs['edges'][0]['relation'] = ["_N", "_E", "_N"]
     for etype in etypes:

From 5240748a87ef44a3258f1279693b88bdac275973 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Wed, 13 Dec 2023 19:16:25 +0000
Subject: [PATCH 06/21] fix bug

---
 .../graphstorm/gconstruct/construct_graph.py  | 11 ++++++--
 .../data_gen/movielens_homo.json              | 15 ++++++++++
 .../gconstruct/test_construct_graph.py        | 28 +++++++++++++++++++
 3 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/python/graphstorm/gconstruct/construct_graph.py b/python/graphstorm/gconstruct/construct_graph.py
index 05d388f54c..1a47782dc6 100644
--- a/python/graphstorm/gconstruct/construct_graph.py
+++ b/python/graphstorm/gconstruct/construct_graph.py
@@ -584,6 +584,10 @@ def process_edge_data(process_confs, node_id_map, arr_merger,
 
 def verify_confs(confs, rev_edges):
     """ Verify the configuration of the input data.
+    Parameters
+    ----------
+    rev_edges: bool
+        Whether to add reverse edges
     """
     if "version" not in confs:
         # TODO: Make a requirement with v1.0 launch
@@ -592,15 +596,16 @@ def verify_confs(confs, rev_edges):
     ntypes = {conf['node_type'] for conf in confs["nodes"]}
     etypes = [conf['relation'] for conf in confs["edges"]]
     # Adjust input to DGL homogeneous graph format if it is a homogeneous graph
-    if len(ntypes) == 1 and len(etypes) == 1 and not rev_edges:
+    etype_set = set(tuple(relation) for relation in etypes)
+    if len(ntypes) == 1 and len(etype_set) == 1 and not rev_edges:
         assert etypes[0][0] in ntypes, \
             f"source node type {etypes[0][0]} does not exist. Please check your input data."
         assert etypes[0][2] in ntypes, \
             f"dest node type {etypes[0][2]} does not exist. Please check your input data."
         logging.warning("Generated Graph is a homogeneous graph, so the node type will be "
                         "changed to _N and edge type will be changed to [_N, _E, _N]")
-        confs['nodes'][0]['node_type'] = "_N"
-        confs['edges'][0]['relation'] = ["_N", "_E", "_N"]
+        [node.update({'node_type': "_N"}) for node in confs['nodes']]
+        [edge.update({'relation': ["_N", "_E", "_N"]}) for edge in confs['edges']]
     for etype in etypes:
         assert len(etype) == 3, \
                 "The edge type must be (source node type, relation type, dest node type)."
diff --git a/tests/end2end-tests/data_gen/movielens_homo.json b/tests/end2end-tests/data_gen/movielens_homo.json
index f64d4a0c2f..c0f7457859 100644
--- a/tests/end2end-tests/data_gen/movielens_homo.json
+++ b/tests/end2end-tests/data_gen/movielens_homo.json
@@ -23,6 +23,16 @@
                             "split_pct":	[0.8, 0.1, 0.1]
                         }
                     ]
+            },
+            {
+                    "node_type":    "movie",
+                    "format":       {"name": "parquet"},
+                    "files":        "/data/ml-100k/movie.parquet",
+                    "features":     [
+                           {
+                                   "feature_col":  "id"
+                           }
+                    ]
             }
     ],
     "edges": [
@@ -39,6 +49,11 @@
                             "split_pct":	[0.1, 0.1, 0.1]
                         }
                     ]
+            },
+            {
+                    "relation":         ["movie", "rating", "movie"],
+                    "format":           {"name": "parquet"},
+                    "files":        "/data/ml-100k/edges_homo.parquet"
             }
     ]
 }
\ No newline at end of file
diff --git a/tests/unit-tests/gconstruct/test_construct_graph.py b/tests/unit-tests/gconstruct/test_construct_graph.py
index c5fd13a5f0..93d8bd38c0 100644
--- a/tests/unit-tests/gconstruct/test_construct_graph.py
+++ b/tests/unit-tests/gconstruct/test_construct_graph.py
@@ -1708,6 +1708,7 @@ def test_gc():
 
 
 def test_homo():
+    # single node type and edge type input
     conf = {'version': 'gconstruct-v0.1', 'nodes': [{'node_id_col': 'id', 'node_type': 'movie', 'format': {'name': 'parquet'},
                                               'files': '/data/ml-100k/movie.parquet', 'features': [
             {'feature_col': 'title',
@@ -1726,6 +1727,33 @@ def test_homo():
     assert conf['nodes'][0]["node_type"] == "movie"
     assert conf['edges'][0]['relation'] == ["movie", "rating", "movie"]
 
+    # multiple node types and edge types input
+    conf = {
+        "version": "gconstruct-v0.1", "nodes": [
+            {"node_id_col": "id", "node_type": "movie", "format": {"name": "parquet"},
+             "files": "/data/ml-100k/movie.parquet", "features": [
+                {"feature_col": "title", "transform": {
+                    "name": "bert_hf", "bert_model": "bert-base-uncased", "max_seq_length": 16}}],
+             "labels": [{"label_col": "label", "task_type": "classification", "split_pct": [0.8, 0.1, 0.1]}]},
+            {"node_type": "movie", "format": {"name": "parquet"}, "files": "/data/ml-100k/movie.parquet",
+             "features": [{"feature_col": "id"}]}],
+        "edges": [
+            {"source_id_col": "src_id", "dest_id_col": "dst_id", "relation": ["movie", "rating", "movie"],
+             "format": {"name": "parquet"}, "files": "/data/ml-100k/edges_homo.parquet", "labels": [
+                {"label_col": "rate", "task_type": "classification", "split_pct": [0.1, 0.1, 0.1]}]},
+            {"relation": ["movie", "rating", "movie"], "format": {"name": "parquet"},
+             "files": "/data/ml-100k/edges_homo.parquet"}]
+    }
+    verify_confs(conf, rev_edges=False)
+    assert conf['nodes'][0]["node_type"] == "_N"
+    assert conf['edges'][0]['relation'] == ["_N", "_E", "_N"]
+
+    conf['nodes'][0]["node_type"] = "movie"
+    conf['edges'][0]['relation'] = ['movie', 'rating', 'movie']
+    verify_confs(conf, rev_edges=True)
+    assert conf['nodes'][0]["node_type"] == "movie"
+    assert conf['edges'][0]['relation'] == ["movie", "rating", "movie"]
+
 if __name__ == '__main__':
     test_parse_edge_data()
     test_multiprocessing_checks()

From 1d63921898df588e7e43b40668c57cdb8d615a2d Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Wed, 13 Dec 2023 19:20:25 +0000
Subject: [PATCH 07/21] reformat

---
 .../gconstruct/test_construct_graph.py        | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/tests/unit-tests/gconstruct/test_construct_graph.py b/tests/unit-tests/gconstruct/test_construct_graph.py
index 93d8bd38c0..35fa176313 100644
--- a/tests/unit-tests/gconstruct/test_construct_graph.py
+++ b/tests/unit-tests/gconstruct/test_construct_graph.py
@@ -1709,14 +1709,18 @@ def test_gc():
 
 def test_homo():
     # single node type and edge type input
-    conf = {'version': 'gconstruct-v0.1', 'nodes': [{'node_id_col': 'id', 'node_type': 'movie', 'format': {'name': 'parquet'},
-                                              'files': '/data/ml-100k/movie.parquet', 'features': [
-            {'feature_col': 'title',
-             'transform': {'name': 'bert_hf', 'bert_model': 'bert-base-uncased', 'max_seq_length': 16}}], 'labels': [
-            {'label_col': 'label', 'task_type': 'classification', 'split_pct': [0.8, 0.1, 0.1]}]}], 'edges': [
-        {'source_id_col': 'src_id', 'dest_id_col': 'dst_id', 'relation': ['movie', 'rating', 'movie'],
-         'format': {'name': 'parquet'}, 'files': '/data/ml-100k/edges.parquet',
-         'labels': [{'label_col': 'rate', 'task_type': 'classification', 'split_pct': [0.1, 0.1, 0.1]}]}]}
+    conf = {
+        "version": "gconstruct-v0.1", "nodes": [
+            {"node_id_col": "id", "node_type": "movie", "format": {"name": "parquet"},
+             "files": "/data/ml-100k/movie.parquet", "features": [
+                {"feature_col": "title", "transform": {
+                    "name": "bert_hf", "bert_model": "bert-base-uncased", "max_seq_length": 16}}],
+             "labels": [{"label_col": "label", "task_type": "classification", "split_pct": [0.8, 0.1, 0.1]}]}],
+        "edges": [
+            {"source_id_col": "src_id", "dest_id_col": "dst_id", "relation": ["movie", "rating", "movie"],
+             "format": {"name": "parquet"}, "files": "/data/ml-100k/edges_homo.parquet", "labels": [
+                {"label_col": "rate", "task_type": "classification", "split_pct": [0.1, 0.1, 0.1]}]}]
+    }
     verify_confs(conf, rev_edges=False)
     assert conf['nodes'][0]["node_type"] == "_N"
     assert conf['edges'][0]['relation'] == ["_N", "_E", "_N"]

From d856178e44a3a4668f775b3847826ab3246c6030 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Wed, 13 Dec 2023 19:56:32 +0000
Subject: [PATCH 08/21] add test

---
 python/graphstorm/gconstruct/construct_graph.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/graphstorm/gconstruct/construct_graph.py b/python/graphstorm/gconstruct/construct_graph.py
index 1a47782dc6..6eb1b65678 100644
--- a/python/graphstorm/gconstruct/construct_graph.py
+++ b/python/graphstorm/gconstruct/construct_graph.py
@@ -604,8 +604,10 @@ def verify_confs(confs, rev_edges):
             f"dest node type {etypes[0][2]} does not exist. Please check your input data."
         logging.warning("Generated Graph is a homogeneous graph, so the node type will be "
                         "changed to _N and edge type will be changed to [_N, _E, _N]")
-        [node.update({'node_type': "_N"}) for node in confs['nodes']]
-        [edge.update({'relation': ["_N", "_E", "_N"]}) for edge in confs['edges']]
+        for node in confs['nodes']:
+            node['node_type'] = "_N"
+        for edge in confs['edges']:
+            edge['relation'] = ["_N", "_E", "_N"]
     for etype in etypes:
         assert len(etype) == 3, \
                 "The edge type must be (source node type, relation type, dest node type)."

From 53b8c57fde9ed2e3a999e7a3996cdabba65ffa67 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Wed, 13 Dec 2023 20:04:17 +0000
Subject: [PATCH 09/21] update

---
 tests/unit-tests/gconstruct/test_construct_graph.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/unit-tests/gconstruct/test_construct_graph.py b/tests/unit-tests/gconstruct/test_construct_graph.py
index 35fa176313..3025dfc031 100644
--- a/tests/unit-tests/gconstruct/test_construct_graph.py
+++ b/tests/unit-tests/gconstruct/test_construct_graph.py
@@ -21,7 +21,6 @@
 import pyarrow.parquet as pq
 import numpy as np
 import dgl
-import argparse
 import torch as th
 
 from functools import partial

From 2c9ec4e8d56080900309df989e8a7277df86f32f Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Thu, 14 Dec 2023 21:06:56 +0000
Subject: [PATCH 10/21] apply comments about reverse edges

---
 .github/workflow_scripts/e2e_check.sh         |  1 +
 .../graphstorm/gconstruct/construct_graph.py  | 58 ++++++++++++++-----
 ...s_homo.json => movielens_homogeneous.json} |  4 ++
 .../data_process/homogeneous_test.sh          | 35 +++++++++++
 .../data_process/movielens_test.sh            |  7 ---
 .../gconstruct/test_construct_graph.py        |  4 +-
 6 files changed, 85 insertions(+), 24 deletions(-)
 rename tests/end2end-tests/data_gen/{movielens_homo.json => movielens_homogeneous.json} (93%)
 create mode 100644 tests/end2end-tests/data_process/homogeneous_test.sh

diff --git a/.github/workflow_scripts/e2e_check.sh b/.github/workflow_scripts/e2e_check.sh
index 9851a35529..8c122c9f9d 100644
--- a/.github/workflow_scripts/e2e_check.sh
+++ b/.github/workflow_scripts/e2e_check.sh
@@ -8,6 +8,7 @@ sh ./tests/end2end-tests/create_data.sh
 sh ./tests/end2end-tests/tools/test_mem_est.sh
 sh ./tests/end2end-tests/data_process/test.sh
 sh ./tests/end2end-tests/data_process/movielens_test.sh
+sh ./tests/end2end-tests/data_process/homogeneous_test.sh
 sh ./tests/end2end-tests/custom-gnn/run_test.sh
 bash ./tests/end2end-tests/graphstorm-nc/test.sh
 bash ./tests/end2end-tests/graphstorm-lp/test.sh
diff --git a/python/graphstorm/gconstruct/construct_graph.py b/python/graphstorm/gconstruct/construct_graph.py
index 6eb1b65678..8939d07605 100644
--- a/python/graphstorm/gconstruct/construct_graph.py
+++ b/python/graphstorm/gconstruct/construct_graph.py
@@ -28,6 +28,7 @@
 import numpy as np
 import torch as th
 import dgl
+from dgl.distributed.constants import DEFAULT_NTYPE, DEFAULT_ETYPE
 
 from ..utils import sys_tracker, get_log_level
 from .file_io import parse_node_file_format, parse_edge_file_format
@@ -582,12 +583,23 @@ def process_edge_data(process_confs, node_id_map, arr_merger,
 
     return (edges, edge_data, label_stats)
 
-def verify_confs(confs, rev_edges):
+def is_homogeneous(confs):
+    """ Verify if it is a homogeneous graph
+    Parameter
+    ---------
+    confs: dict
+        A dict containing all user input config
+    """
+    ntypes = {conf['node_type'] for conf in confs["nodes"]}
+    etypes = set(tuple(conf['relation']) for conf in confs["edges"])
+    return len(ntypes) == 1 and len(etypes) == 1
+
+def verify_confs(confs):
     """ Verify the configuration of the input data.
-    Parameters
-    ----------
-    rev_edges: bool
-        Whether to add reverse edges
+    Parameter
+    ---------
+    confs: dict
+        A dict containing all user input config
     """
     if "version" not in confs:
         # TODO: Make a requirement with v1.0 launch
@@ -596,8 +608,7 @@ def verify_confs(confs, rev_edges):
     ntypes = {conf['node_type'] for conf in confs["nodes"]}
     etypes = [conf['relation'] for conf in confs["edges"]]
     # Adjust input to DGL homogeneous graph format if it is a homogeneous graph
-    etype_set = set(tuple(relation) for relation in etypes)
-    if len(ntypes) == 1 and len(etype_set) == 1 and not rev_edges:
+    if is_homogeneous(confs):
         assert etypes[0][0] in ntypes, \
             f"source node type {etypes[0][0]} does not exist. Please check your input data."
         assert etypes[0][2] in ntypes, \
@@ -605,9 +616,9 @@ def verify_confs(confs, rev_edges):
         logging.warning("Generated Graph is a homogeneous graph, so the node type will be "
                         "changed to _N and edge type will be changed to [_N, _E, _N]")
         for node in confs['nodes']:
-            node['node_type'] = "_N"
+            node['node_type'] = DEFAULT_NTYPE
         for edge in confs['edges']:
-            edge['relation'] = ["_N", "_E", "_N"]
+            edge['relation'] = DEFAULT_ETYPE
     for etype in etypes:
         assert len(etype) == 3, \
                 "The edge type must be (source node type, relation type, dest node type)."
@@ -685,7 +696,7 @@ def process_graph(args):
             if args.num_processes_for_nodes is not None else args.num_processes
     num_processes_for_edges = args.num_processes_for_edges \
             if args.num_processes_for_edges is not None else args.num_processes
-    verify_confs(process_confs, args.add_reverse_edges)
+    verify_confs(process_confs)
     output_format = args.output_format
     for out_format in output_format:
         assert out_format in ["DGL", "DistDGL"], \
@@ -715,12 +726,29 @@ def process_graph(args):
 
     if args.add_reverse_edges:
         edges1 = {}
-        for etype in edges:
-            e = edges[etype]
+        if is_homogeneous(process_confs):
+            logging.warning("For homogeneous graph, the generated reverse edge will "
+                            "be the same edge type as the original graph. Instead for "
+                            "heterogeneous graph, the generated reverse edge type will "
+                            "add -rev as a suffix")
+            e = edges[DEFAULT_ETYPE]
             assert isinstance(e, tuple) and len(e) == 2
-            assert isinstance(etype, tuple) and len(etype) == 3
-            edges1[etype] = e
-            edges1[etype[2], etype[1] + "-rev", etype[0]] = (e[1], e[0])
+            edges1[DEFAULT_ETYPE] = e
+            edges1[DEFAULT_ETYPE] = (np.concatenate([e[0], e[1]]),
+                                     np.concatenate([e[1], e[0]]))
+            if DEFAULT_ETYPE in edge_data:
+                data = edge_data[DEFAULT_ETYPE]
+                logging.warning("Reverse edge for homogeneous graph will have same feature as "
+                                "what we have in the original edges")
+                for key, value in data.items():
+                    data[key] = np.concatenate([value, value])
+        else:
+            for etype in edges:
+                e = edges[etype]
+                assert isinstance(e, tuple) and len(e) == 2
+                assert isinstance(etype, tuple) and len(etype) == 3
+                edges1[etype] = e
+                edges1[etype[2], etype[1] + "-rev", etype[0]] = (e[1], e[0])
         edges = edges1
         sys_tracker.check('Add reverse edges')
     g = dgl.heterograph(edges, num_nodes_dict=num_nodes)
diff --git a/tests/end2end-tests/data_gen/movielens_homo.json b/tests/end2end-tests/data_gen/movielens_homogeneous.json
similarity index 93%
rename from tests/end2end-tests/data_gen/movielens_homo.json
rename to tests/end2end-tests/data_gen/movielens_homogeneous.json
index c0f7457859..79e3e27ceb 100644
--- a/tests/end2end-tests/data_gen/movielens_homo.json
+++ b/tests/end2end-tests/data_gen/movielens_homogeneous.json
@@ -42,6 +42,10 @@
                     "relation":         ["movie", "rating", "movie"],
                     "format":           {"name": "parquet"},
                     "files":        "/data/ml-100k/edges_homo.parquet",
+                    "features": [
+                                  {
+                                   "feature_col":  "rate"
+                           }],
                     "labels":	[
                         {
                             "label_col":	"rate",
diff --git a/tests/end2end-tests/data_process/homogeneous_test.sh b/tests/end2end-tests/data_process/homogeneous_test.sh
new file mode 100644
index 0000000000..e76c4ef741
--- /dev/null
+++ b/tests/end2end-tests/data_process/homogeneous_test.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+service ssh restart
+
+GS_HOME=$(pwd)
+NUM_TRAINERS=4
+export PYTHONPATH=$GS_HOME/python/
+cd $GS_HOME/training_scripts/gsgnn_np
+echo "127.0.0.1" > ip_list.txt
+cd $GS_HOME/training_scripts/gsgnn_ep
+echo "127.0.0.1" > ip_list.txt
+
+error_and_exit () {
+	# check exec status of launch.py
+	status=$1
+	echo $status
+
+	if test $status -ne 0
+	then
+		exit -1
+	fi
+}
+
+
+echo "********* Test Homogeneous Graph Optimization ********"
+python3 -m graphstorm.gconstruct.construct_graph --conf-file $GS_HOME/tests/end2end-tests/data_gen/movielens_homogenous.json --num-processes 1 --output-dir /tmp/movielen_100k_train_val_1p_4t_homogeneous --graph-name movie-lens-100k
+error_and_exit $?
+
+echo "********* Test Node Classification on GConstruct Homogeneous Graph ********"
+python3 -m graphstorm.run.gs_node_classification --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /tmp/movielen_100k_train_val_1p_4t_homogeneous/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc.yaml --target-ntype _N
+error_and_exit $?
+
+echo "********* Test Edge Classification on GConstruct Homogeneous Graph ********"
+python3 -m graphstorm.run.gs_edge_classification --workspace $GS_HOME/training_scripts/gsgnn_ep/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /tmp/movielen_100k_train_val_1p_4t_homogeneous/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec.yaml --target-etype _N,_E,_N
+error_and_exit $?
\ No newline at end of file
diff --git a/tests/end2end-tests/data_process/movielens_test.sh b/tests/end2end-tests/data_process/movielens_test.sh
index e47a7aaac9..200d1f8764 100644
--- a/tests/end2end-tests/data_process/movielens_test.sh
+++ b/tests/end2end-tests/data_process/movielens_test.sh
@@ -28,13 +28,6 @@ python3 -m graphstorm.gconstruct.construct_graph --conf-file $GS_HOME/tests/end2
 
 error_and_exit $?
 
-echo "********* Test Homogeneous Graph Optimization ********"
-python3 -m graphstorm.gconstruct.construct_graph --conf-file $GS_HOME/tests/end2end-tests/data_gen/movielens_homo.json --num-processes 1 --output-dir /tmp/movielen_100k_train_val_1p_4t_homo --graph-name movie-lens-100k
-error_and_exit $?
-
-python3 -m graphstorm.run.gs_node_classification --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /tmp/movielen_100k_train_val_1p_4t_homo/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc.yaml --target-ntype _N
-error_and_exit $?
-
 echo "********* Test the DistDGL graph format with BERT embeddings ********"
 python3 -m graphstorm.gconstruct.construct_graph --conf-file $GS_HOME/tests/end2end-tests/data_gen/movielens.json --num-processes 1 --output-dir /tmp/movielens_bert_emb --graph-name ml --add-reverse-edges
 
diff --git a/tests/unit-tests/gconstruct/test_construct_graph.py b/tests/unit-tests/gconstruct/test_construct_graph.py
index 3025dfc031..873e68d1b7 100644
--- a/tests/unit-tests/gconstruct/test_construct_graph.py
+++ b/tests/unit-tests/gconstruct/test_construct_graph.py
@@ -1706,7 +1706,7 @@ def test_gc():
         "Directory /tmp_featurewrapper2 should not exist after gc"
 
 
-def test_homo():
+def test_homogeneous():
     # single node type and edge type input
     conf = {
         "version": "gconstruct-v0.1", "nodes": [
@@ -1776,4 +1776,4 @@ def test_homo():
     test_multicolumn(None)
     test_multicolumn("/")
     test_feature_wrapper()
-    test_homo()
\ No newline at end of file
+    test_homogeneous()
\ No newline at end of file

From 27a7c9df492fb9863305839c1e8b58c090aa6eab Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Thu, 14 Dec 2023 21:11:58 +0000
Subject: [PATCH 11/21] fix unit test

---
 python/graphstorm/gconstruct/construct_graph.py  |  3 ++-
 .../gconstruct/test_construct_graph.py           | 16 ++--------------
 2 files changed, 4 insertions(+), 15 deletions(-)

diff --git a/python/graphstorm/gconstruct/construct_graph.py b/python/graphstorm/gconstruct/construct_graph.py
index 8939d07605..56d28e87f2 100644
--- a/python/graphstorm/gconstruct/construct_graph.py
+++ b/python/graphstorm/gconstruct/construct_graph.py
@@ -618,7 +618,7 @@ def verify_confs(confs):
         for node in confs['nodes']:
             node['node_type'] = DEFAULT_NTYPE
         for edge in confs['edges']:
-            edge['relation'] = DEFAULT_ETYPE
+            edge['relation'] = list(DEFAULT_ETYPE)
     for etype in etypes:
         assert len(etype) == 3, \
                 "The edge type must be (source node type, relation type, dest node type)."
@@ -736,6 +736,7 @@ def process_graph(args):
             edges1[DEFAULT_ETYPE] = e
             edges1[DEFAULT_ETYPE] = (np.concatenate([e[0], e[1]]),
                                      np.concatenate([e[1], e[0]]))
+            # Double edge feature as it is necessary to match tensor size in generated graph
             if DEFAULT_ETYPE in edge_data:
                 data = edge_data[DEFAULT_ETYPE]
                 logging.warning("Reverse edge for homogeneous graph will have same feature as "
diff --git a/tests/unit-tests/gconstruct/test_construct_graph.py b/tests/unit-tests/gconstruct/test_construct_graph.py
index 873e68d1b7..672105d435 100644
--- a/tests/unit-tests/gconstruct/test_construct_graph.py
+++ b/tests/unit-tests/gconstruct/test_construct_graph.py
@@ -1720,16 +1720,10 @@ def test_homogeneous():
              "format": {"name": "parquet"}, "files": "/data/ml-100k/edges_homo.parquet", "labels": [
                 {"label_col": "rate", "task_type": "classification", "split_pct": [0.1, 0.1, 0.1]}]}]
     }
-    verify_confs(conf, rev_edges=False)
+    verify_confs(conf)
     assert conf['nodes'][0]["node_type"] == "_N"
     assert conf['edges'][0]['relation'] == ["_N", "_E", "_N"]
 
-    conf['nodes'][0]["node_type"] = "movie"
-    conf['edges'][0]['relation'] = ['movie', 'rating', 'movie']
-    verify_confs(conf, rev_edges=True)
-    assert conf['nodes'][0]["node_type"] == "movie"
-    assert conf['edges'][0]['relation'] == ["movie", "rating", "movie"]
-
     # multiple node types and edge types input
     conf = {
         "version": "gconstruct-v0.1", "nodes": [
@@ -1747,16 +1741,10 @@ def test_homogeneous():
             {"relation": ["movie", "rating", "movie"], "format": {"name": "parquet"},
              "files": "/data/ml-100k/edges_homo.parquet"}]
     }
-    verify_confs(conf, rev_edges=False)
+    verify_confs(conf)
     assert conf['nodes'][0]["node_type"] == "_N"
     assert conf['edges'][0]['relation'] == ["_N", "_E", "_N"]
 
-    conf['nodes'][0]["node_type"] = "movie"
-    conf['edges'][0]['relation'] = ['movie', 'rating', 'movie']
-    verify_confs(conf, rev_edges=True)
-    assert conf['nodes'][0]["node_type"] == "movie"
-    assert conf['edges'][0]['relation'] == ["movie", "rating", "movie"]
-
 if __name__ == '__main__':
     test_parse_edge_data()
     test_multiprocessing_checks()

From f71d8969e815a4f34f818a9ee008299676b3b795 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Thu, 14 Dec 2023 21:16:19 +0000
Subject: [PATCH 12/21] test

---
 tests/end2end-tests/data_process/homogeneous_test.sh | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tests/end2end-tests/data_process/homogeneous_test.sh b/tests/end2end-tests/data_process/homogeneous_test.sh
index e76c4ef741..4f8db47e65 100644
--- a/tests/end2end-tests/data_process/homogeneous_test.sh
+++ b/tests/end2end-tests/data_process/homogeneous_test.sh
@@ -32,4 +32,16 @@ error_and_exit $?
 
 echo "********* Test Edge Classification on GConstruct Homogeneous Graph ********"
 python3 -m graphstorm.run.gs_edge_classification --workspace $GS_HOME/training_scripts/gsgnn_ep/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /tmp/movielen_100k_train_val_1p_4t_homogeneous/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec.yaml --target-etype _N,_E,_N
+error_and_exit $?
+
+echo "********* Test Homogeneous Graph Optimization on reverse edge********"
+python3 -m graphstorm.gconstruct.construct_graph --conf-file $GS_HOME/tests/end2end-tests/data_gen/movielens_homogenous.json --num-processes 1 --output-dir /tmp/movielen_100k_train_val_1p_4t_homogeneous --graph-name movie-lens-100k --add-reverse-edges
+error_and_exit $?
+
+echo "********* Test Node Classification on GConstruct Homogeneous Graph on reverse edge********"
+python3 -m graphstorm.run.gs_node_classification --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /tmp/movielen_100k_train_val_1p_4t_homogeneous/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc.yaml --target-ntype _N
+error_and_exit $?
+
+echo "********* Test Edge Classification on GConstruct Homogeneous Graph on reverse edge ********"
+python3 -m graphstorm.run.gs_edge_classification --workspace $GS_HOME/training_scripts/gsgnn_ep/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /tmp/movielen_100k_train_val_1p_4t_homogeneous/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec.yaml --target-etype _N,_E,_N
 error_and_exit $?
\ No newline at end of file

From 9fd4a7ab4e1360e54fe3c0a7b0f59cd1bf4135ee Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Thu, 14 Dec 2023 22:42:17 +0000
Subject: [PATCH 13/21] add reverse edge check

---
 .../graphstorm/gconstruct/construct_graph.py  |  7 ++-
 .../data_process/check_homogeneous.py         | 60 +++++++++++++++++++
 .../data_process/homogeneous_test.sh          | 14 +++--
 3 files changed, 76 insertions(+), 5 deletions(-)
 create mode 100644 tests/end2end-tests/data_process/check_homogeneous.py

diff --git a/python/graphstorm/gconstruct/construct_graph.py b/python/graphstorm/gconstruct/construct_graph.py
index 56d28e87f2..096f9666b7 100644
--- a/python/graphstorm/gconstruct/construct_graph.py
+++ b/python/graphstorm/gconstruct/construct_graph.py
@@ -737,12 +737,17 @@ def process_graph(args):
             edges1[DEFAULT_ETYPE] = (np.concatenate([e[0], e[1]]),
                                      np.concatenate([e[1], e[0]]))
             # Double edge feature as it is necessary to match tensor size in generated graph
+            # Only generate mask on original graph
             if DEFAULT_ETYPE in edge_data:
                 data = edge_data[DEFAULT_ETYPE]
                 logging.warning("Reverse edge for homogeneous graph will have same feature as "
                                 "what we have in the original edges")
                 for key, value in data.items():
-                    data[key] = np.concatenate([value, value])
+                    if key not in ["train_mask", "test_mask", "val_mask"]:
+                        data[key] = np.concatenate([value, value])
+                    else:
+                        data[key] = np.concatenate([value, [0]*len(value)])
+
         else:
             for etype in edges:
                 e = edges[etype]
diff --git a/tests/end2end-tests/data_process/check_homogeneous.py b/tests/end2end-tests/data_process/check_homogeneous.py
new file mode 100644
index 0000000000..daeb6f0ada
--- /dev/null
+++ b/tests/end2end-tests/data_process/check_homogeneous.py
@@ -0,0 +1,60 @@
+"""
+    Copyright 2023 Contributors
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+"""
+import os
+import argparse
+import dgl
+from dgl.distributed.constants import DEFAULT_NTYPE, DEFAULT_ETYPE
+from numpy.testing import assert_almost_equal
+
+
+def check_reverse_edge(args):
+
+    g_orig = dgl.load_graphs(os.path.join(args.orig_graph_path, "graph.dgl"))[0][0]
+    g_rev = dgl.load_graphs(os.path.join(args.rev_graph_path, "graph.dgl"))[0][0]
+    assert g_orig.ntypes == g_rev.ntypes
+    assert g_orig.etypes == g_rev.etypes
+    assert g_orig.number_of_nodes(DEFAULT_NTYPE) == g_rev.number_of_nodes(DEFAULT_NTYPE)
+    assert 2 * g_orig.number_of_edges(DEFAULT_ETYPE) == g_rev.number_of_edges(DEFAULT_ETYPE)
+    for ntype in g_orig.ntypes:
+        assert g_orig.number_of_nodes(ntype) == g_rev.number_of_nodes(ntype)
+        for name in g_orig.nodes[ntype].data:
+            # We should skip '*_mask' because data split is split randomly.
+            if 'mask' not in name:
+                assert_almost_equal(g_orig.nodes[ntype].data[name].numpy(),
+                                    g_rev.nodes[ntype].data[name].numpy())
+
+    # Check edge feature
+    g_orig_feat = dgl.data.load_tensors(os.path.join(args.orig_graph_path, "edge_feat.dgl"))
+    g_rev_feat = dgl.data.load_tensors(os.path.join(args.rev_graph_path, "edge_feat.dgl"))
+    for feat_type in g_orig_feat.keys():
+        if "mask" not in feat_type:
+            assert_almost_equal(g_orig_feat[feat_type].numpy(),
+                                g_rev_feat[feat_type].numpy()[:g_orig.number_of_edges(DEFAULT_ETYPE)])
+        else:
+            assert_almost_equal(g_rev_feat[feat_type].numpy()[g_orig.number_of_edges(DEFAULT_ETYPE):],
+                                [0] * g_orig.number_of_edges(DEFAULT_ETYPE))
+
+if __name__ == '__main__':
+    argparser = argparse.ArgumentParser("Check edge prediction remapping")
+    argparser.add_argument("--orig-graph-path", type=str, default="/tmp/movielen_100k_train_val_1p_4t_homogeneous/part0/",
+                           help="Path to save the generated data")
+    argparser.add_argument("--rev-graph-path", type=str, default="/tmp/movielen_100k_train_val_1p_4t_homogeneous_rev/part0/",
+                           help="Path to save the generated data")
+
+    args = argparser.parse_args()
+
+    check_reverse_edge(args)
\ No newline at end of file
diff --git a/tests/end2end-tests/data_process/homogeneous_test.sh b/tests/end2end-tests/data_process/homogeneous_test.sh
index 4f8db47e65..7f96d004fe 100644
--- a/tests/end2end-tests/data_process/homogeneous_test.sh
+++ b/tests/end2end-tests/data_process/homogeneous_test.sh
@@ -23,7 +23,10 @@ error_and_exit () {
 
 
 echo "********* Test Homogeneous Graph Optimization ********"
-python3 -m graphstorm.gconstruct.construct_graph --conf-file $GS_HOME/tests/end2end-tests/data_gen/movielens_homogenous.json --num-processes 1 --output-dir /tmp/movielen_100k_train_val_1p_4t_homogeneous --graph-name movie-lens-100k
+python3 -m graphstorm.gconstruct.construct_graph --conf-file $GS_HOME/tests/end2end-tests/data_gen/movielens_homogeneous.json --num-processes 1 --output-dir /tmp/movielen_100k_train_val_1p_4t_homogeneous --graph-name movie-lens-100k
+error_and_exit $?
+
+python3 $GS_HOME/tests/end2end-tests/data_process/check_homogeneous.py
 error_and_exit $?
 
 echo "********* Test Node Classification on GConstruct Homogeneous Graph ********"
@@ -35,13 +38,16 @@ python3 -m graphstorm.run.gs_edge_classification --workspace $GS_HOME/training_s
 error_and_exit $?
 
 echo "********* Test Homogeneous Graph Optimization on reverse edge********"
-python3 -m graphstorm.gconstruct.construct_graph --conf-file $GS_HOME/tests/end2end-tests/data_gen/movielens_homogenous.json --num-processes 1 --output-dir /tmp/movielen_100k_train_val_1p_4t_homogeneous --graph-name movie-lens-100k --add-reverse-edges
+python3 -m graphstorm.gconstruct.construct_graph --conf-file $GS_HOME/tests/end2end-tests/data_gen/movielens_homogeneous.json --num-processes 1 --output-dir /tmp/movielen_100k_train_val_1p_4t_homogeneous_rev --graph-name movie-lens-100k --add-reverse-edges
+error_and_exit $?
+
+python3 $GS_HOME/tests/end2end-tests/data_process/check_homogeneous.py
 error_and_exit $?
 
 echo "********* Test Node Classification on GConstruct Homogeneous Graph on reverse edge********"
-python3 -m graphstorm.run.gs_node_classification --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /tmp/movielen_100k_train_val_1p_4t_homogeneous/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc.yaml --target-ntype _N
+python3 -m graphstorm.run.gs_node_classification --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /tmp/movielen_100k_train_val_1p_4t_homogeneous_rev/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc.yaml --target-ntype _N
 error_and_exit $?
 
 echo "********* Test Edge Classification on GConstruct Homogeneous Graph on reverse edge ********"
-python3 -m graphstorm.run.gs_edge_classification --workspace $GS_HOME/training_scripts/gsgnn_ep/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /tmp/movielen_100k_train_val_1p_4t_homogeneous/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec.yaml --target-etype _N,_E,_N
+python3 -m graphstorm.run.gs_edge_classification --workspace $GS_HOME/training_scripts/gsgnn_ep/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /tmp/movielen_100k_train_val_1p_4t_homogeneous_rev/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec.yaml --target-etype _N,_E,_N
 error_and_exit $?
\ No newline at end of file

From 7ca956390d82e72d0487b0fa723a91cae7edb392 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Thu, 14 Dec 2023 23:02:19 +0000
Subject: [PATCH 14/21] change name

---
 tests/end2end-tests/data_gen/movielens_homogeneous.json | 4 ++--
 tests/end2end-tests/data_gen/process_movielens.py       | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/end2end-tests/data_gen/movielens_homogeneous.json b/tests/end2end-tests/data_gen/movielens_homogeneous.json
index 79e3e27ceb..018776e82e 100644
--- a/tests/end2end-tests/data_gen/movielens_homogeneous.json
+++ b/tests/end2end-tests/data_gen/movielens_homogeneous.json
@@ -41,7 +41,7 @@
                     "dest_id_col":      "dst_id",
                     "relation":         ["movie", "rating", "movie"],
                     "format":           {"name": "parquet"},
-                    "files":        "/data/ml-100k/edges_homo.parquet",
+                    "files":        "/data/ml-100k/edges_homogeneous.parquet",
                     "features": [
                                   {
                                    "feature_col":  "rate"
@@ -57,7 +57,7 @@
             {
                     "relation":         ["movie", "rating", "movie"],
                     "format":           {"name": "parquet"},
-                    "files":        "/data/ml-100k/edges_homo.parquet"
+                    "files":        "/data/ml-100k/edges_homogeneous.parquet"
             }
     ]
 }
\ No newline at end of file
diff --git a/tests/end2end-tests/data_gen/process_movielens.py b/tests/end2end-tests/data_gen/process_movielens.py
index 9ecc34de35..a9ca90873e 100644
--- a/tests/end2end-tests/data_gen/process_movielens.py
+++ b/tests/end2end-tests/data_gen/process_movielens.py
@@ -93,7 +93,7 @@ def write_data_parquet(data, data_file):
 # generate data for homogeneous optimization test
 edges = pandas.read_csv('/data/ml-100k/u.data', delimiter='\t', header=None)
 edge_data = {'src_id': edges[1], 'dst_id': edges[1], 'rate': edges[2]}
-write_data_parquet(edge_data, '/data/ml-100k/edges_homo.parquet')
+write_data_parquet(edge_data, '/data/ml-100k/edges_homogeneous.parquet')
 
 # generate synthetic user data with label
 user_labels = np.random.randint(11, size=feat.shape[0])

From ee2762e6c1af3f259183780093862b2c86f9240d Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Thu, 14 Dec 2023 23:04:02 +0000
Subject: [PATCH 15/21] remove redundant

---
 python/graphstorm/gconstruct/construct_graph.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/graphstorm/gconstruct/construct_graph.py b/python/graphstorm/gconstruct/construct_graph.py
index 096f9666b7..5b32a72686 100644
--- a/python/graphstorm/gconstruct/construct_graph.py
+++ b/python/graphstorm/gconstruct/construct_graph.py
@@ -733,7 +733,6 @@ def process_graph(args):
                             "add -rev as a suffix")
             e = edges[DEFAULT_ETYPE]
             assert isinstance(e, tuple) and len(e) == 2
-            edges1[DEFAULT_ETYPE] = e
             edges1[DEFAULT_ETYPE] = (np.concatenate([e[0], e[1]]),
                                      np.concatenate([e[1], e[0]]))
             # Double edge feature as it is necessary to match tensor size in generated graph

From 7a0824f484ec03e45fe9943cc6e41f668236aabd Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Thu, 14 Dec 2023 23:52:31 +0000
Subject: [PATCH 16/21] change order

---
 tests/end2end-tests/data_process/homogeneous_test.sh | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/end2end-tests/data_process/homogeneous_test.sh b/tests/end2end-tests/data_process/homogeneous_test.sh
index 7f96d004fe..fe42732ebf 100644
--- a/tests/end2end-tests/data_process/homogeneous_test.sh
+++ b/tests/end2end-tests/data_process/homogeneous_test.sh
@@ -26,9 +26,6 @@ echo "********* Test Homogeneous Graph Optimization ********"
 python3 -m graphstorm.gconstruct.construct_graph --conf-file $GS_HOME/tests/end2end-tests/data_gen/movielens_homogeneous.json --num-processes 1 --output-dir /tmp/movielen_100k_train_val_1p_4t_homogeneous --graph-name movie-lens-100k
 error_and_exit $?
 
-python3 $GS_HOME/tests/end2end-tests/data_process/check_homogeneous.py
-error_and_exit $?
-
 echo "********* Test Node Classification on GConstruct Homogeneous Graph ********"
 python3 -m graphstorm.run.gs_node_classification --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /tmp/movielen_100k_train_val_1p_4t_homogeneous/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc.yaml --target-ntype _N
 error_and_exit $?

From debc2ff042c07dc29c34c0ff19011f5b334444b8 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Fri, 15 Dec 2023 00:40:46 +0000
Subject: [PATCH 17/21] add

---
 python/graphstorm/gconstruct/construct_graph.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/graphstorm/gconstruct/construct_graph.py b/python/graphstorm/gconstruct/construct_graph.py
index 5b32a72686..6f63c3ee6e 100644
--- a/python/graphstorm/gconstruct/construct_graph.py
+++ b/python/graphstorm/gconstruct/construct_graph.py
@@ -737,7 +737,7 @@ def process_graph(args):
                                      np.concatenate([e[1], e[0]]))
             # Double edge feature as it is necessary to match tensor size in generated graph
             # Only generate mask on original graph
-            if DEFAULT_ETYPE in edge_data:
+            if edge_data:
                 data = edge_data[DEFAULT_ETYPE]
                 logging.warning("Reverse edge for homogeneous graph will have same feature as "
                                 "what we have in the original edges")

From 77b4b5f8292845108232d06e14426860cf4f54a8 Mon Sep 17 00:00:00 2001
From: jalencato <jalencato23pistons@gmail.com>
Date: Fri, 15 Dec 2023 11:03:22 -0800
Subject: [PATCH 18/21] Apply suggestions from code review

Co-authored-by: xiang song(charlie.song) <classicxsong@gmail.com>
---
 python/graphstorm/gconstruct/construct_graph.py      | 2 +-
 tests/end2end-tests/data_process/homogeneous_test.sh | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/graphstorm/gconstruct/construct_graph.py b/python/graphstorm/gconstruct/construct_graph.py
index 6f63c3ee6e..b16b4ef936 100644
--- a/python/graphstorm/gconstruct/construct_graph.py
+++ b/python/graphstorm/gconstruct/construct_graph.py
@@ -745,7 +745,7 @@ def process_graph(args):
                     if key not in ["train_mask", "test_mask", "val_mask"]:
                         data[key] = np.concatenate([value, value])
                     else:
-                        data[key] = np.concatenate([value, [0]*len(value)])
+                        data[key] = np.concatenate([value, numpy.zeros(value.shape, dtype=value.dtype)])
 
         else:
             for etype in edges:
diff --git a/tests/end2end-tests/data_process/homogeneous_test.sh b/tests/end2end-tests/data_process/homogeneous_test.sh
index fe42732ebf..dd3f55907d 100644
--- a/tests/end2end-tests/data_process/homogeneous_test.sh
+++ b/tests/end2end-tests/data_process/homogeneous_test.sh
@@ -41,10 +41,10 @@ error_and_exit $?
 python3 $GS_HOME/tests/end2end-tests/data_process/check_homogeneous.py
 error_and_exit $?
 
-echo "********* Test Node Classification on GConstruct Homogeneous Graph on reverse edge********"
+echo "********* Test Node Classification on GConstruct Homogeneous Graph with reverse edge********"
 python3 -m graphstorm.run.gs_node_classification --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /tmp/movielen_100k_train_val_1p_4t_homogeneous_rev/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc.yaml --target-ntype _N
 error_and_exit $?
 
-echo "********* Test Edge Classification on GConstruct Homogeneous Graph on reverse edge ********"
+echo "********* Test Edge Classification on GConstruct Homogeneous Graph with reverse edge ********"
 python3 -m graphstorm.run.gs_edge_classification --workspace $GS_HOME/training_scripts/gsgnn_ep/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /tmp/movielen_100k_train_val_1p_4t_homogeneous_rev/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec.yaml --target-etype _N,_E,_N
 error_and_exit $?
\ No newline at end of file

From f02c0d61cd057b1fc9ba0c6fbf2488336112028d Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Fri, 15 Dec 2023 19:46:35 +0000
Subject: [PATCH 19/21] apply comments

---
 .../graphstorm/gconstruct/construct_graph.py  | 16 ++++++------
 .../data_process/movielens_test.sh            |  3 +--
 .../gconstruct/test_construct_graph.py        | 25 ++++++++++++++++++-
 3 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/python/graphstorm/gconstruct/construct_graph.py b/python/graphstorm/gconstruct/construct_graph.py
index b16b4ef936..ebd8143593 100644
--- a/python/graphstorm/gconstruct/construct_graph.py
+++ b/python/graphstorm/gconstruct/construct_graph.py
@@ -591,8 +591,13 @@ def is_homogeneous(confs):
         A dict containing all user input config
     """
     ntypes = {conf['node_type'] for conf in confs["nodes"]}
-    etypes = set(tuple(conf['relation']) for conf in confs["edges"])
-    return len(ntypes) == 1 and len(etypes) == 1
+    etypes = [conf['relation'] for conf in confs["edges"]]
+    etypes_set = set(tuple(conf['relation']) for conf in confs["edges"])
+    assert etypes[0][0] in ntypes, \
+        f"source node type {etypes[0][0]} does not exist. Please check your input data."
+    assert etypes[0][2] in ntypes, \
+        f"dest node type {etypes[0][2]} does not exist. Please check your input data."
+    return len(ntypes) == 1 and len(etypes_set) == 1
 
 def verify_confs(confs):
     """ Verify the configuration of the input data.
@@ -609,10 +614,6 @@ def verify_confs(confs):
     etypes = [conf['relation'] for conf in confs["edges"]]
     # Adjust input to DGL homogeneous graph format if it is a homogeneous graph
     if is_homogeneous(confs):
-        assert etypes[0][0] in ntypes, \
-            f"source node type {etypes[0][0]} does not exist. Please check your input data."
-        assert etypes[0][2] in ntypes, \
-            f"dest node type {etypes[0][2]} does not exist. Please check your input data."
         logging.warning("Generated Graph is a homogeneous graph, so the node type will be "
                         "changed to _N and edge type will be changed to [_N, _E, _N]")
         for node in confs['nodes']:
@@ -745,7 +746,8 @@ def process_graph(args):
                     if key not in ["train_mask", "test_mask", "val_mask"]:
                         data[key] = np.concatenate([value, value])
                     else:
-                        data[key] = np.concatenate([value, numpy.zeros(value.shape, dtype=value.dtype)])
+                        data[key] = np.concatenate([value, np.zeros(value.shape,
+                                                                       dtype=value.dtype)])
 
         else:
             for etype in edges:
diff --git a/tests/end2end-tests/data_process/movielens_test.sh b/tests/end2end-tests/data_process/movielens_test.sh
index 200d1f8764..455330bc38 100644
--- a/tests/end2end-tests/data_process/movielens_test.sh
+++ b/tests/end2end-tests/data_process/movielens_test.sh
@@ -5,9 +5,8 @@ service ssh restart
 GS_HOME=$(pwd)
 NUM_TRAINERS=4
 export PYTHONPATH=$GS_HOME/python/
-cd $GS_HOME/training_scripts/gsgnn_np
-echo "127.0.0.1" > ip_list.txt
 cd $GS_HOME/training_scripts/gsgnn_ep
+
 echo "127.0.0.1" > ip_list.txt
 
 error_and_exit () {
diff --git a/tests/unit-tests/gconstruct/test_construct_graph.py b/tests/unit-tests/gconstruct/test_construct_graph.py
index 672105d435..9913f620d8 100644
--- a/tests/unit-tests/gconstruct/test_construct_graph.py
+++ b/tests/unit-tests/gconstruct/test_construct_graph.py
@@ -13,6 +13,7 @@
     See the License for the specific language governing permissions and
     limitations under the License.
 """
+import copy
 import random
 import os
 import tempfile
@@ -22,11 +23,12 @@
 import numpy as np
 import dgl
 import torch as th
+import copy
 
 from functools import partial
 from numpy.testing import assert_equal, assert_almost_equal
 
-from graphstorm.gconstruct.construct_graph import parse_edge_data, verify_confs
+from graphstorm.gconstruct.construct_graph import parse_edge_data, verify_confs, is_homogeneous
 from graphstorm.gconstruct.file_io import write_data_parquet, read_data_parquet
 from graphstorm.gconstruct.file_io import write_data_json, read_data_json
 from graphstorm.gconstruct.file_io import write_data_csv, read_data_csv
@@ -1720,9 +1722,20 @@ def test_homogeneous():
              "format": {"name": "parquet"}, "files": "/data/ml-100k/edges_homo.parquet", "labels": [
                 {"label_col": "rate", "task_type": "classification", "split_pct": [0.1, 0.1, 0.1]}]}]
     }
+    assert is_homogeneous(conf)
     verify_confs(conf)
     assert conf['nodes'][0]["node_type"] == "_N"
     assert conf['edges'][0]['relation'] == ["_N", "_E", "_N"]
+    conf["edges"][0]["relation"] = ["movie_fake", "rating", "movie"]
+    try:
+        is_homogeneous(conf)
+    except AssertionError as e:
+        assert str(e) == "source node type movie_fake does not exist. Please check your input data."
+    conf["nodes"].append(copy.deepcopy(conf["nodes"][0]))
+    conf["nodes"][0]["node_type"] = "movie"
+    conf["nodes"][1]["node_type"] = "movie_fake"
+    assert not is_homogeneous(conf)
+
 
     # multiple node types and edge types input
     conf = {
@@ -1741,9 +1754,19 @@ def test_homogeneous():
             {"relation": ["movie", "rating", "movie"], "format": {"name": "parquet"},
              "files": "/data/ml-100k/edges_homo.parquet"}]
     }
+    assert is_homogeneous(conf)
     verify_confs(conf)
     assert conf['nodes'][0]["node_type"] == "_N"
     assert conf['edges'][0]['relation'] == ["_N", "_E", "_N"]
+    conf["edges"][0]["relation"] = ["movie_fake", "rating", "movie"]
+    try:
+        is_homogeneous(conf)
+    except AssertionError as e:
+        assert str(e) == "source node type movie_fake does not exist. Please check your input data."
+    conf["nodes"].append(copy.deepcopy(conf["nodes"][0]))
+    conf["nodes"][0]["node_type"] = "movie"
+    conf["nodes"][1]["node_type"] = "movie_fake"
+    assert not is_homogeneous(conf)
 
 if __name__ == '__main__':
     test_parse_edge_data()

From 5dca4ad081c5773c73d7c58f476d60c8146ddd62 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Fri, 15 Dec 2023 19:54:25 +0000
Subject: [PATCH 20/21] refactor

---
 .../graphstorm/gconstruct/construct_graph.py  | 25 ++++++++-----------
 .../gconstruct/test_construct_graph.py        |  8 ------
 2 files changed, 10 insertions(+), 23 deletions(-)

diff --git a/python/graphstorm/gconstruct/construct_graph.py b/python/graphstorm/gconstruct/construct_graph.py
index ebd8143593..259399328b 100644
--- a/python/graphstorm/gconstruct/construct_graph.py
+++ b/python/graphstorm/gconstruct/construct_graph.py
@@ -591,13 +591,8 @@ def is_homogeneous(confs):
         A dict containing all user input config
     """
     ntypes = {conf['node_type'] for conf in confs["nodes"]}
-    etypes = [conf['relation'] for conf in confs["edges"]]
-    etypes_set = set(tuple(conf['relation']) for conf in confs["edges"])
-    assert etypes[0][0] in ntypes, \
-        f"source node type {etypes[0][0]} does not exist. Please check your input data."
-    assert etypes[0][2] in ntypes, \
-        f"dest node type {etypes[0][2]} does not exist. Please check your input data."
-    return len(ntypes) == 1 and len(etypes_set) == 1
+    etypes = set(tuple(conf['relation']) for conf in confs["edges"])
+    return len(ntypes) == 1 and len(etypes) == 1
 
 def verify_confs(confs):
     """ Verify the configuration of the input data.
@@ -612,14 +607,6 @@ def verify_confs(confs):
             "The config file does not have a 'version' entry. Assuming gconstruct-v0.1")
     ntypes = {conf['node_type'] for conf in confs["nodes"]}
     etypes = [conf['relation'] for conf in confs["edges"]]
-    # Adjust input to DGL homogeneous graph format if it is a homogeneous graph
-    if is_homogeneous(confs):
-        logging.warning("Generated Graph is a homogeneous graph, so the node type will be "
-                        "changed to _N and edge type will be changed to [_N, _E, _N]")
-        for node in confs['nodes']:
-            node['node_type'] = DEFAULT_NTYPE
-        for edge in confs['edges']:
-            edge['relation'] = list(DEFAULT_ETYPE)
     for etype in etypes:
         assert len(etype) == 3, \
                 "The edge type must be (source node type, relation type, dest node type)."
@@ -628,6 +615,14 @@ def verify_confs(confs):
                 f"source node type {src_type} does not exist. Please check your input data."
         assert dst_type in ntypes, \
                 f"dest node type {dst_type} does not exist. Please check your input data."
+    # Adjust input to DGL homogeneous graph format if it is a homogeneous graph
+    if is_homogeneous(confs):
+        logging.warning("Generated Graph is a homogeneous graph, so the node type will be "
+                        "changed to _N and edge type will be changed to [_N, _E, _N]")
+        for node in confs['nodes']:
+            node['node_type'] = DEFAULT_NTYPE
+        for edge in confs['edges']:
+            edge['relation'] = list(DEFAULT_ETYPE)
 
 def print_graph_info(g, node_data, edge_data, node_label_stats, edge_label_stats):
     """ Print graph information.
diff --git a/tests/unit-tests/gconstruct/test_construct_graph.py b/tests/unit-tests/gconstruct/test_construct_graph.py
index 9913f620d8..384d36fdd3 100644
--- a/tests/unit-tests/gconstruct/test_construct_graph.py
+++ b/tests/unit-tests/gconstruct/test_construct_graph.py
@@ -1727,10 +1727,6 @@ def test_homogeneous():
     assert conf['nodes'][0]["node_type"] == "_N"
     assert conf['edges'][0]['relation'] == ["_N", "_E", "_N"]
     conf["edges"][0]["relation"] = ["movie_fake", "rating", "movie"]
-    try:
-        is_homogeneous(conf)
-    except AssertionError as e:
-        assert str(e) == "source node type movie_fake does not exist. Please check your input data."
     conf["nodes"].append(copy.deepcopy(conf["nodes"][0]))
     conf["nodes"][0]["node_type"] = "movie"
     conf["nodes"][1]["node_type"] = "movie_fake"
@@ -1759,10 +1755,6 @@ def test_homogeneous():
     assert conf['nodes'][0]["node_type"] == "_N"
     assert conf['edges'][0]['relation'] == ["_N", "_E", "_N"]
     conf["edges"][0]["relation"] = ["movie_fake", "rating", "movie"]
-    try:
-        is_homogeneous(conf)
-    except AssertionError as e:
-        assert str(e) == "source node type movie_fake does not exist. Please check your input data."
     conf["nodes"].append(copy.deepcopy(conf["nodes"][0]))
     conf["nodes"][0]["node_type"] = "movie"
     conf["nodes"][1]["node_type"] = "movie_fake"

From 4c0b9965377e286e5b13f2a7f714f85357e3888e Mon Sep 17 00:00:00 2001
From: jalencato <jalencato23pistons@gmail.com>
Date: Fri, 15 Dec 2023 13:39:01 -0800
Subject: [PATCH 21/21] Update test_construct_graph.py

---
 tests/unit-tests/gconstruct/test_construct_graph.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/unit-tests/gconstruct/test_construct_graph.py b/tests/unit-tests/gconstruct/test_construct_graph.py
index 384d36fdd3..d7c9ae6650 100644
--- a/tests/unit-tests/gconstruct/test_construct_graph.py
+++ b/tests/unit-tests/gconstruct/test_construct_graph.py
@@ -13,7 +13,6 @@
     See the License for the specific language governing permissions and
     limitations under the License.
 """
-import copy
 import random
 import os
 import tempfile
@@ -1779,4 +1778,4 @@ def test_homogeneous():
     test_multicolumn(None)
     test_multicolumn("/")
     test_feature_wrapper()
-    test_homogeneous()
\ No newline at end of file
+    test_homogeneous()