From 1d6432f438dbdbdbcf72d25a1380cf3c420ca1cc Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Tue, 3 Oct 2023 21:08:09 +0000
Subject: [PATCH 01/42] initial commit

---
 python/graphstorm/run/gs_gen_embedding.py    | 0
 python/graphstorm/run/gsgnn_emb/__init__.py  | 0
 python/graphstorm/run/gsgnn_emb/gsgnn_emb.py | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 python/graphstorm/run/gs_gen_embedding.py
 create mode 100644 python/graphstorm/run/gsgnn_emb/__init__.py
 create mode 100644 python/graphstorm/run/gsgnn_emb/gsgnn_emb.py
diff --git a/python/graphstorm/run/gs_gen_embedding.py b/python/graphstorm/run/gs_gen_embedding.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/python/graphstorm/run/gsgnn_emb/__init__.py b/python/graphstorm/run/gsgnn_emb/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py
new file mode 100644
index 0000000000..e69de29bb2

From a841911e50d16c1530aee6e079a9bb30a529690b Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Tue, 3 Oct 2023 21:54:47 +0000
Subject: [PATCH 02/42] first commit - no test

---
 python/graphstorm/run/gs_gen_embedding.py    |  49 +++++++++
 python/graphstorm/run/gsgnn_emb/gsgnn_emb.py | 105 +++++++++++++++++++
 2 files changed, 154 insertions(+)

diff --git a/python/graphstorm/run/gs_gen_embedding.py b/python/graphstorm/run/gs_gen_embedding.py
index e69de29bb2..a12f5a54bf 100644
--- a/python/graphstorm/run/gs_gen_embedding.py
+++ b/python/graphstorm/run/gs_gen_embedding.py
@@ -0,0 +1,49 @@
+"""
+    Copyright 2023 Contributors
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+    Entry point for running link prediction tasks.
+
+    Run as:
+    python3 -m graphstorm.run.gs_gen_embedding <Launch args> <Train/Infer args>
+"""
+import os
+import logging
+
+from .launch import get_argument_parser
+from .launch import check_input_arguments
+from .launch import submit_jobs
+
+def main():
+    """ Main function
+    """
+    parser = get_argument_parser()
+    args, exec_script_args = parser.parse_known_args()
+    check_input_arguments(args)
+
+    lib_dir = os.path.abspath(os.path.dirname(__file__))
+    cmd = "gsgnn_emb/gsgnn_emb.py"
+    cmd_path = os.path.join(lib_dir, cmd)
+    exec_script_args = [cmd_path] + exec_script_args
+
+    if "coo" not in args.graph_format:
+        args.graph_format = f"{args.graph_format},coo"
+        logging.debug("Automatically add COO format to graph formats for link prediction. " + \
+                "New graph_format is %s", args.graph_format)
+    submit_jobs(args, exec_script_args)
+
+if __name__ == "__main__":
+    FMT = "%(asctime)s %(levelname)s %(message)s"
+    logging.basicConfig(format=FMT, level=logging.INFO)
+    main()
\ No newline at end of file
diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py
index e69de29bb2..af672e98ad 100644
--- a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py
+++ b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py
@@ -0,0 +1,105 @@
+"""
+    Copyright 2023 Contributors
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+    GSgnn pure gpu link prediction.
+"""
+
+import os
+
+import graphstorm as gs
+from graphstorm.config import get_argument_parser
+from graphstorm.config import GSConfig
+from graphstorm.trainer import GSgnnLinkPredictionTrainer
+from graphstorm.dataloading import GSgnnLPTrainData
+from graphstorm.dataloading import GSgnnLinkPredictionDataLoader
+from graphstorm.dataloading import GSgnnLPJointNegDataLoader
+from graphstorm.dataloading import GSgnnLPLocalUniformNegDataLoader
+from graphstorm.dataloading import GSgnnLPLocalJointNegDataLoader
+from graphstorm.dataloading import GSgnnAllEtypeLPJointNegDataLoader
+from graphstorm.dataloading import GSgnnAllEtypeLinkPredictionDataLoader
+from graphstorm.dataloading import GSgnnLinkPredictionTestDataLoader
+from graphstorm.dataloading import GSgnnLinkPredictionJointTestDataLoader
+from graphstorm.dataloading import BUILTIN_LP_UNIFORM_NEG_SAMPLER
+from graphstorm.dataloading import BUILTIN_LP_JOINT_NEG_SAMPLER
+from graphstorm.dataloading import BUILTIN_LP_LOCALUNIFORM_NEG_SAMPLER
+from graphstorm.dataloading import BUILTIN_LP_LOCALJOINT_NEG_SAMPLER
+from graphstorm.dataloading import BUILTIN_LP_ALL_ETYPE_UNIFORM_NEG_SAMPLER
+from graphstorm.dataloading import BUILTIN_LP_ALL_ETYPE_JOINT_NEG_SAMPLER
+from graphstorm.dataloading import (BUILTIN_FAST_LP_UNIFORM_NEG_SAMPLER,
+                                    BUILTIN_FAST_LP_JOINT_NEG_SAMPLER,
+                                    BUILTIN_FAST_LP_LOCALUNIFORM_NEG_SAMPLER,
+                                    BUILTIN_FAST_LP_LOCALJOINT_NEG_SAMPLER)
+from graphstorm.dataloading import (FastGSgnnLinkPredictionDataLoader,
+                                    FastGSgnnLPJointNegDataLoader,
+                                    FastGSgnnLPLocalUniformNegDataLoader,
+                                    FastGSgnnLPLocalJointNegDataLoader)
+from graphstorm.model.utils import save_embeddings
+from graphstorm.model import do_full_graph_inference
+from graphstorm.utils import rt_profiler, sys_tracker, setup_device, use_wholegraph
+
+
+def main(config_args):
+    """ main function
+    """
+    config = GSConfig(config_args)
+    config.verify_arguments(True)
+
+    gs.initialize(ip_config=config.ip_config, backend=config.backend,
+                  use_wholegraph=use_wholegraph(config.part_config))
+    rt_profiler.init(config.profile_path, rank=gs.get_rank())
+    sys_tracker.init(config.verbose, rank=gs.get_rank())
+    device = setup_device(config.local_rank)
+    train_data = GSgnnLPTrainData(config.graph_name,
+                                  config.part_config,
+                                  train_etypes=config.train_etype,
+                                  eval_etypes=config.eval_etype,
+                                  node_feat_field=config.node_feat_name,
+                                  pos_graph_feat_field=config.lp_edge_weight_for_loss)
+
+    # Preparing input layer for training or inference.
+    # The input layer can pre-compute node features in the preparing step if needed.
+    # For example pre-compute all BERT embeddings
+    assert (config.save_embed_path is not None, "save embeded path cannot be none for gs_gen_embeddings")
+    assert (config.restore_model_path is not None, "restore model path cannot be none for gs_gen_embeddings")
+
+    model = gs.create_builtin_lp_gnn_model(train_data.g, config, train_task=False)
+    model_path = config.restore_model_path
+    # TODO(zhengda) the model path has to be in a shared filesystem.
+    model.restore_model(model_path)
+    # Preparing input layer for training or inference.
+    # The input layer can pre-compute node features in the preparing step if needed.
+    # For example pre-compute all BERT embeddings
+    model.prepare_input_encoder(train_data)
+    # TODO(zhengda) we may not want to only use training edges to generate GNN embeddings.
+    embeddings = do_full_graph_inference(model, train_data, fanout=config.eval_fanout,
+                                         edge_mask="train_mask", task_tracker=tracker)
+    save_embeddings(config.save_embed_path, embeddings, gs.get_rank(),
+                     gs.get_world_size(),
+                     device=device,
+                     node_id_mapping_file=config.node_id_mapping_file)
+
+
+def generate_parser():
+    """ Generate an argument parser
+    """
+    parser = get_argument_parser()
+    return parser
+
+
+if __name__ == '__main__':
+    arg_parser = generate_parser()
+
+    args = arg_parser.parse_args()
+    main(args)

From 7607069f165378404ef26285999a0daf129d7195 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Tue, 3 Oct 2023 22:13:22 +0000
Subject: [PATCH 03/42] remove unnecessary dependency

---
 python/graphstorm/run/gsgnn_emb/gsgnn_emb.py | 22 --------------------
 1 file changed, 22 deletions(-)

diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py
index af672e98ad..016891ab53 100644
--- a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py
+++ b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py
@@ -23,28 +23,6 @@
 from graphstorm.config import GSConfig
 from graphstorm.trainer import GSgnnLinkPredictionTrainer
 from graphstorm.dataloading import GSgnnLPTrainData
-from graphstorm.dataloading import GSgnnLinkPredictionDataLoader
-from graphstorm.dataloading import GSgnnLPJointNegDataLoader
-from graphstorm.dataloading import GSgnnLPLocalUniformNegDataLoader
-from graphstorm.dataloading import GSgnnLPLocalJointNegDataLoader
-from graphstorm.dataloading import GSgnnAllEtypeLPJointNegDataLoader
-from graphstorm.dataloading import GSgnnAllEtypeLinkPredictionDataLoader
-from graphstorm.dataloading import GSgnnLinkPredictionTestDataLoader
-from graphstorm.dataloading import GSgnnLinkPredictionJointTestDataLoader
-from graphstorm.dataloading import BUILTIN_LP_UNIFORM_NEG_SAMPLER
-from graphstorm.dataloading import BUILTIN_LP_JOINT_NEG_SAMPLER
-from graphstorm.dataloading import BUILTIN_LP_LOCALUNIFORM_NEG_SAMPLER
-from graphstorm.dataloading import BUILTIN_LP_LOCALJOINT_NEG_SAMPLER
-from graphstorm.dataloading import BUILTIN_LP_ALL_ETYPE_UNIFORM_NEG_SAMPLER
-from graphstorm.dataloading import BUILTIN_LP_ALL_ETYPE_JOINT_NEG_SAMPLER
-from graphstorm.dataloading import (BUILTIN_FAST_LP_UNIFORM_NEG_SAMPLER,
-                                    BUILTIN_FAST_LP_JOINT_NEG_SAMPLER,
-                                    BUILTIN_FAST_LP_LOCALUNIFORM_NEG_SAMPLER,
-                                    BUILTIN_FAST_LP_LOCALJOINT_NEG_SAMPLER)
-from graphstorm.dataloading import (FastGSgnnLinkPredictionDataLoader,
-                                    FastGSgnnLPJointNegDataLoader,
-                                    FastGSgnnLPLocalUniformNegDataLoader,
-                                    FastGSgnnLPLocalJointNegDataLoader)
 from graphstorm.model.utils import save_embeddings
 from graphstorm.model import do_full_graph_inference
 from graphstorm.utils import rt_profiler, sys_tracker, setup_device, use_wholegraph

From 4621c6e6b3396bb75520adac1eddfcfc61319516 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Tue, 3 Oct 2023 22:17:50 +0000
Subject: [PATCH 04/42] change config

---
 python/graphstorm/run/gs_gen_embedding.py    | 2 +-
 python/graphstorm/run/gsgnn_emb/gsgnn_emb.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/graphstorm/run/gs_gen_embedding.py b/python/graphstorm/run/gs_gen_embedding.py
index a12f5a54bf..4c95164b62 100644
--- a/python/graphstorm/run/gs_gen_embedding.py
+++ b/python/graphstorm/run/gs_gen_embedding.py
@@ -16,7 +16,7 @@
     Entry point for running link prediction tasks.
 
     Run as:
-    python3 -m graphstorm.run.gs_gen_embedding <Launch args> <Train/Infer args>
+    python3 -m graphstorm.run.gs_gen_embedding <Launch args>
 """
 import os
 import logging
diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py
index 016891ab53..08e483cf33 100644
--- a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py
+++ b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py
@@ -13,7 +13,7 @@
     See the License for the specific language governing permissions and
     limitations under the License.
 
-    GSgnn pure gpu link prediction.
+    GSgnn pure gpu generate embeddings.
 """
 
 import os

From d8c0309a8f42e6449bbd48394777d95b5d38cf23 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Tue, 3 Oct 2023 23:32:34 +0000
Subject: [PATCH 05/42] fix lint

---
 python/graphstorm/run/launch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/graphstorm/run/launch.py b/python/graphstorm/run/launch.py
index b23d19a263..6d7e1b2b70 100644
--- a/python/graphstorm/run/launch.py
+++ b/python/graphstorm/run/launch.py
@@ -908,7 +908,7 @@ def check_input_arguments(args):
     ), "--num-servers must be a positive number."
     assert (
         args.part_config is not None
-    ), "A user has to specify a partition configuration file with --part-onfig."
+    ), "A user has to specify a partition configuration file with --part-config."
     assert (
         args.ip_config is not None
     ), "A user has to specify an IP configuration file with --ip-config."

From 323dbb0cfb198b19fa06abc784da03b003189b12 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Wed, 4 Oct 2023 19:51:45 +0000
Subject: [PATCH 06/42] test

---
 python/graphstorm/run/gsgnn_emb/gsgnn_emb.py | 43 ++++++++++++++------
 1 file changed, 31 insertions(+), 12 deletions(-)

diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py
index 08e483cf33..2426326e5e 100644
--- a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py
+++ b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py
@@ -21,12 +21,15 @@
 import graphstorm as gs
 from graphstorm.config import get_argument_parser
 from graphstorm.config import GSConfig
-from graphstorm.trainer import GSgnnLinkPredictionTrainer
-from graphstorm.dataloading import GSgnnLPTrainData
+from graphstorm.dataloading import GSgnnLPTrainData, GSgnnNodeTrainData, GSgnnEdgeTrainData
 from graphstorm.model.utils import save_embeddings
 from graphstorm.model import do_full_graph_inference
 from graphstorm.utils import rt_profiler, sys_tracker, setup_device, use_wholegraph
-
+from graphstorm.config import  (BUILTIN_TASK_NODE_CLASSIFICATION,
+                                BUILTIN_TASK_NODE_REGRESSION,
+                                BUILTIN_TASK_EDGE_CLASSIFICATION,
+                                BUILTIN_TASK_EDGE_REGRESSION,
+                                BUILTIN_TASK_LINK_PREDICTION)
 
 def main(config_args):
     """ main function
@@ -39,16 +42,32 @@ def main(config_args):
     rt_profiler.init(config.profile_path, rank=gs.get_rank())
     sys_tracker.init(config.verbose, rank=gs.get_rank())
     device = setup_device(config.local_rank)
-    train_data = GSgnnLPTrainData(config.graph_name,
-                                  config.part_config,
-                                  train_etypes=config.train_etype,
-                                  eval_etypes=config.eval_etype,
-                                  node_feat_field=config.node_feat_name,
-                                  pos_graph_feat_field=config.lp_edge_weight_for_loss)
 
-    # Preparing input layer for training or inference.
-    # The input layer can pre-compute node features in the preparing step if needed.
-    # For example pre-compute all BERT embeddings
+    if config.task_type == BUILTIN_TASK_LINK_PREDICTION:
+        train_data = GSgnnLPTrainData(config.graph_name,
+                                      config.part_config,
+                                      train_etypes=config.train_etype,
+                                      eval_etypes=config.eval_etype,
+                                      node_feat_field=config.node_feat_name,
+                                      pos_graph_feat_field=config.lp_edge_weight_for_loss)
+    elif config.task_type == BUILTIN_TASK_NODE_REGRESSION or BUILTIN_TASK_NODE_CLASSIFICATION:
+        train_data = GSgnnNodeTrainData(config.graph_name,
+                                        config.part_config,
+                                        train_ntypes=config.target_ntype,
+                                        eval_ntypes=config.eval_target_ntype,
+                                        node_feat_field=config.node_feat_name,
+                                        label_field=config.label_field)
+    elif config.task_type == BUILTIN_TASK_EDGE_CLASSIFICATION or BUILTIN_TASK_EDGE_REGRESSION:
+        train_data = GSgnnEdgeTrainData(config.graph_name,
+                                        config.part_config,
+                                        train_etypes=config.target_etype,
+                                        node_feat_field=config.node_feat_name,
+                                        label_field=config.label_field,
+                                        decoder_edge_feat=config.decoder_edge_feat)
+    else:
+        raise TypeError("Not supported for task type: ", config.task_type)
+
+    # assert the setting for the graphstorm embedding generation.
     assert (config.save_embed_path is not None, "save embeded path cannot be none for gs_gen_embeddings")
     assert (config.restore_model_path is not None, "restore model path cannot be none for gs_gen_embeddings")
 

From 05807e36577de1fd8b3860c9d5ac03cab98b7213 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Wed, 4 Oct 2023 21:03:37 +0000
Subject: [PATCH 07/42] fix save_embed path

---
 python/graphstorm/run/gsgnn_emb/gsgnn_emb.py | 22 ++++++++++++++------
 python/graphstorm/run/gsgnn_ep/gsgnn_ep.py   |  3 ++-
 python/graphstorm/run/gsgnn_lp/gsgnn_lp.py   |  3 ++-
 python/graphstorm/run/gsgnn_np/gsgnn_np.py   |  3 ++-
 4 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py
index 2426326e5e..bcbb5436d9 100644
--- a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py
+++ b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py
@@ -42,8 +42,11 @@ def main(config_args):
     rt_profiler.init(config.profile_path, rank=gs.get_rank())
     sys_tracker.init(config.verbose, rank=gs.get_rank())
     device = setup_device(config.local_rank)
+    tracker = gs.create_builtin_task_tracker(config)
+    if gs.get_rank() == 0:
+        tracker.log_params(config.__dict__)
 
-    if config.task_type == BUILTIN_TASK_LINK_PREDICTION:
+    if config.task_type == BUILTIN_TASK_LINK_PREDICTION or not config.task_type:
         train_data = GSgnnLPTrainData(config.graph_name,
                                       config.part_config,
                                       train_etypes=config.train_etype,
@@ -68,10 +71,16 @@ def main(config_args):
         raise TypeError("Not supported for task type: ", config.task_type)
 
     # assert the setting for the graphstorm embedding generation.
-    assert (config.save_embed_path is not None, "save embeded path cannot be none for gs_gen_embeddings")
-    assert (config.restore_model_path is not None, "restore model path cannot be none for gs_gen_embeddings")
+    assert config.save_embed_path is not None, "save embeded path cannot be none for gs_gen_embeddings"
+    assert config.restore_model_path is not None, "restore model path cannot be none for gs_gen_embeddings"
+
+    if config.task_type == BUILTIN_TASK_LINK_PREDICTION or not config.task_type:
+        model = gs.create_builtin_lp_gnn_model(train_data.g, config, train_task=False)
+    elif config.task_type == BUILTIN_TASK_NODE_REGRESSION or BUILTIN_TASK_NODE_CLASSIFICATION:
+        model = gs.create_builtin_node_gnn_model(train_data.g, config, train_task=False)
+    elif config.task_type == BUILTIN_TASK_EDGE_CLASSIFICATION or BUILTIN_TASK_EDGE_REGRESSION:
+        model = gs.create_builtin_edge_gnn_model(train_data.g, config, train_task=False)
 
-    model = gs.create_builtin_lp_gnn_model(train_data.g, config, train_task=False)
     model_path = config.restore_model_path
     # TODO(zhengda) the model path has to be in a shared filesystem.
     model.restore_model(model_path)
@@ -81,11 +90,12 @@ def main(config_args):
     model.prepare_input_encoder(train_data)
     # TODO(zhengda) we may not want to only use training edges to generate GNN embeddings.
     embeddings = do_full_graph_inference(model, train_data, fanout=config.eval_fanout,
-                                         edge_mask="train_mask", task_tracker=tracker)
+                                         task_tracker=tracker)
     save_embeddings(config.save_embed_path, embeddings, gs.get_rank(),
                      gs.get_world_size(),
                      device=device,
-                     node_id_mapping_file=config.node_id_mapping_file)
+                     node_id_mapping_file=config.node_id_mapping_file,
+                     save_embed_format=config.save_embed_format)
 
 
 def generate_parser():
diff --git a/python/graphstorm/run/gsgnn_ep/gsgnn_ep.py b/python/graphstorm/run/gsgnn_ep/gsgnn_ep.py
index 7acb4cd134..dae6ea727d 100644
--- a/python/graphstorm/run/gsgnn_ep/gsgnn_ep.py
+++ b/python/graphstorm/run/gsgnn_ep/gsgnn_ep.py
@@ -156,7 +156,8 @@ def main(config_args):
         save_embeddings(config.save_embed_path, embs, gs.get_rank(),
                         gs.get_world_size(),
                         device=device,
-                        node_id_mapping_file=config.node_id_mapping_file)
+                        node_id_mapping_file=config.node_id_mapping_file,
+                        save_embed_format=save_embed_format)
 
 def generate_parser():
     """ Generate an argument parser
diff --git a/python/graphstorm/run/gsgnn_lp/gsgnn_lp.py b/python/graphstorm/run/gsgnn_lp/gsgnn_lp.py
index b89e568424..043e13c40f 100644
--- a/python/graphstorm/run/gsgnn_lp/gsgnn_lp.py
+++ b/python/graphstorm/run/gsgnn_lp/gsgnn_lp.py
@@ -202,7 +202,8 @@ def main(config_args):
         save_embeddings(config.save_embed_path, embeddings, gs.get_rank(),
                         gs.get_world_size(),
                         device=device,
-                        node_id_mapping_file=config.node_id_mapping_file)
+                        node_id_mapping_file=config.node_id_mapping_file,
+                        save_embed_format=save_embed_format)
 
 def generate_parser():
     """ Generate an argument parser
diff --git a/python/graphstorm/run/gsgnn_np/gsgnn_np.py b/python/graphstorm/run/gsgnn_np/gsgnn_np.py
index 42f78fed99..2d7e8e240e 100644
--- a/python/graphstorm/run/gsgnn_np/gsgnn_np.py
+++ b/python/graphstorm/run/gsgnn_np/gsgnn_np.py
@@ -156,7 +156,8 @@ def main(config_args):
         save_embeddings(config.save_embed_path, embeddings, gs.get_rank(),
                         gs.get_world_size(),
                         device=device,
-                        node_id_mapping_file=config.node_id_mapping_file)
+                        node_id_mapping_file=config.node_id_mapping_file,
+                        save_embed_format=config.save_embed_format)
 
 def generate_parser():
     """ Generate an argument parser

From 6514afe650ba77c1783148ec77a6a618e2f8b57d Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Wed, 4 Oct 2023 21:17:55 +0000
Subject: [PATCH 08/42] add test

---
 python/graphstorm/run/gsgnn_emb/gsgnn_emb.py   | 2 +-
 tests/end2end-tests/graphstorm-ec/mgpu_test.sh | 9 +++++++++
 tests/end2end-tests/graphstorm-lp/mgpu_test.sh | 9 +++++++++
 tests/end2end-tests/graphstorm-nc/mgpu_test.sh | 9 +++++++++
 4 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py
index bcbb5436d9..d99b38bc71 100644
--- a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py
+++ b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py
@@ -46,7 +46,7 @@ def main(config_args):
     if gs.get_rank() == 0:
         tracker.log_params(config.__dict__)
 
-    if config.task_type == BUILTIN_TASK_LINK_PREDICTION or not config.task_type:
+    if config.task_type == BUILTIN_TASK_LINK_PREDICTION:
         train_data = GSgnnLPTrainData(config.graph_name,
                                       config.part_config,
                                       train_etypes=config.train_etype,
diff --git a/tests/end2end-tests/graphstorm-ec/mgpu_test.sh b/tests/end2end-tests/graphstorm-ec/mgpu_test.sh
index 71928c453a..332c622d0d 100644
--- a/tests/end2end-tests/graphstorm-ec/mgpu_test.sh
+++ b/tests/end2end-tests/graphstorm-ec/mgpu_test.sh
@@ -135,6 +135,15 @@ python3 check_infer.py --train_embout /data/gsgnn_ec/emb/ --infer_embout /data/g
 
 error_and_exit $?
 
+echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on link prediction"
+python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_multi_label_ec/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec.yaml --restore-model-path /data/gsgnn_ec/epoch-$best_epoch/ --save-embed-path /data/gsgnn_ec/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug
+
+error_and_exit $?
+
+python3 $GS_HOME/tests/end2end-tests/check_infer.py --train_embout /data/gsgnn_ec/emb/ --infer_embout /data/gsgnn_ec/save-emb/
+
+error_and_exit $?
+
 echo "**************dataset: Generated multilabel MovieLens EC, do inference on saved model without test_mask"
 python3 -m graphstorm.run.gs_edge_classification --inference --workspace $GS_HOME/inference_scripts/ep_infer --num-trainers $NUM_INFO_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_ec_no_test_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec_infer.yaml  --multilabel true --num-classes 6 --node-feat-name movie:title user:feat --use-mini-batch-infer false --save-embed-path /data/gsgnn_ec/infer-emb/ --restore-model-path /data/gsgnn_ec/epoch-$best_epoch/ --save-prediction-path /data/gsgnn_ec/prediction/ --no-validation true
 
diff --git a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
index 9ed2129c04..bf647348a0 100644
--- a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
+++ b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
@@ -192,6 +192,15 @@ then
 fi
 rm -fr /data/gsgnn_lp_ml_dot/infer-emb/
 
+echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on link prediction"
+python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_dot/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug
+
+error_and_exit $?
+
+python3 $GS_HOME/tests/end2end-tests/check_infer.py --train_embout /data/gsgnn_lp_ml_dot/emb/ --infer_embout /data/gsgnn_lp_ml_dot/save-emb/
+
+error_and_exit $?
+
 echo "**************dataset: Movielens, do mini-batch inference on saved model, decoder: dot"
 python3 -m graphstorm.run.gs_link_prediction --inference --workspace $GS_HOME/inference_scripts/lp_infer --num-trainers $NUM_INFO_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp_infer.yaml --fanout '10,15' --num-layers 2 --use-mini-batch-infer false --use-node-embeddings true --eval-batch-size 1024 --save-embed-path /data/gsgnn_lp_ml_dot/infer-emb/ --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --use-mini-batch-infer true --logging-file /tmp/log.txt
 
diff --git a/tests/end2end-tests/graphstorm-nc/mgpu_test.sh b/tests/end2end-tests/graphstorm-nc/mgpu_test.sh
index c77b4e0af6..393413f1cf 100644
--- a/tests/end2end-tests/graphstorm-nc/mgpu_test.sh
+++ b/tests/end2end-tests/graphstorm-nc/mgpu_test.sh
@@ -137,6 +137,15 @@ python3 $GS_HOME/tests/end2end-tests/check_infer.py --train_embout /data/gsgnn_n
 
 error_and_exit $?
 
+echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on node classification"
+python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc.yaml --restore-model-path /data/gsgnn_nc_ml/epoch-$best_epoch/ --save-embed-path /data/gsgnn_nc_ml/save-emb --logging-file /tmp/train_log.txt --logging-level debug
+
+error_and_exit $?
+
+python3 $GS_HOME/tests/end2end-tests/check_infer.py --train_embout /data/gsgnn_nc_ml/emb/ --infer_embout /data/gsgnn_nc_ml/save-emb/
+
+error_and_exit $?
+
 echo "**************dataset: Movielens, do inference on saved model with mini-batch-infer without test mask"
 python3 -m graphstorm.run.gs_node_classification --inference --workspace $GS_HOME/inference_scripts/np_infer/ --num-trainers $NUM_INFERs --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_train_notest_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc_infer.yaml --use-mini-batch-infer true  --save-embed-path /data/gsgnn_nc_ml/mini-infer-emb/ --restore-model-path /data/gsgnn_nc_ml/epoch-$best_epoch/ --save-prediction-path /data/gsgnn_nc_ml/prediction/ --no-validation true
 

From 249df54361afefde28ce0bcf1c47a25784a45630 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Wed, 4 Oct 2023 21:44:22 +0000
Subject: [PATCH 09/42] fix lint

---
 python/graphstorm/run/gs_gen_embedding.py    | 3 ++-
 python/graphstorm/run/gsgnn_emb/gsgnn_emb.py | 9 ++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/graphstorm/run/gs_gen_embedding.py b/python/graphstorm/run/gs_gen_embedding.py
index 4c95164b62..4666e85470 100644
--- a/python/graphstorm/run/gs_gen_embedding.py
+++ b/python/graphstorm/run/gs_gen_embedding.py
@@ -46,4 +46,5 @@ def main():
 if __name__ == "__main__":
     FMT = "%(asctime)s %(levelname)s %(message)s"
     logging.basicConfig(format=FMT, level=logging.INFO)
-    main()
\ No newline at end of file
+    main()
+    
\ No newline at end of file
diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py
index d99b38bc71..b3f3268cd9 100644
--- a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py
+++ b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py
@@ -15,9 +15,6 @@
 
     GSgnn pure gpu generate embeddings.
 """
-
-import os
-
 import graphstorm as gs
 from graphstorm.config import get_argument_parser
 from graphstorm.config import GSConfig
@@ -71,8 +68,10 @@ def main(config_args):
         raise TypeError("Not supported for task type: ", config.task_type)
 
     # assert the setting for the graphstorm embedding generation.
-    assert config.save_embed_path is not None, "save embeded path cannot be none for gs_gen_embeddings"
-    assert config.restore_model_path is not None, "restore model path cannot be none for gs_gen_embeddings"
+    assert config.save_embed_path is not None, \
+        "save embeded path cannot be none for gs_gen_embeddings"
+    assert config.restore_model_path is not None, \
+        "restore model path cannot be none for gs_gen_embeddings"
 
     if config.task_type == BUILTIN_TASK_LINK_PREDICTION or not config.task_type:
         model = gs.create_builtin_lp_gnn_model(train_data.g, config, train_task=False)

From c9de5e7660470dc3d33b7283915d57b640929174 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Wed, 4 Oct 2023 22:07:52 +0000
Subject: [PATCH 10/42] temp fix

---
 .github/workflow_scripts/lint_check.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflow_scripts/lint_check.sh b/.github/workflow_scripts/lint_check.sh
index 3eecaf93c3..556038a408 100644
--- a/.github/workflow_scripts/lint_check.sh
+++ b/.github/workflow_scripts/lint_check.sh
@@ -4,6 +4,8 @@ cd ../../
 set -ex
 
 python3 -m pip install --upgrade prospector pip
+pip3 uninstall -y pylint
+yes | pip3 install pylint==2.8.3
 FORCE_CUDA=1 python3 -m pip install -e '.[test]'  --no-build-isolation
 pylint --rcfile=./tests/lint/pylintrc ./python/graphstorm/data/*.py
 pylint --rcfile=./tests/lint/pylintrc ./python/graphstorm/dataloading/

From 2b25576ff6cdf31b0795d1bca8a62d3c8346f539 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Wed, 4 Oct 2023 22:21:21 +0000
Subject: [PATCH 11/42] fix

---
 .github/workflow_scripts/lint_check.sh | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflow_scripts/lint_check.sh b/.github/workflow_scripts/lint_check.sh
index 556038a408..3eecaf93c3 100644
--- a/.github/workflow_scripts/lint_check.sh
+++ b/.github/workflow_scripts/lint_check.sh
@@ -4,8 +4,6 @@ cd ../../
 set -ex
 
 python3 -m pip install --upgrade prospector pip
-pip3 uninstall -y pylint
-yes | pip3 install pylint==2.8.3
 FORCE_CUDA=1 python3 -m pip install -e '.[test]'  --no-build-isolation
 pylint --rcfile=./tests/lint/pylintrc ./python/graphstorm/data/*.py
 pylint --rcfile=./tests/lint/pylintrc ./python/graphstorm/dataloading/

From 63fbb6fe9698c83c3b542bbe8f54cd65f526d5fb Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Wed, 4 Oct 2023 23:13:38 +0000
Subject: [PATCH 12/42] fix typo

---
 python/graphstorm/run/gsgnn_ep/gsgnn_ep.py | 2 +-
 python/graphstorm/run/gsgnn_lp/gsgnn_lp.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/graphstorm/run/gsgnn_ep/gsgnn_ep.py b/python/graphstorm/run/gsgnn_ep/gsgnn_ep.py
index dae6ea727d..20e3bbb745 100644
--- a/python/graphstorm/run/gsgnn_ep/gsgnn_ep.py
+++ b/python/graphstorm/run/gsgnn_ep/gsgnn_ep.py
@@ -157,7 +157,7 @@ def main(config_args):
                         gs.get_world_size(),
                         device=device,
                         node_id_mapping_file=config.node_id_mapping_file,
-                        save_embed_format=save_embed_format)
+                        save_embed_format=config.save_embed_format)
 
 def generate_parser():
     """ Generate an argument parser
diff --git a/python/graphstorm/run/gsgnn_lp/gsgnn_lp.py b/python/graphstorm/run/gsgnn_lp/gsgnn_lp.py
index 043e13c40f..6ce7c76e52 100644
--- a/python/graphstorm/run/gsgnn_lp/gsgnn_lp.py
+++ b/python/graphstorm/run/gsgnn_lp/gsgnn_lp.py
@@ -203,7 +203,7 @@ def main(config_args):
                         gs.get_world_size(),
                         device=device,
                         node_id_mapping_file=config.node_id_mapping_file,
-                        save_embed_format=save_embed_format)
+                        save_embed_format=config.save_embed_format)
 
 def generate_parser():
     """ Generate an argument parser

From b2ac45b2718ce2eb9f42b98233fc9a6236ffd14e Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Thu, 5 Oct 2023 01:00:16 +0000
Subject: [PATCH 13/42] fix test

---
 tests/end2end-tests/graphstorm-ec/mgpu_test.sh | 4 ++--
 tests/end2end-tests/graphstorm-lp/mgpu_test.sh | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/end2end-tests/graphstorm-ec/mgpu_test.sh b/tests/end2end-tests/graphstorm-ec/mgpu_test.sh
index 332c622d0d..11aac6e0ee 100644
--- a/tests/end2end-tests/graphstorm-ec/mgpu_test.sh
+++ b/tests/end2end-tests/graphstorm-ec/mgpu_test.sh
@@ -135,8 +135,8 @@ python3 check_infer.py --train_embout /data/gsgnn_ec/emb/ --infer_embout /data/g
 
 error_and_exit $?
 
-echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on link prediction"
-python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_multi_label_ec/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec.yaml --restore-model-path /data/gsgnn_ec/epoch-$best_epoch/ --save-embed-path /data/gsgnn_ec/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug
+echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on edge classification"
+python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_ep/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_multi_label_ec/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec.yaml --restore-model-path /data/gsgnn_ec/epoch-$best_epoch/ --save-embed-path /data/gsgnn_ec/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug
 
 error_and_exit $?
 
diff --git a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
index bf647348a0..48814ae29b 100644
--- a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
+++ b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
@@ -193,7 +193,7 @@ fi
 rm -fr /data/gsgnn_lp_ml_dot/infer-emb/
 
 echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on link prediction"
-python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_dot/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug
+python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_dot/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug
 
 error_and_exit $?
 

From 699dafad8cb2b05fbd04f6b576b3708def4dcb31 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Thu, 5 Oct 2023 04:09:27 +0000
Subject: [PATCH 14/42] fix test

---
 tests/end2end-tests/graphstorm-lp/mgpu_test.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
index 48814ae29b..44bcd7428a 100644
--- a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
+++ b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
@@ -193,7 +193,7 @@ fi
 rm -fr /data/gsgnn_lp_ml_dot/infer-emb/
 
 echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on link prediction"
-python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_dot/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug
+python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-node-embeddings true --save-model-path /data/gsgnn_lp_ml_distmult/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_distmult/save-emb/ --lp-decoder-type distmult --train-etype user,rating,movie movie,rating-rev,user --logging-file /tmp/train_log.txt --logging-level debug
 
 error_and_exit $?
 

From c8991b00aec62c9a8e84813f7a7d09bc809ed12d Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Thu, 5 Oct 2023 05:49:54 +0000
Subject: [PATCH 15/42] fix

---
 tests/end2end-tests/graphstorm-lp/mgpu_test.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
index 44bcd7428a..7aa38c11d5 100644
--- a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
+++ b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
@@ -193,7 +193,7 @@ fi
 rm -fr /data/gsgnn_lp_ml_dot/infer-emb/
 
 echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on link prediction"
-python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-node-embeddings true --save-model-path /data/gsgnn_lp_ml_distmult/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_distmult/save-emb/ --lp-decoder-type distmult --train-etype user,rating,movie movie,rating-rev,user --logging-file /tmp/train_log.txt --logging-level debug
+python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-node-embeddings true --restore-model-path /data/gsgnn_lp_ml_distmult/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_distmult/save-emb/ --lp-decoder-type distmult --train-etype user,rating,movie movie,rating-rev,user --logging-file /tmp/train_log.txt --logging-level debug
 
 error_and_exit $?
 

From 3442970b4b9b19694e21456d95574d3b3e4ce4d4 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Thu, 5 Oct 2023 17:27:54 +0000
Subject: [PATCH 16/42] change test

---
 tests/end2end-tests/graphstorm-lp/mgpu_test.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
index 7aa38c11d5..cf4f061d93 100644
--- a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
+++ b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
@@ -193,7 +193,7 @@ fi
 rm -fr /data/gsgnn_lp_ml_dot/infer-emb/
 
 echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on link prediction"
-python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-node-embeddings true --restore-model-path /data/gsgnn_lp_ml_distmult/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_distmult/save-emb/ --lp-decoder-type distmult --train-etype user,rating,movie movie,rating-rev,user --logging-file /tmp/train_log.txt --logging-level debug
+python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-node-embeddings true --exclude-training-targets True --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_dot/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug
 
 error_and_exit $?
 

From 273846cd14bdb288bf6c5f69032d17aa92a49075 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Thu, 5 Oct 2023 17:48:59 +0000
Subject: [PATCH 17/42] rename the gs_gen_embedding to ge_gen_node_embedding

---
 .../run/{gs_gen_embedding.py => gs_gen_node_embedding.py}   | 2 +-
 .../run/gsgnn_emb/{gsgnn_emb.py => gsgnn_node_emb.py}       | 6 +++---
 tests/end2end-tests/graphstorm-ec/mgpu_test.sh              | 4 ++--
 tests/end2end-tests/graphstorm-lp/mgpu_test.sh              | 2 +-
 tests/end2end-tests/graphstorm-nc/mgpu_test.sh              | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)
 rename python/graphstorm/run/{gs_gen_embedding.py => gs_gen_node_embedding.py} (97%)
 rename python/graphstorm/run/gsgnn_emb/{gsgnn_emb.py => gsgnn_node_emb.py} (96%)

diff --git a/python/graphstorm/run/gs_gen_embedding.py b/python/graphstorm/run/gs_gen_node_embedding.py
similarity index 97%
rename from python/graphstorm/run/gs_gen_embedding.py
rename to python/graphstorm/run/gs_gen_node_embedding.py
index 4666e85470..abe21ca1bf 100644
--- a/python/graphstorm/run/gs_gen_embedding.py
+++ b/python/graphstorm/run/gs_gen_node_embedding.py
@@ -33,7 +33,7 @@ def main():
     check_input_arguments(args)
 
     lib_dir = os.path.abspath(os.path.dirname(__file__))
-    cmd = "gsgnn_emb/gsgnn_emb.py"
+    cmd = "gsgnn_emb/gsgnn_node_emb.py"
     cmd_path = os.path.join(lib_dir, cmd)
     exec_script_args = [cmd_path] + exec_script_args
 
diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
similarity index 96%
rename from python/graphstorm/run/gsgnn_emb/gsgnn_emb.py
rename to python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
index b3f3268cd9..12aff58d0f 100644
--- a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py
+++ b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
@@ -69,11 +69,11 @@ def main(config_args):
 
     # assert the setting for the graphstorm embedding generation.
     assert config.save_embed_path is not None, \
-        "save embeded path cannot be none for gs_gen_embeddings"
+        "save embeded path cannot be none for gs_gen_node_embeddings"
     assert config.restore_model_path is not None, \
-        "restore model path cannot be none for gs_gen_embeddings"
+        "restore model path cannot be none for gs_gen_node_embeddings"
 
-    if config.task_type == BUILTIN_TASK_LINK_PREDICTION or not config.task_type:
+    if config.task_type == BUILTIN_TASK_LINK_PREDICTION:
         model = gs.create_builtin_lp_gnn_model(train_data.g, config, train_task=False)
     elif config.task_type == BUILTIN_TASK_NODE_REGRESSION or BUILTIN_TASK_NODE_CLASSIFICATION:
         model = gs.create_builtin_node_gnn_model(train_data.g, config, train_task=False)
diff --git a/tests/end2end-tests/graphstorm-ec/mgpu_test.sh b/tests/end2end-tests/graphstorm-ec/mgpu_test.sh
index 11aac6e0ee..9690636877 100644
--- a/tests/end2end-tests/graphstorm-ec/mgpu_test.sh
+++ b/tests/end2end-tests/graphstorm-ec/mgpu_test.sh
@@ -135,8 +135,8 @@ python3 check_infer.py --train_embout /data/gsgnn_ec/emb/ --infer_embout /data/g
 
 error_and_exit $?
 
-echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on edge classification"
-python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_ep/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_multi_label_ec/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec.yaml --restore-model-path /data/gsgnn_ec/epoch-$best_epoch/ --save-embed-path /data/gsgnn_ec/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug
+echo "**************dataset: Movielens, use gen_node_embeddings to generate embeddings on edge classification"
+python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_scripts/gsgnn_ep/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_multi_label_ec/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec.yaml --exclude-training-targets True --multilabel true --num-classes 6 --node-feat-name movie:title user:feat --save-embed-path /data/gsgnn_ec/save-emb/ --restore-model-path /data/gsgnn_ec/epoch-$best_epoch/ --logging-file /tmp/train_log.txt --logging-level debug
 
 error_and_exit $?
 
diff --git a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
index cf4f061d93..a0f67489b8 100644
--- a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
+++ b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
@@ -193,7 +193,7 @@ fi
 rm -fr /data/gsgnn_lp_ml_dot/infer-emb/
 
 echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on link prediction"
-python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-node-embeddings true --exclude-training-targets True --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_dot/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug
+python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-node-embeddings true --exclude-training-targets True --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_dot/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug
 
 error_and_exit $?
 
diff --git a/tests/end2end-tests/graphstorm-nc/mgpu_test.sh b/tests/end2end-tests/graphstorm-nc/mgpu_test.sh
index 393413f1cf..eeed50444e 100644
--- a/tests/end2end-tests/graphstorm-nc/mgpu_test.sh
+++ b/tests/end2end-tests/graphstorm-nc/mgpu_test.sh
@@ -138,7 +138,7 @@ python3 $GS_HOME/tests/end2end-tests/check_infer.py --train_embout /data/gsgnn_n
 error_and_exit $?
 
 echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on node classification"
-python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc.yaml --restore-model-path /data/gsgnn_nc_ml/epoch-$best_epoch/ --save-embed-path /data/gsgnn_nc_ml/save-emb --logging-file /tmp/train_log.txt --logging-level debug
+python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc.yaml --restore-model-path /data/gsgnn_nc_ml/epoch-$best_epoch/ --save-embed-path /data/gsgnn_nc_ml/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug
 
 error_and_exit $?
 

From ce05d942e152e98713d0265a5d25263e4f6d3603 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Thu, 5 Oct 2023 19:10:45 +0000
Subject: [PATCH 18/42] fix test

---
 tests/end2end-tests/graphstorm-lp/mgpu_test.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
index a0f67489b8..f97f003c4e 100644
--- a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
+++ b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
@@ -193,7 +193,7 @@ fi
 rm -fr /data/gsgnn_lp_ml_dot/infer-emb/
 
 echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on link prediction"
-python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-node-embeddings true --exclude-training-targets True --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_dot/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug
+python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-node-embeddings true --exclude-training-targets True --reverse-edge-types-map user,rating,rating-rev,movie --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_dot/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug
 
 error_and_exit $?
 

From 6196d194031aac9bb27c8dd33c1d7bf73dc67e56 Mon Sep 17 00:00:00 2001
From: jalencato <jalencato23pistons@gmail.com>
Date: Thu, 5 Oct 2023 13:15:13 -0700
Subject: [PATCH 19/42] Update mgpu_test.sh

---
 tests/end2end-tests/graphstorm-lp/mgpu_test.sh | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
index f97f003c4e..4f9b55fd36 100644
--- a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
+++ b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
@@ -197,10 +197,6 @@ python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_sc
 
 error_and_exit $?
 
-python3 $GS_HOME/tests/end2end-tests/check_infer.py --train_embout /data/gsgnn_lp_ml_dot/emb/ --infer_embout /data/gsgnn_lp_ml_dot/save-emb/
-
-error_and_exit $?
-
 echo "**************dataset: Movielens, do mini-batch inference on saved model, decoder: dot"
 python3 -m graphstorm.run.gs_link_prediction --inference --workspace $GS_HOME/inference_scripts/lp_infer --num-trainers $NUM_INFO_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp_infer.yaml --fanout '10,15' --num-layers 2 --use-mini-batch-infer false --use-node-embeddings true --eval-batch-size 1024 --save-embed-path /data/gsgnn_lp_ml_dot/infer-emb/ --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --use-mini-batch-infer true --logging-file /tmp/log.txt
 

From 264c80e0546b574844e6e7a1cfb81eda21ac0e3d Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Thu, 5 Oct 2023 22:09:52 +0000
Subject: [PATCH 20/42] fix bug

---
 python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py | 8 ++++----
 tests/end2end-tests/graphstorm-lp/mgpu_test.sh    | 6 +++++-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
index 12aff58d0f..71b49a6f2a 100644
--- a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
+++ b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
@@ -50,14 +50,14 @@ def main(config_args):
                                       eval_etypes=config.eval_etype,
                                       node_feat_field=config.node_feat_name,
                                       pos_graph_feat_field=config.lp_edge_weight_for_loss)
-    elif config.task_type == BUILTIN_TASK_NODE_REGRESSION or BUILTIN_TASK_NODE_CLASSIFICATION:
+    elif config.task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}:
         train_data = GSgnnNodeTrainData(config.graph_name,
                                         config.part_config,
                                         train_ntypes=config.target_ntype,
                                         eval_ntypes=config.eval_target_ntype,
                                         node_feat_field=config.node_feat_name,
                                         label_field=config.label_field)
-    elif config.task_type == BUILTIN_TASK_EDGE_CLASSIFICATION or BUILTIN_TASK_EDGE_REGRESSION:
+    elif config.task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}:
         train_data = GSgnnEdgeTrainData(config.graph_name,
                                         config.part_config,
                                         train_etypes=config.target_etype,
@@ -75,9 +75,9 @@ def main(config_args):
 
     if config.task_type == BUILTIN_TASK_LINK_PREDICTION:
         model = gs.create_builtin_lp_gnn_model(train_data.g, config, train_task=False)
-    elif config.task_type == BUILTIN_TASK_NODE_REGRESSION or BUILTIN_TASK_NODE_CLASSIFICATION:
+    elif config.task_type == {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}:
         model = gs.create_builtin_node_gnn_model(train_data.g, config, train_task=False)
-    elif config.task_type == BUILTIN_TASK_EDGE_CLASSIFICATION or BUILTIN_TASK_EDGE_REGRESSION:
+    elif config.task_type == {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}:
         model = gs.create_builtin_edge_gnn_model(train_data.g, config, train_task=False)
 
     model_path = config.restore_model_path
diff --git a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
index 4f9b55fd36..f8b141dbf8 100644
--- a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
+++ b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
@@ -193,7 +193,11 @@ fi
 rm -fr /data/gsgnn_lp_ml_dot/infer-emb/
 
 echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on link prediction"
-python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-node-embeddings true --exclude-training-targets True --reverse-edge-types-map user,rating,rating-rev,movie --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_dot/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug
+python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-mini-batch-infer false --eval-batch-size 1024 --use-node-embeddings true --exclude-training-targets True --reverse-edge-types-map user,rating,rating-rev,movie --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_dot/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug
+
+error_and_exit $?
+
+python3 $GS_HOME/tests/end2end-tests/check_infer.py --train_embout /data/gsgnn_lp_ml_dot/emb/  --infer_embout /data/gsgnn_lp_ml_dot/save-emb/  --link_prediction
 
 error_and_exit $?
 

From 96bdaf849d229e3596348e4a4fab00768f449c27 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Thu, 5 Oct 2023 22:15:17 +0000
Subject: [PATCH 21/42] fix

---
 python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
index 71b49a6f2a..88808caa90 100644
--- a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
+++ b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
@@ -75,9 +75,9 @@ def main(config_args):
 
     if config.task_type == BUILTIN_TASK_LINK_PREDICTION:
         model = gs.create_builtin_lp_gnn_model(train_data.g, config, train_task=False)
-    elif config.task_type == {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}:
+    elif config.task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}:
         model = gs.create_builtin_node_gnn_model(train_data.g, config, train_task=False)
-    elif config.task_type == {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}:
+    elif config.task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}:
         model = gs.create_builtin_edge_gnn_model(train_data.g, config, train_task=False)
 
     model_path = config.restore_model_path

From ebe0d4b8f954722c3de52a8a222bb99db908dead Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Thu, 5 Oct 2023 23:16:36 +0000
Subject: [PATCH 22/42] fix embedding bug on link prediction

---
 python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
index 88808caa90..33af8776a3 100644
--- a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
+++ b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
@@ -88,8 +88,12 @@ def main(config_args):
     # For example pre-compute all BERT embeddings
     model.prepare_input_encoder(train_data)
     # TODO(zhengda) we may not want to only use training edges to generate GNN embeddings.
-    embeddings = do_full_graph_inference(model, train_data, fanout=config.eval_fanout,
-                                         task_tracker=tracker)
+    if config.task_type == BUILTIN_TASK_LINK_PREDICTION:
+        embeddings = do_full_graph_inference(model, train_data, fanout=config.eval_fanout,
+                                         edge_mask="train_mask", task_tracker=tracker)
+    else:
+        embeddings = do_full_graph_inference(model, train_data, fanout=config.eval_fanout,
+                                             task_tracker=tracker)
     save_embeddings(config.save_embed_path, embeddings, gs.get_rank(),
                      gs.get_world_size(),
                      device=device,

From 70feebdc045627da5d86794269faac9ddc185f61 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Fri, 6 Oct 2023 17:29:56 +0000
Subject: [PATCH 23/42] use entire graph for embedding generation

---
 python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py | 10 ++++------
 tests/end2end-tests/graphstorm-lp/mgpu_test.sh    |  4 ----
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
index 33af8776a3..e09b3a4f44 100644
--- a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
+++ b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
@@ -87,12 +87,10 @@ def main(config_args):
     # The input layer can pre-compute node features in the preparing step if needed.
     # For example pre-compute all BERT embeddings
     model.prepare_input_encoder(train_data)
-    # TODO(zhengda) we may not want to only use training edges to generate GNN embeddings.
-    if config.task_type == BUILTIN_TASK_LINK_PREDICTION:
-        embeddings = do_full_graph_inference(model, train_data, fanout=config.eval_fanout,
-                                         edge_mask="train_mask", task_tracker=tracker)
-    else:
-        embeddings = do_full_graph_inference(model, train_data, fanout=config.eval_fanout,
+    # Runjie: To generate embeddings, it might be preferable to utilize the
+    # entire graph instead of just the training graph.
+    # Additionally, generating embeddings will not result in any edge leakage issues.
+    embeddings = do_full_graph_inference(model, train_data, fanout=config.eval_fanout,
                                              task_tracker=tracker)
     save_embeddings(config.save_embed_path, embeddings, gs.get_rank(),
                      gs.get_world_size(),
diff --git a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
index f8b141dbf8..5484541e0e 100644
--- a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
+++ b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh
@@ -197,10 +197,6 @@ python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_sc
 
 error_and_exit $?
 
-python3 $GS_HOME/tests/end2end-tests/check_infer.py --train_embout /data/gsgnn_lp_ml_dot/emb/  --infer_embout /data/gsgnn_lp_ml_dot/save-emb/  --link_prediction
-
-error_and_exit $?
-
 echo "**************dataset: Movielens, do mini-batch inference on saved model, decoder: dot"
 python3 -m graphstorm.run.gs_link_prediction --inference --workspace $GS_HOME/inference_scripts/lp_infer --num-trainers $NUM_INFO_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp_infer.yaml --fanout '10,15' --num-layers 2 --use-mini-batch-infer false --use-node-embeddings true --eval-batch-size 1024 --save-embed-path /data/gsgnn_lp_ml_dot/infer-emb/ --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --use-mini-batch-infer true --logging-file /tmp/log.txt
 

From a38df650213b78116faa75b71b4e1b4a4551a7e9 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Mon, 9 Oct 2023 19:01:13 +0000
Subject: [PATCH 24/42] fix whole code structure

---
 python/graphstorm/inference/__init__.py       |   1 +
 python/graphstorm/inference/emb_infer.py      | 159 ++++++++++++++++++
 .../graphstorm/run/gs_gen_node_embedding.py   |   8 +-
 .../run/gsgnn_emb/gsgnn_node_emb.py           | 105 +++++++-----
 4 files changed, 226 insertions(+), 47 deletions(-)
 create mode 100644 python/graphstorm/inference/emb_infer.py

diff --git a/python/graphstorm/inference/__init__.py b/python/graphstorm/inference/__init__.py
index aad67b6d78..92b4f7f965 100644
--- a/python/graphstorm/inference/__init__.py
+++ b/python/graphstorm/inference/__init__.py
@@ -19,3 +19,4 @@
 from .lp_infer import GSgnnLinkPredictionInferrer
 from .np_infer import GSgnnNodePredictionInferrer
 from .ep_infer import GSgnnEdgePredictionInferrer
+from .emb_infer import GSgnnEmbGenInferer
diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py
new file mode 100644
index 0000000000..f389b77419
--- /dev/null
+++ b/python/graphstorm/inference/emb_infer.py
@@ -0,0 +1,159 @@
+"""
+    Copyright 2023 Contributors
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+    Inferrer wrapper for embedding generation.
+"""
+import time
+
+import logging
+from .graphstorm_infer import GSInferrer
+from ..model.utils import save_embeddings as save_gsgnn_embeddings
+from ..model.utils import save_relation_embeddings
+from ..model.edge_decoder import LinkPredictDistMultDecoder
+from ..model.gnn import do_full_graph_inference, do_mini_batch_inference
+from ..model.node_gnn import node_mini_batch_gnn_predict
+
+from ..utils import sys_tracker, get_rank, get_world_size, barrier, create_dist_tensor
+from graphstorm.config import  (BUILTIN_TASK_NODE_CLASSIFICATION,
+                                BUILTIN_TASK_NODE_REGRESSION,
+                                BUILTIN_TASK_EDGE_CLASSIFICATION,
+                                BUILTIN_TASK_EDGE_REGRESSION,
+                                BUILTIN_TASK_LINK_PREDICTION)
+
+class GSgnnEmbGenInferer(GSInferrer):
+    """ Embedding Generation inffer inferrer.
+
+    This is a high-level inferrer wrapper that can be used directly
+    to generate embedding for inferer.
+
+    Parameters
+    ----------
+    model : GSgnnNodeModel
+        The GNN model with different task.
+    """
+
+    # TODO(zhengda) We only support full-graph inference for now.
+    def infer(self, data, task_type, save_embed_path, loader,
+            use_mini_batch_infer=False,
+            node_id_mapping_file=None,
+            return_proba=True,
+            save_embed_format="pytorch"):
+        """ Do Embedding Generating
+
+        Generate node embeddings and save.
+
+        Parameters
+        ----------
+        data: GSgnnData
+            The GraphStorm dataset
+        task_type : str
+            task_type must be one of graphstorm builtin task types
+        save_embed_path : str
+            The path where the GNN embeddings will be saved.
+        loader : GSEdgeDataLoader/GSNodeDataLoader
+            The mini-batch sampler for built-in graphstorm task.
+        edge_mask_for_gnn_embeddings : str
+            The mask that indicates the edges used for computing GNN embeddings. By default,
+            the dataloader uses the edges in the training graphs to compute GNN embeddings to
+            avoid information leak for link prediction.
+        use_mini_batch_infer : bool
+            Whether or not to use mini-batch inference when computing node embedings.
+        node_id_mapping_file: str
+            Path to the file storing node id mapping generated by the
+            graph partition algorithm.
+        edge_id_mapping_file: str
+            Path to the file storing edge id mapping generated by the
+            graph partition algorithm.
+        save_embed_format : str
+            Specify the format of saved embeddings.
+        """
+
+        device = self.device
+        # deal with uninitialized case first
+        if use_mini_batch_infer and \
+                task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}:
+            assert save_embed_path is None, \
+                "Unable to save the node embeddings when using mini batch inference " \
+                "when doing edge task." \
+                "It is not guaranteed that mini-batch prediction will cover all the nodes."
+
+        if task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}:
+            assert len(loader.data.eval_ntypes) == 1, \
+                "GraphStorm only support single target node type for training and inference"
+
+        assert save_embed_path is not None
+
+        sys_tracker.check('start embedding generation')
+        self._model.eval()
+
+        if task_type == BUILTIN_TASK_LINK_PREDICTION:
+            # for embedding generation, it is preferred to use whole graph
+            if use_mini_batch_infer:
+                embs = do_mini_batch_inference(self._model, data, fanout=loader.fanout,
+                                               edge_mask=None,
+                                               task_tracker=self.task_tracker)
+            else:
+                embs = do_full_graph_inference(self._model, data, fanout=loader.fanout,
+                                               edge_mask=None,
+                                               task_tracker=self.task_tracker)
+        elif task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}:
+            # only generate embeddings on the target node type
+            ntype = loader.data.eval_ntypes[0]
+            if use_mini_batch_infer:
+                inter_embs = node_mini_batch_gnn_predict(self._model, loader, return_proba,
+                                                  return_label=False)[1]
+                inter_embs = {ntype: inter_embs[ntype]} if isinstance(inter_embs, dict) \
+                    else {ntype: inter_embs}
+                g = loader.data.g
+                ntype_emb = create_dist_tensor((g.num_nodes(ntype), inter_embs[ntype].shape[1]),
+                                               dtype=inter_embs[ntype].dtype, name=f'gen-emb-{ntype}',
+                                               part_policy=g.get_node_partition_policy(ntype),
+                                               persistent=True)
+                ntype_emb[loader.target_nidx[ntype]] = inter_embs[ntype]
+                embs = {ntype: ntype_emb}
+            else:
+                embs = do_full_graph_inference(self._model, data, fanout=loader.fanout,
+                                               task_tracker=self.task_tracker)
+                ntype_emb = embs[ntype]
+                embs = {ntype: ntype_emb}
+        elif task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}:
+            embs = do_full_graph_inference(self._model, loader.data, fanout=loader.fanout,
+                                           task_tracker=self.task_tracker)
+            target_ntypes = set()
+            for etype in loader.data.eval_etypes:
+                target_ntypes.add(etype[0])
+                target_ntypes.add(etype[2])
+
+            embs = {ntype: embs[ntype] for ntype in sorted(target_ntypes)}
+        else:
+            raise TypeError("Not supported for task type: ", task_type)
+
+        if get_rank() == 0:
+            logging.info("save embeddings to %s", save_embed_path)
+
+        save_gsgnn_embeddings(save_embed_path, embs, get_rank(),
+            get_world_size(),
+            device=device,
+            node_id_mapping_file=node_id_mapping_file,
+            save_embed_format=save_embed_format)
+        barrier()
+        sys_tracker.check('save embeddings')
+
+        # save relation embedding if any
+        if get_rank() == 0:
+            decoder = self._model.decoder
+            if isinstance(decoder, LinkPredictDistMultDecoder):
+                if save_embed_path is not None:
+                    save_relation_embeddings(save_embed_path, decoder)
diff --git a/python/graphstorm/run/gs_gen_node_embedding.py b/python/graphstorm/run/gs_gen_node_embedding.py
index abe21ca1bf..f879cfabf3 100644
--- a/python/graphstorm/run/gs_gen_node_embedding.py
+++ b/python/graphstorm/run/gs_gen_node_embedding.py
@@ -13,10 +13,10 @@
     See the License for the specific language governing permissions and
     limitations under the License.
 
-    Entry point for running link prediction tasks.
+    Entry point for running embedding generating tasks.
 
     Run as:
-    python3 -m graphstorm.run.gs_gen_embedding <Launch args>
+    python3 -m graphstorm.run.gs_gen_node_embedding <Launch args>
 """
 import os
 import logging
@@ -37,10 +37,6 @@ def main():
     cmd_path = os.path.join(lib_dir, cmd)
     exec_script_args = [cmd_path] + exec_script_args
 
-    if "coo" not in args.graph_format:
-        args.graph_format = f"{args.graph_format},coo"
-        logging.debug("Automatically add COO format to graph formats for link prediction. " + \
-                "New graph_format is %s", args.graph_format)
     submit_jobs(args, exec_script_args)
 
 if __name__ == "__main__":
diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
index e09b3a4f44..f00678e099 100644
--- a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
+++ b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
@@ -18,15 +18,15 @@
 import graphstorm as gs
 from graphstorm.config import get_argument_parser
 from graphstorm.config import GSConfig
-from graphstorm.dataloading import GSgnnLPTrainData, GSgnnNodeTrainData, GSgnnEdgeTrainData
-from graphstorm.model.utils import save_embeddings
-from graphstorm.model import do_full_graph_inference
+from graphstorm.dataloading import (GSgnnEdgeInferData, GSgnnNodeInferData,
+                            GSgnnEdgeDataLoader, GSgnnNodeDataLoader)
 from graphstorm.utils import rt_profiler, sys_tracker, setup_device, use_wholegraph
 from graphstorm.config import  (BUILTIN_TASK_NODE_CLASSIFICATION,
                                 BUILTIN_TASK_NODE_REGRESSION,
                                 BUILTIN_TASK_EDGE_CLASSIFICATION,
                                 BUILTIN_TASK_EDGE_REGRESSION,
                                 BUILTIN_TASK_LINK_PREDICTION)
+from graphstorm.inference import GSgnnEmbGenInferer
 
 def main(config_args):
     """ main function
@@ -44,26 +44,24 @@ def main(config_args):
         tracker.log_params(config.__dict__)
 
     if config.task_type == BUILTIN_TASK_LINK_PREDICTION:
-        train_data = GSgnnLPTrainData(config.graph_name,
-                                      config.part_config,
-                                      train_etypes=config.train_etype,
-                                      eval_etypes=config.eval_etype,
-                                      node_feat_field=config.node_feat_name,
-                                      pos_graph_feat_field=config.lp_edge_weight_for_loss)
-    elif config.task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}:
-        train_data = GSgnnNodeTrainData(config.graph_name,
-                                        config.part_config,
-                                        train_ntypes=config.target_ntype,
-                                        eval_ntypes=config.eval_target_ntype,
-                                        node_feat_field=config.node_feat_name,
-                                        label_field=config.label_field)
-    elif config.task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}:
-        train_data = GSgnnEdgeTrainData(config.graph_name,
+        input_graph = GSgnnEdgeInferData(config.graph_name,
                                         config.part_config,
-                                        train_etypes=config.target_etype,
+                                        eval_etypes=config.eval_etype,
                                         node_feat_field=config.node_feat_name,
-                                        label_field=config.label_field,
                                         decoder_edge_feat=config.decoder_edge_feat)
+    elif config.task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}:
+        input_graph = GSgnnNodeInferData(config.graph_name,
+                                    config.part_config,
+                                    eval_ntypes=config.target_ntype,
+                                    node_feat_field=config.node_feat_name,
+                                    label_field=config.label_field)
+    elif config.task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}:
+        input_graph = GSgnnEdgeInferData(config.graph_name,
+                                    config.part_config,
+                                    eval_etypes=config.target_etype,
+                                    node_feat_field=config.node_feat_name,
+                                    label_field=config.label_field,
+                                    decoder_edge_feat=config.decoder_edge_feat)
     else:
         raise TypeError("Not supported for task type: ", config.task_type)
 
@@ -74,30 +72,55 @@ def main(config_args):
         "restore model path cannot be none for gs_gen_node_embeddings"
 
     if config.task_type == BUILTIN_TASK_LINK_PREDICTION:
-        model = gs.create_builtin_lp_gnn_model(train_data.g, config, train_task=False)
+        model = gs.create_builtin_lp_gnn_model(input_graph.g, config, train_task=False)
+    elif config.task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}:
+        model = gs.create_builtin_node_gnn_model(input_graph.g, config, train_task=False)
+    elif config.task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}:
+        model = gs.create_builtin_edge_gnn_model(input_graph.g, config, train_task=False)
+    else:
+        raise TypeError("Not supported for task type: ", config.task_type)
+
+    if config.task_type == BUILTIN_TASK_LINK_PREDICTION:
+        if config.eval_negative_sampler == BUILTIN_LP_UNIFORM_NEG_SAMPLER:
+            link_prediction_loader = GSgnnLinkPredictionTestDataLoader
+        elif config.eval_negative_sampler == BUILTIN_LP_JOINT_NEG_SAMPLER:
+            link_prediction_loader = GSgnnLinkPredictionJointTestDataLoader
+        else:
+            raise ValueError('Unknown test negative sampler.'
+                             'Supported test negative samplers include '
+                             f'[{BUILTIN_LP_UNIFORM_NEG_SAMPLER}, {BUILTIN_LP_JOINT_NEG_SAMPLER}]')
+
+        dataloader = link_prediction_loader(input_graph, input_graph.test_idxs,
+                                         batch_size=config.eval_batch_size,
+                                         num_negative_edges=config.num_negative_edges_eval,
+                                         fanout=config.eval_fanout)
     elif config.task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}:
-        model = gs.create_builtin_node_gnn_model(train_data.g, config, train_task=False)
+        dataloader = GSgnnNodeDataLoader(input_graph, input_graph.infer_idxs, fanout=config.eval_fanout,
+                                         batch_size=config.eval_batch_size, device=device,
+                                         train_task=False,
+                                         construct_feat_ntype=config.construct_feat_ntype,
+                                         construct_feat_fanout=config.construct_feat_fanout)
     elif config.task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}:
-        model = gs.create_builtin_edge_gnn_model(train_data.g, config, train_task=False)
-
-    model_path = config.restore_model_path
-    # TODO(zhengda) the model path has to be in a shared filesystem.
-    model.restore_model(model_path)
-    # Preparing input layer for training or inference.
-    # The input layer can pre-compute node features in the preparing step if needed.
-    # For example pre-compute all BERT embeddings
-    model.prepare_input_encoder(train_data)
-    # Runjie: To generate embeddings, it might be preferable to utilize the
-    # entire graph instead of just the training graph.
-    # Additionally, generating embeddings will not result in any edge leakage issues.
-    embeddings = do_full_graph_inference(model, train_data, fanout=config.eval_fanout,
-                                             task_tracker=tracker)
-    save_embeddings(config.save_embed_path, embeddings, gs.get_rank(),
-                     gs.get_world_size(),
-                     device=device,
-                     node_id_mapping_file=config.node_id_mapping_file,
-                     save_embed_format=config.save_embed_format)
+        dataloader = GSgnnEdgeDataLoader(input_graph, input_graph.infer_idxs, fanout=config.eval_fanout,
+                                         batch_size=config.eval_batch_size,
+                                         device=device, train_task=False,
+                                         reverse_edge_types_map=config.reverse_edge_types_map,
+                                         remove_target_edge_type=config.remove_target_edge_type,
+                                         construct_feat_ntype=config.construct_feat_ntype,
+                                         construct_feat_fanout=config.construct_feat_fanout)
+    else:
+        raise TypeError("Not supported for task type: ", config.task_type)
+
+    emb_generator = GSgnnEmbGenInferer(model)
+    emb_generator.setup_device(device=device)
 
+    emb_generator.infer(input_graph, config.task_type,
+                save_embed_path=config.save_embed_path,
+                loader=dataloader,
+                use_mini_batch_infer=config.use_mini_batch_infer,
+                node_id_mapping_file=config.node_id_mapping_file,
+                return_proba=config.return_proba,
+                save_embed_format=config.save_embed_format)
 
 def generate_parser():
     """ Generate an argument parser

From f642cf042b8253d9f39e4b1e8627de2e4f0797b4 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Mon, 9 Oct 2023 19:21:37 +0000
Subject: [PATCH 25/42] fix import bug

---
 python/graphstorm/inference/emb_infer.py          | 15 +++++++--------
 python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py |  6 +++++-
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py
index f389b77419..93f466b9ac 100644
--- a/python/graphstorm/inference/emb_infer.py
+++ b/python/graphstorm/inference/emb_infer.py
@@ -15,9 +15,12 @@
 
     Inferrer wrapper for embedding generation.
 """
-import time
-
 import logging
+from graphstorm.config import  (BUILTIN_TASK_NODE_CLASSIFICATION,
+                                BUILTIN_TASK_NODE_REGRESSION,
+                                BUILTIN_TASK_EDGE_CLASSIFICATION,
+                                BUILTIN_TASK_EDGE_REGRESSION,
+                                BUILTIN_TASK_LINK_PREDICTION)
 from .graphstorm_infer import GSInferrer
 from ..model.utils import save_embeddings as save_gsgnn_embeddings
 from ..model.utils import save_relation_embeddings
@@ -26,11 +29,7 @@
 from ..model.node_gnn import node_mini_batch_gnn_predict
 
 from ..utils import sys_tracker, get_rank, get_world_size, barrier, create_dist_tensor
-from graphstorm.config import  (BUILTIN_TASK_NODE_CLASSIFICATION,
-                                BUILTIN_TASK_NODE_REGRESSION,
-                                BUILTIN_TASK_EDGE_CLASSIFICATION,
-                                BUILTIN_TASK_EDGE_REGRESSION,
-                                BUILTIN_TASK_LINK_PREDICTION)
+
 
 class GSgnnEmbGenInferer(GSInferrer):
     """ Embedding Generation inffer inferrer.
@@ -99,7 +98,7 @@ def infer(self, data, task_type, save_embed_path, loader,
         self._model.eval()
 
         if task_type == BUILTIN_TASK_LINK_PREDICTION:
-            # for embedding generation, it is preferred to use whole graph
+            # for embedding generation, it is preferred to use full graph
             if use_mini_batch_infer:
                 embs = do_mini_batch_inference(self._model, data, fanout=loader.fanout,
                                                edge_mask=None,
diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
index f00678e099..847ba15728 100644
--- a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
+++ b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
@@ -19,13 +19,17 @@
 from graphstorm.config import get_argument_parser
 from graphstorm.config import GSConfig
 from graphstorm.dataloading import (GSgnnEdgeInferData, GSgnnNodeInferData,
-                            GSgnnEdgeDataLoader, GSgnnNodeDataLoader)
+                            GSgnnEdgeDataLoader, GSgnnNodeDataLoader,
+                            GSgnnLinkPredictionTestDataLoader,
+                            GSgnnLinkPredictionJointTestDataLoader)
 from graphstorm.utils import rt_profiler, sys_tracker, setup_device, use_wholegraph
 from graphstorm.config import  (BUILTIN_TASK_NODE_CLASSIFICATION,
                                 BUILTIN_TASK_NODE_REGRESSION,
                                 BUILTIN_TASK_EDGE_CLASSIFICATION,
                                 BUILTIN_TASK_EDGE_REGRESSION,
                                 BUILTIN_TASK_LINK_PREDICTION)
+from graphstorm.dataloading import (BUILTIN_LP_UNIFORM_NEG_SAMPLER,
+                                    BUILTIN_LP_JOINT_NEG_SAMPLER)
 from graphstorm.inference import GSgnnEmbGenInferer
 
 def main(config_args):

From 34e22e7532c92865533db0dac646addfc448f9b2 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Mon, 9 Oct 2023 19:53:03 +0000
Subject: [PATCH 26/42] fix lint

---
 python/graphstorm/inference/emb_infer.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py
index 93f466b9ac..ff869b9f55 100644
--- a/python/graphstorm/inference/emb_infer.py
+++ b/python/graphstorm/inference/emb_infer.py
@@ -117,7 +117,8 @@ def infer(self, data, task_type, save_embed_path, loader,
                     else {ntype: inter_embs}
                 g = loader.data.g
                 ntype_emb = create_dist_tensor((g.num_nodes(ntype), inter_embs[ntype].shape[1]),
-                                               dtype=inter_embs[ntype].dtype, name=f'gen-emb-{ntype}',
+                                               dtype=inter_embs[ntype].dtype,
+                                               name=f'gen-emb-{ntype}',
                                                part_policy=g.get_node_partition_policy(ntype),
                                                persistent=True)
                 ntype_emb[loader.target_nidx[ntype]] = inter_embs[ntype]

From 0d9347f6db18b6db3e99bd0e6b72328e0183044c Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Mon, 9 Oct 2023 20:04:12 +0000
Subject: [PATCH 27/42] fix lint

---
 python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
index 847ba15728..84ff3762f9 100644
--- a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
+++ b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
@@ -99,13 +99,15 @@ def main(config_args):
                                          num_negative_edges=config.num_negative_edges_eval,
                                          fanout=config.eval_fanout)
     elif config.task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}:
-        dataloader = GSgnnNodeDataLoader(input_graph, input_graph.infer_idxs, fanout=config.eval_fanout,
+        dataloader = GSgnnNodeDataLoader(input_graph, input_graph.infer_idxs,
+                                         fanout=config.eval_fanout,
                                          batch_size=config.eval_batch_size, device=device,
                                          train_task=False,
                                          construct_feat_ntype=config.construct_feat_ntype,
                                          construct_feat_fanout=config.construct_feat_fanout)
     elif config.task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}:
-        dataloader = GSgnnEdgeDataLoader(input_graph, input_graph.infer_idxs, fanout=config.eval_fanout,
+        dataloader = GSgnnEdgeDataLoader(input_graph, input_graph.infer_idxs,
+                                         fanout=config.eval_fanout,
                                          batch_size=config.eval_batch_size,
                                          device=device, train_task=False,
                                          reverse_edge_types_map=config.reverse_edge_types_map,

From 3c30b1b23f1c1ff9b7b869cb35e1e1d6ac2c4bc7 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Mon, 9 Oct 2023 21:33:29 +0000
Subject: [PATCH 28/42] fix bug for not restoring model

---
 python/graphstorm/inference/emb_infer.py          | 10 ++--------
 python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py |  6 ++++++
 tests/end2end-tests/graphstorm-ec/mgpu_test.sh    |  2 +-
 tests/end2end-tests/graphstorm-nc/mgpu_test.sh    |  2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py
index ff869b9f55..e1a50c2274 100644
--- a/python/graphstorm/inference/emb_infer.py
+++ b/python/graphstorm/inference/emb_infer.py
@@ -43,7 +43,6 @@ class GSgnnEmbGenInferer(GSInferrer):
         The GNN model with different task.
     """
 
-    # TODO(zhengda) We only support full-graph inference for now.
     def infer(self, data, task_type, save_embed_path, loader,
             use_mini_batch_infer=False,
             node_id_mapping_file=None,
@@ -63,18 +62,11 @@ def infer(self, data, task_type, save_embed_path, loader,
             The path where the GNN embeddings will be saved.
         loader : GSEdgeDataLoader/GSNodeDataLoader
             The mini-batch sampler for built-in graphstorm task.
-        edge_mask_for_gnn_embeddings : str
-            The mask that indicates the edges used for computing GNN embeddings. By default,
-            the dataloader uses the edges in the training graphs to compute GNN embeddings to
-            avoid information leak for link prediction.
         use_mini_batch_infer : bool
             Whether or not to use mini-batch inference when computing node embedings.
         node_id_mapping_file: str
             Path to the file storing node id mapping generated by the
             graph partition algorithm.
-        edge_id_mapping_file: str
-            Path to the file storing edge id mapping generated by the
-            graph partition algorithm.
         save_embed_format : str
             Specify the format of saved embeddings.
         """
@@ -129,6 +121,8 @@ def infer(self, data, task_type, save_embed_path, loader,
                 ntype_emb = embs[ntype]
                 embs = {ntype: ntype_emb}
         elif task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}:
+            # Currently it is not allowed to do mini-batch inference
+            # and save embedding on edge tasks
             embs = do_full_graph_inference(self._model, loader.data, fanout=loader.fanout,
                                            task_tracker=self.task_tracker)
             target_ntypes = set()
diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
index 84ff3762f9..0b5a1bf3ed 100644
--- a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
+++ b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
@@ -32,6 +32,7 @@
                                     BUILTIN_LP_JOINT_NEG_SAMPLER)
 from graphstorm.inference import GSgnnEmbGenInferer
 
+
 def main(config_args):
     """ main function
     """
@@ -75,6 +76,7 @@ def main(config_args):
     assert config.restore_model_path is not None, \
         "restore model path cannot be none for gs_gen_node_embeddings"
 
+    # load the model
     if config.task_type == BUILTIN_TASK_LINK_PREDICTION:
         model = gs.create_builtin_lp_gnn_model(input_graph.g, config, train_task=False)
     elif config.task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}:
@@ -83,7 +85,10 @@ def main(config_args):
         model = gs.create_builtin_edge_gnn_model(input_graph.g, config, train_task=False)
     else:
         raise TypeError("Not supported for task type: ", config.task_type)
+    model.restore_model(config.restore_model_path,
+                        model_layer_to_load=config.restore_model_layers)
 
+    # define the dataloader
     if config.task_type == BUILTIN_TASK_LINK_PREDICTION:
         if config.eval_negative_sampler == BUILTIN_LP_UNIFORM_NEG_SAMPLER:
             link_prediction_loader = GSgnnLinkPredictionTestDataLoader
@@ -117,6 +122,7 @@ def main(config_args):
     else:
         raise TypeError("Not supported for task type: ", config.task_type)
 
+    # start the infer
     emb_generator = GSgnnEmbGenInferer(model)
     emb_generator.setup_device(device=device)
 
diff --git a/tests/end2end-tests/graphstorm-ec/mgpu_test.sh b/tests/end2end-tests/graphstorm-ec/mgpu_test.sh
index 9690636877..562b4e3817 100644
--- a/tests/end2end-tests/graphstorm-ec/mgpu_test.sh
+++ b/tests/end2end-tests/graphstorm-ec/mgpu_test.sh
@@ -136,7 +136,7 @@ python3 check_infer.py --train_embout /data/gsgnn_ec/emb/ --infer_embout /data/g
 error_and_exit $?
 
 echo "**************dataset: Movielens, use gen_node_embeddings to generate embeddings on edge classification"
-python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_scripts/gsgnn_ep/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_multi_label_ec/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec.yaml --exclude-training-targets True --multilabel true --num-classes 6 --node-feat-name movie:title user:feat --save-embed-path /data/gsgnn_ec/save-emb/ --restore-model-path /data/gsgnn_ec/epoch-$best_epoch/ --logging-file /tmp/train_log.txt --logging-level debug
+python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_scripts/gsgnn_ep/ --num-trainers $NUM_TRAINERS --use-mini-batch-infer false --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_multi_label_ec/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec.yaml --exclude-training-targets True --multilabel true --num-classes 6 --node-feat-name movie:title user:feat --save-embed-path /data/gsgnn_ec/save-emb/ --restore-model-path /data/gsgnn_ec/epoch-$best_epoch/ --logging-file /tmp/train_log.txt --logging-level debug
 
 error_and_exit $?
 
diff --git a/tests/end2end-tests/graphstorm-nc/mgpu_test.sh b/tests/end2end-tests/graphstorm-nc/mgpu_test.sh
index eeed50444e..9ee9d2dbfa 100644
--- a/tests/end2end-tests/graphstorm-nc/mgpu_test.sh
+++ b/tests/end2end-tests/graphstorm-nc/mgpu_test.sh
@@ -138,7 +138,7 @@ python3 $GS_HOME/tests/end2end-tests/check_infer.py --train_embout /data/gsgnn_n
 error_and_exit $?
 
 echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on node classification"
-python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc.yaml --restore-model-path /data/gsgnn_nc_ml/epoch-$best_epoch/ --save-embed-path /data/gsgnn_nc_ml/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug
+python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc.yaml --restore-model-path /data/gsgnn_nc_ml/epoch-$best_epoch/ --save-embed-path /data/gsgnn_nc_ml/save-emb/ --use-mini-batch-infer false --logging-file /tmp/train_log.txt --logging-level debug
 
 error_and_exit $?
 

From 9c0be9830041865a498563290867e0c5e899b85b Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Wed, 11 Oct 2023 00:11:30 +0000
Subject: [PATCH 29/42] remove relation embedding

---
 python/graphstorm/inference/emb_infer.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py
index e1a50c2274..9e9b3575a7 100644
--- a/python/graphstorm/inference/emb_infer.py
+++ b/python/graphstorm/inference/emb_infer.py
@@ -144,10 +144,3 @@ def infer(self, data, task_type, save_embed_path, loader,
             save_embed_format=save_embed_format)
         barrier()
         sys_tracker.check('save embeddings')
-
-        # save relation embedding if any
-        if get_rank() == 0:
-            decoder = self._model.decoder
-            if isinstance(decoder, LinkPredictDistMultDecoder):
-                if save_embed_path is not None:
-                    save_relation_embeddings(save_embed_path, decoder)

From 6822ef381f843ed1fb4280783a60e78cd8fded31 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Wed, 11 Oct 2023 00:12:56 +0000
Subject: [PATCH 30/42] remove redundant dependency

---
 python/graphstorm/inference/emb_infer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py
index 9e9b3575a7..50e718de30 100644
--- a/python/graphstorm/inference/emb_infer.py
+++ b/python/graphstorm/inference/emb_infer.py
@@ -23,7 +23,6 @@
                                 BUILTIN_TASK_LINK_PREDICTION)
 from .graphstorm_infer import GSInferrer
 from ..model.utils import save_embeddings as save_gsgnn_embeddings
-from ..model.utils import save_relation_embeddings
 from ..model.edge_decoder import LinkPredictDistMultDecoder
 from ..model.gnn import do_full_graph_inference, do_mini_batch_inference
 from ..model.node_gnn import node_mini_batch_gnn_predict

From 2b79a470356a202d142ef9ab2432890bb335423e Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Wed, 11 Oct 2023 00:50:46 +0000
Subject: [PATCH 31/42] fix lint

---
 python/graphstorm/inference/emb_infer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py
index 50e718de30..405b1e7bd2 100644
--- a/python/graphstorm/inference/emb_infer.py
+++ b/python/graphstorm/inference/emb_infer.py
@@ -23,7 +23,6 @@
                                 BUILTIN_TASK_LINK_PREDICTION)
 from .graphstorm_infer import GSInferrer
 from ..model.utils import save_embeddings as save_gsgnn_embeddings
-from ..model.edge_decoder import LinkPredictDistMultDecoder
 from ..model.gnn import do_full_graph_inference, do_mini_batch_inference
 from ..model.node_gnn import node_mini_batch_gnn_predict
 

From 9c119def9eb9ddb87cdc9da823b82a06b1ae5d61 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Thu, 12 Oct 2023 20:20:29 +0000
Subject: [PATCH 32/42] change to trival version

---
 python/graphstorm/inference/emb_infer.py      | 108 ++++++++----------
 .../run/gsgnn_emb/gsgnn_node_emb.py           |  46 +-------
 2 files changed, 53 insertions(+), 101 deletions(-)

diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py
index 405b1e7bd2..de33eff7e7 100644
--- a/python/graphstorm/inference/emb_infer.py
+++ b/python/graphstorm/inference/emb_infer.py
@@ -30,7 +30,7 @@
 
 
 class GSgnnEmbGenInferer(GSInferrer):
-    """ Embedding Generation inffer inferrer.
+    """ Embedding Generation inferrer.
 
     This is a high-level inferrer wrapper that can be used directly
     to generate embedding for inferer.
@@ -41,14 +41,54 @@ class GSgnnEmbGenInferer(GSInferrer):
         The GNN model with different task.
     """
 
-    def infer(self, data, task_type, save_embed_path, loader,
+    def nc_emb(self, g, model, use_mini_batch_infer, fanout):
+        # only generate embeddings on the target node type
+        if use_mini_batch_infer:
+            embs = do_mini_batch_inference(model, g, fanout=fanout,
+                                           edge_mask=None,
+                                           task_tracker=self.task_tracker,
+                                           infer_ntypes=g.infer_idxs)
+        else:
+            embs = do_full_graph_inference(model, g, fanout=fanout,
+                                           task_tracker=self.task_tracker)
+        return embs
+
+    def ec_emb(self, g, model, use_mini_batch_infer, fanout):
+        # only generate embeddings on the target node type within definition
+        # target edge type
+        infer_ntypes = set()
+        for etype in g.infer_idxs:
+            infer_ntypes.add(etype[0])
+            infer_ntypes.add(etype[2])
+
+        if use_mini_batch_infer:
+            embs = do_mini_batch_inference(model, g, fanout=fanout,
+                                           edge_mask=None,
+                                           task_tracker=self.task_tracker,
+                                           infer_ntypes=infer_ntypes)
+        else:
+            embs = do_full_graph_inference(model, g, fanout=fanout,
+                                           task_tracker=self.task_tracker)
+        return embs
+
+    def lp_emb(self, g, model, use_mini_batch_infer, fanout):
+        if use_mini_batch_infer:
+            embs = do_mini_batch_inference(model, g, fanout=fanout,
+                                           edge_mask=None,
+                                           task_tracker=self.task_tracker)
+        else:
+            embs = do_full_graph_inference(model, g, fanout=fanout,
+                                           edge_mask=None,
+                                           task_tracker=self.task_tracker)
+        return embs
+
+    def infer(self, data, task_type, save_embed_path, eval_fanout,
             use_mini_batch_infer=False,
             node_id_mapping_file=None,
-            return_proba=True,
             save_embed_format="pytorch"):
         """ Do Embedding Generating
 
-        Generate node embeddings and save.
+        Generate node embeddings and save into disk.
 
         Parameters
         ----------
@@ -58,10 +98,8 @@ def infer(self, data, task_type, save_embed_path, loader,
             task_type must be one of graphstorm builtin task types
         save_embed_path : str
             The path where the GNN embeddings will be saved.
-        loader : GSEdgeDataLoader/GSNodeDataLoader
-            The mini-batch sampler for built-in graphstorm task.
         use_mini_batch_infer : bool
-            Whether or not to use mini-batch inference when computing node embedings.
+            Whether to use mini-batch inference when computing node embeddings.
         node_id_mapping_file: str
             Path to the file storing node id mapping generated by the
             graph partition algorithm.
@@ -70,65 +108,19 @@ def infer(self, data, task_type, save_embed_path, loader,
         """
 
         device = self.device
-        # deal with uninitialized case first
-        if use_mini_batch_infer and \
-                task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}:
-            assert save_embed_path is None, \
-                "Unable to save the node embeddings when using mini batch inference " \
-                "when doing edge task." \
-                "It is not guaranteed that mini-batch prediction will cover all the nodes."
 
-        if task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}:
-            assert len(loader.data.eval_ntypes) == 1, \
-                "GraphStorm only support single target node type for training and inference"
-
-        assert save_embed_path is not None
+        assert save_embed_path is not None, \
+            "It requires save embed path for gs_gen_node_embedding"
 
         sys_tracker.check('start embedding generation')
         self._model.eval()
 
         if task_type == BUILTIN_TASK_LINK_PREDICTION:
-            # for embedding generation, it is preferred to use full graph
-            if use_mini_batch_infer:
-                embs = do_mini_batch_inference(self._model, data, fanout=loader.fanout,
-                                               edge_mask=None,
-                                               task_tracker=self.task_tracker)
-            else:
-                embs = do_full_graph_inference(self._model, data, fanout=loader.fanout,
-                                               edge_mask=None,
-                                               task_tracker=self.task_tracker)
+            embs = self.lp_emb(data, self._model, use_mini_batch_infer, eval_fanout)
         elif task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}:
-            # only generate embeddings on the target node type
-            ntype = loader.data.eval_ntypes[0]
-            if use_mini_batch_infer:
-                inter_embs = node_mini_batch_gnn_predict(self._model, loader, return_proba,
-                                                  return_label=False)[1]
-                inter_embs = {ntype: inter_embs[ntype]} if isinstance(inter_embs, dict) \
-                    else {ntype: inter_embs}
-                g = loader.data.g
-                ntype_emb = create_dist_tensor((g.num_nodes(ntype), inter_embs[ntype].shape[1]),
-                                               dtype=inter_embs[ntype].dtype,
-                                               name=f'gen-emb-{ntype}',
-                                               part_policy=g.get_node_partition_policy(ntype),
-                                               persistent=True)
-                ntype_emb[loader.target_nidx[ntype]] = inter_embs[ntype]
-                embs = {ntype: ntype_emb}
-            else:
-                embs = do_full_graph_inference(self._model, data, fanout=loader.fanout,
-                                               task_tracker=self.task_tracker)
-                ntype_emb = embs[ntype]
-                embs = {ntype: ntype_emb}
+            embs = self.nc_emb(data, self._model, use_mini_batch_infer, eval_fanout)
         elif task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}:
-            # Currently it is not allowed to do mini-batch inference
-            # and save embedding on edge tasks
-            embs = do_full_graph_inference(self._model, loader.data, fanout=loader.fanout,
-                                           task_tracker=self.task_tracker)
-            target_ntypes = set()
-            for etype in loader.data.eval_etypes:
-                target_ntypes.add(etype[0])
-                target_ntypes.add(etype[2])
-
-            embs = {ntype: embs[ntype] for ntype in sorted(target_ntypes)}
+            embs = self.ec_emb(data, self._model, use_mini_batch_infer, eval_fanout)
         else:
             raise TypeError("Not supported for task type: ", task_type)
 
diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
index 0b5a1bf3ed..80580bf21d 100644
--- a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
+++ b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
@@ -18,18 +18,13 @@
 import graphstorm as gs
 from graphstorm.config import get_argument_parser
 from graphstorm.config import GSConfig
-from graphstorm.dataloading import (GSgnnEdgeInferData, GSgnnNodeInferData,
-                            GSgnnEdgeDataLoader, GSgnnNodeDataLoader,
-                            GSgnnLinkPredictionTestDataLoader,
-                            GSgnnLinkPredictionJointTestDataLoader)
 from graphstorm.utils import rt_profiler, sys_tracker, setup_device, use_wholegraph
+from graphstorm.dataloading import (GSgnnEdgeInferData, GSgnnNodeInferData)
 from graphstorm.config import  (BUILTIN_TASK_NODE_CLASSIFICATION,
                                 BUILTIN_TASK_NODE_REGRESSION,
                                 BUILTIN_TASK_EDGE_CLASSIFICATION,
                                 BUILTIN_TASK_EDGE_REGRESSION,
                                 BUILTIN_TASK_LINK_PREDICTION)
-from graphstorm.dataloading import (BUILTIN_LP_UNIFORM_NEG_SAMPLER,
-                                    BUILTIN_LP_JOINT_NEG_SAMPLER)
 from graphstorm.inference import GSgnnEmbGenInferer
 
 
@@ -88,50 +83,15 @@ def main(config_args):
     model.restore_model(config.restore_model_path,
                         model_layer_to_load=config.restore_model_layers)
 
-    # define the dataloader
-    if config.task_type == BUILTIN_TASK_LINK_PREDICTION:
-        if config.eval_negative_sampler == BUILTIN_LP_UNIFORM_NEG_SAMPLER:
-            link_prediction_loader = GSgnnLinkPredictionTestDataLoader
-        elif config.eval_negative_sampler == BUILTIN_LP_JOINT_NEG_SAMPLER:
-            link_prediction_loader = GSgnnLinkPredictionJointTestDataLoader
-        else:
-            raise ValueError('Unknown test negative sampler.'
-                             'Supported test negative samplers include '
-                             f'[{BUILTIN_LP_UNIFORM_NEG_SAMPLER}, {BUILTIN_LP_JOINT_NEG_SAMPLER}]')
-
-        dataloader = link_prediction_loader(input_graph, input_graph.test_idxs,
-                                         batch_size=config.eval_batch_size,
-                                         num_negative_edges=config.num_negative_edges_eval,
-                                         fanout=config.eval_fanout)
-    elif config.task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}:
-        dataloader = GSgnnNodeDataLoader(input_graph, input_graph.infer_idxs,
-                                         fanout=config.eval_fanout,
-                                         batch_size=config.eval_batch_size, device=device,
-                                         train_task=False,
-                                         construct_feat_ntype=config.construct_feat_ntype,
-                                         construct_feat_fanout=config.construct_feat_fanout)
-    elif config.task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}:
-        dataloader = GSgnnEdgeDataLoader(input_graph, input_graph.infer_idxs,
-                                         fanout=config.eval_fanout,
-                                         batch_size=config.eval_batch_size,
-                                         device=device, train_task=False,
-                                         reverse_edge_types_map=config.reverse_edge_types_map,
-                                         remove_target_edge_type=config.remove_target_edge_type,
-                                         construct_feat_ntype=config.construct_feat_ntype,
-                                         construct_feat_fanout=config.construct_feat_fanout)
-    else:
-        raise TypeError("Not supported for task type: ", config.task_type)
-
-    # start the infer
+    # start to infer
     emb_generator = GSgnnEmbGenInferer(model)
     emb_generator.setup_device(device=device)
 
     emb_generator.infer(input_graph, config.task_type,
                 save_embed_path=config.save_embed_path,
-                loader=dataloader,
+                eval_fanout=config.eval_fanout,
                 use_mini_batch_infer=config.use_mini_batch_infer,
                 node_id_mapping_file=config.node_id_mapping_file,
-                return_proba=config.return_proba,
                 save_embed_format=config.save_embed_format)
 
 def generate_parser():

From 45038f9a57e5642819933dd18a39bd2ac7fb5af6 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Thu, 12 Oct 2023 20:53:01 +0000
Subject: [PATCH 33/42] add doc string

---
 python/graphstorm/inference/emb_infer.py | 49 +++++++++++++++++++++---
 1 file changed, 44 insertions(+), 5 deletions(-)

diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py
index de33eff7e7..286d1d2dab 100644
--- a/python/graphstorm/inference/emb_infer.py
+++ b/python/graphstorm/inference/emb_infer.py
@@ -24,9 +24,8 @@
 from .graphstorm_infer import GSInferrer
 from ..model.utils import save_embeddings as save_gsgnn_embeddings
 from ..model.gnn import do_full_graph_inference, do_mini_batch_inference
-from ..model.node_gnn import node_mini_batch_gnn_predict
 
-from ..utils import sys_tracker, get_rank, get_world_size, barrier, create_dist_tensor
+from ..utils import sys_tracker, get_rank, get_world_size, barrier
 
 
 class GSgnnEmbGenInferer(GSInferrer):
@@ -42,7 +41,20 @@ class GSgnnEmbGenInferer(GSInferrer):
     """
 
     def nc_emb(self, g, model, use_mini_batch_infer, fanout):
-        # only generate embeddings on the target node type
+        """ Embedding Generation for node task.
+        It will only generate embeddings on the target node type.
+
+        Parameters
+        ----------
+        g: GSgnnData
+            The GraphStorm dataset
+        model : GSgnnNodeModel
+            The GNN model on edge prediction/classification task
+        use_mini_batch_infer : bool
+            Whether to use mini-batch inference when computing node embeddings.
+        fanout: list of int
+            The fanout of each GNN layers used in inference.
+        """
         if use_mini_batch_infer:
             embs = do_mini_batch_inference(model, g, fanout=fanout,
                                            edge_mask=None,
@@ -54,8 +66,21 @@ def nc_emb(self, g, model, use_mini_batch_infer, fanout):
         return embs
 
     def ec_emb(self, g, model, use_mini_batch_infer, fanout):
-        # only generate embeddings on the target node type within definition
-        # target edge type
+        """ Embedding Generation for edge task.
+        It will only generate embeddings on the target node type
+        defined in the target edge type.
+
+        Parameters
+        ----------
+        g: GSgnnData
+            The GraphStorm dataset
+        model : GSgnnNodeModel
+            The GNN model on edge prediction/classification task
+        use_mini_batch_infer : bool
+            Whether to use mini-batch inference when computing node embeddings.
+        fanout: list of int
+            The fanout of each GNN layers used in inference.
+        """
         infer_ntypes = set()
         for etype in g.infer_idxs:
             infer_ntypes.add(etype[0])
@@ -72,6 +97,20 @@ def ec_emb(self, g, model, use_mini_batch_infer, fanout):
         return embs
 
     def lp_emb(self, g, model, use_mini_batch_infer, fanout):
+        """ Embedding Generation for link prediction task.
+        It will only generate embeddings on whole graph.
+
+        Parameters
+        ----------
+        g: GSgnnData
+            The GraphStorm dataset
+        model : GSgnnNodeModel
+            The GNN model on edge prediction/classification task
+        use_mini_batch_infer : bool
+            Whether to use mini-batch inference when computing node embeddings.
+        fanout: list of int
+            The fanout of each GNN layers used in inference.
+        """
         if use_mini_batch_infer:
             embs = do_mini_batch_inference(model, g, fanout=fanout,
                                            edge_mask=None,

From 6ceb0d07c01a97c8b2024584402ed7c7db42d3b9 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Thu, 12 Oct 2023 22:08:31 +0000
Subject: [PATCH 34/42] fix edge task mini batch

---
 python/graphstorm/inference/emb_infer.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py
index 286d1d2dab..40c78871d4 100644
--- a/python/graphstorm/inference/emb_infer.py
+++ b/python/graphstorm/inference/emb_infer.py
@@ -83,8 +83,10 @@ def ec_emb(self, g, model, use_mini_batch_infer, fanout):
         """
         infer_ntypes = set()
         for etype in g.infer_idxs:
-            infer_ntypes.add(etype[0])
-            infer_ntypes.add(etype[2])
+            if etype[0] not in infer_ntypes:
+                infer_ntypes.append(etype[0])
+            if etype[2] not in infer_ntypes:
+                infer_ntypes.append(etype[2])
 
         if use_mini_batch_infer:
             embs = do_mini_batch_inference(model, g, fanout=fanout,

From 5e39786bb54a3486ec109d4ad636ca93d08be71a Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Thu, 12 Oct 2023 22:19:35 +0000
Subject: [PATCH 35/42] add

---
 python/graphstorm/inference/emb_infer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py
index 40c78871d4..8e50f379a7 100644
--- a/python/graphstorm/inference/emb_infer.py
+++ b/python/graphstorm/inference/emb_infer.py
@@ -81,7 +81,7 @@ def ec_emb(self, g, model, use_mini_batch_infer, fanout):
         fanout: list of int
             The fanout of each GNN layers used in inference.
         """
-        infer_ntypes = set()
+        infer_ntypes = []
         for etype in g.infer_idxs:
             if etype[0] not in infer_ntypes:
                 infer_ntypes.append(etype[0])

From 57044726a9b7d97041e23c983b5c9747f06b6139 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Thu, 12 Oct 2023 23:06:32 +0000
Subject: [PATCH 36/42] fix sorted bug

---
 python/graphstorm/inference/emb_infer.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py
index 8e50f379a7..c671db9fc8 100644
--- a/python/graphstorm/inference/emb_infer.py
+++ b/python/graphstorm/inference/emb_infer.py
@@ -55,11 +55,12 @@ def nc_emb(self, g, model, use_mini_batch_infer, fanout):
         fanout: list of int
             The fanout of each GNN layers used in inference.
         """
+        infer_ntypes = sorted(g.infer_idxs)
         if use_mini_batch_infer:
             embs = do_mini_batch_inference(model, g, fanout=fanout,
                                            edge_mask=None,
                                            task_tracker=self.task_tracker,
-                                           infer_ntypes=g.infer_idxs)
+                                           infer_ntypes=infer_ntypes)
         else:
             embs = do_full_graph_inference(model, g, fanout=fanout,
                                            task_tracker=self.task_tracker)
@@ -81,12 +82,11 @@ def ec_emb(self, g, model, use_mini_batch_infer, fanout):
         fanout: list of int
             The fanout of each GNN layers used in inference.
         """
-        infer_ntypes = []
+        infer_ntypes = set()
         for etype in g.infer_idxs:
-            if etype[0] not in infer_ntypes:
-                infer_ntypes.append(etype[0])
-            if etype[2] not in infer_ntypes:
-                infer_ntypes.append(etype[2])
+            infer_ntypes.add(etype[0])
+            infer_ntypes.add(etype[2])
+        infer_ntypes = sorted(infer_ntypes)
 
         if use_mini_batch_infer:
             embs = do_mini_batch_inference(model, g, fanout=fanout,

From 6de76ff34a86d6cd8771614f9ec976a04c7bdf05 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Fri, 13 Oct 2023 01:07:27 +0000
Subject: [PATCH 37/42] finish pruning

---
 python/graphstorm/inference/emb_infer.py      | 105 +++---------------
 .../run/gsgnn_emb/gsgnn_node_emb.py           |  18 ++-
 2 files changed, 24 insertions(+), 99 deletions(-)

diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py
index c671db9fc8..e9a8e0ddc0 100644
--- a/python/graphstorm/inference/emb_infer.py
+++ b/python/graphstorm/inference/emb_infer.py
@@ -24,7 +24,6 @@
 from .graphstorm_infer import GSInferrer
 from ..model.utils import save_embeddings as save_gsgnn_embeddings
 from ..model.gnn import do_full_graph_inference, do_mini_batch_inference
-
 from ..utils import sys_tracker, get_rank, get_world_size, barrier
 
 
@@ -39,90 +38,6 @@ class GSgnnEmbGenInferer(GSInferrer):
     model : GSgnnNodeModel
         The GNN model with different task.
     """
-
-    def nc_emb(self, g, model, use_mini_batch_infer, fanout):
-        """ Embedding Generation for node task.
-        It will only generate embeddings on the target node type.
-
-        Parameters
-        ----------
-        g: GSgnnData
-            The GraphStorm dataset
-        model : GSgnnNodeModel
-            The GNN model on edge prediction/classification task
-        use_mini_batch_infer : bool
-            Whether to use mini-batch inference when computing node embeddings.
-        fanout: list of int
-            The fanout of each GNN layers used in inference.
-        """
-        infer_ntypes = sorted(g.infer_idxs)
-        if use_mini_batch_infer:
-            embs = do_mini_batch_inference(model, g, fanout=fanout,
-                                           edge_mask=None,
-                                           task_tracker=self.task_tracker,
-                                           infer_ntypes=infer_ntypes)
-        else:
-            embs = do_full_graph_inference(model, g, fanout=fanout,
-                                           task_tracker=self.task_tracker)
-        return embs
-
-    def ec_emb(self, g, model, use_mini_batch_infer, fanout):
-        """ Embedding Generation for edge task.
-        It will only generate embeddings on the target node type
-        defined in the target edge type.
-
-        Parameters
-        ----------
-        g: GSgnnData
-            The GraphStorm dataset
-        model : GSgnnNodeModel
-            The GNN model on edge prediction/classification task
-        use_mini_batch_infer : bool
-            Whether to use mini-batch inference when computing node embeddings.
-        fanout: list of int
-            The fanout of each GNN layers used in inference.
-        """
-        infer_ntypes = set()
-        for etype in g.infer_idxs:
-            infer_ntypes.add(etype[0])
-            infer_ntypes.add(etype[2])
-        infer_ntypes = sorted(infer_ntypes)
-
-        if use_mini_batch_infer:
-            embs = do_mini_batch_inference(model, g, fanout=fanout,
-                                           edge_mask=None,
-                                           task_tracker=self.task_tracker,
-                                           infer_ntypes=infer_ntypes)
-        else:
-            embs = do_full_graph_inference(model, g, fanout=fanout,
-                                           task_tracker=self.task_tracker)
-        return embs
-
-    def lp_emb(self, g, model, use_mini_batch_infer, fanout):
-        """ Embedding Generation for link prediction task.
-        It will only generate embeddings on whole graph.
-
-        Parameters
-        ----------
-        g: GSgnnData
-            The GraphStorm dataset
-        model : GSgnnNodeModel
-            The GNN model on edge prediction/classification task
-        use_mini_batch_infer : bool
-            Whether to use mini-batch inference when computing node embeddings.
-        fanout: list of int
-            The fanout of each GNN layers used in inference.
-        """
-        if use_mini_batch_infer:
-            embs = do_mini_batch_inference(model, g, fanout=fanout,
-                                           edge_mask=None,
-                                           task_tracker=self.task_tracker)
-        else:
-            embs = do_full_graph_inference(model, g, fanout=fanout,
-                                           edge_mask=None,
-                                           task_tracker=self.task_tracker)
-        return embs
-
     def infer(self, data, task_type, save_embed_path, eval_fanout,
             use_mini_batch_infer=False,
             node_id_mapping_file=None,
@@ -157,14 +72,28 @@ def infer(self, data, task_type, save_embed_path, eval_fanout,
         self._model.eval()
 
         if task_type == BUILTIN_TASK_LINK_PREDICTION:
-            embs = self.lp_emb(data, self._model, use_mini_batch_infer, eval_fanout)
+            infer_ntypes = None
         elif task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}:
-            embs = self.nc_emb(data, self._model, use_mini_batch_infer, eval_fanout)
+            infer_ntypes = sorted(data.infer_idxs)
         elif task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}:
-            embs = self.ec_emb(data, self._model, use_mini_batch_infer, eval_fanout)
+            infer_ntypes = set()
+            for etype in data.infer_idxs:
+                infer_ntypes.add(etype[0])
+                infer_ntypes.add(etype[2])
+            infer_ntypes = sorted(infer_ntypes)
         else:
             raise TypeError("Not supported for task type: ", task_type)
 
+        if use_mini_batch_infer:
+            embs = do_mini_batch_inference(self._model, data, fanout=eval_fanout,
+                                           edge_mask=None,
+                                           task_tracker=self.task_tracker)
+        else:
+            embs = do_full_graph_inference(self._model, data, fanout=eval_fanout,
+                                           edge_mask=None,
+                                           task_tracker=self.task_tracker,
+                                           infer_ntypes=infer_ntypes)
+
         if get_rank() == 0:
             logging.info("save embeddings to %s", save_embed_path)
 
diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
index 80580bf21d..fb35db24f0 100644
--- a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
+++ b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
@@ -47,21 +47,17 @@ def main(config_args):
         input_graph = GSgnnEdgeInferData(config.graph_name,
                                         config.part_config,
                                         eval_etypes=config.eval_etype,
-                                        node_feat_field=config.node_feat_name,
-                                        decoder_edge_feat=config.decoder_edge_feat)
+                                        node_feat_field=config.node_feat_name)
     elif config.task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}:
         input_graph = GSgnnNodeInferData(config.graph_name,
-                                    config.part_config,
-                                    eval_ntypes=config.target_ntype,
-                                    node_feat_field=config.node_feat_name,
-                                    label_field=config.label_field)
+                                        config.part_config,
+                                        eval_ntypes=config.target_ntype,
+                                        node_feat_field=config.node_feat_name)
     elif config.task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}:
         input_graph = GSgnnEdgeInferData(config.graph_name,
-                                    config.part_config,
-                                    eval_etypes=config.target_etype,
-                                    node_feat_field=config.node_feat_name,
-                                    label_field=config.label_field,
-                                    decoder_edge_feat=config.decoder_edge_feat)
+                                        config.part_config,
+                                        eval_etypes=config.target_etype,
+                                        node_feat_field=config.node_feat_name)
     else:
         raise TypeError("Not supported for task type: ", config.task_type)
 

From 788297a8f09d48b841ca33cf3aeb6fdc305f6ffc Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Fri, 13 Oct 2023 01:13:59 +0000
Subject: [PATCH 38/42] fix typo

---
 python/graphstorm/inference/emb_infer.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py
index e9a8e0ddc0..5550af018b 100644
--- a/python/graphstorm/inference/emb_infer.py
+++ b/python/graphstorm/inference/emb_infer.py
@@ -87,12 +87,12 @@ def infer(self, data, task_type, save_embed_path, eval_fanout,
         if use_mini_batch_infer:
             embs = do_mini_batch_inference(self._model, data, fanout=eval_fanout,
                                            edge_mask=None,
-                                           task_tracker=self.task_tracker)
+                                           task_tracker=self.task_tracker,
+                                           infer_ntypes=infer_ntypes)
         else:
             embs = do_full_graph_inference(self._model, data, fanout=eval_fanout,
                                            edge_mask=None,
-                                           task_tracker=self.task_tracker,
-                                           infer_ntypes=infer_ntypes)
+                                           task_tracker=self.task_tracker)
 
         if get_rank() == 0:
             logging.info("save embeddings to %s", save_embed_path)

From 0cd315f9582f585baaf7154c4a972c710de1ace3 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Fri, 13 Oct 2023 02:10:27 +0000
Subject: [PATCH 39/42] apply comment

---
 python/graphstorm/inference/emb_infer.py          |  2 ++
 python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py | 14 +++++++-------
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py
index 5550af018b..095abed516 100644
--- a/python/graphstorm/inference/emb_infer.py
+++ b/python/graphstorm/inference/emb_infer.py
@@ -54,6 +54,8 @@ def infer(self, data, task_type, save_embed_path, eval_fanout,
             task_type must be one of graphstorm builtin task types
         save_embed_path : str
             The path where the GNN embeddings will be saved.
+        eval_fanout: list of int
+            The fanout of each GNN layers used in evaluation and inference.
         use_mini_batch_infer : bool
             Whether to use mini-batch inference when computing node embeddings.
         node_id_mapping_file: str
diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
index fb35db24f0..191471d9f6 100644
--- a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
+++ b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py
@@ -44,17 +44,17 @@ def main(config_args):
         tracker.log_params(config.__dict__)
 
     if config.task_type == BUILTIN_TASK_LINK_PREDICTION:
-        input_graph = GSgnnEdgeInferData(config.graph_name,
+        input_data = GSgnnEdgeInferData(config.graph_name,
                                         config.part_config,
                                         eval_etypes=config.eval_etype,
                                         node_feat_field=config.node_feat_name)
     elif config.task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}:
-        input_graph = GSgnnNodeInferData(config.graph_name,
+        input_data = GSgnnNodeInferData(config.graph_name,
                                         config.part_config,
                                         eval_ntypes=config.target_ntype,
                                         node_feat_field=config.node_feat_name)
     elif config.task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}:
-        input_graph = GSgnnEdgeInferData(config.graph_name,
+        input_data = GSgnnEdgeInferData(config.graph_name,
                                         config.part_config,
                                         eval_etypes=config.target_etype,
                                         node_feat_field=config.node_feat_name)
@@ -69,11 +69,11 @@ def main(config_args):
 
     # load the model
     if config.task_type == BUILTIN_TASK_LINK_PREDICTION:
-        model = gs.create_builtin_lp_gnn_model(input_graph.g, config, train_task=False)
+        model = gs.create_builtin_lp_gnn_model(input_data.g, config, train_task=False)
     elif config.task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}:
-        model = gs.create_builtin_node_gnn_model(input_graph.g, config, train_task=False)
+        model = gs.create_builtin_node_gnn_model(input_data.g, config, train_task=False)
     elif config.task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}:
-        model = gs.create_builtin_edge_gnn_model(input_graph.g, config, train_task=False)
+        model = gs.create_builtin_edge_gnn_model(input_data.g, config, train_task=False)
     else:
         raise TypeError("Not supported for task type: ", config.task_type)
     model.restore_model(config.restore_model_path,
@@ -83,7 +83,7 @@ def main(config_args):
     emb_generator = GSgnnEmbGenInferer(model)
     emb_generator.setup_device(device=device)
 
-    emb_generator.infer(input_graph, config.task_type,
+    emb_generator.infer(input_data, config.task_type,
                 save_embed_path=config.save_embed_path,
                 eval_fanout=config.eval_fanout,
                 use_mini_batch_infer=config.use_mini_batch_infer,

From ada55aec85afb60c6df170d865d06da84525973b Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Fri, 13 Oct 2023 04:52:04 +0000
Subject: [PATCH 40/42] test

---
 python/graphstorm/inference/emb_infer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py
index 095abed516..ec67105507 100644
--- a/python/graphstorm/inference/emb_infer.py
+++ b/python/graphstorm/inference/emb_infer.py
@@ -13,7 +13,7 @@
     See the License for the specific language governing permissions and
     limitations under the License.
 
-    Inferrer wrapper for embedding generation.
+    Inferer wrapper for embedding generation.
 """
 import logging
 from graphstorm.config import  (BUILTIN_TASK_NODE_CLASSIFICATION,
@@ -70,7 +70,7 @@ def infer(self, data, task_type, save_embed_path, eval_fanout,
         assert save_embed_path is not None, \
             "It requires save embed path for gs_gen_node_embedding"
 
-        sys_tracker.check('start embedding generation')
+        sys_tracker.check('start generating embedding')
         self._model.eval()
 
         if task_type == BUILTIN_TASK_LINK_PREDICTION:

From 774187c5a3b71df4eb8e4d07919d884250df2724 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Fri, 13 Oct 2023 17:28:43 +0000
Subject: [PATCH 41/42] add embs

---
 python/graphstorm/inference/emb_infer.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py
index ec67105507..f9ac04c75f 100644
--- a/python/graphstorm/inference/emb_infer.py
+++ b/python/graphstorm/inference/emb_infer.py
@@ -73,6 +73,7 @@ def infer(self, data, task_type, save_embed_path, eval_fanout,
         sys_tracker.check('start generating embedding')
         self._model.eval()
 
+        # infer ntypes must be sorted for node embedding saving
         if task_type == BUILTIN_TASK_LINK_PREDICTION:
             infer_ntypes = None
         elif task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}:
@@ -95,6 +96,8 @@ def infer(self, data, task_type, save_embed_path, eval_fanout,
             embs = do_full_graph_inference(self._model, data, fanout=eval_fanout,
                                            edge_mask=None,
                                            task_tracker=self.task_tracker)
+            if not infer_ntypes:
+                embs = {ntype: embs[ntype] for ntype in infer_ntypes}
 
         if get_rank() == 0:
             logging.info("save embeddings to %s", save_embed_path)

From 6906543ad3a42fd10513d16be0175809a4151455 Mon Sep 17 00:00:00 2001
From: JalenCato <jalencato23pistons@gmail.com>
Date: Fri, 13 Oct 2023 18:52:46 +0000
Subject: [PATCH 42/42] fix typo

---
 python/graphstorm/inference/emb_infer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py
index f9ac04c75f..413c43eebe 100644
--- a/python/graphstorm/inference/emb_infer.py
+++ b/python/graphstorm/inference/emb_infer.py
@@ -96,7 +96,7 @@ def infer(self, data, task_type, save_embed_path, eval_fanout,
             embs = do_full_graph_inference(self._model, data, fanout=eval_fanout,
                                            edge_mask=None,
                                            task_tracker=self.task_tracker)
-            if not infer_ntypes:
+            if infer_ntypes:
                 embs = {ntype: embs[ntype] for ntype in infer_ntypes}
 
         if get_rank() == 0: