From 1d6432f438dbdbdbcf72d25a1380cf3c420ca1cc Mon Sep 17 00:00:00 2001 From: JalenCato Date: Tue, 3 Oct 2023 21:08:09 +0000 Subject: [PATCH 01/42] initial commit --- python/graphstorm/run/gs_gen_embedding.py | 0 python/graphstorm/run/gsgnn_emb/__init__.py | 0 python/graphstorm/run/gsgnn_emb/gsgnn_emb.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 python/graphstorm/run/gs_gen_embedding.py create mode 100644 python/graphstorm/run/gsgnn_emb/__init__.py create mode 100644 python/graphstorm/run/gsgnn_emb/gsgnn_emb.py diff --git a/python/graphstorm/run/gs_gen_embedding.py b/python/graphstorm/run/gs_gen_embedding.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/graphstorm/run/gsgnn_emb/__init__.py b/python/graphstorm/run/gsgnn_emb/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py new file mode 100644 index 0000000000..e69de29bb2 From a841911e50d16c1530aee6e079a9bb30a529690b Mon Sep 17 00:00:00 2001 From: JalenCato Date: Tue, 3 Oct 2023 21:54:47 +0000 Subject: [PATCH 02/42] first commit - no test --- python/graphstorm/run/gs_gen_embedding.py | 49 +++++++++ python/graphstorm/run/gsgnn_emb/gsgnn_emb.py | 105 +++++++++++++++++++ 2 files changed, 154 insertions(+) diff --git a/python/graphstorm/run/gs_gen_embedding.py b/python/graphstorm/run/gs_gen_embedding.py index e69de29bb2..a12f5a54bf 100644 --- a/python/graphstorm/run/gs_gen_embedding.py +++ b/python/graphstorm/run/gs_gen_embedding.py @@ -0,0 +1,49 @@ +""" + Copyright 2023 Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + Entry point for running link prediction tasks. + + Run as: + python3 -m graphstorm.run.gs_gen_embedding +""" +import os +import logging + +from .launch import get_argument_parser +from .launch import check_input_arguments +from .launch import submit_jobs + +def main(): + """ Main function + """ + parser = get_argument_parser() + args, exec_script_args = parser.parse_known_args() + check_input_arguments(args) + + lib_dir = os.path.abspath(os.path.dirname(__file__)) + cmd = "gsgnn_emb/gsgnn_emb.py" + cmd_path = os.path.join(lib_dir, cmd) + exec_script_args = [cmd_path] + exec_script_args + + if "coo" not in args.graph_format: + args.graph_format = f"{args.graph_format},coo" + logging.debug("Automatically add COO format to graph formats for link prediction. " + \ + "New graph_format is %s", args.graph_format) + submit_jobs(args, exec_script_args) + +if __name__ == "__main__": + FMT = "%(asctime)s %(levelname)s %(message)s" + logging.basicConfig(format=FMT, level=logging.INFO) + main() \ No newline at end of file diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py index e69de29bb2..af672e98ad 100644 --- a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py +++ b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py @@ -0,0 +1,105 @@ +""" + Copyright 2023 Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + GSgnn pure gpu link prediction. +""" + +import os + +import graphstorm as gs +from graphstorm.config import get_argument_parser +from graphstorm.config import GSConfig +from graphstorm.trainer import GSgnnLinkPredictionTrainer +from graphstorm.dataloading import GSgnnLPTrainData +from graphstorm.dataloading import GSgnnLinkPredictionDataLoader +from graphstorm.dataloading import GSgnnLPJointNegDataLoader +from graphstorm.dataloading import GSgnnLPLocalUniformNegDataLoader +from graphstorm.dataloading import GSgnnLPLocalJointNegDataLoader +from graphstorm.dataloading import GSgnnAllEtypeLPJointNegDataLoader +from graphstorm.dataloading import GSgnnAllEtypeLinkPredictionDataLoader +from graphstorm.dataloading import GSgnnLinkPredictionTestDataLoader +from graphstorm.dataloading import GSgnnLinkPredictionJointTestDataLoader +from graphstorm.dataloading import BUILTIN_LP_UNIFORM_NEG_SAMPLER +from graphstorm.dataloading import BUILTIN_LP_JOINT_NEG_SAMPLER +from graphstorm.dataloading import BUILTIN_LP_LOCALUNIFORM_NEG_SAMPLER +from graphstorm.dataloading import BUILTIN_LP_LOCALJOINT_NEG_SAMPLER +from graphstorm.dataloading import BUILTIN_LP_ALL_ETYPE_UNIFORM_NEG_SAMPLER +from graphstorm.dataloading import BUILTIN_LP_ALL_ETYPE_JOINT_NEG_SAMPLER +from graphstorm.dataloading import (BUILTIN_FAST_LP_UNIFORM_NEG_SAMPLER, + BUILTIN_FAST_LP_JOINT_NEG_SAMPLER, + BUILTIN_FAST_LP_LOCALUNIFORM_NEG_SAMPLER, + BUILTIN_FAST_LP_LOCALJOINT_NEG_SAMPLER) +from graphstorm.dataloading import (FastGSgnnLinkPredictionDataLoader, + FastGSgnnLPJointNegDataLoader, + FastGSgnnLPLocalUniformNegDataLoader, + FastGSgnnLPLocalJointNegDataLoader) +from graphstorm.model.utils import save_embeddings +from graphstorm.model import do_full_graph_inference +from graphstorm.utils import rt_profiler, sys_tracker, setup_device, use_wholegraph + + +def main(config_args): + """ main function + """ + config = GSConfig(config_args) + config.verify_arguments(True) + + gs.initialize(ip_config=config.ip_config, backend=config.backend, + use_wholegraph=use_wholegraph(config.part_config)) + rt_profiler.init(config.profile_path, rank=gs.get_rank()) + sys_tracker.init(config.verbose, rank=gs.get_rank()) + device = setup_device(config.local_rank) + train_data = GSgnnLPTrainData(config.graph_name, + config.part_config, + train_etypes=config.train_etype, + eval_etypes=config.eval_etype, + node_feat_field=config.node_feat_name, + pos_graph_feat_field=config.lp_edge_weight_for_loss) + + # Preparing input layer for training or inference. + # The input layer can pre-compute node features in the preparing step if needed. + # For example pre-compute all BERT embeddings + assert (config.save_embed_path is not None, "save embeded path cannot be none for gs_gen_embeddings") + assert (config.restore_model_path is not None, "restore model path cannot be none for gs_gen_embeddings") + + model = gs.create_builtin_lp_gnn_model(train_data.g, config, train_task=False) + model_path = config.restore_model_path + # TODO(zhengda) the model path has to be in a shared filesystem. + model.restore_model(model_path) + # Preparing input layer for training or inference. + # The input layer can pre-compute node features in the preparing step if needed. + # For example pre-compute all BERT embeddings + model.prepare_input_encoder(train_data) + # TODO(zhengda) we may not want to only use training edges to generate GNN embeddings. + embeddings = do_full_graph_inference(model, train_data, fanout=config.eval_fanout, + edge_mask="train_mask", task_tracker=tracker) + save_embeddings(config.save_embed_path, embeddings, gs.get_rank(), + gs.get_world_size(), + device=device, + node_id_mapping_file=config.node_id_mapping_file) + + +def generate_parser(): + """ Generate an argument parser + """ + parser = get_argument_parser() + return parser + + +if __name__ == '__main__': + arg_parser = generate_parser() + + args = arg_parser.parse_args() + main(args) From 7607069f165378404ef26285999a0daf129d7195 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Tue, 3 Oct 2023 22:13:22 +0000 Subject: [PATCH 03/42] remove unnecessary dependency --- python/graphstorm/run/gsgnn_emb/gsgnn_emb.py | 22 -------------------- 1 file changed, 22 deletions(-) diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py index af672e98ad..016891ab53 100644 --- a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py +++ b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py @@ -23,28 +23,6 @@ from graphstorm.config import GSConfig from graphstorm.trainer import GSgnnLinkPredictionTrainer from graphstorm.dataloading import GSgnnLPTrainData -from graphstorm.dataloading import GSgnnLinkPredictionDataLoader -from graphstorm.dataloading import GSgnnLPJointNegDataLoader -from graphstorm.dataloading import GSgnnLPLocalUniformNegDataLoader -from graphstorm.dataloading import GSgnnLPLocalJointNegDataLoader -from graphstorm.dataloading import GSgnnAllEtypeLPJointNegDataLoader -from graphstorm.dataloading import GSgnnAllEtypeLinkPredictionDataLoader -from graphstorm.dataloading import GSgnnLinkPredictionTestDataLoader -from graphstorm.dataloading import GSgnnLinkPredictionJointTestDataLoader -from graphstorm.dataloading import BUILTIN_LP_UNIFORM_NEG_SAMPLER -from graphstorm.dataloading import BUILTIN_LP_JOINT_NEG_SAMPLER -from graphstorm.dataloading import BUILTIN_LP_LOCALUNIFORM_NEG_SAMPLER -from graphstorm.dataloading import BUILTIN_LP_LOCALJOINT_NEG_SAMPLER -from graphstorm.dataloading import BUILTIN_LP_ALL_ETYPE_UNIFORM_NEG_SAMPLER -from graphstorm.dataloading import BUILTIN_LP_ALL_ETYPE_JOINT_NEG_SAMPLER -from graphstorm.dataloading import (BUILTIN_FAST_LP_UNIFORM_NEG_SAMPLER, - BUILTIN_FAST_LP_JOINT_NEG_SAMPLER, - BUILTIN_FAST_LP_LOCALUNIFORM_NEG_SAMPLER, - BUILTIN_FAST_LP_LOCALJOINT_NEG_SAMPLER) -from graphstorm.dataloading import (FastGSgnnLinkPredictionDataLoader, - FastGSgnnLPJointNegDataLoader, - FastGSgnnLPLocalUniformNegDataLoader, - FastGSgnnLPLocalJointNegDataLoader) from graphstorm.model.utils import save_embeddings from graphstorm.model import do_full_graph_inference from graphstorm.utils import rt_profiler, sys_tracker, setup_device, use_wholegraph From 4621c6e6b3396bb75520adac1eddfcfc61319516 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Tue, 3 Oct 2023 22:17:50 +0000 Subject: [PATCH 04/42] change config --- python/graphstorm/run/gs_gen_embedding.py | 2 +- python/graphstorm/run/gsgnn_emb/gsgnn_emb.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/graphstorm/run/gs_gen_embedding.py b/python/graphstorm/run/gs_gen_embedding.py index a12f5a54bf..4c95164b62 100644 --- a/python/graphstorm/run/gs_gen_embedding.py +++ b/python/graphstorm/run/gs_gen_embedding.py @@ -16,7 +16,7 @@ Entry point for running link prediction tasks. Run as: - python3 -m graphstorm.run.gs_gen_embedding + python3 -m graphstorm.run.gs_gen_embedding """ import os import logging diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py index 016891ab53..08e483cf33 100644 --- a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py +++ b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. - GSgnn pure gpu link prediction. + GSgnn pure gpu generate embeddings. """ import os From d8c0309a8f42e6449bbd48394777d95b5d38cf23 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Tue, 3 Oct 2023 23:32:34 +0000 Subject: [PATCH 05/42] fix lint --- python/graphstorm/run/launch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/graphstorm/run/launch.py b/python/graphstorm/run/launch.py index b23d19a263..6d7e1b2b70 100644 --- a/python/graphstorm/run/launch.py +++ b/python/graphstorm/run/launch.py @@ -908,7 +908,7 @@ def check_input_arguments(args): ), "--num-servers must be a positive number." assert ( args.part_config is not None - ), "A user has to specify a partition configuration file with --part-onfig." + ), "A user has to specify a partition configuration file with --part-config." assert ( args.ip_config is not None ), "A user has to specify an IP configuration file with --ip-config." From 323dbb0cfb198b19fa06abc784da03b003189b12 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Wed, 4 Oct 2023 19:51:45 +0000 Subject: [PATCH 06/42] test --- python/graphstorm/run/gsgnn_emb/gsgnn_emb.py | 43 ++++++++++++++------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py index 08e483cf33..2426326e5e 100644 --- a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py +++ b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py @@ -21,12 +21,15 @@ import graphstorm as gs from graphstorm.config import get_argument_parser from graphstorm.config import GSConfig -from graphstorm.trainer import GSgnnLinkPredictionTrainer -from graphstorm.dataloading import GSgnnLPTrainData +from graphstorm.dataloading import GSgnnLPTrainData, GSgnnNodeTrainData, GSgnnEdgeTrainData from graphstorm.model.utils import save_embeddings from graphstorm.model import do_full_graph_inference from graphstorm.utils import rt_profiler, sys_tracker, setup_device, use_wholegraph - +from graphstorm.config import (BUILTIN_TASK_NODE_CLASSIFICATION, + BUILTIN_TASK_NODE_REGRESSION, + BUILTIN_TASK_EDGE_CLASSIFICATION, + BUILTIN_TASK_EDGE_REGRESSION, + BUILTIN_TASK_LINK_PREDICTION) def main(config_args): """ main function @@ -39,16 +42,32 @@ def main(config_args): rt_profiler.init(config.profile_path, rank=gs.get_rank()) sys_tracker.init(config.verbose, rank=gs.get_rank()) device = setup_device(config.local_rank) - train_data = GSgnnLPTrainData(config.graph_name, - config.part_config, - train_etypes=config.train_etype, - eval_etypes=config.eval_etype, - node_feat_field=config.node_feat_name, - pos_graph_feat_field=config.lp_edge_weight_for_loss) - # Preparing input layer for training or inference. - # The input layer can pre-compute node features in the preparing step if needed. - # For example pre-compute all BERT embeddings + if config.task_type == BUILTIN_TASK_LINK_PREDICTION: + train_data = GSgnnLPTrainData(config.graph_name, + config.part_config, + train_etypes=config.train_etype, + eval_etypes=config.eval_etype, + node_feat_field=config.node_feat_name, + pos_graph_feat_field=config.lp_edge_weight_for_loss) + elif config.task_type == BUILTIN_TASK_NODE_REGRESSION or BUILTIN_TASK_NODE_CLASSIFICATION: + train_data = GSgnnNodeTrainData(config.graph_name, + config.part_config, + train_ntypes=config.target_ntype, + eval_ntypes=config.eval_target_ntype, + node_feat_field=config.node_feat_name, + label_field=config.label_field) + elif config.task_type == BUILTIN_TASK_EDGE_CLASSIFICATION or BUILTIN_TASK_EDGE_REGRESSION: + train_data = GSgnnEdgeTrainData(config.graph_name, + config.part_config, + train_etypes=config.target_etype, + node_feat_field=config.node_feat_name, + label_field=config.label_field, + decoder_edge_feat=config.decoder_edge_feat) + else: + raise TypeError("Not supported for task type: ", config.task_type) + + # assert the setting for the graphstorm embedding generation. assert (config.save_embed_path is not None, "save embeded path cannot be none for gs_gen_embeddings") assert (config.restore_model_path is not None, "restore model path cannot be none for gs_gen_embeddings") From 05807e36577de1fd8b3860c9d5ac03cab98b7213 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Wed, 4 Oct 2023 21:03:37 +0000 Subject: [PATCH 07/42] fix save_embed path --- python/graphstorm/run/gsgnn_emb/gsgnn_emb.py | 22 ++++++++++++++------ python/graphstorm/run/gsgnn_ep/gsgnn_ep.py | 3 ++- python/graphstorm/run/gsgnn_lp/gsgnn_lp.py | 3 ++- python/graphstorm/run/gsgnn_np/gsgnn_np.py | 3 ++- 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py index 2426326e5e..bcbb5436d9 100644 --- a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py +++ b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py @@ -42,8 +42,11 @@ def main(config_args): rt_profiler.init(config.profile_path, rank=gs.get_rank()) sys_tracker.init(config.verbose, rank=gs.get_rank()) device = setup_device(config.local_rank) + tracker = gs.create_builtin_task_tracker(config) + if gs.get_rank() == 0: + tracker.log_params(config.__dict__) - if config.task_type == BUILTIN_TASK_LINK_PREDICTION: + if config.task_type == BUILTIN_TASK_LINK_PREDICTION or not config.task_type: train_data = GSgnnLPTrainData(config.graph_name, config.part_config, train_etypes=config.train_etype, @@ -68,10 +71,16 @@ def main(config_args): raise TypeError("Not supported for task type: ", config.task_type) # assert the setting for the graphstorm embedding generation. - assert (config.save_embed_path is not None, "save embeded path cannot be none for gs_gen_embeddings") - assert (config.restore_model_path is not None, "restore model path cannot be none for gs_gen_embeddings") + assert config.save_embed_path is not None, "save embeded path cannot be none for gs_gen_embeddings" + assert config.restore_model_path is not None, "restore model path cannot be none for gs_gen_embeddings" + + if config.task_type == BUILTIN_TASK_LINK_PREDICTION or not config.task_type: + model = gs.create_builtin_lp_gnn_model(train_data.g, config, train_task=False) + elif config.task_type == BUILTIN_TASK_NODE_REGRESSION or BUILTIN_TASK_NODE_CLASSIFICATION: + model = gs.create_builtin_node_gnn_model(train_data.g, config, train_task=False) + elif config.task_type == BUILTIN_TASK_EDGE_CLASSIFICATION or BUILTIN_TASK_EDGE_REGRESSION: + model = gs.create_builtin_edge_gnn_model(train_data.g, config, train_task=False) - model = gs.create_builtin_lp_gnn_model(train_data.g, config, train_task=False) model_path = config.restore_model_path # TODO(zhengda) the model path has to be in a shared filesystem. model.restore_model(model_path) @@ -81,11 +90,12 @@ def main(config_args): model.prepare_input_encoder(train_data) # TODO(zhengda) we may not want to only use training edges to generate GNN embeddings. embeddings = do_full_graph_inference(model, train_data, fanout=config.eval_fanout, - edge_mask="train_mask", task_tracker=tracker) + task_tracker=tracker) save_embeddings(config.save_embed_path, embeddings, gs.get_rank(), gs.get_world_size(), device=device, - node_id_mapping_file=config.node_id_mapping_file) + node_id_mapping_file=config.node_id_mapping_file, + save_embed_format=config.save_embed_format) def generate_parser(): diff --git a/python/graphstorm/run/gsgnn_ep/gsgnn_ep.py b/python/graphstorm/run/gsgnn_ep/gsgnn_ep.py index 7acb4cd134..dae6ea727d 100644 --- a/python/graphstorm/run/gsgnn_ep/gsgnn_ep.py +++ b/python/graphstorm/run/gsgnn_ep/gsgnn_ep.py @@ -156,7 +156,8 @@ def main(config_args): save_embeddings(config.save_embed_path, embs, gs.get_rank(), gs.get_world_size(), device=device, - node_id_mapping_file=config.node_id_mapping_file) + node_id_mapping_file=config.node_id_mapping_file, + save_embed_format=save_embed_format) def generate_parser(): """ Generate an argument parser diff --git a/python/graphstorm/run/gsgnn_lp/gsgnn_lp.py b/python/graphstorm/run/gsgnn_lp/gsgnn_lp.py index b89e568424..043e13c40f 100644 --- a/python/graphstorm/run/gsgnn_lp/gsgnn_lp.py +++ b/python/graphstorm/run/gsgnn_lp/gsgnn_lp.py @@ -202,7 +202,8 @@ def main(config_args): save_embeddings(config.save_embed_path, embeddings, gs.get_rank(), gs.get_world_size(), device=device, - node_id_mapping_file=config.node_id_mapping_file) + node_id_mapping_file=config.node_id_mapping_file, + save_embed_format=save_embed_format) def generate_parser(): """ Generate an argument parser diff --git a/python/graphstorm/run/gsgnn_np/gsgnn_np.py b/python/graphstorm/run/gsgnn_np/gsgnn_np.py index 42f78fed99..2d7e8e240e 100644 --- a/python/graphstorm/run/gsgnn_np/gsgnn_np.py +++ b/python/graphstorm/run/gsgnn_np/gsgnn_np.py @@ -156,7 +156,8 @@ def main(config_args): save_embeddings(config.save_embed_path, embeddings, gs.get_rank(), gs.get_world_size(), device=device, - node_id_mapping_file=config.node_id_mapping_file) + node_id_mapping_file=config.node_id_mapping_file, + save_embed_format=config.save_embed_format) def generate_parser(): """ Generate an argument parser From 6514afe650ba77c1783148ec77a6a618e2f8b57d Mon Sep 17 00:00:00 2001 From: JalenCato Date: Wed, 4 Oct 2023 21:17:55 +0000 Subject: [PATCH 08/42] add test --- python/graphstorm/run/gsgnn_emb/gsgnn_emb.py | 2 +- tests/end2end-tests/graphstorm-ec/mgpu_test.sh | 9 +++++++++ tests/end2end-tests/graphstorm-lp/mgpu_test.sh | 9 +++++++++ tests/end2end-tests/graphstorm-nc/mgpu_test.sh | 9 +++++++++ 4 files changed, 28 insertions(+), 1 deletion(-) diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py index bcbb5436d9..d99b38bc71 100644 --- a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py +++ b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py @@ -46,7 +46,7 @@ def main(config_args): if gs.get_rank() == 0: tracker.log_params(config.__dict__) - if config.task_type == BUILTIN_TASK_LINK_PREDICTION or not config.task_type: + if config.task_type == BUILTIN_TASK_LINK_PREDICTION: train_data = GSgnnLPTrainData(config.graph_name, config.part_config, train_etypes=config.train_etype, diff --git a/tests/end2end-tests/graphstorm-ec/mgpu_test.sh b/tests/end2end-tests/graphstorm-ec/mgpu_test.sh index 71928c453a..332c622d0d 100644 --- a/tests/end2end-tests/graphstorm-ec/mgpu_test.sh +++ b/tests/end2end-tests/graphstorm-ec/mgpu_test.sh @@ -135,6 +135,15 @@ python3 check_infer.py --train_embout /data/gsgnn_ec/emb/ --infer_embout /data/g error_and_exit $? +echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on link prediction" +python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_multi_label_ec/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec.yaml --restore-model-path /data/gsgnn_ec/epoch-$best_epoch/ --save-embed-path /data/gsgnn_ec/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug + +error_and_exit $? + +python3 $GS_HOME/tests/end2end-tests/check_infer.py --train_embout /data/gsgnn_ec/emb/ --infer_embout /data/gsgnn_ec/save-emb/ + +error_and_exit $? + echo "**************dataset: Generated multilabel MovieLens EC, do inference on saved model without test_mask" python3 -m graphstorm.run.gs_edge_classification --inference --workspace $GS_HOME/inference_scripts/ep_infer --num-trainers $NUM_INFO_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_ec_no_test_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec_infer.yaml --multilabel true --num-classes 6 --node-feat-name movie:title user:feat --use-mini-batch-infer false --save-embed-path /data/gsgnn_ec/infer-emb/ --restore-model-path /data/gsgnn_ec/epoch-$best_epoch/ --save-prediction-path /data/gsgnn_ec/prediction/ --no-validation true diff --git a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh index 9ed2129c04..bf647348a0 100644 --- a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh +++ b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh @@ -192,6 +192,15 @@ then fi rm -fr /data/gsgnn_lp_ml_dot/infer-emb/ +echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on link prediction" +python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_dot/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug + +error_and_exit $? + +python3 $GS_HOME/tests/end2end-tests/check_infer.py --train_embout /data/gsgnn_lp_ml_dot/emb/ --infer_embout /data/gsgnn_lp_ml_dot/save-emb/ + +error_and_exit $? + echo "**************dataset: Movielens, do mini-batch inference on saved model, decoder: dot" python3 -m graphstorm.run.gs_link_prediction --inference --workspace $GS_HOME/inference_scripts/lp_infer --num-trainers $NUM_INFO_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp_infer.yaml --fanout '10,15' --num-layers 2 --use-mini-batch-infer false --use-node-embeddings true --eval-batch-size 1024 --save-embed-path /data/gsgnn_lp_ml_dot/infer-emb/ --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --use-mini-batch-infer true --logging-file /tmp/log.txt diff --git a/tests/end2end-tests/graphstorm-nc/mgpu_test.sh b/tests/end2end-tests/graphstorm-nc/mgpu_test.sh index c77b4e0af6..393413f1cf 100644 --- a/tests/end2end-tests/graphstorm-nc/mgpu_test.sh +++ b/tests/end2end-tests/graphstorm-nc/mgpu_test.sh @@ -137,6 +137,15 @@ python3 $GS_HOME/tests/end2end-tests/check_infer.py --train_embout /data/gsgnn_n error_and_exit $? +echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on node classification" +python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc.yaml --restore-model-path /data/gsgnn_nc_ml/epoch-$best_epoch/ --save-embed-path /data/gsgnn_nc_ml/save-emb --logging-file /tmp/train_log.txt --logging-level debug + +error_and_exit $? + +python3 $GS_HOME/tests/end2end-tests/check_infer.py --train_embout /data/gsgnn_nc_ml/emb/ --infer_embout /data/gsgnn_nc_ml/save-emb/ + +error_and_exit $? + echo "**************dataset: Movielens, do inference on saved model with mini-batch-infer without test mask" python3 -m graphstorm.run.gs_node_classification --inference --workspace $GS_HOME/inference_scripts/np_infer/ --num-trainers $NUM_INFERs --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_train_notest_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc_infer.yaml --use-mini-batch-infer true --save-embed-path /data/gsgnn_nc_ml/mini-infer-emb/ --restore-model-path /data/gsgnn_nc_ml/epoch-$best_epoch/ --save-prediction-path /data/gsgnn_nc_ml/prediction/ --no-validation true From 249df54361afefde28ce0bcf1c47a25784a45630 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Wed, 4 Oct 2023 21:44:22 +0000 Subject: [PATCH 09/42] fix lint --- python/graphstorm/run/gs_gen_embedding.py | 3 ++- python/graphstorm/run/gsgnn_emb/gsgnn_emb.py | 9 ++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/python/graphstorm/run/gs_gen_embedding.py b/python/graphstorm/run/gs_gen_embedding.py index 4c95164b62..4666e85470 100644 --- a/python/graphstorm/run/gs_gen_embedding.py +++ b/python/graphstorm/run/gs_gen_embedding.py @@ -46,4 +46,5 @@ def main(): if __name__ == "__main__": FMT = "%(asctime)s %(levelname)s %(message)s" logging.basicConfig(format=FMT, level=logging.INFO) - main() \ No newline at end of file + main() + \ No newline at end of file diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py index d99b38bc71..b3f3268cd9 100644 --- a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py +++ b/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py @@ -15,9 +15,6 @@ GSgnn pure gpu generate embeddings. """ - -import os - import graphstorm as gs from graphstorm.config import get_argument_parser from graphstorm.config import GSConfig @@ -71,8 +68,10 @@ def main(config_args): raise TypeError("Not supported for task type: ", config.task_type) # assert the setting for the graphstorm embedding generation. - assert config.save_embed_path is not None, "save embeded path cannot be none for gs_gen_embeddings" - assert config.restore_model_path is not None, "restore model path cannot be none for gs_gen_embeddings" + assert config.save_embed_path is not None, \ + "save embeded path cannot be none for gs_gen_embeddings" + assert config.restore_model_path is not None, \ + "restore model path cannot be none for gs_gen_embeddings" if config.task_type == BUILTIN_TASK_LINK_PREDICTION or not config.task_type: model = gs.create_builtin_lp_gnn_model(train_data.g, config, train_task=False) From c9de5e7660470dc3d33b7283915d57b640929174 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Wed, 4 Oct 2023 22:07:52 +0000 Subject: [PATCH 10/42] temp fix --- .github/workflow_scripts/lint_check.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflow_scripts/lint_check.sh b/.github/workflow_scripts/lint_check.sh index 3eecaf93c3..556038a408 100644 --- a/.github/workflow_scripts/lint_check.sh +++ b/.github/workflow_scripts/lint_check.sh @@ -4,6 +4,8 @@ cd ../../ set -ex python3 -m pip install --upgrade prospector pip +pip3 uninstall -y pylint +yes | pip3 install pylint==2.8.3 FORCE_CUDA=1 python3 -m pip install -e '.[test]' --no-build-isolation pylint --rcfile=./tests/lint/pylintrc ./python/graphstorm/data/*.py pylint --rcfile=./tests/lint/pylintrc ./python/graphstorm/dataloading/ From 2b25576ff6cdf31b0795d1bca8a62d3c8346f539 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Wed, 4 Oct 2023 22:21:21 +0000 Subject: [PATCH 11/42] fix --- .github/workflow_scripts/lint_check.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflow_scripts/lint_check.sh b/.github/workflow_scripts/lint_check.sh index 556038a408..3eecaf93c3 100644 --- a/.github/workflow_scripts/lint_check.sh +++ b/.github/workflow_scripts/lint_check.sh @@ -4,8 +4,6 @@ cd ../../ set -ex python3 -m pip install --upgrade prospector pip -pip3 uninstall -y pylint -yes | pip3 install pylint==2.8.3 FORCE_CUDA=1 python3 -m pip install -e '.[test]' --no-build-isolation pylint --rcfile=./tests/lint/pylintrc ./python/graphstorm/data/*.py pylint --rcfile=./tests/lint/pylintrc ./python/graphstorm/dataloading/ From 63fbb6fe9698c83c3b542bbe8f54cd65f526d5fb Mon Sep 17 00:00:00 2001 From: JalenCato Date: Wed, 4 Oct 2023 23:13:38 +0000 Subject: [PATCH 12/42] fix typo --- python/graphstorm/run/gsgnn_ep/gsgnn_ep.py | 2 +- python/graphstorm/run/gsgnn_lp/gsgnn_lp.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/graphstorm/run/gsgnn_ep/gsgnn_ep.py b/python/graphstorm/run/gsgnn_ep/gsgnn_ep.py index dae6ea727d..20e3bbb745 100644 --- a/python/graphstorm/run/gsgnn_ep/gsgnn_ep.py +++ b/python/graphstorm/run/gsgnn_ep/gsgnn_ep.py @@ -157,7 +157,7 @@ def main(config_args): gs.get_world_size(), device=device, node_id_mapping_file=config.node_id_mapping_file, - save_embed_format=save_embed_format) + save_embed_format=config.save_embed_format) def generate_parser(): """ Generate an argument parser diff --git a/python/graphstorm/run/gsgnn_lp/gsgnn_lp.py b/python/graphstorm/run/gsgnn_lp/gsgnn_lp.py index 043e13c40f..6ce7c76e52 100644 --- a/python/graphstorm/run/gsgnn_lp/gsgnn_lp.py +++ b/python/graphstorm/run/gsgnn_lp/gsgnn_lp.py @@ -203,7 +203,7 @@ def main(config_args): gs.get_world_size(), device=device, node_id_mapping_file=config.node_id_mapping_file, - save_embed_format=save_embed_format) + save_embed_format=config.save_embed_format) def generate_parser(): """ Generate an argument parser From b2ac45b2718ce2eb9f42b98233fc9a6236ffd14e Mon Sep 17 00:00:00 2001 From: JalenCato Date: Thu, 5 Oct 2023 01:00:16 +0000 Subject: [PATCH 13/42] fix test --- tests/end2end-tests/graphstorm-ec/mgpu_test.sh | 4 ++-- tests/end2end-tests/graphstorm-lp/mgpu_test.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/end2end-tests/graphstorm-ec/mgpu_test.sh b/tests/end2end-tests/graphstorm-ec/mgpu_test.sh index 332c622d0d..11aac6e0ee 100644 --- a/tests/end2end-tests/graphstorm-ec/mgpu_test.sh +++ b/tests/end2end-tests/graphstorm-ec/mgpu_test.sh @@ -135,8 +135,8 @@ python3 check_infer.py --train_embout /data/gsgnn_ec/emb/ --infer_embout /data/g error_and_exit $? -echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on link prediction" -python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_multi_label_ec/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec.yaml --restore-model-path /data/gsgnn_ec/epoch-$best_epoch/ --save-embed-path /data/gsgnn_ec/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug +echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on edge classification" +python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_ep/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_multi_label_ec/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec.yaml --restore-model-path /data/gsgnn_ec/epoch-$best_epoch/ --save-embed-path /data/gsgnn_ec/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug error_and_exit $? diff --git a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh index bf647348a0..48814ae29b 100644 --- a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh +++ b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh @@ -193,7 +193,7 @@ fi rm -fr /data/gsgnn_lp_ml_dot/infer-emb/ echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on link prediction" -python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_dot/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug +python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_dot/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug error_and_exit $? From 699dafad8cb2b05fbd04f6b576b3708def4dcb31 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Thu, 5 Oct 2023 04:09:27 +0000 Subject: [PATCH 14/42] fix test --- tests/end2end-tests/graphstorm-lp/mgpu_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh index 48814ae29b..44bcd7428a 100644 --- a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh +++ b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh @@ -193,7 +193,7 @@ fi rm -fr /data/gsgnn_lp_ml_dot/infer-emb/ echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on link prediction" -python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_dot/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug +python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-node-embeddings true --save-model-path /data/gsgnn_lp_ml_distmult/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_distmult/save-emb/ --lp-decoder-type distmult --train-etype user,rating,movie movie,rating-rev,user --logging-file /tmp/train_log.txt --logging-level debug error_and_exit $? From c8991b00aec62c9a8e84813f7a7d09bc809ed12d Mon Sep 17 00:00:00 2001 From: JalenCato Date: Thu, 5 Oct 2023 05:49:54 +0000 Subject: [PATCH 15/42] fix --- tests/end2end-tests/graphstorm-lp/mgpu_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh index 44bcd7428a..7aa38c11d5 100644 --- a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh +++ b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh @@ -193,7 +193,7 @@ fi rm -fr /data/gsgnn_lp_ml_dot/infer-emb/ echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on link prediction" -python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-node-embeddings true --save-model-path /data/gsgnn_lp_ml_distmult/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_distmult/save-emb/ --lp-decoder-type distmult --train-etype user,rating,movie movie,rating-rev,user --logging-file /tmp/train_log.txt --logging-level debug +python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-node-embeddings true --restore-model-path /data/gsgnn_lp_ml_distmult/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_distmult/save-emb/ --lp-decoder-type distmult --train-etype user,rating,movie movie,rating-rev,user --logging-file /tmp/train_log.txt --logging-level debug error_and_exit $? From 3442970b4b9b19694e21456d95574d3b3e4ce4d4 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Thu, 5 Oct 2023 17:27:54 +0000 Subject: [PATCH 16/42] change test --- tests/end2end-tests/graphstorm-lp/mgpu_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh index 7aa38c11d5..cf4f061d93 100644 --- a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh +++ b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh @@ -193,7 +193,7 @@ fi rm -fr /data/gsgnn_lp_ml_dot/infer-emb/ echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on link prediction" -python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-node-embeddings true --restore-model-path /data/gsgnn_lp_ml_distmult/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_distmult/save-emb/ --lp-decoder-type distmult --train-etype user,rating,movie movie,rating-rev,user --logging-file /tmp/train_log.txt --logging-level debug +python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-node-embeddings true --exclude-training-targets True --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_dot/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug error_and_exit $? From 273846cd14bdb288bf6c5f69032d17aa92a49075 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Thu, 5 Oct 2023 17:48:59 +0000 Subject: [PATCH 17/42] rename the gs_gen_embedding to ge_gen_node_embedding --- .../run/{gs_gen_embedding.py => gs_gen_node_embedding.py} | 2 +- .../run/gsgnn_emb/{gsgnn_emb.py => gsgnn_node_emb.py} | 6 +++--- tests/end2end-tests/graphstorm-ec/mgpu_test.sh | 4 ++-- tests/end2end-tests/graphstorm-lp/mgpu_test.sh | 2 +- tests/end2end-tests/graphstorm-nc/mgpu_test.sh | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) rename python/graphstorm/run/{gs_gen_embedding.py => gs_gen_node_embedding.py} (97%) rename python/graphstorm/run/gsgnn_emb/{gsgnn_emb.py => gsgnn_node_emb.py} (96%) diff --git a/python/graphstorm/run/gs_gen_embedding.py b/python/graphstorm/run/gs_gen_node_embedding.py similarity index 97% rename from python/graphstorm/run/gs_gen_embedding.py rename to python/graphstorm/run/gs_gen_node_embedding.py index 4666e85470..abe21ca1bf 100644 --- a/python/graphstorm/run/gs_gen_embedding.py +++ b/python/graphstorm/run/gs_gen_node_embedding.py @@ -33,7 +33,7 @@ def main(): check_input_arguments(args) lib_dir = os.path.abspath(os.path.dirname(__file__)) - cmd = "gsgnn_emb/gsgnn_emb.py" + cmd = "gsgnn_emb/gsgnn_node_emb.py" cmd_path = os.path.join(lib_dir, cmd) exec_script_args = [cmd_path] + exec_script_args diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py similarity index 96% rename from python/graphstorm/run/gsgnn_emb/gsgnn_emb.py rename to python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py index b3f3268cd9..12aff58d0f 100644 --- a/python/graphstorm/run/gsgnn_emb/gsgnn_emb.py +++ b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py @@ -69,11 +69,11 @@ def main(config_args): # assert the setting for the graphstorm embedding generation. assert config.save_embed_path is not None, \ - "save embeded path cannot be none for gs_gen_embeddings" + "save embeded path cannot be none for gs_gen_node_embeddings" assert config.restore_model_path is not None, \ - "restore model path cannot be none for gs_gen_embeddings" + "restore model path cannot be none for gs_gen_node_embeddings" - if config.task_type == BUILTIN_TASK_LINK_PREDICTION or not config.task_type: + if config.task_type == BUILTIN_TASK_LINK_PREDICTION: model = gs.create_builtin_lp_gnn_model(train_data.g, config, train_task=False) elif config.task_type == BUILTIN_TASK_NODE_REGRESSION or BUILTIN_TASK_NODE_CLASSIFICATION: model = gs.create_builtin_node_gnn_model(train_data.g, config, train_task=False) diff --git a/tests/end2end-tests/graphstorm-ec/mgpu_test.sh b/tests/end2end-tests/graphstorm-ec/mgpu_test.sh index 11aac6e0ee..9690636877 100644 --- a/tests/end2end-tests/graphstorm-ec/mgpu_test.sh +++ b/tests/end2end-tests/graphstorm-ec/mgpu_test.sh @@ -135,8 +135,8 @@ python3 check_infer.py --train_embout /data/gsgnn_ec/emb/ --infer_embout /data/g error_and_exit $? -echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on edge classification" -python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_ep/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_multi_label_ec/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec.yaml --restore-model-path /data/gsgnn_ec/epoch-$best_epoch/ --save-embed-path /data/gsgnn_ec/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug +echo "**************dataset: Movielens, use gen_node_embeddings to generate embeddings on edge classification" +python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_scripts/gsgnn_ep/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_multi_label_ec/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec.yaml --exclude-training-targets True --multilabel true --num-classes 6 --node-feat-name movie:title user:feat --save-embed-path /data/gsgnn_ec/save-emb/ --restore-model-path /data/gsgnn_ec/epoch-$best_epoch/ --logging-file /tmp/train_log.txt --logging-level debug error_and_exit $? diff --git a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh index cf4f061d93..a0f67489b8 100644 --- a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh +++ b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh @@ -193,7 +193,7 @@ fi rm -fr /data/gsgnn_lp_ml_dot/infer-emb/ echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on link prediction" -python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-node-embeddings true --exclude-training-targets True --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_dot/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug +python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-node-embeddings true --exclude-training-targets True --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_dot/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug error_and_exit $? diff --git a/tests/end2end-tests/graphstorm-nc/mgpu_test.sh b/tests/end2end-tests/graphstorm-nc/mgpu_test.sh index 393413f1cf..eeed50444e 100644 --- a/tests/end2end-tests/graphstorm-nc/mgpu_test.sh +++ b/tests/end2end-tests/graphstorm-nc/mgpu_test.sh @@ -138,7 +138,7 @@ python3 $GS_HOME/tests/end2end-tests/check_infer.py --train_embout /data/gsgnn_n error_and_exit $? echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on node classification" -python3 -m graphstorm.run.gs_gen_embedding --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc.yaml --restore-model-path /data/gsgnn_nc_ml/epoch-$best_epoch/ --save-embed-path /data/gsgnn_nc_ml/save-emb --logging-file /tmp/train_log.txt --logging-level debug +python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc.yaml --restore-model-path /data/gsgnn_nc_ml/epoch-$best_epoch/ --save-embed-path /data/gsgnn_nc_ml/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug error_and_exit $? From ce05d942e152e98713d0265a5d25263e4f6d3603 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Thu, 5 Oct 2023 19:10:45 +0000 Subject: [PATCH 18/42] fix test --- tests/end2end-tests/graphstorm-lp/mgpu_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh index a0f67489b8..f97f003c4e 100644 --- a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh +++ b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh @@ -193,7 +193,7 @@ fi rm -fr /data/gsgnn_lp_ml_dot/infer-emb/ echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on link prediction" -python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-node-embeddings true --exclude-training-targets True --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_dot/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug +python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-node-embeddings true --exclude-training-targets True --reverse-edge-types-map user,rating,rating-rev,movie --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_dot/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug error_and_exit $? From 6196d194031aac9bb27c8dd33c1d7bf73dc67e56 Mon Sep 17 00:00:00 2001 From: jalencato Date: Thu, 5 Oct 2023 13:15:13 -0700 Subject: [PATCH 19/42] Update mgpu_test.sh --- tests/end2end-tests/graphstorm-lp/mgpu_test.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh index f97f003c4e..4f9b55fd36 100644 --- a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh +++ b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh @@ -197,10 +197,6 @@ python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_sc error_and_exit $? -python3 $GS_HOME/tests/end2end-tests/check_infer.py --train_embout /data/gsgnn_lp_ml_dot/emb/ --infer_embout /data/gsgnn_lp_ml_dot/save-emb/ - -error_and_exit $? - echo "**************dataset: Movielens, do mini-batch inference on saved model, decoder: dot" python3 -m graphstorm.run.gs_link_prediction --inference --workspace $GS_HOME/inference_scripts/lp_infer --num-trainers $NUM_INFO_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp_infer.yaml --fanout '10,15' --num-layers 2 --use-mini-batch-infer false --use-node-embeddings true --eval-batch-size 1024 --save-embed-path /data/gsgnn_lp_ml_dot/infer-emb/ --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --use-mini-batch-infer true --logging-file /tmp/log.txt From 264c80e0546b574844e6e7a1cfb81eda21ac0e3d Mon Sep 17 00:00:00 2001 From: JalenCato Date: Thu, 5 Oct 2023 22:09:52 +0000 Subject: [PATCH 20/42] fix bug --- python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py | 8 ++++---- tests/end2end-tests/graphstorm-lp/mgpu_test.sh | 6 +++++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py index 12aff58d0f..71b49a6f2a 100644 --- a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py +++ b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py @@ -50,14 +50,14 @@ def main(config_args): eval_etypes=config.eval_etype, node_feat_field=config.node_feat_name, pos_graph_feat_field=config.lp_edge_weight_for_loss) - elif config.task_type == BUILTIN_TASK_NODE_REGRESSION or BUILTIN_TASK_NODE_CLASSIFICATION: + elif config.task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}: train_data = GSgnnNodeTrainData(config.graph_name, config.part_config, train_ntypes=config.target_ntype, eval_ntypes=config.eval_target_ntype, node_feat_field=config.node_feat_name, label_field=config.label_field) - elif config.task_type == BUILTIN_TASK_EDGE_CLASSIFICATION or BUILTIN_TASK_EDGE_REGRESSION: + elif config.task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}: train_data = GSgnnEdgeTrainData(config.graph_name, config.part_config, train_etypes=config.target_etype, @@ -75,9 +75,9 @@ def main(config_args): if config.task_type == BUILTIN_TASK_LINK_PREDICTION: model = gs.create_builtin_lp_gnn_model(train_data.g, config, train_task=False) - elif config.task_type == BUILTIN_TASK_NODE_REGRESSION or BUILTIN_TASK_NODE_CLASSIFICATION: + elif config.task_type == {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}: model = gs.create_builtin_node_gnn_model(train_data.g, config, train_task=False) - elif config.task_type == BUILTIN_TASK_EDGE_CLASSIFICATION or BUILTIN_TASK_EDGE_REGRESSION: + elif config.task_type == {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}: model = gs.create_builtin_edge_gnn_model(train_data.g, config, train_task=False) model_path = config.restore_model_path diff --git a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh index 4f9b55fd36..f8b141dbf8 100644 --- a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh +++ b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh @@ -193,7 +193,11 @@ fi rm -fr /data/gsgnn_lp_ml_dot/infer-emb/ echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on link prediction" -python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-node-embeddings true --exclude-training-targets True --reverse-edge-types-map user,rating,rating-rev,movie --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_dot/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug +python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '10,15' --num-layers 2 --use-mini-batch-infer false --eval-batch-size 1024 --use-node-embeddings true --exclude-training-targets True --reverse-edge-types-map user,rating,rating-rev,movie --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --save-embed-path /data/gsgnn_lp_ml_dot/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug + +error_and_exit $? + +python3 $GS_HOME/tests/end2end-tests/check_infer.py --train_embout /data/gsgnn_lp_ml_dot/emb/ --infer_embout /data/gsgnn_lp_ml_dot/save-emb/ --link_prediction error_and_exit $? From 96bdaf849d229e3596348e4a4fab00768f449c27 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Thu, 5 Oct 2023 22:15:17 +0000 Subject: [PATCH 21/42] fix --- python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py index 71b49a6f2a..88808caa90 100644 --- a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py +++ b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py @@ -75,9 +75,9 @@ def main(config_args): if config.task_type == BUILTIN_TASK_LINK_PREDICTION: model = gs.create_builtin_lp_gnn_model(train_data.g, config, train_task=False) - elif config.task_type == {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}: + elif config.task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}: model = gs.create_builtin_node_gnn_model(train_data.g, config, train_task=False) - elif config.task_type == {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}: + elif config.task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}: model = gs.create_builtin_edge_gnn_model(train_data.g, config, train_task=False) model_path = config.restore_model_path From ebe0d4b8f954722c3de52a8a222bb99db908dead Mon Sep 17 00:00:00 2001 From: JalenCato Date: Thu, 5 Oct 2023 23:16:36 +0000 Subject: [PATCH 22/42] fix embedding bug on link prediction --- python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py index 88808caa90..33af8776a3 100644 --- a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py +++ b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py @@ -88,8 +88,12 @@ def main(config_args): # For example pre-compute all BERT embeddings model.prepare_input_encoder(train_data) # TODO(zhengda) we may not want to only use training edges to generate GNN embeddings. - embeddings = do_full_graph_inference(model, train_data, fanout=config.eval_fanout, - task_tracker=tracker) + if config.task_type == BUILTIN_TASK_LINK_PREDICTION: + embeddings = do_full_graph_inference(model, train_data, fanout=config.eval_fanout, + edge_mask="train_mask", task_tracker=tracker) + else: + embeddings = do_full_graph_inference(model, train_data, fanout=config.eval_fanout, + task_tracker=tracker) save_embeddings(config.save_embed_path, embeddings, gs.get_rank(), gs.get_world_size(), device=device, From 70feebdc045627da5d86794269faac9ddc185f61 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Fri, 6 Oct 2023 17:29:56 +0000 Subject: [PATCH 23/42] use entire graph for embedding generation --- python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py | 10 ++++------ tests/end2end-tests/graphstorm-lp/mgpu_test.sh | 4 ---- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py index 33af8776a3..e09b3a4f44 100644 --- a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py +++ b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py @@ -87,12 +87,10 @@ def main(config_args): # The input layer can pre-compute node features in the preparing step if needed. # For example pre-compute all BERT embeddings model.prepare_input_encoder(train_data) - # TODO(zhengda) we may not want to only use training edges to generate GNN embeddings. - if config.task_type == BUILTIN_TASK_LINK_PREDICTION: - embeddings = do_full_graph_inference(model, train_data, fanout=config.eval_fanout, - edge_mask="train_mask", task_tracker=tracker) - else: - embeddings = do_full_graph_inference(model, train_data, fanout=config.eval_fanout, + # Runjie: To generate embeddings, it might be preferable to utilize the + # entire graph instead of just the training graph. + # Additionally, generating embeddings will not result in any edge leakage issues. + embeddings = do_full_graph_inference(model, train_data, fanout=config.eval_fanout, task_tracker=tracker) save_embeddings(config.save_embed_path, embeddings, gs.get_rank(), gs.get_world_size(), diff --git a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh index f8b141dbf8..5484541e0e 100644 --- a/tests/end2end-tests/graphstorm-lp/mgpu_test.sh +++ b/tests/end2end-tests/graphstorm-lp/mgpu_test.sh @@ -197,10 +197,6 @@ python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_sc error_and_exit $? -python3 $GS_HOME/tests/end2end-tests/check_infer.py --train_embout /data/gsgnn_lp_ml_dot/emb/ --infer_embout /data/gsgnn_lp_ml_dot/save-emb/ --link_prediction - -error_and_exit $? - echo "**************dataset: Movielens, do mini-batch inference on saved model, decoder: dot" python3 -m graphstorm.run.gs_link_prediction --inference --workspace $GS_HOME/inference_scripts/lp_infer --num-trainers $NUM_INFO_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp_infer.yaml --fanout '10,15' --num-layers 2 --use-mini-batch-infer false --use-node-embeddings true --eval-batch-size 1024 --save-embed-path /data/gsgnn_lp_ml_dot/infer-emb/ --restore-model-path /data/gsgnn_lp_ml_dot/epoch-$best_epoch_dot/ --use-mini-batch-infer true --logging-file /tmp/log.txt From a38df650213b78116faa75b71b4e1b4a4551a7e9 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Mon, 9 Oct 2023 19:01:13 +0000 Subject: [PATCH 24/42] fix whole code structure --- python/graphstorm/inference/__init__.py | 1 + python/graphstorm/inference/emb_infer.py | 159 ++++++++++++++++++ .../graphstorm/run/gs_gen_node_embedding.py | 8 +- .../run/gsgnn_emb/gsgnn_node_emb.py | 105 +++++++----- 4 files changed, 226 insertions(+), 47 deletions(-) create mode 100644 python/graphstorm/inference/emb_infer.py diff --git a/python/graphstorm/inference/__init__.py b/python/graphstorm/inference/__init__.py index aad67b6d78..92b4f7f965 100644 --- a/python/graphstorm/inference/__init__.py +++ b/python/graphstorm/inference/__init__.py @@ -19,3 +19,4 @@ from .lp_infer import GSgnnLinkPredictionInferrer from .np_infer import GSgnnNodePredictionInferrer from .ep_infer import GSgnnEdgePredictionInferrer +from .emb_infer import GSgnnEmbGenInferer diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py new file mode 100644 index 0000000000..f389b77419 --- /dev/null +++ b/python/graphstorm/inference/emb_infer.py @@ -0,0 +1,159 @@ +""" + Copyright 2023 Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + Inferrer wrapper for embedding generation. +""" +import time + +import logging +from .graphstorm_infer import GSInferrer +from ..model.utils import save_embeddings as save_gsgnn_embeddings +from ..model.utils import save_relation_embeddings +from ..model.edge_decoder import LinkPredictDistMultDecoder +from ..model.gnn import do_full_graph_inference, do_mini_batch_inference +from ..model.node_gnn import node_mini_batch_gnn_predict + +from ..utils import sys_tracker, get_rank, get_world_size, barrier, create_dist_tensor +from graphstorm.config import (BUILTIN_TASK_NODE_CLASSIFICATION, + BUILTIN_TASK_NODE_REGRESSION, + BUILTIN_TASK_EDGE_CLASSIFICATION, + BUILTIN_TASK_EDGE_REGRESSION, + BUILTIN_TASK_LINK_PREDICTION) + +class GSgnnEmbGenInferer(GSInferrer): + """ Embedding Generation inffer inferrer. + + This is a high-level inferrer wrapper that can be used directly + to generate embedding for inferer. + + Parameters + ---------- + model : GSgnnNodeModel + The GNN model with different task. + """ + + # TODO(zhengda) We only support full-graph inference for now. + def infer(self, data, task_type, save_embed_path, loader, + use_mini_batch_infer=False, + node_id_mapping_file=None, + return_proba=True, + save_embed_format="pytorch"): + """ Do Embedding Generating + + Generate node embeddings and save. + + Parameters + ---------- + data: GSgnnData + The GraphStorm dataset + task_type : str + task_type must be one of graphstorm builtin task types + save_embed_path : str + The path where the GNN embeddings will be saved. + loader : GSEdgeDataLoader/GSNodeDataLoader + The mini-batch sampler for built-in graphstorm task. + edge_mask_for_gnn_embeddings : str + The mask that indicates the edges used for computing GNN embeddings. By default, + the dataloader uses the edges in the training graphs to compute GNN embeddings to + avoid information leak for link prediction. + use_mini_batch_infer : bool + Whether or not to use mini-batch inference when computing node embedings. + node_id_mapping_file: str + Path to the file storing node id mapping generated by the + graph partition algorithm. + edge_id_mapping_file: str + Path to the file storing edge id mapping generated by the + graph partition algorithm. + save_embed_format : str + Specify the format of saved embeddings. + """ + + device = self.device + # deal with uninitialized case first + if use_mini_batch_infer and \ + task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}: + assert save_embed_path is None, \ + "Unable to save the node embeddings when using mini batch inference " \ + "when doing edge task." \ + "It is not guaranteed that mini-batch prediction will cover all the nodes." + + if task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}: + assert len(loader.data.eval_ntypes) == 1, \ + "GraphStorm only support single target node type for training and inference" + + assert save_embed_path is not None + + sys_tracker.check('start embedding generation') + self._model.eval() + + if task_type == BUILTIN_TASK_LINK_PREDICTION: + # for embedding generation, it is preferred to use whole graph + if use_mini_batch_infer: + embs = do_mini_batch_inference(self._model, data, fanout=loader.fanout, + edge_mask=None, + task_tracker=self.task_tracker) + else: + embs = do_full_graph_inference(self._model, data, fanout=loader.fanout, + edge_mask=None, + task_tracker=self.task_tracker) + elif task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}: + # only generate embeddings on the target node type + ntype = loader.data.eval_ntypes[0] + if use_mini_batch_infer: + inter_embs = node_mini_batch_gnn_predict(self._model, loader, return_proba, + return_label=False)[1] + inter_embs = {ntype: inter_embs[ntype]} if isinstance(inter_embs, dict) \ + else {ntype: inter_embs} + g = loader.data.g + ntype_emb = create_dist_tensor((g.num_nodes(ntype), inter_embs[ntype].shape[1]), + dtype=inter_embs[ntype].dtype, name=f'gen-emb-{ntype}', + part_policy=g.get_node_partition_policy(ntype), + persistent=True) + ntype_emb[loader.target_nidx[ntype]] = inter_embs[ntype] + embs = {ntype: ntype_emb} + else: + embs = do_full_graph_inference(self._model, data, fanout=loader.fanout, + task_tracker=self.task_tracker) + ntype_emb = embs[ntype] + embs = {ntype: ntype_emb} + elif task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}: + embs = do_full_graph_inference(self._model, loader.data, fanout=loader.fanout, + task_tracker=self.task_tracker) + target_ntypes = set() + for etype in loader.data.eval_etypes: + target_ntypes.add(etype[0]) + target_ntypes.add(etype[2]) + + embs = {ntype: embs[ntype] for ntype in sorted(target_ntypes)} + else: + raise TypeError("Not supported for task type: ", task_type) + + if get_rank() == 0: + logging.info("save embeddings to %s", save_embed_path) + + save_gsgnn_embeddings(save_embed_path, embs, get_rank(), + get_world_size(), + device=device, + node_id_mapping_file=node_id_mapping_file, + save_embed_format=save_embed_format) + barrier() + sys_tracker.check('save embeddings') + + # save relation embedding if any + if get_rank() == 0: + decoder = self._model.decoder + if isinstance(decoder, LinkPredictDistMultDecoder): + if save_embed_path is not None: + save_relation_embeddings(save_embed_path, decoder) diff --git a/python/graphstorm/run/gs_gen_node_embedding.py b/python/graphstorm/run/gs_gen_node_embedding.py index abe21ca1bf..f879cfabf3 100644 --- a/python/graphstorm/run/gs_gen_node_embedding.py +++ b/python/graphstorm/run/gs_gen_node_embedding.py @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. - Entry point for running link prediction tasks. + Entry point for running embedding generating tasks. Run as: - python3 -m graphstorm.run.gs_gen_embedding + python3 -m graphstorm.run.gs_gen_node_embedding """ import os import logging @@ -37,10 +37,6 @@ def main(): cmd_path = os.path.join(lib_dir, cmd) exec_script_args = [cmd_path] + exec_script_args - if "coo" not in args.graph_format: - args.graph_format = f"{args.graph_format},coo" - logging.debug("Automatically add COO format to graph formats for link prediction. " + \ - "New graph_format is %s", args.graph_format) submit_jobs(args, exec_script_args) if __name__ == "__main__": diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py index e09b3a4f44..f00678e099 100644 --- a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py +++ b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py @@ -18,15 +18,15 @@ import graphstorm as gs from graphstorm.config import get_argument_parser from graphstorm.config import GSConfig -from graphstorm.dataloading import GSgnnLPTrainData, GSgnnNodeTrainData, GSgnnEdgeTrainData -from graphstorm.model.utils import save_embeddings -from graphstorm.model import do_full_graph_inference +from graphstorm.dataloading import (GSgnnEdgeInferData, GSgnnNodeInferData, + GSgnnEdgeDataLoader, GSgnnNodeDataLoader) from graphstorm.utils import rt_profiler, sys_tracker, setup_device, use_wholegraph from graphstorm.config import (BUILTIN_TASK_NODE_CLASSIFICATION, BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION, BUILTIN_TASK_LINK_PREDICTION) +from graphstorm.inference import GSgnnEmbGenInferer def main(config_args): """ main function @@ -44,26 +44,24 @@ def main(config_args): tracker.log_params(config.__dict__) if config.task_type == BUILTIN_TASK_LINK_PREDICTION: - train_data = GSgnnLPTrainData(config.graph_name, - config.part_config, - train_etypes=config.train_etype, - eval_etypes=config.eval_etype, - node_feat_field=config.node_feat_name, - pos_graph_feat_field=config.lp_edge_weight_for_loss) - elif config.task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}: - train_data = GSgnnNodeTrainData(config.graph_name, - config.part_config, - train_ntypes=config.target_ntype, - eval_ntypes=config.eval_target_ntype, - node_feat_field=config.node_feat_name, - label_field=config.label_field) - elif config.task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}: - train_data = GSgnnEdgeTrainData(config.graph_name, + input_graph = GSgnnEdgeInferData(config.graph_name, config.part_config, - train_etypes=config.target_etype, + eval_etypes=config.eval_etype, node_feat_field=config.node_feat_name, - label_field=config.label_field, decoder_edge_feat=config.decoder_edge_feat) + elif config.task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}: + input_graph = GSgnnNodeInferData(config.graph_name, + config.part_config, + eval_ntypes=config.target_ntype, + node_feat_field=config.node_feat_name, + label_field=config.label_field) + elif config.task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}: + input_graph = GSgnnEdgeInferData(config.graph_name, + config.part_config, + eval_etypes=config.target_etype, + node_feat_field=config.node_feat_name, + label_field=config.label_field, + decoder_edge_feat=config.decoder_edge_feat) else: raise TypeError("Not supported for task type: ", config.task_type) @@ -74,30 +72,55 @@ def main(config_args): "restore model path cannot be none for gs_gen_node_embeddings" if config.task_type == BUILTIN_TASK_LINK_PREDICTION: - model = gs.create_builtin_lp_gnn_model(train_data.g, config, train_task=False) + model = gs.create_builtin_lp_gnn_model(input_graph.g, config, train_task=False) + elif config.task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}: + model = gs.create_builtin_node_gnn_model(input_graph.g, config, train_task=False) + elif config.task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}: + model = gs.create_builtin_edge_gnn_model(input_graph.g, config, train_task=False) + else: + raise TypeError("Not supported for task type: ", config.task_type) + + if config.task_type == BUILTIN_TASK_LINK_PREDICTION: + if config.eval_negative_sampler == BUILTIN_LP_UNIFORM_NEG_SAMPLER: + link_prediction_loader = GSgnnLinkPredictionTestDataLoader + elif config.eval_negative_sampler == BUILTIN_LP_JOINT_NEG_SAMPLER: + link_prediction_loader = GSgnnLinkPredictionJointTestDataLoader + else: + raise ValueError('Unknown test negative sampler.' + 'Supported test negative samplers include ' + f'[{BUILTIN_LP_UNIFORM_NEG_SAMPLER}, {BUILTIN_LP_JOINT_NEG_SAMPLER}]') + + dataloader = link_prediction_loader(input_graph, input_graph.test_idxs, + batch_size=config.eval_batch_size, + num_negative_edges=config.num_negative_edges_eval, + fanout=config.eval_fanout) elif config.task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}: - model = gs.create_builtin_node_gnn_model(train_data.g, config, train_task=False) + dataloader = GSgnnNodeDataLoader(input_graph, input_graph.infer_idxs, fanout=config.eval_fanout, + batch_size=config.eval_batch_size, device=device, + train_task=False, + construct_feat_ntype=config.construct_feat_ntype, + construct_feat_fanout=config.construct_feat_fanout) elif config.task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}: - model = gs.create_builtin_edge_gnn_model(train_data.g, config, train_task=False) - - model_path = config.restore_model_path - # TODO(zhengda) the model path has to be in a shared filesystem. - model.restore_model(model_path) - # Preparing input layer for training or inference. - # The input layer can pre-compute node features in the preparing step if needed. - # For example pre-compute all BERT embeddings - model.prepare_input_encoder(train_data) - # Runjie: To generate embeddings, it might be preferable to utilize the - # entire graph instead of just the training graph. - # Additionally, generating embeddings will not result in any edge leakage issues. - embeddings = do_full_graph_inference(model, train_data, fanout=config.eval_fanout, - task_tracker=tracker) - save_embeddings(config.save_embed_path, embeddings, gs.get_rank(), - gs.get_world_size(), - device=device, - node_id_mapping_file=config.node_id_mapping_file, - save_embed_format=config.save_embed_format) + dataloader = GSgnnEdgeDataLoader(input_graph, input_graph.infer_idxs, fanout=config.eval_fanout, + batch_size=config.eval_batch_size, + device=device, train_task=False, + reverse_edge_types_map=config.reverse_edge_types_map, + remove_target_edge_type=config.remove_target_edge_type, + construct_feat_ntype=config.construct_feat_ntype, + construct_feat_fanout=config.construct_feat_fanout) + else: + raise TypeError("Not supported for task type: ", config.task_type) + + emb_generator = GSgnnEmbGenInferer(model) + emb_generator.setup_device(device=device) + emb_generator.infer(input_graph, config.task_type, + save_embed_path=config.save_embed_path, + loader=dataloader, + use_mini_batch_infer=config.use_mini_batch_infer, + node_id_mapping_file=config.node_id_mapping_file, + return_proba=config.return_proba, + save_embed_format=config.save_embed_format) def generate_parser(): """ Generate an argument parser From f642cf042b8253d9f39e4b1e8627de2e4f0797b4 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Mon, 9 Oct 2023 19:21:37 +0000 Subject: [PATCH 25/42] fix import bug --- python/graphstorm/inference/emb_infer.py | 15 +++++++-------- python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py | 6 +++++- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py index f389b77419..93f466b9ac 100644 --- a/python/graphstorm/inference/emb_infer.py +++ b/python/graphstorm/inference/emb_infer.py @@ -15,9 +15,12 @@ Inferrer wrapper for embedding generation. """ -import time - import logging +from graphstorm.config import (BUILTIN_TASK_NODE_CLASSIFICATION, + BUILTIN_TASK_NODE_REGRESSION, + BUILTIN_TASK_EDGE_CLASSIFICATION, + BUILTIN_TASK_EDGE_REGRESSION, + BUILTIN_TASK_LINK_PREDICTION) from .graphstorm_infer import GSInferrer from ..model.utils import save_embeddings as save_gsgnn_embeddings from ..model.utils import save_relation_embeddings @@ -26,11 +29,7 @@ from ..model.node_gnn import node_mini_batch_gnn_predict from ..utils import sys_tracker, get_rank, get_world_size, barrier, create_dist_tensor -from graphstorm.config import (BUILTIN_TASK_NODE_CLASSIFICATION, - BUILTIN_TASK_NODE_REGRESSION, - BUILTIN_TASK_EDGE_CLASSIFICATION, - BUILTIN_TASK_EDGE_REGRESSION, - BUILTIN_TASK_LINK_PREDICTION) + class GSgnnEmbGenInferer(GSInferrer): """ Embedding Generation inffer inferrer. @@ -99,7 +98,7 @@ def infer(self, data, task_type, save_embed_path, loader, self._model.eval() if task_type == BUILTIN_TASK_LINK_PREDICTION: - # for embedding generation, it is preferred to use whole graph + # for embedding generation, it is preferred to use full graph if use_mini_batch_infer: embs = do_mini_batch_inference(self._model, data, fanout=loader.fanout, edge_mask=None, diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py index f00678e099..847ba15728 100644 --- a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py +++ b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py @@ -19,13 +19,17 @@ from graphstorm.config import get_argument_parser from graphstorm.config import GSConfig from graphstorm.dataloading import (GSgnnEdgeInferData, GSgnnNodeInferData, - GSgnnEdgeDataLoader, GSgnnNodeDataLoader) + GSgnnEdgeDataLoader, GSgnnNodeDataLoader, + GSgnnLinkPredictionTestDataLoader, + GSgnnLinkPredictionJointTestDataLoader) from graphstorm.utils import rt_profiler, sys_tracker, setup_device, use_wholegraph from graphstorm.config import (BUILTIN_TASK_NODE_CLASSIFICATION, BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION, BUILTIN_TASK_LINK_PREDICTION) +from graphstorm.dataloading import (BUILTIN_LP_UNIFORM_NEG_SAMPLER, + BUILTIN_LP_JOINT_NEG_SAMPLER) from graphstorm.inference import GSgnnEmbGenInferer def main(config_args): From 34e22e7532c92865533db0dac646addfc448f9b2 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Mon, 9 Oct 2023 19:53:03 +0000 Subject: [PATCH 26/42] fix lint --- python/graphstorm/inference/emb_infer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py index 93f466b9ac..ff869b9f55 100644 --- a/python/graphstorm/inference/emb_infer.py +++ b/python/graphstorm/inference/emb_infer.py @@ -117,7 +117,8 @@ def infer(self, data, task_type, save_embed_path, loader, else {ntype: inter_embs} g = loader.data.g ntype_emb = create_dist_tensor((g.num_nodes(ntype), inter_embs[ntype].shape[1]), - dtype=inter_embs[ntype].dtype, name=f'gen-emb-{ntype}', + dtype=inter_embs[ntype].dtype, + name=f'gen-emb-{ntype}', part_policy=g.get_node_partition_policy(ntype), persistent=True) ntype_emb[loader.target_nidx[ntype]] = inter_embs[ntype] From 0d9347f6db18b6db3e99bd0e6b72328e0183044c Mon Sep 17 00:00:00 2001 From: JalenCato Date: Mon, 9 Oct 2023 20:04:12 +0000 Subject: [PATCH 27/42] fix lint --- python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py index 847ba15728..84ff3762f9 100644 --- a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py +++ b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py @@ -99,13 +99,15 @@ def main(config_args): num_negative_edges=config.num_negative_edges_eval, fanout=config.eval_fanout) elif config.task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}: - dataloader = GSgnnNodeDataLoader(input_graph, input_graph.infer_idxs, fanout=config.eval_fanout, + dataloader = GSgnnNodeDataLoader(input_graph, input_graph.infer_idxs, + fanout=config.eval_fanout, batch_size=config.eval_batch_size, device=device, train_task=False, construct_feat_ntype=config.construct_feat_ntype, construct_feat_fanout=config.construct_feat_fanout) elif config.task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}: - dataloader = GSgnnEdgeDataLoader(input_graph, input_graph.infer_idxs, fanout=config.eval_fanout, + dataloader = GSgnnEdgeDataLoader(input_graph, input_graph.infer_idxs, + fanout=config.eval_fanout, batch_size=config.eval_batch_size, device=device, train_task=False, reverse_edge_types_map=config.reverse_edge_types_map, From 3c30b1b23f1c1ff9b7b869cb35e1e1d6ac2c4bc7 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Mon, 9 Oct 2023 21:33:29 +0000 Subject: [PATCH 28/42] fix bug for not restoring model --- python/graphstorm/inference/emb_infer.py | 10 ++-------- python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py | 6 ++++++ tests/end2end-tests/graphstorm-ec/mgpu_test.sh | 2 +- tests/end2end-tests/graphstorm-nc/mgpu_test.sh | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py index ff869b9f55..e1a50c2274 100644 --- a/python/graphstorm/inference/emb_infer.py +++ b/python/graphstorm/inference/emb_infer.py @@ -43,7 +43,6 @@ class GSgnnEmbGenInferer(GSInferrer): The GNN model with different task. """ - # TODO(zhengda) We only support full-graph inference for now. def infer(self, data, task_type, save_embed_path, loader, use_mini_batch_infer=False, node_id_mapping_file=None, @@ -63,18 +62,11 @@ def infer(self, data, task_type, save_embed_path, loader, The path where the GNN embeddings will be saved. loader : GSEdgeDataLoader/GSNodeDataLoader The mini-batch sampler for built-in graphstorm task. - edge_mask_for_gnn_embeddings : str - The mask that indicates the edges used for computing GNN embeddings. By default, - the dataloader uses the edges in the training graphs to compute GNN embeddings to - avoid information leak for link prediction. use_mini_batch_infer : bool Whether or not to use mini-batch inference when computing node embedings. node_id_mapping_file: str Path to the file storing node id mapping generated by the graph partition algorithm. - edge_id_mapping_file: str - Path to the file storing edge id mapping generated by the - graph partition algorithm. save_embed_format : str Specify the format of saved embeddings. """ @@ -129,6 +121,8 @@ def infer(self, data, task_type, save_embed_path, loader, ntype_emb = embs[ntype] embs = {ntype: ntype_emb} elif task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}: + # Currently it is not allowed to do mini-batch inference + # and save embedding on edge tasks embs = do_full_graph_inference(self._model, loader.data, fanout=loader.fanout, task_tracker=self.task_tracker) target_ntypes = set() diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py index 84ff3762f9..0b5a1bf3ed 100644 --- a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py +++ b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py @@ -32,6 +32,7 @@ BUILTIN_LP_JOINT_NEG_SAMPLER) from graphstorm.inference import GSgnnEmbGenInferer + def main(config_args): """ main function """ @@ -75,6 +76,7 @@ def main(config_args): assert config.restore_model_path is not None, \ "restore model path cannot be none for gs_gen_node_embeddings" + # load the model if config.task_type == BUILTIN_TASK_LINK_PREDICTION: model = gs.create_builtin_lp_gnn_model(input_graph.g, config, train_task=False) elif config.task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}: @@ -83,7 +85,10 @@ def main(config_args): model = gs.create_builtin_edge_gnn_model(input_graph.g, config, train_task=False) else: raise TypeError("Not supported for task type: ", config.task_type) + model.restore_model(config.restore_model_path, + model_layer_to_load=config.restore_model_layers) + # define the dataloader if config.task_type == BUILTIN_TASK_LINK_PREDICTION: if config.eval_negative_sampler == BUILTIN_LP_UNIFORM_NEG_SAMPLER: link_prediction_loader = GSgnnLinkPredictionTestDataLoader @@ -117,6 +122,7 @@ def main(config_args): else: raise TypeError("Not supported for task type: ", config.task_type) + # start the infer emb_generator = GSgnnEmbGenInferer(model) emb_generator.setup_device(device=device) diff --git a/tests/end2end-tests/graphstorm-ec/mgpu_test.sh b/tests/end2end-tests/graphstorm-ec/mgpu_test.sh index 9690636877..562b4e3817 100644 --- a/tests/end2end-tests/graphstorm-ec/mgpu_test.sh +++ b/tests/end2end-tests/graphstorm-ec/mgpu_test.sh @@ -136,7 +136,7 @@ python3 check_infer.py --train_embout /data/gsgnn_ec/emb/ --infer_embout /data/g error_and_exit $? echo "**************dataset: Movielens, use gen_node_embeddings to generate embeddings on edge classification" -python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_scripts/gsgnn_ep/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_multi_label_ec/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec.yaml --exclude-training-targets True --multilabel true --num-classes 6 --node-feat-name movie:title user:feat --save-embed-path /data/gsgnn_ec/save-emb/ --restore-model-path /data/gsgnn_ec/epoch-$best_epoch/ --logging-file /tmp/train_log.txt --logging-level debug +python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_scripts/gsgnn_ep/ --num-trainers $NUM_TRAINERS --use-mini-batch-infer false --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_multi_label_ec/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_ec.yaml --exclude-training-targets True --multilabel true --num-classes 6 --node-feat-name movie:title user:feat --save-embed-path /data/gsgnn_ec/save-emb/ --restore-model-path /data/gsgnn_ec/epoch-$best_epoch/ --logging-file /tmp/train_log.txt --logging-level debug error_and_exit $? diff --git a/tests/end2end-tests/graphstorm-nc/mgpu_test.sh b/tests/end2end-tests/graphstorm-nc/mgpu_test.sh index eeed50444e..9ee9d2dbfa 100644 --- a/tests/end2end-tests/graphstorm-nc/mgpu_test.sh +++ b/tests/end2end-tests/graphstorm-nc/mgpu_test.sh @@ -138,7 +138,7 @@ python3 $GS_HOME/tests/end2end-tests/check_infer.py --train_embout /data/gsgnn_n error_and_exit $? echo "**************dataset: Movielens, use gen_embeddings to generate embeddings on node classification" -python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc.yaml --restore-model-path /data/gsgnn_nc_ml/epoch-$best_epoch/ --save-embed-path /data/gsgnn_nc_ml/save-emb/ --logging-file /tmp/train_log.txt --logging-level debug +python3 -m graphstorm.run.gs_gen_node_embedding --workspace $GS_HOME/training_scripts/gsgnn_np/ --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc.yaml --restore-model-path /data/gsgnn_nc_ml/epoch-$best_epoch/ --save-embed-path /data/gsgnn_nc_ml/save-emb/ --use-mini-batch-infer false --logging-file /tmp/train_log.txt --logging-level debug error_and_exit $? From 9c0be9830041865a498563290867e0c5e899b85b Mon Sep 17 00:00:00 2001 From: JalenCato Date: Wed, 11 Oct 2023 00:11:30 +0000 Subject: [PATCH 29/42] remove relation embedding --- python/graphstorm/inference/emb_infer.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py index e1a50c2274..9e9b3575a7 100644 --- a/python/graphstorm/inference/emb_infer.py +++ b/python/graphstorm/inference/emb_infer.py @@ -144,10 +144,3 @@ def infer(self, data, task_type, save_embed_path, loader, save_embed_format=save_embed_format) barrier() sys_tracker.check('save embeddings') - - # save relation embedding if any - if get_rank() == 0: - decoder = self._model.decoder - if isinstance(decoder, LinkPredictDistMultDecoder): - if save_embed_path is not None: - save_relation_embeddings(save_embed_path, decoder) From 6822ef381f843ed1fb4280783a60e78cd8fded31 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Wed, 11 Oct 2023 00:12:56 +0000 Subject: [PATCH 30/42] remove redundant dependency --- python/graphstorm/inference/emb_infer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py index 9e9b3575a7..50e718de30 100644 --- a/python/graphstorm/inference/emb_infer.py +++ b/python/graphstorm/inference/emb_infer.py @@ -23,7 +23,6 @@ BUILTIN_TASK_LINK_PREDICTION) from .graphstorm_infer import GSInferrer from ..model.utils import save_embeddings as save_gsgnn_embeddings -from ..model.utils import save_relation_embeddings from ..model.edge_decoder import LinkPredictDistMultDecoder from ..model.gnn import do_full_graph_inference, do_mini_batch_inference from ..model.node_gnn import node_mini_batch_gnn_predict From 2b79a470356a202d142ef9ab2432890bb335423e Mon Sep 17 00:00:00 2001 From: JalenCato Date: Wed, 11 Oct 2023 00:50:46 +0000 Subject: [PATCH 31/42] fix lint --- python/graphstorm/inference/emb_infer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py index 50e718de30..405b1e7bd2 100644 --- a/python/graphstorm/inference/emb_infer.py +++ b/python/graphstorm/inference/emb_infer.py @@ -23,7 +23,6 @@ BUILTIN_TASK_LINK_PREDICTION) from .graphstorm_infer import GSInferrer from ..model.utils import save_embeddings as save_gsgnn_embeddings -from ..model.edge_decoder import LinkPredictDistMultDecoder from ..model.gnn import do_full_graph_inference, do_mini_batch_inference from ..model.node_gnn import node_mini_batch_gnn_predict From 9c119def9eb9ddb87cdc9da823b82a06b1ae5d61 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Thu, 12 Oct 2023 20:20:29 +0000 Subject: [PATCH 32/42] change to trival version --- python/graphstorm/inference/emb_infer.py | 108 ++++++++---------- .../run/gsgnn_emb/gsgnn_node_emb.py | 46 +------- 2 files changed, 53 insertions(+), 101 deletions(-) diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py index 405b1e7bd2..de33eff7e7 100644 --- a/python/graphstorm/inference/emb_infer.py +++ b/python/graphstorm/inference/emb_infer.py @@ -30,7 +30,7 @@ class GSgnnEmbGenInferer(GSInferrer): - """ Embedding Generation inffer inferrer. + """ Embedding Generation inferrer. This is a high-level inferrer wrapper that can be used directly to generate embedding for inferer. @@ -41,14 +41,54 @@ class GSgnnEmbGenInferer(GSInferrer): The GNN model with different task. """ - def infer(self, data, task_type, save_embed_path, loader, + def nc_emb(self, g, model, use_mini_batch_infer, fanout): + # only generate embeddings on the target node type + if use_mini_batch_infer: + embs = do_mini_batch_inference(model, g, fanout=fanout, + edge_mask=None, + task_tracker=self.task_tracker, + infer_ntypes=g.infer_idxs) + else: + embs = do_full_graph_inference(model, g, fanout=fanout, + task_tracker=self.task_tracker) + return embs + + def ec_emb(self, g, model, use_mini_batch_infer, fanout): + # only generate embeddings on the target node type within definition + # target edge type + infer_ntypes = set() + for etype in g.infer_idxs: + infer_ntypes.add(etype[0]) + infer_ntypes.add(etype[2]) + + if use_mini_batch_infer: + embs = do_mini_batch_inference(model, g, fanout=fanout, + edge_mask=None, + task_tracker=self.task_tracker, + infer_ntypes=infer_ntypes) + else: + embs = do_full_graph_inference(model, g, fanout=fanout, + task_tracker=self.task_tracker) + return embs + + def lp_emb(self, g, model, use_mini_batch_infer, fanout): + if use_mini_batch_infer: + embs = do_mini_batch_inference(model, g, fanout=fanout, + edge_mask=None, + task_tracker=self.task_tracker) + else: + embs = do_full_graph_inference(model, g, fanout=fanout, + edge_mask=None, + task_tracker=self.task_tracker) + return embs + + def infer(self, data, task_type, save_embed_path, eval_fanout, use_mini_batch_infer=False, node_id_mapping_file=None, - return_proba=True, save_embed_format="pytorch"): """ Do Embedding Generating - Generate node embeddings and save. + Generate node embeddings and save into disk. Parameters ---------- @@ -58,10 +98,8 @@ def infer(self, data, task_type, save_embed_path, loader, task_type must be one of graphstorm builtin task types save_embed_path : str The path where the GNN embeddings will be saved. - loader : GSEdgeDataLoader/GSNodeDataLoader - The mini-batch sampler for built-in graphstorm task. use_mini_batch_infer : bool - Whether or not to use mini-batch inference when computing node embedings. + Whether to use mini-batch inference when computing node embeddings. node_id_mapping_file: str Path to the file storing node id mapping generated by the graph partition algorithm. @@ -70,65 +108,19 @@ def infer(self, data, task_type, save_embed_path, loader, """ device = self.device - # deal with uninitialized case first - if use_mini_batch_infer and \ - task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}: - assert save_embed_path is None, \ - "Unable to save the node embeddings when using mini batch inference " \ - "when doing edge task." \ - "It is not guaranteed that mini-batch prediction will cover all the nodes." - if task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}: - assert len(loader.data.eval_ntypes) == 1, \ - "GraphStorm only support single target node type for training and inference" - - assert save_embed_path is not None + assert save_embed_path is not None, \ + "It requires save embed path for gs_gen_node_embedding" sys_tracker.check('start embedding generation') self._model.eval() if task_type == BUILTIN_TASK_LINK_PREDICTION: - # for embedding generation, it is preferred to use full graph - if use_mini_batch_infer: - embs = do_mini_batch_inference(self._model, data, fanout=loader.fanout, - edge_mask=None, - task_tracker=self.task_tracker) - else: - embs = do_full_graph_inference(self._model, data, fanout=loader.fanout, - edge_mask=None, - task_tracker=self.task_tracker) + embs = self.lp_emb(data, self._model, use_mini_batch_infer, eval_fanout) elif task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}: - # only generate embeddings on the target node type - ntype = loader.data.eval_ntypes[0] - if use_mini_batch_infer: - inter_embs = node_mini_batch_gnn_predict(self._model, loader, return_proba, - return_label=False)[1] - inter_embs = {ntype: inter_embs[ntype]} if isinstance(inter_embs, dict) \ - else {ntype: inter_embs} - g = loader.data.g - ntype_emb = create_dist_tensor((g.num_nodes(ntype), inter_embs[ntype].shape[1]), - dtype=inter_embs[ntype].dtype, - name=f'gen-emb-{ntype}', - part_policy=g.get_node_partition_policy(ntype), - persistent=True) - ntype_emb[loader.target_nidx[ntype]] = inter_embs[ntype] - embs = {ntype: ntype_emb} - else: - embs = do_full_graph_inference(self._model, data, fanout=loader.fanout, - task_tracker=self.task_tracker) - ntype_emb = embs[ntype] - embs = {ntype: ntype_emb} + embs = self.nc_emb(data, self._model, use_mini_batch_infer, eval_fanout) elif task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}: - # Currently it is not allowed to do mini-batch inference - # and save embedding on edge tasks - embs = do_full_graph_inference(self._model, loader.data, fanout=loader.fanout, - task_tracker=self.task_tracker) - target_ntypes = set() - for etype in loader.data.eval_etypes: - target_ntypes.add(etype[0]) - target_ntypes.add(etype[2]) - - embs = {ntype: embs[ntype] for ntype in sorted(target_ntypes)} + embs = self.ec_emb(data, self._model, use_mini_batch_infer, eval_fanout) else: raise TypeError("Not supported for task type: ", task_type) diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py index 0b5a1bf3ed..80580bf21d 100644 --- a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py +++ b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py @@ -18,18 +18,13 @@ import graphstorm as gs from graphstorm.config import get_argument_parser from graphstorm.config import GSConfig -from graphstorm.dataloading import (GSgnnEdgeInferData, GSgnnNodeInferData, - GSgnnEdgeDataLoader, GSgnnNodeDataLoader, - GSgnnLinkPredictionTestDataLoader, - GSgnnLinkPredictionJointTestDataLoader) from graphstorm.utils import rt_profiler, sys_tracker, setup_device, use_wholegraph +from graphstorm.dataloading import (GSgnnEdgeInferData, GSgnnNodeInferData) from graphstorm.config import (BUILTIN_TASK_NODE_CLASSIFICATION, BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION, BUILTIN_TASK_LINK_PREDICTION) -from graphstorm.dataloading import (BUILTIN_LP_UNIFORM_NEG_SAMPLER, - BUILTIN_LP_JOINT_NEG_SAMPLER) from graphstorm.inference import GSgnnEmbGenInferer @@ -88,50 +83,15 @@ def main(config_args): model.restore_model(config.restore_model_path, model_layer_to_load=config.restore_model_layers) - # define the dataloader - if config.task_type == BUILTIN_TASK_LINK_PREDICTION: - if config.eval_negative_sampler == BUILTIN_LP_UNIFORM_NEG_SAMPLER: - link_prediction_loader = GSgnnLinkPredictionTestDataLoader - elif config.eval_negative_sampler == BUILTIN_LP_JOINT_NEG_SAMPLER: - link_prediction_loader = GSgnnLinkPredictionJointTestDataLoader - else: - raise ValueError('Unknown test negative sampler.' - 'Supported test negative samplers include ' - f'[{BUILTIN_LP_UNIFORM_NEG_SAMPLER}, {BUILTIN_LP_JOINT_NEG_SAMPLER}]') - - dataloader = link_prediction_loader(input_graph, input_graph.test_idxs, - batch_size=config.eval_batch_size, - num_negative_edges=config.num_negative_edges_eval, - fanout=config.eval_fanout) - elif config.task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}: - dataloader = GSgnnNodeDataLoader(input_graph, input_graph.infer_idxs, - fanout=config.eval_fanout, - batch_size=config.eval_batch_size, device=device, - train_task=False, - construct_feat_ntype=config.construct_feat_ntype, - construct_feat_fanout=config.construct_feat_fanout) - elif config.task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}: - dataloader = GSgnnEdgeDataLoader(input_graph, input_graph.infer_idxs, - fanout=config.eval_fanout, - batch_size=config.eval_batch_size, - device=device, train_task=False, - reverse_edge_types_map=config.reverse_edge_types_map, - remove_target_edge_type=config.remove_target_edge_type, - construct_feat_ntype=config.construct_feat_ntype, - construct_feat_fanout=config.construct_feat_fanout) - else: - raise TypeError("Not supported for task type: ", config.task_type) - - # start the infer + # start to infer emb_generator = GSgnnEmbGenInferer(model) emb_generator.setup_device(device=device) emb_generator.infer(input_graph, config.task_type, save_embed_path=config.save_embed_path, - loader=dataloader, + eval_fanout=config.eval_fanout, use_mini_batch_infer=config.use_mini_batch_infer, node_id_mapping_file=config.node_id_mapping_file, - return_proba=config.return_proba, save_embed_format=config.save_embed_format) def generate_parser(): From 45038f9a57e5642819933dd18a39bd2ac7fb5af6 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Thu, 12 Oct 2023 20:53:01 +0000 Subject: [PATCH 33/42] add doc string --- python/graphstorm/inference/emb_infer.py | 49 +++++++++++++++++++++--- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py index de33eff7e7..286d1d2dab 100644 --- a/python/graphstorm/inference/emb_infer.py +++ b/python/graphstorm/inference/emb_infer.py @@ -24,9 +24,8 @@ from .graphstorm_infer import GSInferrer from ..model.utils import save_embeddings as save_gsgnn_embeddings from ..model.gnn import do_full_graph_inference, do_mini_batch_inference -from ..model.node_gnn import node_mini_batch_gnn_predict -from ..utils import sys_tracker, get_rank, get_world_size, barrier, create_dist_tensor +from ..utils import sys_tracker, get_rank, get_world_size, barrier class GSgnnEmbGenInferer(GSInferrer): @@ -42,7 +41,20 @@ class GSgnnEmbGenInferer(GSInferrer): """ def nc_emb(self, g, model, use_mini_batch_infer, fanout): - # only generate embeddings on the target node type + """ Embedding Generation for node task. + It will only generate embeddings on the target node type. + + Parameters + ---------- + g: GSgnnData + The GraphStorm dataset + model : GSgnnNodeModel + The GNN model on edge prediction/classification task + use_mini_batch_infer : bool + Whether to use mini-batch inference when computing node embeddings. + fanout: list of int + The fanout of each GNN layers used in inference. + """ if use_mini_batch_infer: embs = do_mini_batch_inference(model, g, fanout=fanout, edge_mask=None, @@ -54,8 +66,21 @@ def nc_emb(self, g, model, use_mini_batch_infer, fanout): return embs def ec_emb(self, g, model, use_mini_batch_infer, fanout): - # only generate embeddings on the target node type within definition - # target edge type + """ Embedding Generation for edge task. + It will only generate embeddings on the target node type + defined in the target edge type. + + Parameters + ---------- + g: GSgnnData + The GraphStorm dataset + model : GSgnnNodeModel + The GNN model on edge prediction/classification task + use_mini_batch_infer : bool + Whether to use mini-batch inference when computing node embeddings. + fanout: list of int + The fanout of each GNN layers used in inference. + """ infer_ntypes = set() for etype in g.infer_idxs: infer_ntypes.add(etype[0]) @@ -72,6 +97,20 @@ def ec_emb(self, g, model, use_mini_batch_infer, fanout): return embs def lp_emb(self, g, model, use_mini_batch_infer, fanout): + """ Embedding Generation for link prediction task. + It will only generate embeddings on whole graph. + + Parameters + ---------- + g: GSgnnData + The GraphStorm dataset + model : GSgnnNodeModel + The GNN model on edge prediction/classification task + use_mini_batch_infer : bool + Whether to use mini-batch inference when computing node embeddings. + fanout: list of int + The fanout of each GNN layers used in inference. + """ if use_mini_batch_infer: embs = do_mini_batch_inference(model, g, fanout=fanout, edge_mask=None, From 6ceb0d07c01a97c8b2024584402ed7c7db42d3b9 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Thu, 12 Oct 2023 22:08:31 +0000 Subject: [PATCH 34/42] fix edge task mini batch --- python/graphstorm/inference/emb_infer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py index 286d1d2dab..40c78871d4 100644 --- a/python/graphstorm/inference/emb_infer.py +++ b/python/graphstorm/inference/emb_infer.py @@ -83,8 +83,10 @@ def ec_emb(self, g, model, use_mini_batch_infer, fanout): """ infer_ntypes = set() for etype in g.infer_idxs: - infer_ntypes.add(etype[0]) - infer_ntypes.add(etype[2]) + if etype[0] not in infer_ntypes: + infer_ntypes.append(etype[0]) + if etype[2] not in infer_ntypes: + infer_ntypes.append(etype[2]) if use_mini_batch_infer: embs = do_mini_batch_inference(model, g, fanout=fanout, From 5e39786bb54a3486ec109d4ad636ca93d08be71a Mon Sep 17 00:00:00 2001 From: JalenCato Date: Thu, 12 Oct 2023 22:19:35 +0000 Subject: [PATCH 35/42] add --- python/graphstorm/inference/emb_infer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py index 40c78871d4..8e50f379a7 100644 --- a/python/graphstorm/inference/emb_infer.py +++ b/python/graphstorm/inference/emb_infer.py @@ -81,7 +81,7 @@ def ec_emb(self, g, model, use_mini_batch_infer, fanout): fanout: list of int The fanout of each GNN layers used in inference. """ - infer_ntypes = set() + infer_ntypes = [] for etype in g.infer_idxs: if etype[0] not in infer_ntypes: infer_ntypes.append(etype[0]) From 57044726a9b7d97041e23c983b5c9747f06b6139 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Thu, 12 Oct 2023 23:06:32 +0000 Subject: [PATCH 36/42] fix sorted bug --- python/graphstorm/inference/emb_infer.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py index 8e50f379a7..c671db9fc8 100644 --- a/python/graphstorm/inference/emb_infer.py +++ b/python/graphstorm/inference/emb_infer.py @@ -55,11 +55,12 @@ def nc_emb(self, g, model, use_mini_batch_infer, fanout): fanout: list of int The fanout of each GNN layers used in inference. """ + infer_ntypes = sorted(g.infer_idxs) if use_mini_batch_infer: embs = do_mini_batch_inference(model, g, fanout=fanout, edge_mask=None, task_tracker=self.task_tracker, - infer_ntypes=g.infer_idxs) + infer_ntypes=infer_ntypes) else: embs = do_full_graph_inference(model, g, fanout=fanout, task_tracker=self.task_tracker) @@ -81,12 +82,11 @@ def ec_emb(self, g, model, use_mini_batch_infer, fanout): fanout: list of int The fanout of each GNN layers used in inference. """ - infer_ntypes = [] + infer_ntypes = set() for etype in g.infer_idxs: - if etype[0] not in infer_ntypes: - infer_ntypes.append(etype[0]) - if etype[2] not in infer_ntypes: - infer_ntypes.append(etype[2]) + infer_ntypes.add(etype[0]) + infer_ntypes.add(etype[2]) + infer_ntypes = sorted(infer_ntypes) if use_mini_batch_infer: embs = do_mini_batch_inference(model, g, fanout=fanout, From 6de76ff34a86d6cd8771614f9ec976a04c7bdf05 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Fri, 13 Oct 2023 01:07:27 +0000 Subject: [PATCH 37/42] finish pruning --- python/graphstorm/inference/emb_infer.py | 105 +++--------------- .../run/gsgnn_emb/gsgnn_node_emb.py | 18 ++- 2 files changed, 24 insertions(+), 99 deletions(-) diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py index c671db9fc8..e9a8e0ddc0 100644 --- a/python/graphstorm/inference/emb_infer.py +++ b/python/graphstorm/inference/emb_infer.py @@ -24,7 +24,6 @@ from .graphstorm_infer import GSInferrer from ..model.utils import save_embeddings as save_gsgnn_embeddings from ..model.gnn import do_full_graph_inference, do_mini_batch_inference - from ..utils import sys_tracker, get_rank, get_world_size, barrier @@ -39,90 +38,6 @@ class GSgnnEmbGenInferer(GSInferrer): model : GSgnnNodeModel The GNN model with different task. """ - - def nc_emb(self, g, model, use_mini_batch_infer, fanout): - """ Embedding Generation for node task. - It will only generate embeddings on the target node type. - - Parameters - ---------- - g: GSgnnData - The GraphStorm dataset - model : GSgnnNodeModel - The GNN model on edge prediction/classification task - use_mini_batch_infer : bool - Whether to use mini-batch inference when computing node embeddings. - fanout: list of int - The fanout of each GNN layers used in inference. - """ - infer_ntypes = sorted(g.infer_idxs) - if use_mini_batch_infer: - embs = do_mini_batch_inference(model, g, fanout=fanout, - edge_mask=None, - task_tracker=self.task_tracker, - infer_ntypes=infer_ntypes) - else: - embs = do_full_graph_inference(model, g, fanout=fanout, - task_tracker=self.task_tracker) - return embs - - def ec_emb(self, g, model, use_mini_batch_infer, fanout): - """ Embedding Generation for edge task. - It will only generate embeddings on the target node type - defined in the target edge type. - - Parameters - ---------- - g: GSgnnData - The GraphStorm dataset - model : GSgnnNodeModel - The GNN model on edge prediction/classification task - use_mini_batch_infer : bool - Whether to use mini-batch inference when computing node embeddings. - fanout: list of int - The fanout of each GNN layers used in inference. - """ - infer_ntypes = set() - for etype in g.infer_idxs: - infer_ntypes.add(etype[0]) - infer_ntypes.add(etype[2]) - infer_ntypes = sorted(infer_ntypes) - - if use_mini_batch_infer: - embs = do_mini_batch_inference(model, g, fanout=fanout, - edge_mask=None, - task_tracker=self.task_tracker, - infer_ntypes=infer_ntypes) - else: - embs = do_full_graph_inference(model, g, fanout=fanout, - task_tracker=self.task_tracker) - return embs - - def lp_emb(self, g, model, use_mini_batch_infer, fanout): - """ Embedding Generation for link prediction task. - It will only generate embeddings on whole graph. - - Parameters - ---------- - g: GSgnnData - The GraphStorm dataset - model : GSgnnNodeModel - The GNN model on edge prediction/classification task - use_mini_batch_infer : bool - Whether to use mini-batch inference when computing node embeddings. - fanout: list of int - The fanout of each GNN layers used in inference. - """ - if use_mini_batch_infer: - embs = do_mini_batch_inference(model, g, fanout=fanout, - edge_mask=None, - task_tracker=self.task_tracker) - else: - embs = do_full_graph_inference(model, g, fanout=fanout, - edge_mask=None, - task_tracker=self.task_tracker) - return embs - def infer(self, data, task_type, save_embed_path, eval_fanout, use_mini_batch_infer=False, node_id_mapping_file=None, @@ -157,14 +72,28 @@ def infer(self, data, task_type, save_embed_path, eval_fanout, self._model.eval() if task_type == BUILTIN_TASK_LINK_PREDICTION: - embs = self.lp_emb(data, self._model, use_mini_batch_infer, eval_fanout) + infer_ntypes = None elif task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}: - embs = self.nc_emb(data, self._model, use_mini_batch_infer, eval_fanout) + infer_ntypes = sorted(data.infer_idxs) elif task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}: - embs = self.ec_emb(data, self._model, use_mini_batch_infer, eval_fanout) + infer_ntypes = set() + for etype in data.infer_idxs: + infer_ntypes.add(etype[0]) + infer_ntypes.add(etype[2]) + infer_ntypes = sorted(infer_ntypes) else: raise TypeError("Not supported for task type: ", task_type) + if use_mini_batch_infer: + embs = do_mini_batch_inference(self._model, data, fanout=eval_fanout, + edge_mask=None, + task_tracker=self.task_tracker) + else: + embs = do_full_graph_inference(self._model, data, fanout=eval_fanout, + edge_mask=None, + task_tracker=self.task_tracker, + infer_ntypes=infer_ntypes) + if get_rank() == 0: logging.info("save embeddings to %s", save_embed_path) diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py index 80580bf21d..fb35db24f0 100644 --- a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py +++ b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py @@ -47,21 +47,17 @@ def main(config_args): input_graph = GSgnnEdgeInferData(config.graph_name, config.part_config, eval_etypes=config.eval_etype, - node_feat_field=config.node_feat_name, - decoder_edge_feat=config.decoder_edge_feat) + node_feat_field=config.node_feat_name) elif config.task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}: input_graph = GSgnnNodeInferData(config.graph_name, - config.part_config, - eval_ntypes=config.target_ntype, - node_feat_field=config.node_feat_name, - label_field=config.label_field) + config.part_config, + eval_ntypes=config.target_ntype, + node_feat_field=config.node_feat_name) elif config.task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}: input_graph = GSgnnEdgeInferData(config.graph_name, - config.part_config, - eval_etypes=config.target_etype, - node_feat_field=config.node_feat_name, - label_field=config.label_field, - decoder_edge_feat=config.decoder_edge_feat) + config.part_config, + eval_etypes=config.target_etype, + node_feat_field=config.node_feat_name) else: raise TypeError("Not supported for task type: ", config.task_type) From 788297a8f09d48b841ca33cf3aeb6fdc305f6ffc Mon Sep 17 00:00:00 2001 From: JalenCato Date: Fri, 13 Oct 2023 01:13:59 +0000 Subject: [PATCH 38/42] fix typo --- python/graphstorm/inference/emb_infer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py index e9a8e0ddc0..5550af018b 100644 --- a/python/graphstorm/inference/emb_infer.py +++ b/python/graphstorm/inference/emb_infer.py @@ -87,12 +87,12 @@ def infer(self, data, task_type, save_embed_path, eval_fanout, if use_mini_batch_infer: embs = do_mini_batch_inference(self._model, data, fanout=eval_fanout, edge_mask=None, - task_tracker=self.task_tracker) + task_tracker=self.task_tracker, + infer_ntypes=infer_ntypes) else: embs = do_full_graph_inference(self._model, data, fanout=eval_fanout, edge_mask=None, - task_tracker=self.task_tracker, - infer_ntypes=infer_ntypes) + task_tracker=self.task_tracker) if get_rank() == 0: logging.info("save embeddings to %s", save_embed_path) From 0cd315f9582f585baaf7154c4a972c710de1ace3 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Fri, 13 Oct 2023 02:10:27 +0000 Subject: [PATCH 39/42] apply comment --- python/graphstorm/inference/emb_infer.py | 2 ++ python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py | 14 +++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py index 5550af018b..095abed516 100644 --- a/python/graphstorm/inference/emb_infer.py +++ b/python/graphstorm/inference/emb_infer.py @@ -54,6 +54,8 @@ def infer(self, data, task_type, save_embed_path, eval_fanout, task_type must be one of graphstorm builtin task types save_embed_path : str The path where the GNN embeddings will be saved. + eval_fanout: list of int + The fanout of each GNN layers used in evaluation and inference. use_mini_batch_infer : bool Whether to use mini-batch inference when computing node embeddings. node_id_mapping_file: str diff --git a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py index fb35db24f0..191471d9f6 100644 --- a/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py +++ b/python/graphstorm/run/gsgnn_emb/gsgnn_node_emb.py @@ -44,17 +44,17 @@ def main(config_args): tracker.log_params(config.__dict__) if config.task_type == BUILTIN_TASK_LINK_PREDICTION: - input_graph = GSgnnEdgeInferData(config.graph_name, + input_data = GSgnnEdgeInferData(config.graph_name, config.part_config, eval_etypes=config.eval_etype, node_feat_field=config.node_feat_name) elif config.task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}: - input_graph = GSgnnNodeInferData(config.graph_name, + input_data = GSgnnNodeInferData(config.graph_name, config.part_config, eval_ntypes=config.target_ntype, node_feat_field=config.node_feat_name) elif config.task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}: - input_graph = GSgnnEdgeInferData(config.graph_name, + input_data = GSgnnEdgeInferData(config.graph_name, config.part_config, eval_etypes=config.target_etype, node_feat_field=config.node_feat_name) @@ -69,11 +69,11 @@ def main(config_args): # load the model if config.task_type == BUILTIN_TASK_LINK_PREDICTION: - model = gs.create_builtin_lp_gnn_model(input_graph.g, config, train_task=False) + model = gs.create_builtin_lp_gnn_model(input_data.g, config, train_task=False) elif config.task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}: - model = gs.create_builtin_node_gnn_model(input_graph.g, config, train_task=False) + model = gs.create_builtin_node_gnn_model(input_data.g, config, train_task=False) elif config.task_type in {BUILTIN_TASK_EDGE_CLASSIFICATION, BUILTIN_TASK_EDGE_REGRESSION}: - model = gs.create_builtin_edge_gnn_model(input_graph.g, config, train_task=False) + model = gs.create_builtin_edge_gnn_model(input_data.g, config, train_task=False) else: raise TypeError("Not supported for task type: ", config.task_type) model.restore_model(config.restore_model_path, @@ -83,7 +83,7 @@ def main(config_args): emb_generator = GSgnnEmbGenInferer(model) emb_generator.setup_device(device=device) - emb_generator.infer(input_graph, config.task_type, + emb_generator.infer(input_data, config.task_type, save_embed_path=config.save_embed_path, eval_fanout=config.eval_fanout, use_mini_batch_infer=config.use_mini_batch_infer, From ada55aec85afb60c6df170d865d06da84525973b Mon Sep 17 00:00:00 2001 From: JalenCato Date: Fri, 13 Oct 2023 04:52:04 +0000 Subject: [PATCH 40/42] test --- python/graphstorm/inference/emb_infer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py index 095abed516..ec67105507 100644 --- a/python/graphstorm/inference/emb_infer.py +++ b/python/graphstorm/inference/emb_infer.py @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. - Inferrer wrapper for embedding generation. + Inferer wrapper for embedding generation. """ import logging from graphstorm.config import (BUILTIN_TASK_NODE_CLASSIFICATION, @@ -70,7 +70,7 @@ def infer(self, data, task_type, save_embed_path, eval_fanout, assert save_embed_path is not None, \ "It requires save embed path for gs_gen_node_embedding" - sys_tracker.check('start embedding generation') + sys_tracker.check('start generating embedding') self._model.eval() if task_type == BUILTIN_TASK_LINK_PREDICTION: From 774187c5a3b71df4eb8e4d07919d884250df2724 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Fri, 13 Oct 2023 17:28:43 +0000 Subject: [PATCH 41/42] add embs --- python/graphstorm/inference/emb_infer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py index ec67105507..f9ac04c75f 100644 --- a/python/graphstorm/inference/emb_infer.py +++ b/python/graphstorm/inference/emb_infer.py @@ -73,6 +73,7 @@ def infer(self, data, task_type, save_embed_path, eval_fanout, sys_tracker.check('start generating embedding') self._model.eval() + # infer ntypes must be sorted for node embedding saving if task_type == BUILTIN_TASK_LINK_PREDICTION: infer_ntypes = None elif task_type in {BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_NODE_CLASSIFICATION}: @@ -95,6 +96,8 @@ def infer(self, data, task_type, save_embed_path, eval_fanout, embs = do_full_graph_inference(self._model, data, fanout=eval_fanout, edge_mask=None, task_tracker=self.task_tracker) + if not infer_ntypes: + embs = {ntype: embs[ntype] for ntype in infer_ntypes} if get_rank() == 0: logging.info("save embeddings to %s", save_embed_path) From 6906543ad3a42fd10513d16be0175809a4151455 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Fri, 13 Oct 2023 18:52:46 +0000 Subject: [PATCH 42/42] fix typo --- python/graphstorm/inference/emb_infer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/graphstorm/inference/emb_infer.py b/python/graphstorm/inference/emb_infer.py index f9ac04c75f..413c43eebe 100644 --- a/python/graphstorm/inference/emb_infer.py +++ b/python/graphstorm/inference/emb_infer.py @@ -96,7 +96,7 @@ def infer(self, data, task_type, save_embed_path, eval_fanout, embs = do_full_graph_inference(self._model, data, fanout=eval_fanout, edge_mask=None, task_tracker=self.task_tracker) - if not infer_ntypes: + if infer_ntypes: embs = {ntype: embs[ntype] for ntype in infer_ntypes} if get_rank() == 0: