From 937d1d624d6cb267312c2c78433a1928c0cafb3f Mon Sep 17 00:00:00 2001 From: Xiang Song Date: Wed, 25 Sep 2024 21:40:19 -0700 Subject: [PATCH 01/10] Add a strict naming rule for graph name --- .../distributed_executor.py | 22 ++++++++++++ .../graphstorm/gconstruct/construct_graph.py | 3 +- python/graphstorm/utils.py | 24 ++++++++++++- tests/unit-tests/test_gsf.py | 35 +++++++++++++++++++ 4 files changed, 82 insertions(+), 2 deletions(-) diff --git a/graphstorm-processing/graphstorm_processing/distributed_executor.py b/graphstorm-processing/graphstorm_processing/distributed_executor.py index c374056f56..580e07c8ce 100644 --- a/graphstorm-processing/graphstorm_processing/distributed_executor.py +++ b/graphstorm-processing/graphstorm_processing/distributed_executor.py @@ -563,6 +563,27 @@ def parse_args() -> argparse.Namespace: return parser.parse_args() +def check_graph_name(graph_name): + """ Check whether the graph name is a valid graph name. + + We enforce that the graph name adheres to the Python + identifier naming rules as in + https://docs.python.org/3/reference/lexical_analysis.html#identifiers, + with the exception that hyphens (-) are permitted. + This helps avoid the cases when an invalid graph name, + such as `/graph`, causes unexpected errors. + + Note: Same as graphstorm.utils.check_graph_name. + + Parameter + --------- + graph_name: str + Graph Name. + """ + assert graph_name.replace('-', '_').isidentifier(), \ + "GraphStorm expects the graph name adheres to the Python" \ + "identifier naming rules with the exception that hyphens " \ + f"(-) are permitted. But we get {graph_name}" def main(): """Main entry point for GSProcessing""" @@ -572,6 +593,7 @@ def main(): level=gsprocessing_args.log_level, format="[GSPROCESSING] %(asctime)s %(levelname)-8s %(message)s", ) + check_graph_name(gsprocessing_args.graph_name) # Determine execution environment if os.path.exists("/opt/ml/config/processingjobconfig.json"): diff --git a/python/graphstorm/gconstruct/construct_graph.py b/python/graphstorm/gconstruct/construct_graph.py index 9308c2003e..bf21c3f7dd 100644 --- a/python/graphstorm/gconstruct/construct_graph.py +++ b/python/graphstorm/gconstruct/construct_graph.py @@ -30,7 +30,7 @@ import dgl from dgl.distributed.constants import DEFAULT_NTYPE, DEFAULT_ETYPE -from ..utils import sys_tracker, get_log_level +from ..utils import sys_tracker, get_log_level, check_graph_name from .file_io import parse_node_file_format, parse_edge_file_format from .file_io import get_in_files from .transform import parse_feat_ops, process_features, preprocess_features @@ -742,6 +742,7 @@ def print_graph_info(g, node_data, edge_data, node_label_stats, edge_label_stats def process_graph(args): """ Process the graph. """ + check_graph_name(args.graph_name) logging.basicConfig(level=get_log_level(args.logging_level)) with open(args.conf_file, 'r', encoding="utf8") as json_file: process_confs = json.load(json_file) diff --git a/python/graphstorm/utils.py b/python/graphstorm/utils.py index e54027178f..a58c9daafd 100644 --- a/python/graphstorm/utils.py +++ b/python/graphstorm/utils.py @@ -31,6 +31,26 @@ USE_WHOLEGRAPH = False GS_DEVICE = th.device('cpu') +def check_graph_name(graph_name): + """ Check whether the graph name is a valid graph name. + + We enforce that the graph name adheres to the Python + identifier naming rules as in + https://docs.python.org/3/reference/lexical_analysis.html#identifiers, + with the exception that hyphens (-) are permitted. + This helps avoid the cases when an invalid graph name, + such as `/graph`, causes unexpected errors. + + Parameter + --------- + graph_name: str + Graph Name. + """ + assert graph_name.replace('-', '_').isidentifier(), \ + "GraphStorm expects the graph name adheres to the Python" \ + "identifier naming rules with the exception that hyphens " \ + f"(-) are permitted. But we get {graph_name}" + def get_graph_name(part_config): """ Get graph name from graph partition config file @@ -45,7 +65,9 @@ def get_graph_name(part_config): """ with open(part_config, "r", encoding='utf-8') as f: config = json.load(f) - return config["graph_name"] + graph_name = config["graph_name"] + check_graph_name(graph_name) + return graph_name def setup_device(local_rank): r"""Setup computation device. diff --git a/tests/unit-tests/test_gsf.py b/tests/unit-tests/test_gsf.py index 338f53128d..6be5958788 100644 --- a/tests/unit-tests/test_gsf.py +++ b/tests/unit-tests/test_gsf.py @@ -16,6 +16,7 @@ from graphstorm.gsf import (create_builtin_node_decoder, create_builtin_edge_decoder, create_builtin_lp_decoder) +from graphstorm.utils import check_graph_name from graphstorm.config import (BUILTIN_TASK_NODE_CLASSIFICATION, BUILTIN_TASK_NODE_REGRESSION, BUILTIN_TASK_EDGE_CLASSIFICATION, @@ -449,7 +450,41 @@ def test_create_builtin_lp_decoder(): assert decoder.gamma == 6. +def test_check_graph_name(): + graph_name = "a" + check_graph_name(graph_name) + graph_name = "graph_name" + check_graph_name(graph_name) + graph_name = "graph-name" + check_graph_name(graph_name) + + # test with invalid graph name + graph_name = "/graph_name" + invalid_name = False + try: + check_graph_name(graph_name) + except: + invalid_name = True + assert invalid_name + + graph_name = "|graph_name" + invalid_name = False + try: + check_graph_name(graph_name) + except: + invalid_name = True + assert invalid_name + + graph_name = "\graph_name" + invalid_name = False + try: + check_graph_name(graph_name) + except: + invalid_name = True + assert invalid_name + if __name__ == '__main__': + test_check_graph_name() test_create_builtin_node_decoder() test_create_builtin_edge_decoder() test_create_builtin_lp_decoder() \ No newline at end of file From d53568752ee5d9b20b29e91a223e6a62479c8e1d Mon Sep 17 00:00:00 2001 From: Xiang Song Date: Wed, 25 Sep 2024 21:47:33 -0700 Subject: [PATCH 02/10] Update --- .../cli/graph-construction/single-machine-gconstruct.rst | 2 +- .../graphstorm_processing/distributed_executor.py | 5 ++++- python/graphstorm/gconstruct/construct_graph.py | 5 ++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/docs/source/cli/graph-construction/single-machine-gconstruct.rst b/docs/source/cli/graph-construction/single-machine-gconstruct.rst index 45026d1f4e..d0d75c2d32 100644 --- a/docs/source/cli/graph-construction/single-machine-gconstruct.rst +++ b/docs/source/cli/graph-construction/single-machine-gconstruct.rst @@ -33,7 +33,7 @@ Full argument list of the ``gconstruct.construct_graph`` command * **-\-num-processes-for-nodes**: the number of processes to process node data simultaneously. Increase this number can speed up node data processing. * **-\-num-processes-for-edges**: the number of processes to process edge data simultaneously. Increase this number can speed up edge data processing. * **-\-output-dir**: (**Required**) the path of the output data files. -* **-\-graph-name**: (**Required**) the name assigned for the graph. +* **-\-graph-name**: (**Required**) the name assigned for the graph. The graph name must adhere to the Python identifier naming rules with the exception that hyphens (``-``) are permitted. * **-\-remap-node-id**: boolean value to decide whether to rename node IDs or not. Adding this argument will set it to be true, otherwise false. * **-\-add-reverse-edges**: boolean value to decide whether to add reverse edges for the given graph. Adding this argument sets it to true; otherwise, it defaults to false. It is **strongly** suggested to include this argument for graph construction, as some nodes in the original data may not have in-degrees, and thus cannot update their presentations by aggregating messages from their neighbors. Adding this arugment helps prevent this issue. * **-\-output-format**: the format of constructed graph, options are ``DGL``, ``DistDGL``. Default is ``DistDGL``. It also accepts multiple graph formats at the same time separated by an space, for example ``--output-format "DGL DistDGL"``. The output format is explained in the :ref:`Output ` section above. diff --git a/graphstorm-processing/graphstorm_processing/distributed_executor.py b/graphstorm-processing/graphstorm_processing/distributed_executor.py index 580e07c8ce..697be1373c 100644 --- a/graphstorm-processing/graphstorm_processing/distributed_executor.py +++ b/graphstorm-processing/graphstorm_processing/distributed_executor.py @@ -540,7 +540,10 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--graph-name", type=str, - help="Name for the graph being processed.", + help="Name for the graph being processed." + "The graph name must adhere to the Python " + "identifier naming rules with the exception " + "that hyphens (-) are permitted.", required=False, default=None, ) diff --git a/python/graphstorm/gconstruct/construct_graph.py b/python/graphstorm/gconstruct/construct_graph.py index bf21c3f7dd..56d9b4b64e 100644 --- a/python/graphstorm/gconstruct/construct_graph.py +++ b/python/graphstorm/gconstruct/construct_graph.py @@ -910,7 +910,10 @@ def process_graph(args): argparser.add_argument("--output-dir", type=str, required=True, help="The path of the output data folder.") argparser.add_argument("--graph-name", type=str, required=True, - help="The graph name") + help="Name for the graph being processed." + "The graph name must adhere to the Python " + "identifier naming rules with the exception " + "that hyphens (-) are permitted.",) argparser.add_argument("--remap-node-id", action='store_true', help="Whether or not to remap node IDs.") argparser.add_argument("--add-reverse-edges", action='store_true', From 0771150047c3d052898c29f8cbead2574f39943a Mon Sep 17 00:00:00 2001 From: Xiang Song Date: Wed, 25 Sep 2024 22:36:00 -0700 Subject: [PATCH 03/10] Update --- .../distributed_executor.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/graphstorm-processing/graphstorm_processing/distributed_executor.py b/graphstorm-processing/graphstorm_processing/distributed_executor.py index 697be1373c..ffe2bd8a1a 100644 --- a/graphstorm-processing/graphstorm_processing/distributed_executor.py +++ b/graphstorm-processing/graphstorm_processing/distributed_executor.py @@ -541,9 +541,9 @@ def parse_args() -> argparse.Namespace: "--graph-name", type=str, help="Name for the graph being processed." - "The graph name must adhere to the Python " - "identifier naming rules with the exception " - "that hyphens (-) are permitted.", + "The graph name must adhere to the Python " + "identifier naming rules with the exception " + "that hyphens (-) are permitted.", required=False, default=None, ) @@ -566,8 +566,9 @@ def parse_args() -> argparse.Namespace: return parser.parse_args() + def check_graph_name(graph_name): - """ Check whether the graph name is a valid graph name. + """Check whether the graph name is a valid graph name. We enforce that the graph name adheres to the Python identifier naming rules as in @@ -583,10 +584,12 @@ def check_graph_name(graph_name): graph_name: str Graph Name. """ - assert graph_name.replace('-', '_').isidentifier(), \ - "GraphStorm expects the graph name adheres to the Python" \ - "identifier naming rules with the exception that hyphens " \ + assert graph_name.replace("-", "_").isidentifier(), ( + "GraphStorm expects the graph name adheres to the Python" + "identifier naming rules with the exception that hyphens " f"(-) are permitted. But we get {graph_name}" + ) + def main(): """Main entry point for GSProcessing""" From c2794c4745dab907c36785bb0a0d48145adf07af Mon Sep 17 00:00:00 2001 From: Xiang Song Date: Thu, 26 Sep 2024 11:13:20 -0700 Subject: [PATCH 04/10] Update --- .../graph-construction/single-machine-gconstruct.rst | 2 +- .../model-training-inference/distributed/sagemaker.rst | 2 +- .../graphstorm_processing/distributed_executor.py | 9 ++++++--- python/graphstorm/gconstruct/construct_graph.py | 3 ++- python/graphstorm/utils.py | 10 +++++++--- tests/unit-tests/test_gsf.py | 4 ++++ 6 files changed, 21 insertions(+), 9 deletions(-) diff --git a/docs/source/cli/graph-construction/single-machine-gconstruct.rst b/docs/source/cli/graph-construction/single-machine-gconstruct.rst index d0d75c2d32..f0d7849c61 100644 --- a/docs/source/cli/graph-construction/single-machine-gconstruct.rst +++ b/docs/source/cli/graph-construction/single-machine-gconstruct.rst @@ -33,7 +33,7 @@ Full argument list of the ``gconstruct.construct_graph`` command * **-\-num-processes-for-nodes**: the number of processes to process node data simultaneously. Increase this number can speed up node data processing. * **-\-num-processes-for-edges**: the number of processes to process edge data simultaneously. Increase this number can speed up edge data processing. * **-\-output-dir**: (**Required**) the path of the output data files. -* **-\-graph-name**: (**Required**) the name assigned for the graph. The graph name must adhere to the Python identifier naming rules with the exception that hyphens (``-``) are permitted. +* **-\-graph-name**: (**Required**) the name assigned for the graph. The graph name must adhere to the Python identifier naming rules with the exception that hyphens (``-``) are permitted and the name can start with numbers. * **-\-remap-node-id**: boolean value to decide whether to rename node IDs or not. Adding this argument will set it to be true, otherwise false. * **-\-add-reverse-edges**: boolean value to decide whether to add reverse edges for the given graph. Adding this argument sets it to true; otherwise, it defaults to false. It is **strongly** suggested to include this argument for graph construction, as some nodes in the original data may not have in-degrees, and thus cannot update their presentations by aggregating messages from their neighbors. Adding this arugment helps prevent this issue. * **-\-output-format**: the format of constructed graph, options are ``DGL``, ``DistDGL``. Default is ``DistDGL``. It also accepts multiple graph formats at the same time separated by an space, for example ``--output-format "DGL DistDGL"``. The output format is explained in the :ref:`Output ` section above. diff --git a/docs/source/cli/model-training-inference/distributed/sagemaker.rst b/docs/source/cli/model-training-inference/distributed/sagemaker.rst index 5b111726cb..ab1c892271 100644 --- a/docs/source/cli/model-training-inference/distributed/sagemaker.rst +++ b/docs/source/cli/model-training-inference/distributed/sagemaker.rst @@ -388,7 +388,7 @@ The rest of the arguments are passed on to ``sagemaker_train.py`` or ``sagemaker * **--task-type**: Task type. * **--graph-data-s3**: S3 location of the input graph. -* **--graph-name**: Name of the input graph. +* **--graph-name**: Name of the input graph. The graph name must adhere to the Python identifier naming rules with the exception that hyphens (``-``) are permitted and the name can start with numbers. * **--yaml-s3**: S3 location of yaml file for training and inference. * **--custom-script**: Custom training script provided by customers to run customer training logic. This should be a path to the Python script within the Docker image. * **--output-emb-s3**: S3 location to store GraphStorm generated node embeddings. This is an inference only argument. diff --git a/graphstorm-processing/graphstorm_processing/distributed_executor.py b/graphstorm-processing/graphstorm_processing/distributed_executor.py index ffe2bd8a1a..295ccad60b 100644 --- a/graphstorm-processing/graphstorm_processing/distributed_executor.py +++ b/graphstorm-processing/graphstorm_processing/distributed_executor.py @@ -543,7 +543,8 @@ def parse_args() -> argparse.Namespace: help="Name for the graph being processed." "The graph name must adhere to the Python " "identifier naming rules with the exception " - "that hyphens (-) are permitted.", + "that hyphens (-) are permitted and the name " + "can start with numbers", required=False, default=None, ) @@ -573,7 +574,8 @@ def check_graph_name(graph_name): We enforce that the graph name adheres to the Python identifier naming rules as in https://docs.python.org/3/reference/lexical_analysis.html#identifiers, - with the exception that hyphens (-) are permitted. + with the exception that hyphens (-) are permitted + and the name can start with numbers. This helps avoid the cases when an invalid graph name, such as `/graph`, causes unexpected errors. @@ -587,7 +589,8 @@ def check_graph_name(graph_name): assert graph_name.replace("-", "_").isidentifier(), ( "GraphStorm expects the graph name adheres to the Python" "identifier naming rules with the exception that hyphens " - f"(-) are permitted. But we get {graph_name}" + f"(-) are permitted and the name can start with numbers. " + "But we get {graph_name}" ) diff --git a/python/graphstorm/gconstruct/construct_graph.py b/python/graphstorm/gconstruct/construct_graph.py index 56d9b4b64e..ceaee5adfd 100644 --- a/python/graphstorm/gconstruct/construct_graph.py +++ b/python/graphstorm/gconstruct/construct_graph.py @@ -913,7 +913,8 @@ def process_graph(args): help="Name for the graph being processed." "The graph name must adhere to the Python " "identifier naming rules with the exception " - "that hyphens (-) are permitted.",) + "that hyphens (-) are permitted " + "and the name can start with numbers",) argparser.add_argument("--remap-node-id", action='store_true', help="Whether or not to remap node IDs.") argparser.add_argument("--add-reverse-edges", action='store_true', diff --git a/python/graphstorm/utils.py b/python/graphstorm/utils.py index a58c9daafd..72bb6465e6 100644 --- a/python/graphstorm/utils.py +++ b/python/graphstorm/utils.py @@ -21,6 +21,7 @@ import resource import logging import psutil +import re import pandas as pd import dgl @@ -37,7 +38,8 @@ def check_graph_name(graph_name): We enforce that the graph name adheres to the Python identifier naming rules as in https://docs.python.org/3/reference/lexical_analysis.html#identifiers, - with the exception that hyphens (-) are permitted. + with the exception that hyphens (-) are permitted + and the name can start with numbers. This helps avoid the cases when an invalid graph name, such as `/graph`, causes unexpected errors. @@ -46,10 +48,12 @@ def check_graph_name(graph_name): graph_name: str Graph Name. """ - assert graph_name.replace('-', '_').isidentifier(), \ + gname = re.sub(r'^\d+', '', graph_name) + assert gname.replace('-', '_').isidentifier(), \ "GraphStorm expects the graph name adheres to the Python" \ "identifier naming rules with the exception that hyphens " \ - f"(-) are permitted. But we get {graph_name}" + f"(-) are permitted and the name can start with numbers. " \ + "But we get {graph_name}" def get_graph_name(part_config): """ Get graph name from graph partition config file diff --git a/tests/unit-tests/test_gsf.py b/tests/unit-tests/test_gsf.py index 6be5958788..1f3e1d58d0 100644 --- a/tests/unit-tests/test_gsf.py +++ b/tests/unit-tests/test_gsf.py @@ -457,6 +457,10 @@ def test_check_graph_name(): check_graph_name(graph_name) graph_name = "graph-name" check_graph_name(graph_name) + graph_name = "123-graph-name" + check_graph_name(graph_name) + graph_name = "_Graph-name" + check_graph_name(graph_name) # test with invalid graph name graph_name = "/graph_name" From c76975d0612715cacc4af01685b7c3a037f95bb6 Mon Sep 17 00:00:00 2001 From: Xiang Song Date: Thu, 26 Sep 2024 11:20:36 -0700 Subject: [PATCH 05/10] update --- .../cli/graph-construction/single-machine-gconstruct.rst | 2 +- .../cli/model-training-inference/distributed/sagemaker.rst | 2 +- .../graphstorm_processing/distributed_executor.py | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/source/cli/graph-construction/single-machine-gconstruct.rst b/docs/source/cli/graph-construction/single-machine-gconstruct.rst index f0d7849c61..a0b2c465d7 100644 --- a/docs/source/cli/graph-construction/single-machine-gconstruct.rst +++ b/docs/source/cli/graph-construction/single-machine-gconstruct.rst @@ -33,7 +33,7 @@ Full argument list of the ``gconstruct.construct_graph`` command * **-\-num-processes-for-nodes**: the number of processes to process node data simultaneously. Increase this number can speed up node data processing. * **-\-num-processes-for-edges**: the number of processes to process edge data simultaneously. Increase this number can speed up edge data processing. * **-\-output-dir**: (**Required**) the path of the output data files. -* **-\-graph-name**: (**Required**) the name assigned for the graph. The graph name must adhere to the Python identifier naming rules with the exception that hyphens (``-``) are permitted and the name can start with numbers. +* **-\-graph-name**: (**Required**) the name assigned for the graph. The graph name must adhere to the `Python identifier naming rules`_ with the exception that hyphens (``-``) are permitted and the name can start with numbers. * **-\-remap-node-id**: boolean value to decide whether to rename node IDs or not. Adding this argument will set it to be true, otherwise false. * **-\-add-reverse-edges**: boolean value to decide whether to add reverse edges for the given graph. Adding this argument sets it to true; otherwise, it defaults to false. It is **strongly** suggested to include this argument for graph construction, as some nodes in the original data may not have in-degrees, and thus cannot update their presentations by aggregating messages from their neighbors. Adding this arugment helps prevent this issue. * **-\-output-format**: the format of constructed graph, options are ``DGL``, ``DistDGL``. Default is ``DistDGL``. It also accepts multiple graph formats at the same time separated by an space, for example ``--output-format "DGL DistDGL"``. The output format is explained in the :ref:`Output ` section above. diff --git a/docs/source/cli/model-training-inference/distributed/sagemaker.rst b/docs/source/cli/model-training-inference/distributed/sagemaker.rst index ab1c892271..3acb78ee61 100644 --- a/docs/source/cli/model-training-inference/distributed/sagemaker.rst +++ b/docs/source/cli/model-training-inference/distributed/sagemaker.rst @@ -388,7 +388,7 @@ The rest of the arguments are passed on to ``sagemaker_train.py`` or ``sagemaker * **--task-type**: Task type. * **--graph-data-s3**: S3 location of the input graph. -* **--graph-name**: Name of the input graph. The graph name must adhere to the Python identifier naming rules with the exception that hyphens (``-``) are permitted and the name can start with numbers. +* **--graph-name**: Name of the input graph. The graph name must adhere to the `Python identifier naming rules`_ with the exception that hyphens (``-``) are permitted and the name can start with numbers. * **--yaml-s3**: S3 location of yaml file for training and inference. * **--custom-script**: Custom training script provided by customers to run customer training logic. This should be a path to the Python script within the Docker image. * **--output-emb-s3**: S3 location to store GraphStorm generated node embeddings. This is an inference only argument. diff --git a/graphstorm-processing/graphstorm_processing/distributed_executor.py b/graphstorm-processing/graphstorm_processing/distributed_executor.py index 295ccad60b..4c0561ef67 100644 --- a/graphstorm-processing/graphstorm_processing/distributed_executor.py +++ b/graphstorm-processing/graphstorm_processing/distributed_executor.py @@ -54,6 +54,7 @@ import json import logging import os +import re from pathlib import Path import tempfile import time @@ -586,7 +587,8 @@ def check_graph_name(graph_name): graph_name: str Graph Name. """ - assert graph_name.replace("-", "_").isidentifier(), ( + gname = re.sub(r"^\d+", "", graph_name) + assert gname.replace("-", "_").isidentifier(), ( "GraphStorm expects the graph name adheres to the Python" "identifier naming rules with the exception that hyphens " f"(-) are permitted and the name can start with numbers. " From 77c142798622ef93a1eaf6aba7c8f739d7f850b7 Mon Sep 17 00:00:00 2001 From: Xiang Song Date: Thu, 26 Sep 2024 11:24:20 -0700 Subject: [PATCH 06/10] Update --- .../graphstorm_processing/distributed_executor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/graphstorm-processing/graphstorm_processing/distributed_executor.py b/graphstorm-processing/graphstorm_processing/distributed_executor.py index 4c0561ef67..ca4c710d8e 100644 --- a/graphstorm-processing/graphstorm_processing/distributed_executor.py +++ b/graphstorm-processing/graphstorm_processing/distributed_executor.py @@ -591,8 +591,8 @@ def check_graph_name(graph_name): assert gname.replace("-", "_").isidentifier(), ( "GraphStorm expects the graph name adheres to the Python" "identifier naming rules with the exception that hyphens " - f"(-) are permitted and the name can start with numbers. " - "But we get {graph_name}" + "(-) are permitted and the name can start with numbers. " + f"But we get {graph_name}" ) From 84e92cbf07239f7d697357914b78a981edced94b Mon Sep 17 00:00:00 2001 From: Xiang Song Date: Thu, 26 Sep 2024 11:32:44 -0700 Subject: [PATCH 07/10] Update --- python/graphstorm/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/graphstorm/utils.py b/python/graphstorm/utils.py index 72bb6465e6..c6dd08707a 100644 --- a/python/graphstorm/utils.py +++ b/python/graphstorm/utils.py @@ -20,8 +20,8 @@ import time import resource import logging -import psutil import re +import psutil import pandas as pd import dgl @@ -52,8 +52,8 @@ def check_graph_name(graph_name): assert gname.replace('-', '_').isidentifier(), \ "GraphStorm expects the graph name adheres to the Python" \ "identifier naming rules with the exception that hyphens " \ - f"(-) are permitted and the name can start with numbers. " \ - "But we get {graph_name}" + "(-) are permitted and the name can start with numbers. " \ + f"But we get {graph_name}." def get_graph_name(part_config): """ Get graph name from graph partition config file From e788214a8733d78024c27359b1b0ccb0a73a4cc7 Mon Sep 17 00:00:00 2001 From: "xiang song(charlie.song)" Date: Thu, 26 Sep 2024 13:23:48 -0700 Subject: [PATCH 08/10] Update graphstorm-processing/graphstorm_processing/distributed_executor.py Co-authored-by: Theodore Vasiloudis --- .../graphstorm_processing/distributed_executor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphstorm-processing/graphstorm_processing/distributed_executor.py b/graphstorm-processing/graphstorm_processing/distributed_executor.py index ca4c710d8e..3bc1bc4936 100644 --- a/graphstorm-processing/graphstorm_processing/distributed_executor.py +++ b/graphstorm-processing/graphstorm_processing/distributed_executor.py @@ -592,7 +592,7 @@ def check_graph_name(graph_name): "GraphStorm expects the graph name adheres to the Python" "identifier naming rules with the exception that hyphens " "(-) are permitted and the name can start with numbers. " - f"But we get {graph_name}" + f"Got: {graph_name}" ) From c8167b37144b85ed4472214a893a4f70554fe21b Mon Sep 17 00:00:00 2001 From: "xiang song(charlie.song)" Date: Thu, 26 Sep 2024 13:23:56 -0700 Subject: [PATCH 09/10] Update python/graphstorm/utils.py Co-authored-by: Theodore Vasiloudis --- python/graphstorm/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/graphstorm/utils.py b/python/graphstorm/utils.py index c6dd08707a..8f1511f406 100644 --- a/python/graphstorm/utils.py +++ b/python/graphstorm/utils.py @@ -53,7 +53,7 @@ def check_graph_name(graph_name): "GraphStorm expects the graph name adheres to the Python" \ "identifier naming rules with the exception that hyphens " \ "(-) are permitted and the name can start with numbers. " \ - f"But we get {graph_name}." + f"Got: {graph_name}." def get_graph_name(part_config): """ Get graph name from graph partition config file From 39a65cc336e2d227cb37028f47c7c1ed58ce951d Mon Sep 17 00:00:00 2001 From: Xiang Song Date: Thu, 26 Sep 2024 13:27:04 -0700 Subject: [PATCH 10/10] Update --- tests/unit-tests/test_gsf.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/tests/unit-tests/test_gsf.py b/tests/unit-tests/test_gsf.py index 1f3e1d58d0..031017b886 100644 --- a/tests/unit-tests/test_gsf.py +++ b/tests/unit-tests/test_gsf.py @@ -12,6 +12,7 @@ Unit tests for gsf.py """ +import pytest from graphstorm.gsf import (create_builtin_node_decoder, create_builtin_edge_decoder, @@ -464,28 +465,16 @@ def test_check_graph_name(): # test with invalid graph name graph_name = "/graph_name" - invalid_name = False - try: + with pytest.raises(AssertionError): check_graph_name(graph_name) - except: - invalid_name = True - assert invalid_name graph_name = "|graph_name" - invalid_name = False - try: + with pytest.raises(AssertionError): check_graph_name(graph_name) - except: - invalid_name = True - assert invalid_name graph_name = "\graph_name" - invalid_name = False - try: + with pytest.raises(AssertionError): check_graph_name(graph_name) - except: - invalid_name = True - assert invalid_name if __name__ == '__main__': test_check_graph_name()