From d53568752ee5d9b20b29e91a223e6a62479c8e1d Mon Sep 17 00:00:00 2001 From: Xiang Song Date: Wed, 25 Sep 2024 21:47:33 -0700 Subject: [PATCH] Update --- .../cli/graph-construction/single-machine-gconstruct.rst | 2 +- .../graphstorm_processing/distributed_executor.py | 5 ++++- python/graphstorm/gconstruct/construct_graph.py | 5 ++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/docs/source/cli/graph-construction/single-machine-gconstruct.rst b/docs/source/cli/graph-construction/single-machine-gconstruct.rst index 45026d1f4e..d0d75c2d32 100644 --- a/docs/source/cli/graph-construction/single-machine-gconstruct.rst +++ b/docs/source/cli/graph-construction/single-machine-gconstruct.rst @@ -33,7 +33,7 @@ Full argument list of the ``gconstruct.construct_graph`` command * **-\-num-processes-for-nodes**: the number of processes to process node data simultaneously. Increase this number can speed up node data processing. * **-\-num-processes-for-edges**: the number of processes to process edge data simultaneously. Increase this number can speed up edge data processing. * **-\-output-dir**: (**Required**) the path of the output data files. -* **-\-graph-name**: (**Required**) the name assigned for the graph. +* **-\-graph-name**: (**Required**) the name assigned for the graph. The graph name must adhere to the Python identifier naming rules with the exception that hyphens (``-``) are permitted. * **-\-remap-node-id**: boolean value to decide whether to rename node IDs or not. Adding this argument will set it to be true, otherwise false. * **-\-add-reverse-edges**: boolean value to decide whether to add reverse edges for the given graph. Adding this argument sets it to true; otherwise, it defaults to false. It is **strongly** suggested to include this argument for graph construction, as some nodes in the original data may not have in-degrees, and thus cannot update their presentations by aggregating messages from their neighbors. Adding this arugment helps prevent this issue. * **-\-output-format**: the format of constructed graph, options are ``DGL``, ``DistDGL``. Default is ``DistDGL``. It also accepts multiple graph formats at the same time separated by an space, for example ``--output-format "DGL DistDGL"``. The output format is explained in the :ref:`Output ` section above. diff --git a/graphstorm-processing/graphstorm_processing/distributed_executor.py b/graphstorm-processing/graphstorm_processing/distributed_executor.py index 580e07c8ce..697be1373c 100644 --- a/graphstorm-processing/graphstorm_processing/distributed_executor.py +++ b/graphstorm-processing/graphstorm_processing/distributed_executor.py @@ -540,7 +540,10 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--graph-name", type=str, - help="Name for the graph being processed.", + help="Name for the graph being processed." + "The graph name must adhere to the Python " + "identifier naming rules with the exception " + "that hyphens (-) are permitted.", required=False, default=None, ) diff --git a/python/graphstorm/gconstruct/construct_graph.py b/python/graphstorm/gconstruct/construct_graph.py index bf21c3f7dd..56d9b4b64e 100644 --- a/python/graphstorm/gconstruct/construct_graph.py +++ b/python/graphstorm/gconstruct/construct_graph.py @@ -910,7 +910,10 @@ def process_graph(args): argparser.add_argument("--output-dir", type=str, required=True, help="The path of the output data folder.") argparser.add_argument("--graph-name", type=str, required=True, - help="The graph name") + help="Name for the graph being processed." + "The graph name must adhere to the Python " + "identifier naming rules with the exception " + "that hyphens (-) are permitted.",) argparser.add_argument("--remap-node-id", action='store_true', help="Whether or not to remap node IDs.") argparser.add_argument("--add-reverse-edges", action='store_true',