From 8ee4db3dbcc28eaa28d8f94e38065a9ef29419b6 Mon Sep 17 00:00:00 2001 From: jalencato Date: Fri, 29 Mar 2024 16:15:38 -0700 Subject: [PATCH] [Partition Image] Add dependency for gs partition (#790) *Issue #, if available:* *Description of changes:* By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. --- docker/Dockerfile.local | 3 ++- python/graphstorm/gpartition/dist_partition_graph.py | 11 ++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/docker/Dockerfile.local b/docker/Dockerfile.local index 3c68f9e170..5a0354c820 100644 --- a/docker/Dockerfile.local +++ b/docker/Dockerfile.local @@ -9,11 +9,12 @@ RUN apt-get install -y python3-pip git wget psmisc RUN apt-get install -y cmake # Install Pytorch -RUN pip3 install networkx==3.1 +RUN pip3 install networkx==3.1 pydantic RUN pip3 install torch==2.1.0+cu118 --extra-index-url https://download.pytorch.org/whl/cu118 # Install DGL RUN pip3 install dgl==1.0.4+cu117 -f https://data.dgl.ai/wheels/cu117/repo.html +ENV PYTHONPATH="/root/dgl/tools/:${PYTHONPATH}" # Install related Python packages RUN pip3 install ogb==1.3.6 scipy pyarrow boto3 scikit-learn transformers diff --git a/python/graphstorm/gpartition/dist_partition_graph.py b/python/graphstorm/gpartition/dist_partition_graph.py index 96b6f086e4..fdde707342 100644 --- a/python/graphstorm/gpartition/dist_partition_graph.py +++ b/python/graphstorm/gpartition/dist_partition_graph.py @@ -39,7 +39,8 @@ def run_build_dglgraph( ip_list, output_path, metadata_filename, - dgl_tool_path): + dgl_tool_path, + ssh_port): """ Build DistDGL Graph Parameters @@ -54,6 +55,8 @@ def run_build_dglgraph( Output Path metadata_filename: str The filename for the graph partitioning metadata file we'll use to determine data sources. + ssh_port: int + SSH port """ # Get the python interpreter used right now. # If we can not get it we go with the default `python3` @@ -68,7 +71,7 @@ def run_build_dglgraph( "--partitions-dir", partitions_dir, "--ip-config", ip_list, "--out-dir", output_path, - "--ssh-port", "22", + "--ssh-port", f"{ssh_port}", "--python-path", f"{python_bin}", "--log-level", logging.getLevelName(logging.root.getEffectiveLevel()), "--save-orig-nids", @@ -134,7 +137,8 @@ def main(): args.ip_list, os.path.join(output_path, "dist_graph"), args.metadata_filename, - args.dgl_tool_path) + args.dgl_tool_path, + args.ssh_port) logging.info("DGL graph building took %f sec", part_end - time.time()) @@ -153,6 +157,7 @@ def parse_args() -> argparse.Namespace: help="Path to store the partitioned data") argparser.add_argument("--num-parts", type=int, required=True, help="Number of partitions to generate") + argparser.add_argument("--ssh-port", type=int, default=22, help="SSH Port") argparser.add_argument("--dgl-tool-path", type=str, help="The path to dgl/tools") argparser.add_argument("--partition-algorithm", type=str, default="random",