diff --git a/docker/build_docker_oss4local.sh b/docker/build_docker_oss4local.sh index 15f848f6e5..982cffdf8b 100644 --- a/docker/build_docker_oss4local.sh +++ b/docker/build_docker_oss4local.sh @@ -33,6 +33,13 @@ else DEVICE_TYPE="$4" fi +# process argument 5: support for parmetis +if [ -z "$4" ]; then + USE_PARMETIS="false" +else + USE_PARMETIS="$5" +fi + # Copy scripts and tools codes to the docker folder mkdir -p $GSF_HOME"/docker/code" cp $SCRIPT_DIR"/local/fetch_and_run.sh" $GSF_HOME"/docker/code/" @@ -42,7 +49,6 @@ cp -r $GSF_HOME"/inference_scripts" $GSF_HOME"/docker/code/inference_scripts" cp -r $GSF_HOME"/tools" $GSF_HOME"/docker/code/tools" cp -r $GSF_HOME"/training_scripts" $GSF_HOME"/docker/code/training_scripts" - # Build OSS docker for EC2 instances that an pull ECR docker images DOCKER_FULLNAME="${IMAGE_NAME}:${TAG}-${DEVICE_TYPE}" @@ -55,7 +61,7 @@ elif [[ $DEVICE_TYPE = "cpu" ]]; then docker login --username AWS --password-stdin public.ecr.aws SOURCE_IMAGE="public.ecr.aws/ubuntu/ubuntu:22.04_stable" else - echo >&2 -e "Image type can only be \"gpu\" or \"cpu\", but got \""$DEVICE_TYPE"\"" + echo >&2 -e "Image type can only be \"gpu\" or \"cpu\", but got '$DEVICE_TYPE'" # remove the temporary code folder rm -rf code exit 1 @@ -65,6 +71,7 @@ fi DOCKER_BUILDKIT=1 docker build \ --build-arg DEVICE=$DEVICE_TYPE \ --build-arg SOURCE=${SOURCE_IMAGE} \ + --build-arg PARMETIS=${USE_PARMETIS} \ -f "${GSF_HOME}/docker/local/Dockerfile.local" . -t $DOCKER_FULLNAME # remove the temporary code folder diff --git a/docker/local/Dockerfile.local b/docker/local/Dockerfile.local index 3886ac1c6c..ee4992fcff 100644 --- a/docker/local/Dockerfile.local +++ b/docker/local/Dockerfile.local @@ -1,4 +1,5 @@ ARG DEVICE=gpu +ARG USE_PARMETIS=false ARG SOURCE FROM ${SOURCE} as base @@ -48,6 +49,12 @@ ARG OGB_VERSION=1.3.6 ARG TORCH_VERSION=2.3 ARG TRANSFORMERS_VERSION=4.28.1 +# Download dgl files +RUN cd /root; git clone --branch v${DGL_VERSION} --single-branch https://github.com/dmlc/dgl.git +ENV DGL_HOME=/root/dgl +ENV DGLBACKEND=pytorch +ENV PYTHONPATH="/root/dgl/tools/:${PYTHONPATH}" + FROM base as base-cpu # Install torch, DGL, and GSF deps that require torch @@ -75,18 +82,50 @@ RUN TORCH_MAJOR_MINOR=$(echo $TORCH_VERSION | cut -c1-3) && \ transformers==${TRANSFORMERS_VERSION} \ && rm -rf /root/.cache -FROM base-${DEVICE} as runtime +FROM base-${DEVICE} as parmetis-true -ENV PYTHONPATH="/root/dgl/tools/:${PYTHONPATH}" +# Install MPI and dependencies +RUN apt update && apt install -y --no-install-recommends \ + build-essential \ + cmake \ + libopenmpi-dev \ + openmpi-bin \ + && rm -rf /var/lib/apt/lists/* -# Download DGL source code -RUN cd /root; git clone --branch v${DGL_VERSION} https://github.com/dmlc/dgl.git +RUN pip install \ + pyyaml \ + && rm -rf /root/.cache -# Copy GraphStorm source and add to PYTHONPATH -RUN mkdir -p /graphstorm -COPY code/python/graphstorm /graphstorm/python/graphstorm -ENV PYTHONPATH="/graphstorm/python/:${PYTHONPATH}" +# Install GKLib +RUN cd /root && \ + git clone --single-branch --branch master https://github.com/KarypisLab/GKlib && \ + cd GKlib && \ + make && \ + make install +# Install Metis +RUN cd /root && \ + git clone --single-branch --branch master https://github.com/KarypisLab/METIS.git && \ + cd METIS && \ + make config shared=1 cc=gcc prefix=/root/local i64=1 && \ + make install + +# Install Parmetis +RUN cd /root && \ + git clone --single-branch --branch main https://github.com/KarypisLab/PM4GNN.git && \ + cd PM4GNN && \ + make config cc=mpicc prefix=/root/local && \ + make install + +ENV PATH=$PATH:/root/local/bin +ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/root/local/lib/ +RUN cp /root/local/bin/pm_dglpart /root/local/bin/pm_dglpart3 + +FROM base-${DEVICE} as parmetis-false + +# No additional dependencies when not supporting ParMETIS + +FROM parmetis-${USE_PARMETIS} as runtime # Set up SSH access ENV SSH_PORT=2222 @@ -101,11 +140,18 @@ RUN mkdir -p ${SSHDIR} \ EXPOSE ${SSH_PORT} +# Copy GraphStorm source and add to PYTHONPATH +RUN mkdir -p /graphstorm +COPY code/python/graphstorm /graphstorm/python/graphstorm +ENV PYTHONPATH="/graphstorm/python/:${PYTHONPATH}" + + # Copy GraphStorm scripts and tools COPY code/examples /graphstorm/examples COPY code/inference_scripts /graphstorm/inference_scripts COPY code/tools /graphstorm/tools COPY code/training_scripts /graphstorm/training_scripts COPY code/fetch_and_run.sh /graphstorm/fetch_and_run.sh +RUN chmod +x "/graphstorm/fetch_and_run.sh" CMD ["/usr/sbin/sshd", "-D"]