Skip to content

Commit

Permalink
[GSProcessing] SagMaker image for v0.2.1
Browse files Browse the repository at this point in the history
  • Loading branch information
thvasilo committed Nov 9, 2023
1 parent 6563c05 commit 36a7d49
Showing 1 changed file with 45 additions and 0 deletions.
45 changes: 45 additions & 0 deletions graphstorm-processing/docker/0.2.1/sagemaker/Dockerfile.cpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# syntax=docker/dockerfile:experimental
FROM 153931337802.dkr.ecr.us-west-2.amazonaws.com/sagemaker-spark-processing:3.4-cpu-py39-v1.0 AS base

# Python won’t try to write .pyc or .pyo files on the import of source modules
# Force stdin, stdout and stderr to be totally unbuffered. Good for logging
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
ENV PYTHONIOENCODING=UTF-8
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8
ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/lib"
ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/opt/conda/lib"
ENV PATH=/opt/conda/bin:$PATH

# GSProcessing requirements
RUN pipenv install pip==23.1.2 setuptools wheel spacy==3.6.0 pyspark==3.4.1 \
pyarrow==13.0.0 joblib==1.3.1 psutil==5.9.5 pandas==1.3.5 \
boto3==1.28.38 protobuf==3.20.3 mock==5.1.0 \
&& rm -rf /root/.cache
# Do a pipenv sync so our base libs are independent from our editable code, making them cacheable
RUN pipenv sync --system && python3 -m spacy download en_core_web_lg \
&& rm -rf /root/.cache

# Graphloader codebase
COPY code/ /usr/lib/spark/code/
WORKDIR /usr/lib/spark/code/

# Base container assumes this is the workdir
ENV SPARK_HOME /usr/lib/spark
WORKDIR $SPARK_HOME

# Ensure our python3 installation is the one used
RUN echo 'alias python3=python3.9' >> ~/.bashrc

# Starts framework
ENTRYPOINT ["bash", "/usr/lib/spark/code/docker-entry.sh"]

FROM base AS prod
RUN python3 -m pip install /usr/lib/spark/code/graphstorm_processing-*.whl && \
rm /usr/lib/spark/code/graphstorm_processing-*.whl
CMD ["gs-processing"]

FROM base AS test
RUN python3 -m pip install /usr/lib/spark/code/graphstorm-processing/
CMD ["sh", "-c", "pytest ./code/tests/"]

0 comments on commit 36a7d49

Please sign in to comment.