Skip to content

Commit

Permalink
Merge pull request #47 from kbase/dev-build_cache
Browse files Browse the repository at this point in the history
Allow for caching chown steps
  • Loading branch information
MrCreosote authored Jul 12, 2024
2 parents 41dff3f + 5d56b49 commit 37578d1
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 16 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@ cdr/cdm/jupyter/

# Ignore Gradle build output directory
build
/.project
/.pydevproject*
37 changes: 21 additions & 16 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ FROM bitnami/spark:3.5.1
# https://github.com/bitnami/containers/tree/main/bitnami/spark#installing-additional-jars
USER root

# Create a non-root user
# User 1001 is not defined in /etc/passwd in the bitnami/spark image, causing various issues.
# References:
# https://github.com/bitnami/containers/issues/52698
# https://github.com/bitnami/containers/pull/52661
RUN groupadd -r spark && useradd -r -g spark spark_user

RUN apt-get update && apt-get install -y \
# GCC required to resolve error during JupyterLab installation: psutil could not be installed from sources because gcc is not installed.
gcc curl git \
Expand All @@ -22,46 +29,44 @@ ENV GRADLE_JARS_DIR=gradle_jars
RUN /gradle/gradlew -p /gradle build
RUN cp -r /gradle/${GRADLE_JARS_DIR}/* /opt/bitnami/spark/jars/

RUN chown -R spark_user:spark /opt/bitnami

# install pipenv
RUN pip3 install pipenv

# install python dependencies
COPY Pipfile* ./
RUN pipenv sync --system

# Set up Jupyter directories
ENV JUPYTER_CONFIG_DIR=/.jupyter
ENV JUPYTER_RUNTIME_DIR=/.jupyter/runtime
ENV JUPYTER_DATA_DIR=/.jupyter/data
RUN mkdir -p ${JUPYTER_CONFIG_DIR} ${JUPYTER_RUNTIME_DIR} ${JUPYTER_DATA_DIR}
RUN chown -R spark_user:spark /.jupyter

COPY ./src/ /src
ENV PYTHONPATH "${PYTHONPATH}:/src"

# Copy the startup script to the default profile location to automatically load pre-built functions in Jupyter Notebook
COPY ./src/notebook_utils/startup.py /.ipython/profile_default/startup/
RUN chown -R spark_user:spark /.ipython

COPY ./scripts/ /opt/scripts/
RUN chmod a+x /opt/scripts/*.sh

# Copy the configuration files
COPY ./config/ /opt/config/

# Don't just do /opt since we already did bitnami
RUN chown -R spark_user:spark /src /opt/scripts /opt/config

# This is the shared directory between the spark master, worker and driver containers
ENV CDM_SHARED_DIR=/cdm_shared_workspace
RUN mkdir -p ${CDM_SHARED_DIR} && chmod -R 777 ${CDM_SHARED_DIR}

# Set up Jupyter directories
ENV JUPYTER_CONFIG_DIR=/.jupyter
ENV JUPYTER_RUNTIME_DIR=/.jupyter/runtime
ENV JUPYTER_DATA_DIR=/.jupyter/data
RUN mkdir -p ${JUPYTER_CONFIG_DIR} ${JUPYTER_RUNTIME_DIR} ${JUPYTER_DATA_DIR}
RUN chown -R spark_user:spark $CDM_SHARED_DIR

# Switch back to non-root user

# Create a non-root user
# User 1001 is not defined in /etc/passwd in the bitnami/spark image, causing various issues.
# References:
# https://github.com/bitnami/containers/issues/52698
# https://github.com/bitnami/containers/pull/52661
RUN groupadd -r spark && useradd -r -g spark spark_user

# Change ownership of the .ipython, .jupyter, etc. directories to the non-root user
RUN chown -R spark_user:spark /.ipython /.jupyter /src /opt/config /opt/scripts /cdm_shared_workspace /opt/bitnami
USER spark_user

ENTRYPOINT ["/opt/scripts/entrypoint.sh"]

0 comments on commit 37578d1

Please sign in to comment.