Skip to content

Commit

Permalink
setup jupyterhub config
Browse files Browse the repository at this point in the history
  • Loading branch information
Tianhao-Gu committed Sep 5, 2024
1 parent a208fa7 commit 7cfdc7b
Show file tree
Hide file tree
Showing 7 changed files with 82 additions and 7 deletions.
30 changes: 25 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,19 @@ RUN groupadd -r spark && useradd -r -g spark spark_user

RUN apt-get update && apt-get install -y \
# GCC required to resolve error during JupyterLab installation: psutil could not be installed from sources because gcc is not installed.
gcc curl git npm nodejs graphviz graphviz-dev libgdal-dev build-essential python3-dev\
gcc \
curl \
git \
wget \
vim \
npm \
nodejs \
graphviz \
graphviz-dev \
libgdal-dev \
build-essential \
python3-dev \
sudo \
&& rm -rf /var/lib/apt/lists/*

ENV HADOOP_AWS_VER=3.3.4
Expand Down Expand Up @@ -42,22 +54,26 @@ RUN pipenv sync --system

RUN chown -R spark_user:spark /opt/bitnami

# Set up Jupyter Lab directories
# Set up JupyterLab directories
ENV JUPYTER_CONFIG_DIR=/.jupyter
ENV JUPYTER_RUNTIME_DIR=/.jupyter/runtime
ENV JUPYTER_DATA_DIR=/.jupyter/data
RUN mkdir -p ${JUPYTER_CONFIG_DIR} ${JUPYTER_RUNTIME_DIR} ${JUPYTER_DATA_DIR}
RUN chown -R spark_user:spark /.jupyter

# Set up Jupyter Hub directories
# Set up JupyterHub directories
ENV JUPYTERHUB_CONFIG_DIR=/srv/jupyterhub
RUN mkdir -p ${JUPYTERHUB_CONFIG_DIR}
COPY ./src/notebook_utils/startup.py ${JUPYTERHUB_CONFIG_DIR}/startup.py
COPY ./config/jupyterhub_config.py ${JUPYTERHUB_CONFIG_DIR}/jupyterhub_config.py
COPY ./scripts/spawn_notebook.sh ${JUPYTERHUB_CONFIG_DIR}/spawn_notebook.sh
RUN chmod +x ${JUPYTERHUB_CONFIG_DIR}/spawn_notebook.sh
RUN chown -R spark_user:spark ${JUPYTERHUB_CONFIG_DIR}

# Jupyter Hub user home directory
RUN mkdir -p /jupyterhub/users_home
RUN chown -R spark_user:spark /jupyterhub/users_home
ENV JUPYTERHUB_USER_HOME=/jupyterhub/users_home
RUN mkdir -p $JUPYTERHUB_USER_HOME
RUN chown -R spark_user:spark $JUPYTERHUB_USER_HOME

RUN npm install -g configurable-http-proxy

Expand All @@ -82,6 +98,10 @@ ENV CDM_SHARED_DIR=/cdm_shared_workspace
RUN mkdir -p ${CDM_SHARED_DIR} && chmod -R 777 ${CDM_SHARED_DIR}
RUN chown -R spark_user:spark $CDM_SHARED_DIR

# Allow spark_user to use sudo without a password
# TODO: use `sudospawner` in JupyterHub to avoid this (https://jupyterhub.readthedocs.io/en/stable/howto/configuration/config-sudo.html)
RUN echo "spark_user ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers

# Switch back to non-root user
USER spark_user

Expand Down
26 changes: 26 additions & 0 deletions config/jupyterhub_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""
This is the JupyterHub configuration file. It is used to configure the JupyterHub server.
Refer to the JupyterHub documentation for more information:
https://jupyterhub.readthedocs.io/en/latest/tutorial/getting-started/config-basics.html
"""
import os

from jupyterhub_config.custom_spawner import VirtualEnvSpawner

c = get_config()

# Set the authenticator class
# TODO: Change the authenticator class to a secure one (e.g. GitHubOAuthenticator)
c.JupyterHub.authenticator_class = 'jupyterhub.auth.DummyAuthenticator'
c.Authenticator.allowed_users = {'spark_user', 'test_user1', 'test_user2'}
c.DummyAuthenticator.password = os.environ['JUPYTERHUB_ADMIN_PASSWORD']

c.Authenticator.admin_users = {'spark_user'}

c.JupyterHub.spawner_class = VirtualEnvSpawner

# Set the JupyterHub IP address and port
c.JupyterHub.ip = '0.0.0.0'
c.JupyterHub.port = int(os.getenv('NOTEBOOK_PORT'))

c.JupyterHub.log_level = 'DEBUG'
4 changes: 3 additions & 1 deletion docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ services:
- JUPYTER_MODE=jupyterhub
- YARN_RESOURCE_MANAGER_URL=http://yarn-resourcemanager:8032
- SPARK_MASTER_URL=spark://spark-master:7077
- SPARK_DRIVER_HOST=dev-jupterhub
- SPARK_DRIVER_HOST=dev-jupyterhub
- MINIO_URL=http://minio:9002
- MINIO_ACCESS_KEY=minio-readwrite
- MINIO_SECRET_KEY=minio123
Expand All @@ -207,6 +207,7 @@ services:
- POSTGRES_DB=hive
- POSTGRES_URL=postgres:5432
- USAGE_MODE=dev
- JUPYTERHUB_ADMIN_PASSWORD=testpassword123
volumes:
- ./cdr/cdm/jupyter:/cdm_shared_workspace
- ./cdr/cdm/jupyter/jupyterhub/users_home:/jupyterhub/users_home
Expand Down Expand Up @@ -237,6 +238,7 @@ services:
- POSTGRES_PASSWORD=hivepassword
- POSTGRES_DB=hive
- POSTGRES_URL=postgres:5432
- JUPYTERHUB_ADMIN_PASSWORD=testpassword123
volumes:
- ./cdr/cdm/jupyter/jupyterhub/users_home:/jupyterhub/users_home

Expand Down
2 changes: 1 addition & 1 deletion scripts/notebook_entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ if [ "$JUPYTER_MODE" = "jupyterlab" ]; then
elif [ "$JUPYTER_MODE" = "jupyterhub" ]; then
echo "starting jupyterhub"

echo "TO BE IMPLEMENTED"
jupyterhub -f "$JUPYTERHUB_CONFIG_DIR"/jupyterhub_config.py
else
echo "ERROR: JUPYTER_MODE is not set to jupyterlab or jupyterhub. Please set JUPYTER_MODE to either jupyterlab or jupyterhub."
exit 1
Expand Down
9 changes: 9 additions & 0 deletions scripts/spawn_notebook.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

USERNAME=${JUPYTERHUB_USER}

echo "Starting Jupyter Notebook for user: $USERNAME"
cd $JUPYTERHUB_USER_HOME/$USERNAME

# Start the notebook server with current user
exec jupyterhub-singleuser "$@"
17 changes: 17 additions & 0 deletions src/jupyterhub_config/custom_spawner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from jupyterhub.spawner import SimpleLocalProcessSpawner


class VirtualEnvSpawner(SimpleLocalProcessSpawner):
"""
A custom JupyterHub spawner that creates and manages a virtual environment
for each user, configuring their workspace based on their admin status.
"""

def start(self):
"""
Start the JupyterHub server for the user. This method ensures that the
user's directory and virtual environment are set up, configures environment
variables, and sets the notebook directory before starting the server.
"""

return super().start()

Check warning on line 17 in src/jupyterhub_config/custom_spawner.py

View check run for this annotation

Codecov / codecov/patch

src/jupyterhub_config/custom_spawner.py#L17

Added line #L17 was not covered by tests
1 change: 1 addition & 0 deletions test/src/jupyterhub_config/custom_spawner_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from jupyterhub_config.custom_spawner import *

0 comments on commit 7cfdc7b

Please sign in to comment.