Skip to content

Commit

Permalink
Merge pull request #85 from kbase/dev_jupyterhub
Browse files Browse the repository at this point in the history
assign correct minio access key
  • Loading branch information
Tianhao-Gu authored Sep 12, 2024
2 parents a0fae4d + 5bded0b commit 77e1718
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 45 deletions.
5 changes: 3 additions & 2 deletions config/jupyterhub_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
c.NativeAuthenticator.minimum_password_length = 8

# Set up the admin user
c.Authenticator.admin_users = {'spark_user'}
admin_user = 'spark_user'
c.Authenticator.admin_users = {admin_user}
# TODO set admin user password to os.environ['JUPYTERHUB_ADMIN_PASSWORD'] automatically - currently spark_user is created manually with the signup page
# Allow user who can successfully authenticate to access the JupyterHub server
# ref: https://jupyterhub.readthedocs.io/en/latest/reference/api/auth.html#jupyterhub.auth.Authenticator.allow_all
Expand All @@ -36,7 +37,7 @@

# Create a group to indicate users with read/write access to MinIO
c.JupyterHub.load_groups = {
'minio_rw': [],
VirtualEnvSpawner.RW_MINIO_GROUP: [],
}

# Set the JupyterHub IP address and port
Expand Down
44 changes: 7 additions & 37 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -181,57 +181,27 @@ services:
volumes:
- ./cdr/cdm/jupyter/user_shared_workspace:/cdm_shared_workspace/user_shared_workspace

dev_jupyterhub:
build:
context: .
dockerfile: Dockerfile
container_name: dev-jupyterhub
ports:
- "4043:4043"
depends_on:
- spark-master
- minio-create-bucket
environment:
- NOTEBOOK_PORT=4043
- JUPYTER_MODE=jupyterhub
- YARN_RESOURCE_MANAGER_URL=http://yarn-resourcemanager:8032
- SPARK_MASTER_URL=spark://spark-master:7077
- SPARK_DRIVER_HOST=dev-jupyterhub
- MINIO_URL=http://minio:9002
- MINIO_ACCESS_KEY=minio-readwrite
- MINIO_SECRET_KEY=minio123
- S3_YARN_BUCKET=yarn
- MAX_EXECUTORS=4
- POSTGRES_USER=hive
- POSTGRES_PASSWORD=hivepassword
- POSTGRES_DB=hive
- POSTGRES_URL=postgres:5432
- USAGE_MODE=dev
- JUPYTERHUB_ADMIN_PASSWORD=testpassword123
volumes:
- ./cdr/cdm/jupyter:/cdm_shared_workspace
- ./cdr/cdm/jupyter/jupyterhub_secrets:/jupyterhub_secrets
- ./cdr/cdm/jupyter/jupyterhub/users_home:/jupyterhub/users_home

user_jupyterhub:
cdm_jupyterhub:
build:
context: .
dockerfile: Dockerfile
container_name: user-jupyterhub
container_name: cdm-jupyterhub
ports:
- "4044:4044"
- "4043:4043"
depends_on:
- spark-master
- minio-create-bucket
environment:
- NOTEBOOK_PORT=4044
- NOTEBOOK_PORT=4043
- JUPYTER_MODE=jupyterhub
- YARN_RESOURCE_MANAGER_URL=http://yarn-resourcemanager:8032
- SPARK_MASTER_URL=spark://spark-master:7077
- SPARK_DRIVER_HOST=user-jupyterhub
- SPARK_DRIVER_HOST=cdm-jupyterhub
- MINIO_URL=http://minio:9002
- MINIO_ACCESS_KEY=minio-readonly
- MINIO_SECRET_KEY=minio123
- MINIO_RW_ACCESS_KEY=minio-readwrite
- MINIO_RW_SECRET_KEY=minio123
- S3_YARN_BUCKET=yarn
- JUPYTER_MODE=jupyterhub
- MAX_EXECUTORS=4
Expand Down
23 changes: 20 additions & 3 deletions src/jupyterhub_config/custom_spawner.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ class VirtualEnvSpawner(SimpleLocalProcessSpawner):
for each user, configuring their workspace based on their admin status.
"""

RW_MINIO_GROUP = 'minio_rw'

def start(self):
"""
Start the JupyterHub server for the user. This method ensures that the
Expand Down Expand Up @@ -118,8 +120,8 @@ def _ensure_user_directory(self, user_dir: Path, username: str):
# Change the directory's ownership to the user
os.chown(user_dir, uid, gid)

# Set directory permissions to 700: Owner (rwx), Group (---), Others (---)
os.chmod(user_dir, 0o700)
# Set directory permissions to 750: Owner (rwx), Group (r-x), Others (---)
os.chmod(user_dir, 0o750)

else:
self.log.info(f'Reusing user directory for {username}')
Expand Down Expand Up @@ -179,6 +181,21 @@ def _configure_environment(self, user_dir: Path, user_env_dir: Path, username: s
self.environment['PYTHONSTARTUP'] = os.path.join(os.environ['JUPYTERHUB_CONFIG_DIR'], 'startup.py')
self.environment['JUPYTERHUB_USER'] = username

group_names = [group.name for group in self.user.groups]
self.log.info(f'User {self.user.name} groups: {group_names}')

if self.user.admin or self.RW_MINIO_GROUP in group_names:
self.log.info(f'MinIO read/write user detected: {username}. Setting up minio_rw credentials.')
self.environment['MINIO_ACCESS_KEY'] = self.environment['MINIO_RW_ACCESS_KEY']
self.environment['MINIO_SECRET_KEY'] = self.environment['MINIO_RW_SECRET_KEY']
else:
self.log.info(f'Non-admin user detected: {username}. Removing admin credentials.')
self.environment.pop('MINIO_RW_ACCESS_KEY', None)
self.environment.pop('MINIO_RW_SECRET_KEY', None)

# TODO: add a white list of environment variables to pass to the user's environment
self.environment.pop('JUPYTERHUB_ADMIN_PASSWORD', None)

self.log.info(f"Environment variables for {username}: {self.environment}")

def _configure_notebook_dir(self, username: str, user_dir: Path):
Expand All @@ -192,4 +209,4 @@ def _configure_notebook_dir(self, username: str, user_dir: Path):
self.notebook_dir = '/cdm_shared_workspace'
else:
self.log.info(f'Non-admin user detected: {username}. Setting up user-specific workspace.')
self.notebook_dir = str(user_dir)
self.notebook_dir = str(user_dir)
70 changes: 67 additions & 3 deletions test/src/jupyterhub_config/custom_spawner_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ def spawner():
spawner = VirtualEnvSpawner()
spawner.user = MagicMock()
spawner.user.name = 'testuser'
spawner.user.admin = False

return spawner


Expand Down Expand Up @@ -190,8 +192,8 @@ def test_ensure_user_directory_with_logging(mock_chown, mock_getpwnam, caplog):

# Check directory permissions
st = os.stat(user_dir)
# Permissions should be 0o700 (rwx------)
assert (st.st_mode & 0o777) == 0o700
# Permissions should be 0o750 (rwxr-x---)
assert (st.st_mode & 0o777) == 0o750

# Check log messages
assert f'Getting user info for {username}' in caplog.text
Expand Down Expand Up @@ -335,10 +337,13 @@ def test_reuse_virtual_environment(mock_run, caplog, spawner):
'PATH': '/usr/local/bin:/usr/bin:/bin',
'PYTHONPATH': '/usr/local/lib/python3.11/site-packages',
'JUPYTERHUB_CONFIG_DIR': '/etc/jupyterhub',
'MINIO_RW_ACCESS_KEY': 'minio_rw_access',
'MINIO_RW_SECRET_KEY': 'minio_rw_secret',
'JUPYTERHUB_ADMIN_PASSWORD': 'admin_password',
'EXISTING_VAR': 'existing_value',
'OVERWRITE_VAR': 'original_value'
})
def test_configure_environment(spawner, caplog):
def test_configure_environment_non_admin(spawner, caplog):
user_dir = Path('/home/testuser')
user_env_dir = Path('/home/testuser/.venv')
username = 'testuser'
Expand All @@ -363,8 +368,67 @@ def test_configure_environment(spawner, caplog):
assert spawner.environment['PYTHONSTARTUP'] == '/etc/jupyterhub/startup.py'
assert spawner.environment['JUPYTERHUB_USER'] == username

# Check that the admin credentials are removed for non-admin users
assert 'MINIO_RW_ACCESS_KEY' not in spawner.environment
assert 'MINIO_RW_SECRET_KEY' not in spawner.environment
assert 'JUPYTERHUB_ADMIN_PASSWORD' not in spawner.environment

assert f"Environment variables for {username}" in caplog.text
assert str(spawner.environment) in caplog.text
assert f'Non-admin user detected: {username}. Removing admin credentials.' in caplog.text

@patch.dict(os.environ, {
'PATH': '/usr/local/bin:/usr/bin:/bin',
'PYTHONPATH': '/usr/local/lib/python3.11/site-packages',
'JUPYTERHUB_CONFIG_DIR': '/etc/jupyterhub',
'MINIO_RW_ACCESS_KEY': 'minio_rw_access',
'MINIO_RW_SECRET_KEY': 'minio_rw_secret',
})
def test_configure_minio_environment_admin(spawner, caplog):
spawner.user.admin = True
user_dir = Path('/home/testadminuser')
user_env_dir = Path('/home/testuser/.venv')
username = 'testadminuser'

with caplog.at_level(logging.INFO):
spawner._configure_environment(user_dir, user_env_dir, username)

assert spawner.environment['MINIO_RW_ACCESS_KEY'] == 'minio_rw_access'
assert spawner.environment['MINIO_ACCESS_KEY'] == 'minio_rw_access'

assert spawner.environment['MINIO_RW_SECRET_KEY'] == 'minio_rw_secret'
assert spawner.environment['MINIO_SECRET_KEY'] == 'minio_rw_secret'

assert f'MinIO read/write user detected: {username}. Setting up minio_rw credentials.' in caplog.text


@patch.dict(os.environ, {
'PATH': '/usr/local/bin:/usr/bin:/bin',
'PYTHONPATH': '/usr/local/lib/python3.11/site-packages',
'JUPYTERHUB_CONFIG_DIR': '/etc/jupyterhub',
'MINIO_RW_ACCESS_KEY': 'minio_rw_access',
'MINIO_RW_SECRET_KEY': 'minio_rw_secret',
})
def test_configure_environment_minio_rw_group(spawner, caplog):
# Mock the user's groups to include the minio_rw group
mock_group_minio_rw = MagicMock()
mock_group_minio_rw.name = 'minio_rw'
spawner.user.groups = [mock_group_minio_rw]

user_dir = Path('/home/testadminuser')
user_env_dir = Path('/home/testuser/.venv')
username = 'testadminuser'

with caplog.at_level(logging.INFO):
spawner._configure_environment(user_dir, user_env_dir, username)

assert spawner.environment['MINIO_RW_ACCESS_KEY'] == 'minio_rw_access'
assert spawner.environment['MINIO_ACCESS_KEY'] == 'minio_rw_access'

assert spawner.environment['MINIO_RW_SECRET_KEY'] == 'minio_rw_secret'
assert spawner.environment['MINIO_SECRET_KEY'] == 'minio_rw_secret'

assert f'MinIO read/write user detected: {username}. Setting up minio_rw credentials.' in caplog.text


@patch.dict(os.environ, {}, clear=True) # Clear the environment for the test
Expand Down

0 comments on commit 77e1718

Please sign in to comment.