From 8e3131354b41024feb65e59368f5b7bf70df8a6b Mon Sep 17 00:00:00 2001 From: Tianhao-Gu Date: Wed, 11 Sep 2024 18:56:23 -0500 Subject: [PATCH 1/3] assign correct minio access key --- config/jupyterhub_config.py | 5 +- docker-compose.yaml | 44 ++----------- src/jupyterhub_config/custom_spawner.py | 19 +++++- .../jupyterhub_config/custom_spawner_test.py | 66 ++++++++++++++++++- 4 files changed, 93 insertions(+), 41 deletions(-) diff --git a/config/jupyterhub_config.py b/config/jupyterhub_config.py index caf0612..c7d6087 100644 --- a/config/jupyterhub_config.py +++ b/config/jupyterhub_config.py @@ -23,7 +23,8 @@ c.NativeAuthenticator.minimum_password_length = 8 # Set up the admin user -c.Authenticator.admin_users = {'spark_user'} +admin_user = 'spark_user' +c.Authenticator.admin_users = {admin_user} # TODO set admin user password to os.environ['JUPYTERHUB_ADMIN_PASSWORD'] automatically - currently spark_user is created manually with the signup page # Allow user who can successfully authenticate to access the JupyterHub server # ref: https://jupyterhub.readthedocs.io/en/latest/reference/api/auth.html#jupyterhub.auth.Authenticator.allow_all @@ -36,7 +37,7 @@ # Create a group to indicate users with read/write access to MinIO c.JupyterHub.load_groups = { - 'minio_rw': [], + VirtualEnvSpawner.RW_MINIO_GROUP: [], } # Set the JupyterHub IP address and port diff --git a/docker-compose.yaml b/docker-compose.yaml index 47a7014..a0fbe9e 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -181,57 +181,27 @@ services: volumes: - ./cdr/cdm/jupyter/user_shared_workspace:/cdm_shared_workspace/user_shared_workspace - dev_jupyterhub: - build: - context: . - dockerfile: Dockerfile - container_name: dev-jupyterhub - ports: - - "4043:4043" - depends_on: - - spark-master - - minio-create-bucket - environment: - - NOTEBOOK_PORT=4043 - - JUPYTER_MODE=jupyterhub - - YARN_RESOURCE_MANAGER_URL=http://yarn-resourcemanager:8032 - - SPARK_MASTER_URL=spark://spark-master:7077 - - SPARK_DRIVER_HOST=dev-jupyterhub - - MINIO_URL=http://minio:9002 - - MINIO_ACCESS_KEY=minio-readwrite - - MINIO_SECRET_KEY=minio123 - - S3_YARN_BUCKET=yarn - - MAX_EXECUTORS=4 - - POSTGRES_USER=hive - - POSTGRES_PASSWORD=hivepassword - - POSTGRES_DB=hive - - POSTGRES_URL=postgres:5432 - - USAGE_MODE=dev - - JUPYTERHUB_ADMIN_PASSWORD=testpassword123 - volumes: - - ./cdr/cdm/jupyter:/cdm_shared_workspace - - ./cdr/cdm/jupyter/jupyterhub_secrets:/jupyterhub_secrets - - ./cdr/cdm/jupyter/jupyterhub/users_home:/jupyterhub/users_home - - user_jupyterhub: + cdm_jupyterhub: build: context: . dockerfile: Dockerfile - container_name: user-jupyterhub + container_name: cdm-jupyterhub ports: - - "4044:4044" + - "4043:4043" depends_on: - spark-master - minio-create-bucket environment: - - NOTEBOOK_PORT=4044 + - NOTEBOOK_PORT=4043 - JUPYTER_MODE=jupyterhub - YARN_RESOURCE_MANAGER_URL=http://yarn-resourcemanager:8032 - SPARK_MASTER_URL=spark://spark-master:7077 - - SPARK_DRIVER_HOST=user-jupyterhub + - SPARK_DRIVER_HOST=cdm-jupyterhub - MINIO_URL=http://minio:9002 - MINIO_ACCESS_KEY=minio-readonly - MINIO_SECRET_KEY=minio123 + - MINIO_RW_ACCESS_KEY=minio-readwrite + - MINIO_RW_SECRET_KEY=minio123 - S3_YARN_BUCKET=yarn - JUPYTER_MODE=jupyterhub - MAX_EXECUTORS=4 diff --git a/src/jupyterhub_config/custom_spawner.py b/src/jupyterhub_config/custom_spawner.py index f5dd6b7..064b9cc 100644 --- a/src/jupyterhub_config/custom_spawner.py +++ b/src/jupyterhub_config/custom_spawner.py @@ -15,6 +15,8 @@ class VirtualEnvSpawner(SimpleLocalProcessSpawner): for each user, configuring their workspace based on their admin status. """ + RW_MINIO_GROUP = 'minio_rw' + def start(self): """ Start the JupyterHub server for the user. This method ensures that the @@ -179,6 +181,21 @@ def _configure_environment(self, user_dir: Path, user_env_dir: Path, username: s self.environment['PYTHONSTARTUP'] = os.path.join(os.environ['JUPYTERHUB_CONFIG_DIR'], 'startup.py') self.environment['JUPYTERHUB_USER'] = username + group_names = [group.name for group in self.user.groups] + self.log.info(f'User {self.user.name} groups: {group_names}') + + if self.user.admin or self.RW_MINIO_GROUP in group_names: + self.log.info(f'MinIO read/write user detected: {username}. Setting up minio_rw credentials.') + self.environment['MINIO_ACCESS_KEY'] = self.environment['MINIO_RW_ACCESS_KEY'] + self.environment['MINIO_SECRET_KEY'] = self.environment['MINIO_RW_SECRET_KEY'] + else: + self.log.info(f'Non-admin user detected: {username}. Removing admin credentials.') + self.environment.pop('MINIO_RW_ACCESS_KEY', None) + self.environment.pop('MINIO_RW_SECRET_KEY', None) + + # TODO: add a white list of environment variables to pass to the user's environment + self.environment.pop('JUPYTERHUB_ADMIN_PASSWORD', None) + self.log.info(f"Environment variables for {username}: {self.environment}") def _configure_notebook_dir(self, username: str, user_dir: Path): @@ -192,4 +209,4 @@ def _configure_notebook_dir(self, username: str, user_dir: Path): self.notebook_dir = '/cdm_shared_workspace' else: self.log.info(f'Non-admin user detected: {username}. Setting up user-specific workspace.') - self.notebook_dir = str(user_dir) \ No newline at end of file + self.notebook_dir = str(user_dir) diff --git a/test/src/jupyterhub_config/custom_spawner_test.py b/test/src/jupyterhub_config/custom_spawner_test.py index f98e56d..ba9bd91 100644 --- a/test/src/jupyterhub_config/custom_spawner_test.py +++ b/test/src/jupyterhub_config/custom_spawner_test.py @@ -17,6 +17,8 @@ def spawner(): spawner = VirtualEnvSpawner() spawner.user = MagicMock() spawner.user.name = 'testuser' + spawner.user.admin = False + return spawner @@ -335,10 +337,13 @@ def test_reuse_virtual_environment(mock_run, caplog, spawner): 'PATH': '/usr/local/bin:/usr/bin:/bin', 'PYTHONPATH': '/usr/local/lib/python3.11/site-packages', 'JUPYTERHUB_CONFIG_DIR': '/etc/jupyterhub', + 'MINIO_RW_ACCESS_KEY': 'minio_rw_access', + 'MINIO_RW_SECRET_KEY': 'minio_rw_secret', + 'JUPYTERHUB_ADMIN_PASSWORD': 'admin_password', 'EXISTING_VAR': 'existing_value', 'OVERWRITE_VAR': 'original_value' }) -def test_configure_environment(spawner, caplog): +def test_configure_environment_non_admin(spawner, caplog): user_dir = Path('/home/testuser') user_env_dir = Path('/home/testuser/.venv') username = 'testuser' @@ -363,8 +368,67 @@ def test_configure_environment(spawner, caplog): assert spawner.environment['PYTHONSTARTUP'] == '/etc/jupyterhub/startup.py' assert spawner.environment['JUPYTERHUB_USER'] == username + # Check that the admin credentials are removed for non-admin users + assert 'MINIO_RW_ACCESS_KEY' not in spawner.environment + assert 'MINIO_RW_SECRET_KEY' not in spawner.environment + assert 'JUPYTERHUB_ADMIN_PASSWORD' not in spawner.environment + assert f"Environment variables for {username}" in caplog.text assert str(spawner.environment) in caplog.text + assert f'Non-admin user detected: {username}. Removing admin credentials.' in caplog.text + +@patch.dict(os.environ, { + 'PATH': '/usr/local/bin:/usr/bin:/bin', + 'PYTHONPATH': '/usr/local/lib/python3.11/site-packages', + 'JUPYTERHUB_CONFIG_DIR': '/etc/jupyterhub', + 'MINIO_RW_ACCESS_KEY': 'minio_rw_access', + 'MINIO_RW_SECRET_KEY': 'minio_rw_secret', +}) +def test_configure_environment_admin(spawner, caplog): + spawner.user.admin = True + user_dir = Path('/home/testadminuser') + user_env_dir = Path('/home/testuser/.venv') + username = 'testadminuser' + + with caplog.at_level(logging.INFO): + spawner._configure_environment(user_dir, user_env_dir, username) + + assert spawner.environment['MINIO_RW_ACCESS_KEY'] == 'minio_rw_access' + assert spawner.environment['MINIO_ACCESS_KEY'] == 'minio_rw_access' + + assert spawner.environment['MINIO_RW_SECRET_KEY'] == 'minio_rw_secret' + assert spawner.environment['MINIO_SECRET_KEY'] == 'minio_rw_secret' + + assert f'MinIO read/write user detected: {username}. Setting up minio_rw credentials.' in caplog.text + + +@patch.dict(os.environ, { + 'PATH': '/usr/local/bin:/usr/bin:/bin', + 'PYTHONPATH': '/usr/local/lib/python3.11/site-packages', + 'JUPYTERHUB_CONFIG_DIR': '/etc/jupyterhub', + 'MINIO_RW_ACCESS_KEY': 'minio_rw_access', + 'MINIO_RW_SECRET_KEY': 'minio_rw_secret', +}) +def test_configure_environment_minio_rw_group(spawner, caplog): + # Mock the user's groups to include the minio_rw group + mock_group_minio_rw = MagicMock() + mock_group_minio_rw.name = 'minio_rw' + spawner.user.groups = [mock_group_minio_rw] + + user_dir = Path('/home/testadminuser') + user_env_dir = Path('/home/testuser/.venv') + username = 'testadminuser' + + with caplog.at_level(logging.INFO): + spawner._configure_environment(user_dir, user_env_dir, username) + + assert spawner.environment['MINIO_RW_ACCESS_KEY'] == 'minio_rw_access' + assert spawner.environment['MINIO_ACCESS_KEY'] == 'minio_rw_access' + + assert spawner.environment['MINIO_RW_SECRET_KEY'] == 'minio_rw_secret' + assert spawner.environment['MINIO_SECRET_KEY'] == 'minio_rw_secret' + + assert f'MinIO read/write user detected: {username}. Setting up minio_rw credentials.' in caplog.text @patch.dict(os.environ, {}, clear=True) # Clear the environment for the test From 91671623f0e6b25624a0f46b63bd1bf4b7d96c20 Mon Sep 17 00:00:00 2001 From: Tianhao-Gu Date: Wed, 11 Sep 2024 22:29:38 -0500 Subject: [PATCH 2/3] change default permission --- src/jupyterhub_config/custom_spawner.py | 4 ++-- test/src/jupyterhub_config/custom_spawner_test.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/jupyterhub_config/custom_spawner.py b/src/jupyterhub_config/custom_spawner.py index 064b9cc..0ba1e02 100644 --- a/src/jupyterhub_config/custom_spawner.py +++ b/src/jupyterhub_config/custom_spawner.py @@ -120,8 +120,8 @@ def _ensure_user_directory(self, user_dir: Path, username: str): # Change the directory's ownership to the user os.chown(user_dir, uid, gid) - # Set directory permissions to 700: Owner (rwx), Group (---), Others (---) - os.chmod(user_dir, 0o700) + # Set directory permissions to 750: Owner (rwx), Group (r-x), Others (---) + os.chmod(user_dir, 0o750) else: self.log.info(f'Reusing user directory for {username}') diff --git a/test/src/jupyterhub_config/custom_spawner_test.py b/test/src/jupyterhub_config/custom_spawner_test.py index ba9bd91..9d7cc3e 100644 --- a/test/src/jupyterhub_config/custom_spawner_test.py +++ b/test/src/jupyterhub_config/custom_spawner_test.py @@ -192,8 +192,8 @@ def test_ensure_user_directory_with_logging(mock_chown, mock_getpwnam, caplog): # Check directory permissions st = os.stat(user_dir) - # Permissions should be 0o700 (rwx------) - assert (st.st_mode & 0o777) == 0o700 + # Permissions should be 0o750 (rwxr-x---) + assert (st.st_mode & 0o777) == 0o750 # Check log messages assert f'Getting user info for {username}' in caplog.text From 5bded0bfe37509ac61970bcd9c3200d9d2216da2 Mon Sep 17 00:00:00 2001 From: Tianhao Gu Date: Thu, 12 Sep 2024 11:21:02 -0500 Subject: [PATCH 3/3] Update test/src/jupyterhub_config/custom_spawner_test.py Co-authored-by: MrCreosote --- test/src/jupyterhub_config/custom_spawner_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/src/jupyterhub_config/custom_spawner_test.py b/test/src/jupyterhub_config/custom_spawner_test.py index 9d7cc3e..0577b99 100644 --- a/test/src/jupyterhub_config/custom_spawner_test.py +++ b/test/src/jupyterhub_config/custom_spawner_test.py @@ -384,7 +384,7 @@ def test_configure_environment_non_admin(spawner, caplog): 'MINIO_RW_ACCESS_KEY': 'minio_rw_access', 'MINIO_RW_SECRET_KEY': 'minio_rw_secret', }) -def test_configure_environment_admin(spawner, caplog): +def test_configure_minio_environment_admin(spawner, caplog): spawner.user.admin = True user_dir = Path('/home/testadminuser') user_env_dir = Path('/home/testuser/.venv')