From 68c6e2e9e63454beda055a401968aa800ddb239d Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Mon, 14 Sep 2020 09:22:45 -0700 Subject: [PATCH 1/9] Pin traitlets 4, separate idle culler, try jh master --- jupyter-base/Dockerfile | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/jupyter-base/Dockerfile b/jupyter-base/Dockerfile index 6e86c9f..0e403f2 100644 --- a/jupyter-base/Dockerfile +++ b/jupyter-base/Dockerfile @@ -50,6 +50,7 @@ RUN \ oauthlib \ pamela \ psycopg2 \ + pycurl \ pyopenssl \ pyrsistent \ python-dateutil \ @@ -58,7 +59,7 @@ RUN \ ruamel.yaml.clib \ sqlalchemy \ tornado \ - traitlets \ + traitlets=4.3.3 \ zipp # Install JupyterHub @@ -69,9 +70,7 @@ RUN \ npm install -g configurable-http-proxy && \ git clone https://github.com/jupyterhub/jupyterhub.git && \ cd jupyterhub && \ - git checkout tags/1.1.0 && \ +# git checkout tags/1.1.0 && \ /opt/anaconda3/bin/python setup.py js && \ /opt/anaconda3/bin/pip --no-cache-dir install . && \ - cp examples/cull-idle/cull_idle_servers.py /opt/anaconda3/bin/. && \ - chmod u+x /opt/anaconda3/bin/cull_idle_servers.py && \ rm -rf ~/.cache ~/.npm From 35f995c93aa467c2018a487421e9ff4febbecfb0 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Mon, 14 Sep 2020 09:23:37 -0700 Subject: [PATCH 2/9] Add key for idle culler --- jupyter-nersc/web-jupyterhub/docker-entrypoint.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/jupyter-nersc/web-jupyterhub/docker-entrypoint.sh b/jupyter-nersc/web-jupyterhub/docker-entrypoint.sh index 68aa8c2..570fd2c 100644 --- a/jupyter-nersc/web-jupyterhub/docker-entrypoint.sh +++ b/jupyter-nersc/web-jupyterhub/docker-entrypoint.sh @@ -30,5 +30,6 @@ file_env 'CONFIGPROXY_AUTH_TOKEN' file_env 'JUPYTERHUB_CRYPT_KEY' file_env 'ANNOUNCEMENT_JUPYTERHUB_API_TOKEN' file_env 'NBVIEWER_JUPYTERHUB_API_TOKEN' +file_env 'IDLE_CULLER_JUPYTERHUB_API_TOKEN' exec "$@" From dad5047dc67853533f1393043a041f8494608d59 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Mon, 14 Sep 2020 09:24:22 -0700 Subject: [PATCH 3/9] Inclusive language and separate idle culler --- jupyter-nersc/web-jupyterhub/jupyterhub_config.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index b401966..dc779e3 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -393,7 +393,7 @@ def comma_split(string): { 'name': 'cull-idle', 'admin': True, - 'command': 'cull_idle_servers.py --timeout=57600'.split(), + 'api_token': os.environ["IDLE_CULLER_JUPYTERHUB_API_TOKEN"] }, { 'name': 'announcement', @@ -860,7 +860,7 @@ def comma_split(string): # # .. versionadded: 0.9 #c.Authenticator.blacklist = set() -c.Authenticator.blacklist = set(comma_split(os.environ.get("BLACKLIST"))) +c.Authenticator.blocked_users = set(comma_split(os.environ.get("BLOCKED_USERS"))) ## Enable persisting auth_state (if available). # @@ -901,7 +901,7 @@ def comma_split(string): # # If empty, does not perform any additional restriction. #c.Authenticator.whitelist = set() -c.Authenticator.whitelist = set(comma_split(os.environ.get("WHITELIST"))) +c.Authenticator.allowed_users = set(comma_split(os.environ.get("ALLOWED_USERS"))) #------------------------------------------------------------------------------ # LocalAuthenticator(Authenticator) configuration From fcad0c45c72ec436ff78db8dc06cd79f8a45cece Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Mon, 14 Sep 2020 09:24:56 -0700 Subject: [PATCH 4/9] Separate idle culler as service --- jupyter-nersc/app-idle-culler/Dockerfile | 14 ++++++++++ jupyter-nersc/app-idle-culler/build.sh | 28 +++++++++++++++++++ .../app-idle-culler/docker-entrypoint.sh | 28 +++++++++++++++++++ 3 files changed, 70 insertions(+) create mode 100644 jupyter-nersc/app-idle-culler/Dockerfile create mode 100644 jupyter-nersc/app-idle-culler/build.sh create mode 100644 jupyter-nersc/app-idle-culler/docker-entrypoint.sh diff --git a/jupyter-nersc/app-idle-culler/Dockerfile b/jupyter-nersc/app-idle-culler/Dockerfile new file mode 100644 index 0000000..2da3951 --- /dev/null +++ b/jupyter-nersc/app-idle-culler/Dockerfile @@ -0,0 +1,14 @@ +ARG branch=unknown + +FROM registry.spin.nersc.gov/das/jupyter-base-${branch}:latest +LABEL maintainer="Rollin Thomas " + +RUN \ + pip install --no-cache-dir jupyterhub-idle-culler + +WORKDIR /srv + +ADD docker-entrypoint.sh ./ +RUN chmod +x docker-entrypoint.sh +ENTRYPOINT ["./docker-entrypoint.sh"] +CMD ["python3", "-m", "jupyterhub_idle_culler", "--timeout=57600", "--cull-every=3600", "--url=http://web-jupyterhub:8081/hub/api"] diff --git a/jupyter-nersc/app-idle-culler/build.sh b/jupyter-nersc/app-idle-culler/build.sh new file mode 100644 index 0000000..b734bcf --- /dev/null +++ b/jupyter-nersc/app-idle-culler/build.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +imcmd="" +for command in docker podman; do + if [ $(command -v $command) ]; then + imcmd=$command + break + fi +done +if [ -n "$imcmd" ]; then + echo "Using $imcmd" +else + echo "No image command defined" + exit 1 +fi + +format="" +if [ "$imcmd" == "podman" ]; then + format="--format docker" +fi + +branch=$(git symbolic-ref --short HEAD) + +$imcmd build \ + --build-arg branch=$branch \ + $format \ + "$@" \ + --tag registry.spin.nersc.gov/das/app-idle-culler.jupyter-nersc-$branch:latest . diff --git a/jupyter-nersc/app-idle-culler/docker-entrypoint.sh b/jupyter-nersc/app-idle-culler/docker-entrypoint.sh new file mode 100644 index 0000000..a2c1ead --- /dev/null +++ b/jupyter-nersc/app-idle-culler/docker-entrypoint.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +# file_env VAR [DEFAULT] +# ---------------------- +# Treat the value of VAR_FILE as the path to a secrets file and initialize VAR +# with the contents of that file. From postgres docker-entrypoint.sh. + +file_env() { + local var="$1" + local fileVar="${var}_FILE" + local def="${2:-}" + if [ "${!var:-}" ] && [ "${!fileVar:-}" ]; then + echo >&2 "error: both $var and $fileVar are set (but are exclusive)" + exit 1 + fi + local val="$def" + if [ "${!var:-}" ]; then + val="${!var}" + elif [ "${!fileVar:-}" ]; then + val="$(< "${!fileVar}")" + fi + export "$var"="$val" + unset "$fileVar" +} + +file_env 'JUPYTERHUB_API_TOKEN' + +exec "$@" From c07c5109a3dcf4b9267bcc191ce00ce934f2992b Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Mon, 14 Sep 2020 12:21:08 -0700 Subject: [PATCH 5/9] Why did we invent html --- jupyter-nersc/web-jupyterhub/templates/login.html | 2 ++ 1 file changed, 2 insertions(+) diff --git a/jupyter-nersc/web-jupyterhub/templates/login.html b/jupyter-nersc/web-jupyterhub/templates/login.html index 70984ff..9c1e58d 100644 --- a/jupyter-nersc/web-jupyterhub/templates/login.html +++ b/jupyter-nersc/web-jupyterhub/templates/login.html @@ -66,10 +66,12 @@ /> +

From 025c92e3b1425d8c13f6c88d23cb74b091889e8b Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Mon, 14 Sep 2020 12:48:14 -0700 Subject: [PATCH 6/9] Switch to new ansible-installed setups --- .../web-jupyterhub/jupyterhub_config.py | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index dc779e3..f21d092 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -1090,19 +1090,19 @@ def comma_split(string): "gerty-shared-node-cpu": ( "sshspawner.sshspawner.SSHSpawner", { "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", - "/global/common/cori_cle7/software/jupyter/20-06/bin/jupyterhub-singleuser"], + "/global/common/cori_cle7/software/jupyter/cori/20-09/bin/jupyterhub-singleuser"], "args": ["--transport=ipc"], "environment": {"OMP_NUM_THREADS" : "2"}, "remote_hosts": ["gerty.nersc.gov"], "remote_port_command": "/usr/bin/python /global/common/cori/das/jupyterhub/new-get-port.py --ip", "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", - "path": "/global/common/cori_cle7/software/jupyter/20-06/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + "path": "/global/common/cori_cle7/software/jupyter/cori/20-09/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", "ssh_keyfile": '/certs/{username}.key' } ), "gerty-exclusive-node-cpu": ( "nerscslurmspawner.NERSCExclusiveSlurmSpawner", { - "cmd": ["/global/common/cori_cle7/software/jupyter/20-06/bin/jupyterhub-singleuser"], + "cmd": ["/global/common/cori_cle7/software/jupyter/cori/20-09/bin/jupyterhub-singleuser"], "exec_prefix": "/usr/bin/ssh -q -o StrictHostKeyChecking=no -o preferredauthentications=publickey -l {username} -i /certs/{username}.key {remote_host}", "http_timeout": 300, "startup_poll_interval": 30.0, @@ -1111,29 +1111,29 @@ def comma_split(string): "req_runtime": "240", "req_qos": "regular", "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", - "path": "/global/common/cori_cle7/software/jupyter/20-06/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + "path": "/global/common/cori_cle7/software/jupyter/cori/20-09/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", "batchspawner_singleuser_cmd" : " ".join([ "/global/common/cori/das/jupyterhub/jupyter-launcher.sh", - "/global/common/cori_cle7/software/jupyter/20-06/bin/batchspawner-singleuser", + "/global/common/cori_cle7/software/jupyter/cori/20-09/bin/batchspawner-singleuser", ]) } ), "cori-shared-node-cpu": ( "sshspawner.sshspawner.SSHSpawner", { "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", - "/global/common/cori_cle7/software/jupyter/20-06/bin/jupyterhub-singleuser"], + "/global/common/cori_cle7/software/jupyter/cori/20-09/bin/jupyterhub-singleuser"], "args": ["--transport=ipc"], "environment": {"OMP_NUM_THREADS" : "2", "PYTHONFAULTHANDLER": "1"}, "remote_hosts": ["corijupyter.nersc.gov"], "remote_port_command": "/usr/bin/python /global/common/cori/das/jupyterhub/new-get-port.py --ip", "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", - "path": "/usr/common/software/jupyter/20-06/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + "path": "/global/common/cori_cle7/software/jupyter/cori/20-09/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", "ssh_keyfile": '/certs/{username}.key' } ), "cori-shared-node-gpu": ( "nerscslurmspawner.NERSCExclusiveGPUSlurmSpawner", { - "cmd": ["/global/common/cori_cle7/software/jupyter/20-06/bin/jupyterhub-singleuser"], + "cmd": ["/global/common/cori_cle7/software/jupyter/cgpu/20-09/bin/jupyterhub-singleuser"], "args": ["--transport=ipc"], "exec_prefix": "/usr/bin/ssh -q -o StrictHostKeyChecking=no -o preferredauthentications=publickey -l {username} -i /certs/{username}.key {remote_host}", "startup_poll_interval": 30.0, @@ -1141,16 +1141,16 @@ def comma_split(string): "req_homedir": "/tmp", "req_runtime": "240", "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", - "path": "/usr/common/software/jupyter/20-06/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + "path": "/global/common/cori_cle7/software/jupyter/cgpu/20-09/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", "batchspawner_singleuser_cmd" : " ".join([ "/global/common/cori/das/jupyterhub/jupyter-launcher.sh", - "/global/common/cori_cle7/software/jupyter/20-06/bin/batchspawner-singleuser", + "/global/common/cori_cle7/software/jupyter/cgpu/20-09/bin/batchspawner-singleuser", ]) } ), "cori-exclusive-node-cpu": ( "nerscslurmspawner.NERSCExclusiveSlurmSpawner", { - "cmd": ["/global/common/cori_cle7/software/jupyter/20-06/bin/jupyterhub-singleuser"], + "cmd": ["/global/common/cori_cle7/software/jupyter/cori/20-09/bin/jupyterhub-singleuser"], "args": ["--transport=ipc"], "exec_prefix": "/usr/bin/ssh -q -o StrictHostKeyChecking=no -o preferredauthentications=publickey -l {username} -i /certs/{username}.key {remote_host}", "startup_poll_interval": 30.0, @@ -1158,16 +1158,16 @@ def comma_split(string): "req_homedir": "/tmp", "req_runtime": "240", "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", - "path": "/usr/common/software/jupyter/20-06/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + "path": "/global/common/cori_cle7/software/jupyter/cori/20-09/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", "batchspawner_singleuser_cmd" : " ".join([ "/global/common/cori/das/jupyterhub/jupyter-launcher.sh", - "/global/common/cori_cle7/software/jupyter/20-06/bin/batchspawner-singleuser", + "/global/common/cori_cle7/software/jupyter/cori/20-09/bin/batchspawner-singleuser", ]) } ), "cori-exclusive-node-largemem": ( "nerscslurmspawner.NERSCExclusiveLargeMemSlurmSpawner", { - "cmd": ["/global/common/cori_cle7/software/jupyter/20-06/bin/jupyterhub-singleuser"], + "cmd": ["/global/common/cori_cle7/software/jupyter/cori/20-09/bin/jupyterhub-singleuser"], "args": ["--transport=ipc"], "exec_prefix": "/usr/bin/ssh -q -o StrictHostKeyChecking=no -o preferredauthentications=publickey -l {username} -i /certs/{username}.key {remote_host}", "startup_poll_interval": 30.0, @@ -1175,16 +1175,16 @@ def comma_split(string): "req_homedir": "/tmp", "req_runtime": "480", "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", - "path": "/usr/common/software/jupyter/20-06/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + "path": "/global/common/cori_cle7/software/jupyter/cori/20-09/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", "batchspawner_singleuser_cmd" : " ".join([ "/global/common/cori/das/jupyterhub/jupyter-launcher.sh", - "/global/common/cori_cle7/software/jupyter/20-06/bin/batchspawner-singleuser", + "/global/common/cori_cle7/software/jupyter/cori/20-09/bin/batchspawner-singleuser", ]) } ), "cori-configurable-gpu": ( "nerscslurmspawner.NERSCConfigurableGPUSlurmSpawner", { - "cmd": ["/global/common/cori_cle7/software/jupyter/20-06/bin/jupyterhub-singleuser"], + "cmd": ["/global/common/cori_cle7/software/jupyter/cgpu/20-09/bin/jupyterhub-singleuser"], "args": ["--transport=ipc"], "exec_prefix": "/usr/bin/ssh -q -o StrictHostKeyChecking=no -o preferredauthentications=publickey -l {username} -i /certs/{username}.key {remote_host}", "startup_poll_interval": 30.0, @@ -1193,23 +1193,23 @@ def comma_split(string): "req_ngpus": "1", "req_runtime": "240", "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", - "path": "/usr/common/software/jupyter/20-06/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + "path": "/global/common/cori_cle7/software/jupyter/cgpu/20-09/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", "batchspawner_singleuser_cmd" : " ".join([ "/global/common/cori/das/jupyterhub/jupyter-launcher.sh", - "/global/common/cori_cle7/software/jupyter/20-06/bin/batchspawner-singleuser", + "/global/common/cori_cle7/software/jupyter/cgpu/20-09/bin/batchspawner-singleuser", ]) } ), "spin-shared-node-cpu": ( "sshspawner.sshspawner.SSHSpawner", { "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", - "/global/common/cori_cle7/software/jupyter/20-06/bin/jupyterhub-singleuser"], + "/global/common/cori_cle7/software/jupyter/cori/20-09/bin/jupyterhub-singleuser"], "args": ["--transport=ipc"], "environment": {"OMP_NUM_THREADS" : "2"}, "remote_hosts": ["app-notebooks"], "remote_port_command": "/usr/bin/python /global/common/cori/das/jupyterhub/new-get-port.py --ip", "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", - "path": "/global/common/cori_cle7/software/jupyter/20-06/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + "path": "/global/common/cori_cle7/software/jupyter/cori/20-09/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", "ssh_keyfile": '/certs/{username}.key' } ) From ae734fc7e31633b3daa3e66e227e8abdda24f58c Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Tue, 22 Sep 2020 08:24:42 -0700 Subject: [PATCH 7/9] Enable DGX access woohoo --- .../web-jupyterhub/jupyterhub_config.py | 24 ++++ .../web-jupyterhub/nerscslurmspawner.py | 103 ++++++++++++++++++ jupyter-nersc/web-jupyterhub/nerscspawner.py | 12 ++ 3 files changed, 139 insertions(+) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index f21d092..30fe37b 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -1019,6 +1019,7 @@ def comma_split(string): { "name": "cori-exclusive-node-cpu" }, { "name": "cori-exclusive-node-largemem" }, { "name": "cori-configurable-gpu" }, + { "name": "cori-configurable-dgx" }, { "name": "spin-shared-node-cpu" }, ] @@ -1064,6 +1065,11 @@ def comma_split(string): "name": "gpu", "description": "Configurable GPU", "roles": ["gpu"], + }, + { + "name": "dgx", + "description": "Configurable DGX", + "roles": ["dgx"], } ], "resources": "Use multiple compute nodes with specialized settings.", @@ -1200,6 +1206,24 @@ def comma_split(string): ]) } ), + "cori-configurable-dgx": ( + "nerscslurmspawner.NERSCConfigurableDGXSlurmSpawner", { + "cmd": ["/global/common/cori_cle7/software/jupyter/cgpu/20-09/bin/jupyterhub-singleuser"], + "args": ["--transport=ipc"], + "exec_prefix": "/usr/bin/ssh -q -o StrictHostKeyChecking=no -o preferredauthentications=publickey -l {username} -i /certs/{username}.key {remote_host}", + "startup_poll_interval": 30.0, + "req_remote_host": "cori19-224.nersc.gov", + "req_homedir": "/tmp", + "req_ngpus": "1", + "req_runtime": "240", + "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", + "path": "/global/common/cori_cle7/software/jupyter/cgpu/20-09/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + "batchspawner_singleuser_cmd" : " ".join([ + "/global/common/cori/das/jupyterhub/jupyter-launcher.sh", + "/global/common/cori_cle7/software/jupyter/cgpu/20-09/bin/batchspawner-singleuser", + ]) + } + ), "spin-shared-node-cpu": ( "sshspawner.sshspawner.SSHSpawner", { "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", diff --git a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py index 6b88afa..791a36b 100644 --- a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py +++ b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py @@ -411,6 +411,109 @@ def options_from_form(self, formdata): # continue # yield allocation +class NERSCConfigurableDGXSlurmSpawner(NERSCSlurmSpawner): + + batch_submit_cmd = Unicode("/bin/bash -l /global/common/cori/das/jupyterhub/dgx-wrapper.sh sbatch").tag(config=True) + batch_query_cmd = Unicode("/bin/bash -l /global/common/cori/das/jupyterhub/dgx-wrapper.sh squeue -h -j {job_id} -o '%T\ %B-144.nersc.gov'").tag(config=True) + batch_cancel_cmd = Unicode("/bin/bash -l /global/common/cori/das/jupyterhub/dgx-wrapper.sh scancel {job_id}").tag(config=True) + + batch_script = Unicode("""#!/bin/bash +#SBATCH --account={{ account }} +#SBATCH --constraint=dgx +#SBATCH --job-name=jupyter +#SBATCH --nodes={{ nodes }} +#SBATCH --ntasks-per-node={{ ntasks_per_node }} +#SBATCH --cpus-per-task={{ cpus_per_task }} +#SBATCH --gpus-per-task={{ gpus_per_task }} +#SBATCH --time={{ runtime }} +{{ env_text }} +unset XDG_RUNTIME_DIR +{{ cmd }}""").tag(config=True) + + async def options_form(self, spawner): + form = "" + + # Account + + form += dedent(""" + + + """) + +# # QOS, would be nice to constrain from qos + +# form += dedent(""" +# +# +# """) + +# # GPUs per node, should come from model + +# form += dedent(""" +# +# +# """) + + # Nodes, should come from model + + form += dedent(""" + + + """) + + # Number of tasks per node, should come from model + + form += dedent(""" + + + """) + + # Number of CPUs per task, should come from model + + form += dedent(""" + + + """) + + # Number of GPUs per task, should come from model + + form += dedent(""" + + + """) + + # Time, should come from model + + form += dedent(""" + + + """) + + return form + + def options_from_form(self, formdata): + options = dict() + options["account"] = formdata["account"][0] +# options["qos"] = formdata["qos"][0] +# options["ngpus"] = formdata["ngpus"][0] + options["ntasks_per_node"] = formdata["ntasks-per-node"][0] + options["cpus_per_task"] = formdata["cpus-per-task"][0] + options["gpus_per_task"] = formdata["gpus-per-task"][0] + options["runtime"] = formdata["runtime"][0] + return options class NERSCConfigurableSlurmSpawner(NERSCSlurmSpawner): diff --git a/jupyter-nersc/web-jupyterhub/nerscspawner.py b/jupyter-nersc/web-jupyterhub/nerscspawner.py index 8049baf..8f20e87 100644 --- a/jupyter-nersc/web-jupyterhub/nerscspawner.py +++ b/jupyter-nersc/web-jupyterhub/nerscspawner.py @@ -45,6 +45,8 @@ def check_role(self, auth_state, role): return self.check_role_cori_exclusive_node_cpu(auth_state) if role == "cmem": return self.check_role_cmem(auth_state) + if role == "dgx": + return self.check_role_dgx(auth_state) return False def check_role_cori_exclusive_node_cpu(self, auth_state): @@ -82,6 +84,16 @@ def default_gpu_repo(self, auth_state): return allocation["computeAllocation"]["repoName"] return None + def check_role_dgx(self, auth_state): + return self.default_dgx_repo(auth_state) is not None + + def default_dgx_repo(self, auth_state): + for allocation in self.user_allocations(auth_state): + for qos in allocation["userAllocationQos"]: + if qos["qos"]["qos"] in ["dgx"]: + return allocation["computeAllocation"]["repoName"] + return None + def user_allocations(self, auth_state, repos=[]): for allocation in auth_state["userdata"].get("userAllocations", []): if repos and allocation["computeAllocation"]["repoName"] not in repos: From b980fb7bd9ec84fc73f7a279865ad2856befd05c Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Tue, 22 Sep 2020 08:25:49 -0700 Subject: [PATCH 8/9] Up to 8 hours --- jupyter-nersc/web-jupyterhub/jupyterhub_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index 30fe37b..bdccca3 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -1162,7 +1162,7 @@ def comma_split(string): "startup_poll_interval": 30.0, "req_remote_host": "cori19-224.nersc.gov", "req_homedir": "/tmp", - "req_runtime": "240", + "req_runtime": "480", "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", "path": "/global/common/cori_cle7/software/jupyter/cori/20-09/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", "batchspawner_singleuser_cmd" : " ".join([ From 7792969de33d514240fcc50a5bd85940e63c868f Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Thu, 24 Sep 2020 13:38:31 -0700 Subject: [PATCH 9/9] Adjust qos on Gerty to match Cori --- jupyter-nersc/web-jupyterhub/jupyterhub_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index bdccca3..c898ac7 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -1115,7 +1115,7 @@ def comma_split(string): "req_remote_host": "gerty.nersc.gov", "req_homedir": "/tmp", "req_runtime": "240", - "req_qos": "regular", + "req_qos": "jupyter", "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", "path": "/global/common/cori_cle7/software/jupyter/cori/20-09/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", "batchspawner_singleuser_cmd" : " ".join([