From 1070d48b7559943f017da364ffd0f8fdd13ac2c7 Mon Sep 17 00:00:00 2001 From: William Krinsman Date: Thu, 12 Dec 2019 10:23:57 -0800 Subject: [PATCH 01/60] Test fork at krinsman/jupyterhub --- jupyter-base/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/jupyter-base/Dockerfile b/jupyter-base/Dockerfile index 0d4411c..27f4b4d 100644 --- a/jupyter-base/Dockerfile +++ b/jupyter-base/Dockerfile @@ -60,7 +60,8 @@ WORKDIR /tmp RUN \ npm install -g configurable-http-proxy && \ # git clone https://github.com/jupyterhub/jupyterhub.git && \ - git clone https://github.com/rcthomas/jupyterhub.git && \ +# git clone https://github.com/rcthomas/jupyterhub.git && \ + git clone https://github.com/krinsman/jupyterhub.git && \ cd jupyterhub && \ # git checkout tags/1.0.0 && \ git checkout auth-state-to-spawner && \ From 77ab9dcbc1ee75891cbc67e472232c745d7c5a90 Mon Sep 17 00:00:00 2001 From: krinsman <31422259+krinsman@users.noreply.github.com> Date: Thu, 12 Dec 2019 14:03:16 -0800 Subject: [PATCH 02/60] forgot to comment out relevant line in config --- jupyter-nersc/web-jupyterhub/jupyterhub_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index a2e2517..068dfd1 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -1200,4 +1200,4 @@ def auth_state_hook(spawner, auth_state): c.JupyterHub.authenticate_prometheus = False -#c.JupyterHub.default_server_name = 'cori-shared-node-cpu' +c.JupyterHub.default_server_name = 'cori-shared-node-cpu' From d10a62f386aad9016dd5980038728f1db0e3c2be Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Mon, 16 Dec 2019 10:06:43 -0800 Subject: [PATCH 03/60] Using master now --- jupyter-base/Dockerfile | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/jupyter-base/Dockerfile b/jupyter-base/Dockerfile index 0d4411c..7cff375 100644 --- a/jupyter-base/Dockerfile +++ b/jupyter-base/Dockerfile @@ -59,11 +59,8 @@ ENV PATH=/opt/anaconda3/bin:$PATH WORKDIR /tmp RUN \ npm install -g configurable-http-proxy && \ -# git clone https://github.com/jupyterhub/jupyterhub.git && \ - git clone https://github.com/rcthomas/jupyterhub.git && \ + git clone https://github.com/jupyterhub/jupyterhub.git && \ cd jupyterhub && \ -# git checkout tags/1.0.0 && \ - git checkout auth-state-to-spawner && \ /opt/anaconda3/bin/python setup.py js && \ /opt/anaconda3/bin/pip --no-cache-dir install . && \ cp examples/cull-idle/cull_idle_servers.py /opt/anaconda3/bin/. && \ From 69b70c8f2499ad23f0b497b3c13259ffbc94ba2f Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Mon, 16 Dec 2019 12:09:58 -0800 Subject: [PATCH 04/60] Upgrade to 0.4.1 --- jupyter-nersc/web-announcement/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter-nersc/web-announcement/Dockerfile b/jupyter-nersc/web-announcement/Dockerfile index 1d05094..b50af16 100644 --- a/jupyter-nersc/web-announcement/Dockerfile +++ b/jupyter-nersc/web-announcement/Dockerfile @@ -4,7 +4,7 @@ FROM registry.spin.nersc.gov/das/jupyter-base-${branch}:latest LABEL maintainer="Rollin Thomas " RUN \ - pip install git+https://github.com/rcthomas/jupyterhub-announcement.git@0.3.1 + pip install git+https://github.com/rcthomas/jupyterhub-announcement.git@0.4.1 WORKDIR /srv From 806a3640bd89c3cbd5f00b5eb0d75ca3aa46a906 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Wed, 22 Jan 2020 09:27:48 -0800 Subject: [PATCH 05/60] Add needed xrender --- jupyter-nersc/app-notebooks/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/jupyter-nersc/app-notebooks/Dockerfile b/jupyter-nersc/app-notebooks/Dockerfile index a18427d..86fc70f 100644 --- a/jupyter-nersc/app-notebooks/Dockerfile +++ b/jupyter-nersc/app-notebooks/Dockerfile @@ -23,6 +23,7 @@ RUN \ ldap-utils \ libnss-ldapd \ libpam-ldap \ + libxrender-dev \ nscd \ openssh-server \ supervisor \ From f873d55d2d2c9ea713b14a21ef8d79335fa4b80d Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Wed, 22 Jan 2020 09:28:07 -0800 Subject: [PATCH 06/60] MOre from conda less from the pip --- jupyter-base/Dockerfile | 42 ++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/jupyter-base/Dockerfile b/jupyter-base/Dockerfile index 0400312..1d8e6ef 100644 --- a/jupyter-base/Dockerfile +++ b/jupyter-base/Dockerfile @@ -35,23 +35,31 @@ RUN \ echo "python 3.7.3" >> /opt/anaconda3/conda-meta/pinned && \ /opt/anaconda3/bin/conda update --yes conda && \ /opt/anaconda3/bin/conda install --yes \ - alembic \ - cryptography \ - decorator \ - entrypoints \ - jinja2 \ - mako \ - markupsafe \ - nodejs \ - oauthlib \ - pamela \ - psycopg2 \ - pyopenssl \ - python-dateutil \ - python-editor \ - sqlalchemy \ - tornado \ - traitlets + alembic \ + attrs \ + certipy \ + cryptography \ + decorator \ + entrypoints \ + jinja2 \ + jsonschema \ + mako \ + markupsafe \ + more-itertools \ + nodejs \ + oauthlib \ + pamela \ + psycopg2 \ + pyopenssl \ + pyrsistent \ + python-dateutil \ + python-editor \ + ruamel.yaml \ + ruamel.yaml.clib \ + sqlalchemy \ + tornado \ + traitlets \ + zipp # Install JupyterHub From 9a6446678dfaadc21f0fc5c32e019b9be549aacd Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Tue, 4 Feb 2020 14:53:16 -0800 Subject: [PATCH 07/60] Make beautiful buttons work with JH 1.1 --- jupyter-base/Dockerfile | 6 ++-- jupyter-nersc/web-jupyterhub/nerscspawner.py | 28 +++++++++---------- .../web-jupyterhub/templates/home.html | 10 +++---- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/jupyter-base/Dockerfile b/jupyter-base/Dockerfile index 1d8e6ef..e969b8e 100644 --- a/jupyter-base/Dockerfile +++ b/jupyter-base/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:16.04 +FROM ubuntu:18.04 LABEL maintainer="Rollin Thomas " # Base Ubuntu packages @@ -67,9 +67,9 @@ ENV PATH=/opt/anaconda3/bin:$PATH WORKDIR /tmp RUN \ npm install -g configurable-http-proxy && \ -# git clone https://github.com/jupyterhub/jupyterhub.git && \ - git clone https://github.com/krinsman/jupyterhub.git && \ + git clone https://github.com/jupyterhub/jupyterhub.git && \ cd jupyterhub && \ + git checkout tags/1.1.0 && \ /opt/anaconda3/bin/python setup.py js && \ /opt/anaconda3/bin/pip --no-cache-dir install . && \ cp examples/cull-idle/cull_idle_servers.py /opt/anaconda3/bin/. && \ diff --git a/jupyter-nersc/web-jupyterhub/nerscspawner.py b/jupyter-nersc/web-jupyterhub/nerscspawner.py index f083a27..de7353d 100644 --- a/jupyter-nersc/web-jupyterhub/nerscspawner.py +++ b/jupyter-nersc/web-jupyterhub/nerscspawner.py @@ -28,23 +28,21 @@ class NERSCSpawner(WrapSpawner): child_profile = Unicode() - userdata = Dict() - - def check_roles(self, roles): + def check_roles(self, auth_state, roles): """User has one or more of these roles""" if roles: for role in roles: - if self.check_role(role): + if self.check_role(auth_state, role): return True return False else: return True - def check_role(self, role): + def check_role(self, auth_state, role): if role == "gpu": - return self.check_role_gpu() + return self.check_role_gpu(auth_state) if role == "staff": - return self.check_role_staff() + return self.check_role_staff(auth_state) if role == "cori-exclusive-node-cpu": return self.check_role_cori_exclusive_node_cpu() return False @@ -56,23 +54,23 @@ def check_role_cori_exclusive_node_cpu(self): else: return True - def check_role_gpu(self): - return self.default_gpu_repo() is not None + def check_role_gpu(self, auth_state): + return self.default_gpu_repo(auth_state) is not None - def check_role_staff(self): - for allocation in self.user_allocations(["nstaff"]): + def check_role_staff(self, auth_state): + for allocation in self.user_allocations(auth_state, ["nstaff"]): return True return False - def default_gpu_repo(self): - for allocation in self.user_allocations(["nstaff", "m1759", "dasrepo"]): + def default_gpu_repo(self, auth_state): + for allocation in self.user_allocations(auth_state, ["nstaff", "m1759", "dasrepo"]): for qos in allocation["userAllocationQos"]: if qos["qos"]["qos"] == "gpu": return allocation["computeAllocation"]["repoName"] return None - def user_allocations(self, repos=[]): - for allocation in self.userdata.get("userAllocations", []): + def user_allocations(self, auth_state, repos=[]): + for allocation in auth_state["userdata"].get("userAllocations", []): if repos and allocation["computeAllocation"]["repoName"] not in repos: continue yield allocation diff --git a/jupyter-nersc/web-jupyterhub/templates/home.html b/jupyter-nersc/web-jupyterhub/templates/home.html index 7c3dbce..b490866 100644 --- a/jupyter-nersc/web-jupyterhub/templates/home.html +++ b/jupyter-nersc/web-jupyterhub/templates/home.html @@ -19,7 +19,7 @@ {% for setup in user.spawner.setups -%} - {% for arch in setup.architectures if user.spawner.check_roles(arch.roles) %} + {% for arch in setup.architectures if user.spawner.check_roles(auth_state, arch.roles) %} {{ arch.description }} {% endfor %} {%- endfor %} @@ -28,7 +28,7 @@ {% macro spawner_table_body() -%} - {% for system in user.spawner.systems if user.spawner.check_roles(system.roles) -%} + {% for system in user.spawner.systems if user.spawner.check_roles(auth_state, system.roles) -%} {{ spawner_table_body_row(system) }} {%- endfor %} @@ -44,7 +44,7 @@ {%- endmacro %} {% macro spawner_table_body_row_data(system, setup) -%} -{% for arch in setup.architectures if user.spawner.check_roles(arch.roles) %} +{% for arch in setup.architectures if user.spawner.check_roles(auth_state, arch.roles) %} {% set profile_key = [system.name, setup.name, arch.name] | join("-") %} {% set profile = user.spawner.profiles | selectattr("name", "equalto", profile_key) | first %} @@ -71,7 +71,7 @@ Resources {% for setup in user.spawner.setups %} {% set counter = [] %} - {% for arch in setup.architectures if user.spawner.check_roles(arch.roles) %} + {% for arch in setup.architectures if user.spawner.check_roles(auth_state, arch.roles) %} {% if counter.append('1') %}{% endif %} {% endfor %} {% if counter | length %} @@ -82,7 +82,7 @@ Use Cases {% for setup in user.spawner.setups %} {% set counter = [] %} - {% for arch in setup.architectures if user.spawner.check_roles(arch.roles) %} + {% for arch in setup.architectures if user.spawner.check_roles(auth_state, arch.roles) %} {% if counter.append('1') %}{% endif %} {% endfor %} {% if counter | length %} From 9cd0393c17ff44a900caa39d9b0ff5fc3301cb0e Mon Sep 17 00:00:00 2001 From: William Krinsman Date: Tue, 4 Feb 2020 15:55:07 -0800 Subject: [PATCH 08/60] Update NBViewer source repo branch, and try to use master branch of clonenotebooks, now that default_server_name seems to be working --- jupyter-nersc/web-nbviewer/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jupyter-nersc/web-nbviewer/Dockerfile b/jupyter-nersc/web-nbviewer/Dockerfile index b3b5ca5..e8cff9f 100644 --- a/jupyter-nersc/web-nbviewer/Dockerfile +++ b/jupyter-nersc/web-nbviewer/Dockerfile @@ -24,7 +24,7 @@ RUN \ WORKDIR /repos RUN \ - git clone --single-branch --branch step7 https://github.com/krinsman/nbviewer.git && \ + git clone --single-branch --branch develop https://github.com/krinsman/nbviewer.git && \ cd nbviewer && \ # --no-dependencies flag because we don't actually need pylibmc or elasticsearch to run this (without # elasticsearch or memcached) and everything else in requirements.txt is already installed @@ -37,7 +37,7 @@ RUN \ RUN \ git clone https://github.com/krinsman/clonenotebooks.git && \ cd clonenotebooks && \ - git checkout NERSC && \ +# git checkout NERSC && \ pip install -e . --no-cache-dir && \ cd .. From f464f98e1c322619491aee237f5e7b83d5c0e7ce Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Tue, 4 Feb 2020 15:55:11 -0800 Subject: [PATCH 09/60] Update config to 0.4.1 format --- .../web-announcement/announcement_config.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/jupyter-nersc/web-announcement/announcement_config.py b/jupyter-nersc/web-announcement/announcement_config.py index ba3fc72..09eb9ef 100644 --- a/jupyter-nersc/web-announcement/announcement_config.py +++ b/jupyter-nersc/web-announcement/announcement_config.py @@ -50,6 +50,12 @@ # AnnouncementQueue(LoggingConfigurable) configuration #------------------------------------------------------------------------------ +## Number of days to retain announcements. +# +# Announcements that have been in the queue for this many days are purged from +# the queue. +#c.AnnouncementQueue.lifetime_days = 7.0 + ## File path where announcements persist as JSON. # # For a persistent announcement queue, this parameter must be set to a non-empty @@ -70,3 +76,15 @@ #c.AnnouncementQueue.persist_path = '' c.AnnouncementQueue.persist_path = 'announcements.json' +#------------------------------------------------------------------------------ +# SSLContext(Configurable) configuration +#------------------------------------------------------------------------------ + +## SSL CA, use with keyfile and certfile +#c.SSLContext.cafile = '' + +## SSL cert, use with keyfile +#c.SSLContext.certfile = '' + +## SSL key, use with certfile +#c.SSLContext.keyfile = '' From 9f34c9e310b69989c2fcb4cc7c428293d08e4971 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Wed, 5 Feb 2020 09:37:17 -0800 Subject: [PATCH 10/60] Adjust sanic to run off master (bug in release) --- jupyter-nersc/web-offline/Dockerfile | 35 ++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/jupyter-nersc/web-offline/Dockerfile b/jupyter-nersc/web-offline/Dockerfile index abb2f55..7941fd8 100644 --- a/jupyter-nersc/web-offline/Dockerfile +++ b/jupyter-nersc/web-offline/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:16.04 +FROM ubuntu:18.04 LABEL maintainer="Rollin Thomas " # Base Ubuntu packages @@ -12,6 +12,7 @@ RUN \ apt-get --yes install \ bzip2 \ curl \ + git \ tzdata \ vim @@ -36,11 +37,35 @@ ENV PATH=/opt/anaconda3/bin:$PATH # Packages +# RUN \ +# conda install --yes \ +# --channel=conda-forge \ +# jinja2 \ +# sanic + +# Temporary off master, sanic bug: https://github.com/huge-success/sanic/issues/1773 + RUN \ - conda install --yes \ - --channel=conda-forge \ - jinja2 \ - sanic + conda install --yes \ + --channel=conda-forge \ + aiofiles \ + brotlipy \ + h11=0.8.1 \ + h2 \ + hpack \ + hstspreload \ + httptools \ + httpx=0.9.3 \ + hyperframe \ + jinja2 \ + markupsafe \ + multidict \ + rfc3986 \ + sniffio \ + ujson \ + uvloop \ + websockets && \ + pip install --no-cache-dir git+https://github.com/huge-success/sanic # Application From 0365754212deb9794b31543b231f6c8c5744ecc4 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Wed, 5 Feb 2020 09:46:28 -0800 Subject: [PATCH 11/60] Up to 18.04 --- jupyter-nersc/app-monitoring/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter-nersc/app-monitoring/Dockerfile b/jupyter-nersc/app-monitoring/Dockerfile index 7d6e874..100be4d 100644 --- a/jupyter-nersc/app-monitoring/Dockerfile +++ b/jupyter-nersc/app-monitoring/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:16.04 +FROM ubuntu:18.04 LABEL maintainer="Rollin Thomas " # Base Ubuntu packages From b8be01d7f4c2c7ac841f574454bdebe20969a3b9 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Mon, 24 Feb 2020 16:23:25 -0600 Subject: [PATCH 12/60] Set up configurable GPU jobs --- .../web-jupyterhub/jupyterhub_config.py | 28 ++++++ .../web-jupyterhub/nerscslurmspawner.py | 94 +++++++++++++++++++ jupyter-nersc/web-jupyterhub/nerscspawner.py | 6 +- 3 files changed, 125 insertions(+), 3 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index 068dfd1..906eac9 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -1010,6 +1010,7 @@ def comma_split(string): { "name": "cori-shared-node-cpu" }, { "name": "cori-exclusive-node-cpu" }, { "name": "cori-exclusive-node-gpu" }, + { "name": "cori-configurable-gpu" }, { "name": "spin-shared-node-cpu" }, ] @@ -1043,6 +1044,18 @@ def comma_split(string): "resources": "Use your own node within a job allocation using defaults.", "use_cases": "Visualization, analytics, machine learning that is compute or memory intensive but can be done on a single node." }, + { + "name": "configurable", + "architectures": [ + { + "name": "gpu", + "description": "Configurable GPU", + "roles": ["gpu"], + } + ], + "resources": "Use multiple compute nodes with specialized settings.", + "use_cases": "Multi-node analytics jobs, jobs in reservations, custom project charging, and more." + }, ] c.NERSCSpawner.systems = [ @@ -1130,6 +1143,21 @@ def comma_split(string): "path": "/usr/common/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", } ), + "cori-configurable-gpu": ( + "nerscslurmspawner.NERSCConfigurableGPUSlurmSpawner", { + "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", + "/usr/common/software/jupyter/19-11/bin/jupyter-labhub"], + "args": ["--transport=ipc"], + "exec_prefix": "/usr/bin/ssh -q -o StrictHostKeyChecking=no -o preferredauthentications=publickey -l {username} -i /certs/{username}.key {remote_host}", + "startup_poll_interval": 30.0, + "req_remote_host": "cori19-224.nersc.gov", + "req_homedir": "/tmp", + "req_ngpus": "1", + "req_runtime": "240", + "hub_api_url": "http://{}:8081/hub/api".format(ip), + "path": "/usr/common/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + } + ), "spin-shared-node-cpu": ( "sshspawner.sshspawner.SSHSpawner", { "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", diff --git a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py index 0e168c2..2f9cff1 100644 --- a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py +++ b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py @@ -201,6 +201,100 @@ def user_allocations(self, repos=[]): yield allocation +class NERSCConfigurableGPUSlurmSpawner(NERSCSlurmSpawner): + + batch_submit_cmd = Unicode("/bin/bash -l /global/common/cori/das/jupyterhub/esslurm-wrapper.sh sbatch").tag(config=True) + batch_query_cmd = Unicode("/bin/bash -l /global/common/cori/das/jupyterhub/esslurm-wrapper.sh squeue -h -j {job_id} -o '%T\ %B-144.nersc.gov'").tag(config=True) + batch_cancel_cmd = Unicode("/bin/bash -l /global/common/cori/das/jupyterhub/esslurm-wrapper.sh scancel {job_id}").tag(config=True) + + batch_script = Unicode("""#!/bin/bash +#SBATCH --account={{ account }} +#SBATCH --constraint=gpu +#SBATCH --gres=gpu:{{ ngpus }} +#SBATCH --job-name=jupyter +#SBATCH --nodes={{ nodes }} +#SBATCH --time={{ runtime }} +{{ env_text }} +unset XDG_RUNTIME_DIR +{{ cmd }}""").tag(config=True) + + async def options_form(self, spawner): + form = "" + + # Account + + form += dedent(""" + + + """) + + # GPUs per node, should come from model + + form += dedent(""" + + + """) + + # Nodes, should come from model + + form += dedent(""" + + + """) + + # Time, should come from model + + form += dedent(""" + + + """) + + return form + + def options_from_form(self, formdata): + options = dict() + options["account"] = formdata["account"][0] + options["ngpus"] = formdata["ngpus"][0] + options["nodes"] = formdata["nodes"][0] + options["time"] = formdata["time"][0] + return options + +# # Have to override this to call get_auth_state() I think +# async def _get_batch_script(self, **subvars): +# """Format batch script from vars""" +# auth_state = await self.user.get_auth_state() +# self.userdata = auth_state["userdata"] +# # subvars["account"] = self.default_gpu_repo() +# return format_template(self.batch_script, **subvars) + +# def default_gpu_repo(self): +# for allocation in self.user_allocations(["nstaff", "m1759", "dasrepo"]): +# for qos in allocation["userAllocationQos"]: +# if qos["qos"]["qos"] == "gpu": +# return allocation["computeAllocation"]["repoName"] +# return None + +# def user_allocations(self, repos=[]): +# for allocation in self.userdata["userAllocations"]: +# if repos and allocation["computeAllocation"]["repoName"] not in repos: +# continue +# yield allocation + + + class NERSCConfigurableSlurmSpawner(NERSCSlurmSpawner): req_image = Unicode("", diff --git a/jupyter-nersc/web-jupyterhub/nerscspawner.py b/jupyter-nersc/web-jupyterhub/nerscspawner.py index de7353d..78db4f2 100644 --- a/jupyter-nersc/web-jupyterhub/nerscspawner.py +++ b/jupyter-nersc/web-jupyterhub/nerscspawner.py @@ -97,9 +97,9 @@ def construct_child(self): self.select_profile(self.child_profile) super().construct_child() # self.child_spawner.orm_spawner = self.orm_spawner ### IS THIS KOSHER?!?!!? -# self.options_form = self.child_spawner.options_form # another one... -# self.options_from_form = self.child_spawner.options_from_form -# self.child_spawner.user_options = self.user_options + self.options_form = self.child_spawner.options_form # another one... + self.options_from_form = self.child_spawner.options_from_form + self.child_spawner.user_options = self.user_options # ### Think we need to do this to get JUPYTERHUB_OAUTH_CALLBACK_URL set properly def load_child_class(self, state): From 6fb3b0422e3db85405c522c3a219579045259d84 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Mon, 24 Feb 2020 16:24:01 -0600 Subject: [PATCH 13/60] Make more room --- jupyter-nersc/web-jupyterhub/templates/home.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/templates/home.html b/jupyter-nersc/web-jupyterhub/templates/home.html index b490866..ed07c3a 100644 --- a/jupyter-nersc/web-jupyterhub/templates/home.html +++ b/jupyter-nersc/web-jupyterhub/templates/home.html @@ -4,9 +4,9 @@ {% endif %} {% macro spawner_table() -%} -
+
-
+
{{ spawner_table_header() }} {{ spawner_table_body() }} From 3c8e161587b9e3d52c098920964deb6a2b500b5c Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Tue, 17 Mar 2020 11:05:03 -0700 Subject: [PATCH 14/60] Override of CHP to force web-jupyterhub:8081 route --- jupyter-nersc/web-jupyterhub/spinproxy.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 jupyter-nersc/web-jupyterhub/spinproxy.py diff --git a/jupyter-nersc/web-jupyterhub/spinproxy.py b/jupyter-nersc/web-jupyterhub/spinproxy.py new file mode 100644 index 0000000..d9c3a74 --- /dev/null +++ b/jupyter-nersc/web-jupyterhub/spinproxy.py @@ -0,0 +1,13 @@ + +from jupyterhub.proxy import ConfigurableHTTPProxy + +class ConfigurableHTTPProxySpin(ConfigurableHTTPProxy): + + def add_hub_route(self, hub): + """Add the default route for the Hub""" + self.log.debug("url %s, api_url %s", hub.url, hub.api_url) + host = "http://web-jupyterhub:8081" + self.log.info("Adding default route for Hub: %s => %s", hub.routespec, host) + return self.add_route(hub.routespec, host, {'hub': True}) +# self.log.info("Adding default route for Hub: %s => %s", hub.routespec, hub.host) +# return self.add_route(hub.routespec, self.hub.host, {'hub': True}) From 5ae68b22f5a5454f6a9160e8060a2370bcfde3a4 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Tue, 17 Mar 2020 11:12:26 -0700 Subject: [PATCH 15/60] Changes for closing of 8081 --- jupyter-nersc/web-jupyterhub/jupyterhub_config.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index 906eac9..1256337 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -233,6 +233,7 @@ def comma_split(string): # # .. versionadded:: 0.9 #c.JupyterHub.hub_bind_url = '' +c.JupyterHub.hub_bind_url = 'http://web-jupyterhub:8081' ## The ip or hostname for proxies and spawners to use for connecting to the Hub. # @@ -248,7 +249,6 @@ def comma_split(string): # # .. versionadded:: 0.8 #c.JupyterHub.hub_connect_ip = '' -c.JupyterHub.hub_connect_ip = ip ## DEPRECATED # @@ -272,6 +272,7 @@ def comma_split(string): # # .. versionadded:: 0.9 #c.JupyterHub.hub_connect_url = '' +c.JupyterHub.hub_connect_url = "https://jupyter-stage.nersc.gov/hub" ## The ip address for the Hub process to *bind* to. # @@ -996,6 +997,9 @@ def comma_split(string): # Additional ConfigurableHTTPProxy configuration #------------------------------------------------------------------------------ +from spinproxy import ConfigurableHTTPProxySpin +c.JupyterHub.proxy_class = ConfigurableHTTPProxySpin + c.ConfigurableHTTPProxy.should_start = False c.ConfigurableHTTPProxy.api_url = 'http://web-proxy:8001' @@ -1110,7 +1114,8 @@ def comma_split(string): "environment": {"OMP_NUM_THREADS" : "2", "PYTHONFAULTHANDLER": "1"}, "remote_hosts": ["corijupyter.nersc.gov"], "remote_port_command": "/usr/bin/python /global/common/cori/das/jupyterhub/new-get-port.py --ip", - "hub_api_url": "http://{}:8081/hub/api".format(ip), +# "hub_api_url": "http://{}:8081/hub/api".format(ip), + "hub_api_url": "https://jupyter-stage.nersc.gov/hub/api", "path": "/usr/common/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", "ssh_keyfile": '/certs/{username}.key' } @@ -1154,7 +1159,8 @@ def comma_split(string): "req_homedir": "/tmp", "req_ngpus": "1", "req_runtime": "240", - "hub_api_url": "http://{}:8081/hub/api".format(ip), + #"hub_api_url": "http://{}:8081/hub/api".format(ip), + "hub_api_url": "https://jupyter-stage.nersc.gov/hub/api", "path": "/usr/common/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", } ), @@ -1187,7 +1193,7 @@ async def setup(spawner): client_keys=[(k,c)], known_hosts=None) as conn: result = await conn.run("myquota -c $HOME") retcode = result.exit_status - except asyncssh.misc.ConnectionLost: + except: spawner.log.warning(f"Problem connecting to {remote_host} to check quota oh well") retcode = 0 if retcode: From dcf91aac93c108d3e24fae9546b1b44cacb6219a Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Tue, 17 Mar 2020 11:12:44 -0700 Subject: [PATCH 16/60] Add proxy override --- jupyter-nersc/web-jupyterhub/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/jupyter-nersc/web-jupyterhub/Dockerfile b/jupyter-nersc/web-jupyterhub/Dockerfile index a1ca275..d91e149 100644 --- a/jupyter-nersc/web-jupyterhub/Dockerfile +++ b/jupyter-nersc/web-jupyterhub/Dockerfile @@ -22,6 +22,7 @@ ENV PYTHONPATH=/srv ADD nerscspawner.py . ADD nerscslurmspawner.py . ADD iris.py . +ADD spinproxy.py . # Hub scripts From 2d119be541f858612bbb5da2dfe5d98c9d2fa469 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Tue, 17 Mar 2020 11:13:02 -0700 Subject: [PATCH 17/60] Multi GPU interface --- .../web-jupyterhub/nerscslurmspawner.py | 47 +++++++++++++++---- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py index 2f9cff1..405ed99 100644 --- a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py +++ b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py @@ -207,12 +207,16 @@ class NERSCConfigurableGPUSlurmSpawner(NERSCSlurmSpawner): batch_query_cmd = Unicode("/bin/bash -l /global/common/cori/das/jupyterhub/esslurm-wrapper.sh squeue -h -j {job_id} -o '%T\ %B-144.nersc.gov'").tag(config=True) batch_cancel_cmd = Unicode("/bin/bash -l /global/common/cori/das/jupyterhub/esslurm-wrapper.sh scancel {job_id}").tag(config=True) +#SBATCH --gres=gpu:{{ ngpus }} + batch_script = Unicode("""#!/bin/bash #SBATCH --account={{ account }} #SBATCH --constraint=gpu -#SBATCH --gres=gpu:{{ ngpus }} #SBATCH --job-name=jupyter #SBATCH --nodes={{ nodes }} +#SBATCH --ntasks-per-node={{ ntasks_per_node }} +#SBATCH --cpus-per-task={{ cpus_per_task }} +#SBATCH --gpus-per-task={{ gpus_per_task }} #SBATCH --time={{ runtime }} {{ env_text }} unset XDG_RUNTIME_DIR @@ -241,24 +245,45 @@ async def options_form(self, spawner): """) - # GPUs per node, should come from model +# # GPUs per node, should come from model - form += dedent(""" - - - """) +# form += dedent(""" +# +# +# """) # Nodes, should come from model form += dedent(""" - + """) + # Number of tasks per node, should come from model + + form += dedent(""" + + + """) + + # Number of CPUs per task, should come from model + + form += dedent(""" + + + """) + + # Number of GPUs per task, should come from model + + form += dedent(""" + + + """) + # Time, should come from model form += dedent(""" - + """) @@ -267,8 +292,10 @@ async def options_form(self, spawner): def options_from_form(self, formdata): options = dict() options["account"] = formdata["account"][0] - options["ngpus"] = formdata["ngpus"][0] - options["nodes"] = formdata["nodes"][0] +# options["ngpus"] = formdata["ngpus"][0] + options["ntasks_per_node"] = formdata["ntasks-per-node"][0] + options["cpus_per_task"] = formdata["cpus-per-task"][0] + options["gpus_per_task"] = formdata["gpus-per-task"][0] options["time"] = formdata["time"][0] return options From bbe248c24e04580b0466645f26beff990557c1bc Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Wed, 18 Mar 2020 11:26:26 -0700 Subject: [PATCH 18/60] Replace api url with public api url --- jupyter-nersc/web-jupyterhub/jupyterhub_config.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index 1256337..f22ae88 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -1086,7 +1086,7 @@ def comma_split(string): "environment": {"OMP_NUM_THREADS" : "2"}, "remote_hosts": ["gerty.nersc.gov"], "remote_port_command": "/usr/bin/python /global/common/cori/das/jupyterhub/new-get-port.py --ip", - "hub_api_url": "http://{}:8081/hub/api".format(ip), + "hub_api_url": "https://jupyter-stage.nersc.gov/hub/api", "path": "/global/common/cori_cle7/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", "ssh_keyfile": '/certs/{username}.key' } @@ -1102,7 +1102,7 @@ def comma_split(string): "req_homedir": "/tmp", "req_runtime": "240", "req_qos": "regular", - "hub_api_url": "http://{}:8081/hub/api".format(ip), + "hub_api_url": "https://jupyter-stage.nersc.gov/hub/api", "path": "/usr/common/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", } ), @@ -1114,7 +1114,6 @@ def comma_split(string): "environment": {"OMP_NUM_THREADS" : "2", "PYTHONFAULTHANDLER": "1"}, "remote_hosts": ["corijupyter.nersc.gov"], "remote_port_command": "/usr/bin/python /global/common/cori/das/jupyterhub/new-get-port.py --ip", -# "hub_api_url": "http://{}:8081/hub/api".format(ip), "hub_api_url": "https://jupyter-stage.nersc.gov/hub/api", "path": "/usr/common/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", "ssh_keyfile": '/certs/{username}.key' @@ -1130,7 +1129,7 @@ def comma_split(string): "req_remote_host": "cori19-224.nersc.gov", "req_homedir": "/tmp", "req_runtime": "240", - "hub_api_url": "http://{}:8081/hub/api".format(ip), + "hub_api_url": "https://jupyter-stage.nersc.gov/hub/api", "path": "/usr/common/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", } ), @@ -1144,7 +1143,7 @@ def comma_split(string): "req_remote_host": "cori19-224.nersc.gov", "req_homedir": "/tmp", "req_runtime": "240", - "hub_api_url": "http://{}:8081/hub/api".format(ip), + "hub_api_url": "https://jupyter-stage.nersc.gov/hub/api", "path": "/usr/common/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", } ), @@ -1159,7 +1158,6 @@ def comma_split(string): "req_homedir": "/tmp", "req_ngpus": "1", "req_runtime": "240", - #"hub_api_url": "http://{}:8081/hub/api".format(ip), "hub_api_url": "https://jupyter-stage.nersc.gov/hub/api", "path": "/usr/common/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", } @@ -1172,7 +1170,7 @@ def comma_split(string): "environment": {"OMP_NUM_THREADS" : "2"}, "remote_hosts": ["app-notebooks"], "remote_port_command": "/usr/bin/python /global/common/cori/das/jupyterhub/new-get-port.py --ip", - "hub_api_url": "http://{}:8081/hub/api".format(ip), + "hub_api_url": "https://jupyter-stage.nersc.gov/hub/api", "path": "/global/common/cori_cle7/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", "ssh_keyfile": '/certs/{username}.key' } From d6b96113f87ceaff5bb4db7dc372836f76ffcb93 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Wed, 18 Mar 2020 11:27:39 -0700 Subject: [PATCH 19/60] No longer need this handy hack --- jupyter-nersc/web-jupyterhub/jupyterhub_config.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index f22ae88..f958ca0 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -4,7 +4,6 @@ import sys import asyncssh -import requests from tornado import web from jupyterhub.utils import url_path_join @@ -20,8 +19,6 @@ def comma_split(string): else: return list() -ip = requests.get('https://v4.ifconfig.co/json').json()['ip'] - #------------------------------------------------------------------------------ # Application(SingletonConfigurable) configuration #------------------------------------------------------------------------------ From 487736db7661bf8ca3ff3e7cf8c055231bd6bfe6 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Fri, 20 Mar 2020 14:09:32 -0700 Subject: [PATCH 20/60] Proof of concept submission key --- jupyter-nersc/web-jupyterhub/iris.py | 1 + jupyter-nersc/web-jupyterhub/nerscslurmspawner.py | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/jupyter-nersc/web-jupyterhub/iris.py b/jupyter-nersc/web-jupyterhub/iris.py index 21ab9e0..26dbb88 100644 --- a/jupyter-nersc/web-jupyterhub/iris.py +++ b/jupyter-nersc/web-jupyterhub/iris.py @@ -12,6 +12,7 @@ async def query_user(self, name): query = dedent(""" query {{ systemInfo {{ + uid users(name: "{}") {{ baseRepos {{ computeAllocation {{ diff --git a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py index 405ed99..10c8f47 100644 --- a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py +++ b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py @@ -1,5 +1,6 @@ from textwrap import dedent +import time import asyncssh from traitlets import default, Unicode @@ -149,6 +150,7 @@ def get_env(self): class NERSCExclusiveSlurmSpawner(NERSCSlurmSpawner): batch_script = Unicode("""#!/bin/bash +#SBATCH --comment={{ cookie }} {%- if constraint %} #SBATCH --constraint={{ constraint }} {%- endif %} @@ -161,6 +163,15 @@ class NERSCExclusiveSlurmSpawner(NERSCSlurmSpawner): unset XDG_RUNTIME_DIR {{ cmd }}""").tag(config=True) + # Have to override this to call get_auth_state() I think + async def _get_batch_script(self, **subvars): + """Format batch script from vars""" + auth_state = await self.user.get_auth_state() + self.userdata = auth_state["userdata"] + uid = self.userdata["uid"] + subvars["cookie"] = int(time.time()) ^ (uid ** 2) + return format_template(self.batch_script, **subvars) + class NERSCExclusiveGPUSlurmSpawner(NERSCSlurmSpawner): From dc8102e7a423d42fb3ed5f76a39e6bd1ef610f21 Mon Sep 17 00:00:00 2001 From: krinsman <31422259+krinsman@users.noreply.github.com> Date: Tue, 24 Mar 2020 16:16:27 -0700 Subject: [PATCH 21/60] Update Dockerfile (NBViewer GitHub repo) All changes needed to run clonenotebooks have been merged into NBViewer, so I deleted the `develop` branch on my fork. In any case, any further changes I will be able to merge into NBViewer master, so it seems like cloning from that repo should be enough. --- jupyter-nersc/web-nbviewer/Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/jupyter-nersc/web-nbviewer/Dockerfile b/jupyter-nersc/web-nbviewer/Dockerfile index e8cff9f..05d5753 100644 --- a/jupyter-nersc/web-nbviewer/Dockerfile +++ b/jupyter-nersc/web-nbviewer/Dockerfile @@ -24,7 +24,7 @@ RUN \ WORKDIR /repos RUN \ - git clone --single-branch --branch develop https://github.com/krinsman/nbviewer.git && \ + git clone https://github.com/jupyter/nbviewer.git && \ cd nbviewer && \ # --no-dependencies flag because we don't actually need pylibmc or elasticsearch to run this (without # elasticsearch or memcached) and everything else in requirements.txt is already installed @@ -37,7 +37,6 @@ RUN \ RUN \ git clone https://github.com/krinsman/clonenotebooks.git && \ cd clonenotebooks && \ -# git checkout NERSC && \ pip install -e . --no-cache-dir && \ cd .. From 0cd224f4fb4616228d4e4d6cf21533e6b48f0707 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Wed, 25 Mar 2020 16:24:21 -0700 Subject: [PATCH 22/60] Add uid --- jupyter-nersc/web-jupyterhub/iris.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter-nersc/web-jupyterhub/iris.py b/jupyter-nersc/web-jupyterhub/iris.py index 26dbb88..8a7a857 100644 --- a/jupyter-nersc/web-jupyterhub/iris.py +++ b/jupyter-nersc/web-jupyterhub/iris.py @@ -12,8 +12,8 @@ async def query_user(self, name): query = dedent(""" query {{ systemInfo {{ - uid users(name: "{}") {{ + uid baseRepos {{ computeAllocation {{ repoName From cdb222ba1d77f32c6a96957b9508eb40062e6aed Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Fri, 17 Apr 2020 16:53:42 -0700 Subject: [PATCH 23/60] Change to shared GPU node name --- jupyter-nersc/web-jupyterhub/jupyterhub_config.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index f958ca0..5cb2d95 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -1009,8 +1009,7 @@ def comma_split(string): { "name": "gerty-shared-node-cpu" }, { "name": "gerty-exclusive-node-cpu" }, { "name": "cori-shared-node-cpu" }, - { "name": "cori-exclusive-node-cpu" }, - { "name": "cori-exclusive-node-gpu" }, + { "name": "cori-shared-node-gpu" }, { "name": "cori-configurable-gpu" }, { "name": "spin-shared-node-cpu" }, ] From 539a25e044322e8d090ec39df28b5af796ffaecf Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Fri, 17 Apr 2020 16:56:11 -0700 Subject: [PATCH 24/60] Reorder configs --- jupyter-nersc/web-jupyterhub/jupyterhub_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index 5cb2d95..861e81a 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -1009,8 +1009,8 @@ def comma_split(string): { "name": "gerty-shared-node-cpu" }, { "name": "gerty-exclusive-node-cpu" }, { "name": "cori-shared-node-cpu" }, + { "name": "cori-exclusive-node-cpu" }, { "name": "cori-shared-node-gpu" }, - { "name": "cori-configurable-gpu" }, { "name": "spin-shared-node-cpu" }, ] From 2fb749c476115ea9ebb5c36281db2907e7851570 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Fri, 17 Apr 2020 16:59:45 -0700 Subject: [PATCH 25/60] Propagate GPU shared name change, perserve configurable --- jupyter-nersc/web-jupyterhub/jupyterhub_config.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index 861e81a..67a2e0b 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -1011,6 +1011,7 @@ def comma_split(string): { "name": "cori-shared-node-cpu" }, { "name": "cori-exclusive-node-cpu" }, { "name": "cori-shared-node-gpu" }, + { "name": "cori-configurable-gpu" }, { "name": "spin-shared-node-cpu" }, ] @@ -1022,6 +1023,11 @@ def comma_split(string): "name": "cpu", "description": "Shared CPU Node", "roles": [], + }, + { + "name": "gpu", + "description": "Shared GPU Node", + "roles": ["gpu"], } ], "resources": "Use a node shared with other users' notebooks but outside the batch queues.", @@ -1034,12 +1040,7 @@ def comma_split(string): "name": "cpu", "description": "Exclusive CPU Node", "roles": ["cori-exclusive-node-cpu"], - }, - { - "name": "gpu", - "description": "Exclusive GPU Node", - "roles": ["gpu"], - } + } ], "resources": "Use your own node within a job allocation using defaults.", "use_cases": "Visualization, analytics, machine learning that is compute or memory intensive but can be done on a single node." From 06f0689bcffa51e6f6f70ff3cb9e74a00c7fb804 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Fri, 17 Apr 2020 17:05:37 -0700 Subject: [PATCH 26/60] Kind of pointless adjustments --- jupyter-nersc/web-jupyterhub/jupyterhub_config.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index 67a2e0b..33c6036 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -1009,8 +1009,8 @@ def comma_split(string): { "name": "gerty-shared-node-cpu" }, { "name": "gerty-exclusive-node-cpu" }, { "name": "cori-shared-node-cpu" }, - { "name": "cori-exclusive-node-cpu" }, { "name": "cori-shared-node-gpu" }, + { "name": "cori-exclusive-node-cpu" }, { "name": "cori-configurable-gpu" }, { "name": "spin-shared-node-cpu" }, ] @@ -1116,12 +1116,12 @@ def comma_split(string): "ssh_keyfile": '/certs/{username}.key' } ), - "cori-exclusive-node-cpu": ( - "nerscslurmspawner.NERSCExclusiveSlurmSpawner", { + "cori-shared-node-gpu": ( + "nerscslurmspawner.NERSCExclusiveGPUSlurmSpawner", { "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", "/usr/common/software/jupyter/19-11/bin/jupyter-labhub"], + "args": ["--transport=ipc"], "exec_prefix": "/usr/bin/ssh -q -o StrictHostKeyChecking=no -o preferredauthentications=publickey -l {username} -i /certs/{username}.key {remote_host}", - "http_timeout": 300, "startup_poll_interval": 30.0, "req_remote_host": "cori19-224.nersc.gov", "req_homedir": "/tmp", @@ -1130,12 +1130,12 @@ def comma_split(string): "path": "/usr/common/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", } ), - "cori-exclusive-node-gpu": ( - "nerscslurmspawner.NERSCExclusiveGPUSlurmSpawner", { + "cori-exclusive-node-cpu": ( + "nerscslurmspawner.NERSCExclusiveSlurmSpawner", { "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", "/usr/common/software/jupyter/19-11/bin/jupyter-labhub"], - "args": ["--transport=ipc"], "exec_prefix": "/usr/bin/ssh -q -o StrictHostKeyChecking=no -o preferredauthentications=publickey -l {username} -i /certs/{username}.key {remote_host}", + "http_timeout": 300, "startup_poll_interval": 30.0, "req_remote_host": "cori19-224.nersc.gov", "req_homedir": "/tmp", From 5a127e061f2c3dd7324542c93e836c87842f3a47 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Fri, 17 Apr 2020 17:10:53 -0700 Subject: [PATCH 27/60] Work around 8AM resv, look for m3502 first --- .../web-jupyterhub/nerscslurmspawner.py | 28 ++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py index 10c8f47..8a48b8a 100644 --- a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py +++ b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py @@ -1,4 +1,5 @@ +import datetime from textwrap import dedent import time @@ -190,16 +191,41 @@ class NERSCExclusiveGPUSlurmSpawner(NERSCSlurmSpawner): unset XDG_RUNTIME_DIR {{ cmd }}""").tag(config=True) + # For Cori GPU + def default_deadline(self): + return datetime.datetime.today().replace(hour=8, minute=0, second=0, + microsecond=0) + + def minutes_from_deadline(self, now, deadline=None): + deadline = deadline or self.default_deadline() + return int((deadline - now).total_seconds() / 60.0) + + def validate_runtime(self, runtime, now=None, deadline=None, slop=2): + now = now or datetime.datetime.now() + if now.weekday() > 4: + return runtime + window = self.minutes_from_deadline(now, deadline) + if 0 <= window <= int(runtime): + return str(max(window - slop, 0)) + else: + return runtime + # Have to override this to call get_auth_state() I think async def _get_batch_script(self, **subvars): """Format batch script from vars""" auth_state = await self.user.get_auth_state() self.userdata = auth_state["userdata"] subvars["account"] = self.default_gpu_repo() + subvars["runtime"] = self.validate_runtime(subvars["runtime"]) return format_template(self.batch_script, **subvars) def default_gpu_repo(self): - for allocation in self.user_allocations(["nstaff", "m1759", "dasrepo"]): + # Search for training account first + for allocation in self.user_allocations(["m3502"]): + for qos in allocation["userAllocationQos"]: + if qos["qos"]["qos"] == "gpu": + return allocation["computeAllocation"]["repoName"] + for allocation in self.user_allocations(): for qos in allocation["userAllocationQos"]: if qos["qos"]["qos"] == "gpu": return allocation["computeAllocation"]["repoName"] From a21940bc529144a0ca8de1194682dfc888f2f855 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Wed, 22 Apr 2020 18:26:40 -0700 Subject: [PATCH 28/60] Needed test branch --- jupyter-nersc/web-nbviewer/Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/jupyter-nersc/web-nbviewer/Dockerfile b/jupyter-nersc/web-nbviewer/Dockerfile index 05d5753..eaf0f56 100644 --- a/jupyter-nersc/web-nbviewer/Dockerfile +++ b/jupyter-nersc/web-nbviewer/Dockerfile @@ -34,9 +34,11 @@ RUN \ invoke less && \ cd .. +RUN echo 1 RUN \ - git clone https://github.com/krinsman/clonenotebooks.git && \ + git clone https://github.com/NERSC/clonenotebooks.git && \ cd clonenotebooks && \ + git checkout stale_username_cache_test && \ pip install -e . --no-cache-dir && \ cd .. From 7cbe250a7ae003b920a89ba378818e0f92a9aa5d Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Fri, 24 Apr 2020 08:10:22 -0700 Subject: [PATCH 29/60] Change of URL for miniconda installer --- jupyter-base/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter-base/Dockerfile b/jupyter-base/Dockerfile index e969b8e..b711cc2 100644 --- a/jupyter-base/Dockerfile +++ b/jupyter-base/Dockerfile @@ -29,7 +29,7 @@ RUN \ # Python 3 Miniconda and dependencies for JupyterHub we can get via conda RUN \ - curl -s -o /tmp/miniconda3.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ + curl -s -o /tmp/miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ bash /tmp/miniconda3.sh -b -p /opt/anaconda3 && \ rm -rf /tmp/miniconda3.sh && \ echo "python 3.7.3" >> /opt/anaconda3/conda-meta/pinned && \ From 7d5c90ed5dfb01aef69dd30ebc711081906f1b07 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Fri, 24 Apr 2020 08:10:54 -0700 Subject: [PATCH 30/60] Change to URL and allow conda update again --- jupyter-nersc/app-monitoring/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jupyter-nersc/app-monitoring/Dockerfile b/jupyter-nersc/app-monitoring/Dockerfile index 100be4d..98811ab 100644 --- a/jupyter-nersc/app-monitoring/Dockerfile +++ b/jupyter-nersc/app-monitoring/Dockerfile @@ -25,10 +25,10 @@ RUN \ # Python 3 Miniconda RUN \ - curl -s -o /tmp/miniconda3.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ + curl -s -o /tmp/miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ bash /tmp/miniconda3.sh -f -b -p /opt/anaconda3 && \ rm -rf /tmp/miniconda3.sh && \ -# /opt/anaconda3/bin/conda update --yes conda && \ + /opt/anaconda3/bin/conda update --yes conda && \ /opt/anaconda3/bin/pip install --no-cache-dir \ pika From a282013284c6b8f453120dd15d907c134dceb8d6 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Fri, 24 Apr 2020 08:12:44 -0700 Subject: [PATCH 31/60] Change layout, need to make this nicer one day --- jupyter-nersc/web-jupyterhub/templates/home.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/templates/home.html b/jupyter-nersc/web-jupyterhub/templates/home.html index ed07c3a..215379e 100644 --- a/jupyter-nersc/web-jupyterhub/templates/home.html +++ b/jupyter-nersc/web-jupyterhub/templates/home.html @@ -4,9 +4,9 @@ {% endif %} {% macro spawner_table() -%} -
+
-
+
{{ spawner_table_header() }} {{ spawner_table_body() }} From 2788a54af7d160e619456cba2f61a9cee77c70ad Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Fri, 24 Apr 2020 08:13:06 -0700 Subject: [PATCH 32/60] Start of our little home page for nbviewer --- jupyter-nersc/web-nbviewer/frontpage.json | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 jupyter-nersc/web-nbviewer/frontpage.json diff --git a/jupyter-nersc/web-nbviewer/frontpage.json b/jupyter-nersc/web-nbviewer/frontpage.json new file mode 100644 index 0000000..58bcc8a --- /dev/null +++ b/jupyter-nersc/web-nbviewer/frontpage.json @@ -0,0 +1,7 @@ +{ + "title": "NERSC NBViewer", + "subtitle": "A simple way to share Jupyter Notebooks", + "text": "Curious? Read more about it here.
Enter the location of a Jupyter Notebook to have it rendered here:", + "show_input": true, + "sections":[] +} From 5150e70875ea663416325f7460485a3c3d679bcb Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Fri, 24 Apr 2020 08:13:44 -0700 Subject: [PATCH 33/60] Switch back to master, add frontpage --- jupyter-nersc/web-nbviewer/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/jupyter-nersc/web-nbviewer/Dockerfile b/jupyter-nersc/web-nbviewer/Dockerfile index eaf0f56..956ac98 100644 --- a/jupyter-nersc/web-nbviewer/Dockerfile +++ b/jupyter-nersc/web-nbviewer/Dockerfile @@ -38,12 +38,13 @@ RUN echo 1 RUN \ git clone https://github.com/NERSC/clonenotebooks.git && \ cd clonenotebooks && \ - git checkout stale_username_cache_test && \ pip install -e . --no-cache-dir && \ cd .. WORKDIR /srv +ADD frontpage.json ./ + ADD docker-entrypoint.sh nbviewer_config.py ./ RUN chmod +x docker-entrypoint.sh ENTRYPOINT ["./docker-entrypoint.sh"] From df91ac71cd0e9dbb0ecbae5597202ffd5f71792d Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Fri, 24 Apr 2020 08:13:59 -0700 Subject: [PATCH 34/60] Add the frontpage --- jupyter-nersc/web-nbviewer/nbviewer_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/jupyter-nersc/web-nbviewer/nbviewer_config.py b/jupyter-nersc/web-nbviewer/nbviewer_config.py index cd680f8..342d6f2 100644 --- a/jupyter-nersc/web-nbviewer/nbviewer_config.py +++ b/jupyter-nersc/web-nbviewer/nbviewer_config.py @@ -13,3 +13,4 @@ c.NBViewer.static_path = "/repos/clonenotebooks/static" c.NBViewer.index_handler = "clonenotebooks.renderers.IndexRenderingHandler" +c.NBViewer.frontpage = "/srv/frontpage.json" From f99f73e413f1d855c501e116c2f966a429bb92f2 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Fri, 24 Apr 2020 08:25:58 -0700 Subject: [PATCH 35/60] Change URL for miniconda --- jupyter-nersc/web-offline/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter-nersc/web-offline/Dockerfile b/jupyter-nersc/web-offline/Dockerfile index 7941fd8..22f24f1 100644 --- a/jupyter-nersc/web-offline/Dockerfile +++ b/jupyter-nersc/web-offline/Dockerfile @@ -26,7 +26,7 @@ RUN \ # Miniconda RUN \ - curl -s -o /tmp/miniconda3.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ + curl -s -o /tmp/miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ bash /tmp/miniconda3.sh -b -p /opt/anaconda3 && \ rm -rf /tmp/miniconda3.sh && \ echo "python 3.7.3" >> /opt/anaconda3/conda-meta/pinned && \ From 51de19dc02759ab85248c1e41bc7668a348fb4f6 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Fri, 24 Apr 2020 09:01:09 -0700 Subject: [PATCH 36/60] Make subdomain come from env var if not default --- .../web-jupyterhub/jupyterhub_config.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index 33c6036..1ffb63c 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -7,6 +7,7 @@ from tornado import web from jupyterhub.utils import url_path_join +nersc_jupyterhub_subdomain = os.environ.get("NERSC_JUPYTERHUB_SUBDOMAIN", "jupyter") def comma_split(string): """Handle env variables that may be None, empty string, or have spaces""" @@ -269,7 +270,7 @@ def comma_split(string): # # .. versionadded:: 0.9 #c.JupyterHub.hub_connect_url = '' -c.JupyterHub.hub_connect_url = "https://jupyter-stage.nersc.gov/hub" +c.JupyterHub.hub_connect_url = f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub" ## The ip address for the Hub process to *bind* to. # @@ -1083,7 +1084,7 @@ def comma_split(string): "environment": {"OMP_NUM_THREADS" : "2"}, "remote_hosts": ["gerty.nersc.gov"], "remote_port_command": "/usr/bin/python /global/common/cori/das/jupyterhub/new-get-port.py --ip", - "hub_api_url": "https://jupyter-stage.nersc.gov/hub/api", + "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", "path": "/global/common/cori_cle7/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", "ssh_keyfile": '/certs/{username}.key' } @@ -1099,7 +1100,7 @@ def comma_split(string): "req_homedir": "/tmp", "req_runtime": "240", "req_qos": "regular", - "hub_api_url": "https://jupyter-stage.nersc.gov/hub/api", + "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", "path": "/usr/common/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", } ), @@ -1111,7 +1112,7 @@ def comma_split(string): "environment": {"OMP_NUM_THREADS" : "2", "PYTHONFAULTHANDLER": "1"}, "remote_hosts": ["corijupyter.nersc.gov"], "remote_port_command": "/usr/bin/python /global/common/cori/das/jupyterhub/new-get-port.py --ip", - "hub_api_url": "https://jupyter-stage.nersc.gov/hub/api", + "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", "path": "/usr/common/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", "ssh_keyfile": '/certs/{username}.key' } @@ -1126,7 +1127,7 @@ def comma_split(string): "req_remote_host": "cori19-224.nersc.gov", "req_homedir": "/tmp", "req_runtime": "240", - "hub_api_url": "https://jupyter-stage.nersc.gov/hub/api", + "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", "path": "/usr/common/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", } ), @@ -1140,7 +1141,7 @@ def comma_split(string): "req_remote_host": "cori19-224.nersc.gov", "req_homedir": "/tmp", "req_runtime": "240", - "hub_api_url": "https://jupyter-stage.nersc.gov/hub/api", + "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", "path": "/usr/common/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", } ), @@ -1155,7 +1156,7 @@ def comma_split(string): "req_homedir": "/tmp", "req_ngpus": "1", "req_runtime": "240", - "hub_api_url": "https://jupyter-stage.nersc.gov/hub/api", + "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", "path": "/usr/common/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", } ), @@ -1167,7 +1168,7 @@ def comma_split(string): "environment": {"OMP_NUM_THREADS" : "2"}, "remote_hosts": ["app-notebooks"], "remote_port_command": "/usr/bin/python /global/common/cori/das/jupyterhub/new-get-port.py --ip", - "hub_api_url": "https://jupyter-stage.nersc.gov/hub/api", + "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", "path": "/global/common/cori_cle7/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", "ssh_keyfile": '/certs/{username}.key' } From ecb2e37e7af8799d567f5f86f0d0acfbb3ef1bb7 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Fri, 24 Apr 2020 09:01:46 -0700 Subject: [PATCH 37/60] We'll update this later --- jupyter-nersc/web-nbviewer/frontpage.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter-nersc/web-nbviewer/frontpage.json b/jupyter-nersc/web-nbviewer/frontpage.json index 58bcc8a..a50a2b4 100644 --- a/jupyter-nersc/web-nbviewer/frontpage.json +++ b/jupyter-nersc/web-nbviewer/frontpage.json @@ -1,7 +1,7 @@ { "title": "NERSC NBViewer", "subtitle": "A simple way to share Jupyter Notebooks", - "text": "Curious? Read more about it here.
Enter the location of a Jupyter Notebook to have it rendered here:", + "text": "Enter the location of a Jupyter Notebook to have it rendered here:", "show_input": true, "sections":[] } From e3c4d779cbe17e91b8856678eb03d8e4fe9f6a33 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Fri, 24 Apr 2020 09:19:47 -0700 Subject: [PATCH 38/60] Update message --- jupyter-nersc/web-nbviewer/frontpage.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter-nersc/web-nbviewer/frontpage.json b/jupyter-nersc/web-nbviewer/frontpage.json index a50a2b4..14fbdc9 100644 --- a/jupyter-nersc/web-nbviewer/frontpage.json +++ b/jupyter-nersc/web-nbviewer/frontpage.json @@ -1,7 +1,7 @@ { "title": "NERSC NBViewer", "subtitle": "A simple way to share Jupyter Notebooks", - "text": "Enter the location of a Jupyter Notebook to have it rendered here:", + "text": "This is an experimental service! Documentation coming soon.
Enter the location of a Jupyter Notebook to have it rendered here:", "show_input": true, "sections":[] } From 63844be8ab424f9302c63e7ecc51fa17963ddc42 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Wed, 20 May 2020 18:17:21 -0700 Subject: [PATCH 39/60] Take away the workaround --- .../web-jupyterhub/nerscslurmspawner.py | 21 ------------------- 1 file changed, 21 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py index 8a48b8a..cc595eb 100644 --- a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py +++ b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py @@ -1,5 +1,4 @@ -import datetime from textwrap import dedent import time @@ -191,32 +190,12 @@ class NERSCExclusiveGPUSlurmSpawner(NERSCSlurmSpawner): unset XDG_RUNTIME_DIR {{ cmd }}""").tag(config=True) - # For Cori GPU - def default_deadline(self): - return datetime.datetime.today().replace(hour=8, minute=0, second=0, - microsecond=0) - - def minutes_from_deadline(self, now, deadline=None): - deadline = deadline or self.default_deadline() - return int((deadline - now).total_seconds() / 60.0) - - def validate_runtime(self, runtime, now=None, deadline=None, slop=2): - now = now or datetime.datetime.now() - if now.weekday() > 4: - return runtime - window = self.minutes_from_deadline(now, deadline) - if 0 <= window <= int(runtime): - return str(max(window - slop, 0)) - else: - return runtime - # Have to override this to call get_auth_state() I think async def _get_batch_script(self, **subvars): """Format batch script from vars""" auth_state = await self.user.get_auth_state() self.userdata = auth_state["userdata"] subvars["account"] = self.default_gpu_repo() - subvars["runtime"] = self.validate_runtime(subvars["runtime"]) return format_template(self.batch_script, **subvars) def default_gpu_repo(self): From d98099e6ba6f4d3b0dad0147f2d756c5828d8fb5 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Thu, 21 May 2020 12:51:08 -0700 Subject: [PATCH 40/60] Work w/podman or docker, remove forced --no-cache --- jupyter-base/build.sh | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/jupyter-base/build.sh b/jupyter-base/build.sh index e6265ec..161545e 100644 --- a/jupyter-base/build.sh +++ b/jupyter-base/build.sh @@ -1,7 +1,21 @@ #!/bin/bash +imcmd="" +for command in docker podman; do + if [ $(command -v $command) ]; then + imcmd=$command + break + fi +done +if [ -n "$imcmd" ]; then + echo "Using $imcmd" +else + echo "No image command defined" + exit 1 +fi + branch=$(git symbolic-ref --short HEAD) -docker build \ - --no-cache \ +$imcmd build \ + "$@" --tag registry.spin.nersc.gov/das/jupyter-base-$branch:latest . From c18d0080e7e5c521a1f2e69bf77628ebb1fb427e Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Thu, 21 May 2020 12:55:11 -0700 Subject: [PATCH 41/60] Forgot \ --- jupyter-base/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter-base/build.sh b/jupyter-base/build.sh index 161545e..6b34fda 100644 --- a/jupyter-base/build.sh +++ b/jupyter-base/build.sh @@ -17,5 +17,5 @@ fi branch=$(git symbolic-ref --short HEAD) $imcmd build \ - "$@" + "$@" \ --tag registry.spin.nersc.gov/das/jupyter-base-$branch:latest . From a989ef19e782edc2e8bdb9dadd76c55da3443962 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Thu, 21 May 2020 13:11:45 -0700 Subject: [PATCH 42/60] Support podman or docker --- jupyter-nersc/web-jupyterhub/build.sh | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/jupyter-nersc/web-jupyterhub/build.sh b/jupyter-nersc/web-jupyterhub/build.sh index b0340fc..86f8551 100644 --- a/jupyter-nersc/web-jupyterhub/build.sh +++ b/jupyter-nersc/web-jupyterhub/build.sh @@ -1,8 +1,22 @@ #!/bin/bash +imcmd="" +for command in docker podman; do + if [ $(command -v $command) ]; then + imcmd=$command + break + fi +done +if [ -n "$imcmd" ]; then + echo "Using $imcmd" +else + echo "No image command defined" + exit 1 +fi + branch=$(git symbolic-ref --short HEAD) -docker build \ +$imcmd build \ --build-arg branch=$branch \ "$@" \ --tag registry.spin.nersc.gov/das/web-jupyterhub.jupyter-nersc-$branch:latest . From 0b373d43c4944b721e3b6d29a942e787ac620814 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Fri, 5 Jun 2020 14:09:52 -0700 Subject: [PATCH 43/60] Set batchspawner version to 1.0.0rc0 --- jupyter-nersc/web-jupyterhub/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/Dockerfile b/jupyter-nersc/web-jupyterhub/Dockerfile index d91e149..bbda83c 100644 --- a/jupyter-nersc/web-jupyterhub/Dockerfile +++ b/jupyter-nersc/web-jupyterhub/Dockerfile @@ -7,9 +7,9 @@ WORKDIR /srv # Authenticator and spawner RUN \ - pip install git+https://github.com/nersc/sshapiauthenticator.git && \ - pip install git+https://github.com/jupyterhub/batchspawner.git@4747946 && \ - pip install git+https://github.com/jupyterhub/wrapspawner.git && \ + pip install git+https://github.com/nersc/sshapiauthenticator.git && \ + pip install git+https://github.com/jupyterhub/batchspawner.git@v1.0.0-rc0 && \ + pip install git+https://github.com/jupyterhub/wrapspawner.git && \ pip install git+https://github.com/nersc/sshspawner.git # Customized templates From 8c5671f511363a665c90c6003e5df562ee468a39 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Fri, 5 Jun 2020 14:10:10 -0700 Subject: [PATCH 44/60] WARNING!!! TEST SETUP FOR THE RC TEST!!! --- jupyter-nersc/web-jupyterhub/jupyterhub_config.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index 1ffb63c..70007e5 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -1133,8 +1133,9 @@ def comma_split(string): ), "cori-exclusive-node-cpu": ( "nerscslurmspawner.NERSCExclusiveSlurmSpawner", { - "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", - "/usr/common/software/jupyter/19-11/bin/jupyter-labhub"], + # "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", + # "/usr/common/software/jupyter/19-11/bin/jupyter-labhub"], + "cmd": ["/global/homes/r/rthomas/.conda/envs/bsp1/bin/jupyter-labhub"], "exec_prefix": "/usr/bin/ssh -q -o StrictHostKeyChecking=no -o preferredauthentications=publickey -l {username} -i /certs/{username}.key {remote_host}", "http_timeout": 300, "startup_poll_interval": 30.0, @@ -1143,6 +1144,10 @@ def comma_split(string): "req_runtime": "240", "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", "path": "/usr/common/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + "batchspawner_singleuser_cmd" : " ".join([ + "/global/common/cori/das/jupyterhub/jupyter-launcher.sh", + "/global/homes/r/rthomas/.conda/envs/bsp1/bin/batchspawner-singleuser" + ]) } ), "cori-configurable-gpu": ( @@ -1230,4 +1235,10 @@ def auth_state_hook(spawner, auth_state): c.JupyterHub.authenticate_prometheus = False +### Default server name + c.JupyterHub.default_server_name = 'cori-shared-node-cpu' + +### Need to import batchspawner for the /hub/api/batchspawner callback to work. + +import batchspawner From ebc876ffefd339cdf7cdd8a586243086c70e7adb Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Fri, 5 Jun 2020 14:50:24 -0700 Subject: [PATCH 45/60] Allow special for m1759 --- .../web-jupyterhub/nerscslurmspawner.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py index cc595eb..9799e0d 100644 --- a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py +++ b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py @@ -185,6 +185,7 @@ class NERSCExclusiveGPUSlurmSpawner(NERSCSlurmSpawner): #SBATCH --gres=gpu:1 #SBATCH --job-name=jupyter #SBATCH --nodes={{ nodes }} +#SBATCH --qos={{ qos }} #SBATCH --time={{ runtime }} {{ env_text }} unset XDG_RUNTIME_DIR @@ -196,10 +197,16 @@ async def _get_batch_script(self, **subvars): auth_state = await self.user.get_auth_state() self.userdata = auth_state["userdata"] subvars["account"] = self.default_gpu_repo() + subvars["qos"] = self.gpu_qos() return format_template(self.batch_script, **subvars) def default_gpu_repo(self): - # Search for training account first + # special m1759 people + for allocation in self.user_allocations(["m1759"]): + for qos in allocation["userAllocationQos"]: + if qos["qos"]["qos"] == "gpu_special_m1759": + return allocation["computeAllocation"]["repoName"] + # training for allocation in self.user_allocations(["m3502"]): for qos in allocation["userAllocationQos"]: if qos["qos"]["qos"] == "gpu": @@ -210,6 +217,14 @@ def default_gpu_repo(self): return allocation["computeAllocation"]["repoName"] return None + def gpu_qos(self): + # special m1759 people, only special people there + for allocation in self.user_allocations(["m1759"]): + for qos in allocation["userAllocationQos"]: + if qos["qos"]["qos"] == "gpu_special_m1759": + return "gpu_special_m1759" + return "regular" + def user_allocations(self, repos=[]): for allocation in self.userdata["userAllocations"]: if repos and allocation["computeAllocation"]["repoName"] not in repos: From 3c6635f60c3102e135f1f3de72372028548f7ee3 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Fri, 5 Jun 2020 15:09:09 -0700 Subject: [PATCH 46/60] Wrong qos name --- jupyter-nersc/web-jupyterhub/nerscslurmspawner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py index 9799e0d..9f7b23a 100644 --- a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py +++ b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py @@ -222,7 +222,7 @@ def gpu_qos(self): for allocation in self.user_allocations(["m1759"]): for qos in allocation["userAllocationQos"]: if qos["qos"]["qos"] == "gpu_special_m1759": - return "gpu_special_m1759" + return "special" return "regular" def user_allocations(self, repos=[]): From 5bf75780053ec1c0d71a8307046725ba1e4a6600 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Sat, 13 Jun 2020 08:26:58 -0700 Subject: [PATCH 47/60] WIP Config is still using test stuff, not ready for prod --- jupyter-nersc/web-jupyterhub/jupyterhub_config.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index 70007e5..5ae0f1b 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -614,6 +614,9 @@ def comma_split(string): # across upgrades, so if you are using the callable take care to verify it # continues to work after upgrades! #c.Spawner.environment = {} +c.Spawner.environment = { + "JUPYTER_RUNTIME_DIR" = lambda spawner: f"/tmp/jupyter-runtime-{spawner.user.name}" +} ## Timeout (in seconds) before giving up on a spawned HTTP server # From b5dc9e366299868fe5291901d07281180d4232a1 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Sat, 13 Jun 2020 08:32:43 -0700 Subject: [PATCH 48/60] WIP still have test settings --- .../web-jupyterhub/jupyterhub_config.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index 5ae0f1b..5ab71d7 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -615,7 +615,7 @@ def comma_split(string): # continues to work after upgrades! #c.Spawner.environment = {} c.Spawner.environment = { - "JUPYTER_RUNTIME_DIR" = lambda spawner: f"/tmp/jupyter-runtime-{spawner.user.name}" + "JUPYTER_RUNTIME_DIR": lambda spawner: f"/tmp/jupyter-runtime-{spawner.user.name}" } ## Timeout (in seconds) before giving up on a spawned HTTP server @@ -1082,13 +1082,13 @@ def comma_split(string): "gerty-shared-node-cpu": ( "sshspawner.sshspawner.SSHSpawner", { "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", - "/global/common/cori_cle7/software/jupyter/19-11/bin/jupyter-labhub"], + "/global/common/cori_cle7/software/jupyter/20-06/bin/jupyterhub-singleuser"], "args": ["--transport=ipc"], "environment": {"OMP_NUM_THREADS" : "2"}, "remote_hosts": ["gerty.nersc.gov"], "remote_port_command": "/usr/bin/python /global/common/cori/das/jupyterhub/new-get-port.py --ip", "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", - "path": "/global/common/cori_cle7/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + "path": "/global/common/cori_cle7/software/jupyter/20-06/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", "ssh_keyfile": '/certs/{username}.key' } ), @@ -1122,8 +1122,9 @@ def comma_split(string): ), "cori-shared-node-gpu": ( "nerscslurmspawner.NERSCExclusiveGPUSlurmSpawner", { - "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", - "/usr/common/software/jupyter/19-11/bin/jupyter-labhub"], +# "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", +# "/usr/common/software/jupyter/20-06/bin/jupyterhub-singleuser"], + "cmd": ["/global/common/cori_cle7/software/jupyter/20-06/bin/jupyterhub-singleuser"], "args": ["--transport=ipc"], "exec_prefix": "/usr/bin/ssh -q -o StrictHostKeyChecking=no -o preferredauthentications=publickey -l {username} -i /certs/{username}.key {remote_host}", "startup_poll_interval": 30.0, @@ -1131,7 +1132,11 @@ def comma_split(string): "req_homedir": "/tmp", "req_runtime": "240", "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", - "path": "/usr/common/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + "path": "/usr/common/software/jupyter/20-06/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + "batchspawner_singleuser_cmd" : " ".join([ + "/global/common/cori/das/jupyterhub/jupyter-launcher.sh", + "/global/common/cori_cle7/software/jupyter/20-06/bin/batchspawner-singleuser", + ]) } ), "cori-exclusive-node-cpu": ( From e3c66bfed5b47f4c101106b65c7d94e1fda88389 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Tue, 23 Jun 2020 12:20:18 -0700 Subject: [PATCH 49/60] Redact configurable for now --- jupyter-nersc/web-jupyterhub/jupyterhub_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index 5ab71d7..516f2b0 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -1015,7 +1015,7 @@ def comma_split(string): { "name": "cori-shared-node-cpu" }, { "name": "cori-shared-node-gpu" }, { "name": "cori-exclusive-node-cpu" }, - { "name": "cori-configurable-gpu" }, +# { "name": "cori-configurable-gpu" }, { "name": "spin-shared-node-cpu" }, ] From df7e2fc91f5fde736aad3622dbb4f24e99035575 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Tue, 23 Jun 2020 12:34:02 -0700 Subject: [PATCH 50/60] REmove configurable altogether --- .../web-jupyterhub/jupyterhub_config.py | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index 516f2b0..7403e35 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -1015,7 +1015,7 @@ def comma_split(string): { "name": "cori-shared-node-cpu" }, { "name": "cori-shared-node-gpu" }, { "name": "cori-exclusive-node-cpu" }, -# { "name": "cori-configurable-gpu" }, + { "name": "cori-configurable-gpu" }, { "name": "spin-shared-node-cpu" }, ] @@ -1049,18 +1049,18 @@ def comma_split(string): "resources": "Use your own node within a job allocation using defaults.", "use_cases": "Visualization, analytics, machine learning that is compute or memory intensive but can be done on a single node." }, - { - "name": "configurable", - "architectures": [ - { - "name": "gpu", - "description": "Configurable GPU", - "roles": ["gpu"], - } - ], - "resources": "Use multiple compute nodes with specialized settings.", - "use_cases": "Multi-node analytics jobs, jobs in reservations, custom project charging, and more." - }, +# { +# "name": "configurable", +# "architectures": [ +# { +# "name": "gpu", +# "description": "Configurable GPU", +# "roles": ["gpu"], +# } +# ], +# "resources": "Use multiple compute nodes with specialized settings.", +# "use_cases": "Multi-node analytics jobs, jobs in reservations, custom project charging, and more." +# }, ] c.NERSCSpawner.systems = [ From 126fa142c1e866560a057d29ac0f612cb3f4bdcd Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Tue, 23 Jun 2020 12:54:57 -0700 Subject: [PATCH 51/60] Update gerty exclusive to jupyter 2. --- jupyter-nersc/web-jupyterhub/jupyterhub_config.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index 7403e35..be53423 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -1094,8 +1094,7 @@ def comma_split(string): ), "gerty-exclusive-node-cpu": ( "nerscslurmspawner.NERSCExclusiveSlurmSpawner", { - "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", - "/usr/common/software/jupyter/19-11/bin/jupyter-labhub"], + "cmd": ["/global/common/cori_cle7/software/jupyter/20-06/bin/jupyterhub-singleuser"], "exec_prefix": "/usr/bin/ssh -q -o StrictHostKeyChecking=no -o preferredauthentications=publickey -l {username} -i /certs/{username}.key {remote_host}", "http_timeout": 300, "startup_poll_interval": 30.0, @@ -1104,7 +1103,11 @@ def comma_split(string): "req_runtime": "240", "req_qos": "regular", "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", - "path": "/usr/common/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + "path": "/global/common/cori_cle7/software/jupyter/20-06/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + "batchspawner_singleuser_cmd" : " ".join([ + "/global/common/cori/das/jupyterhub/jupyter-launcher.sh", + "/global/common/cori_cle7/software/jupyter/20-06/bin/batchspawner-singleuser", + ]) } ), "cori-shared-node-cpu": ( From b8ff8ab27f8c5026791604fd35b2370471f4aaa7 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Tue, 23 Jun 2020 13:00:32 -0700 Subject: [PATCH 52/60] Embrace 2.0 for Cori GPU and Cori CPU compute --- jupyter-nersc/web-jupyterhub/jupyterhub_config.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index be53423..d38241b 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -1125,8 +1125,6 @@ def comma_split(string): ), "cori-shared-node-gpu": ( "nerscslurmspawner.NERSCExclusiveGPUSlurmSpawner", { -# "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", -# "/usr/common/software/jupyter/20-06/bin/jupyterhub-singleuser"], "cmd": ["/global/common/cori_cle7/software/jupyter/20-06/bin/jupyterhub-singleuser"], "args": ["--transport=ipc"], "exec_prefix": "/usr/bin/ssh -q -o StrictHostKeyChecking=no -o preferredauthentications=publickey -l {username} -i /certs/{username}.key {remote_host}", @@ -1144,9 +1142,7 @@ def comma_split(string): ), "cori-exclusive-node-cpu": ( "nerscslurmspawner.NERSCExclusiveSlurmSpawner", { - # "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", - # "/usr/common/software/jupyter/19-11/bin/jupyter-labhub"], - "cmd": ["/global/homes/r/rthomas/.conda/envs/bsp1/bin/jupyter-labhub"], + "cmd": ["/global/common/cori_cle7/software/jupyter/20-06/bin/jupyterhub-singleuser"], "exec_prefix": "/usr/bin/ssh -q -o StrictHostKeyChecking=no -o preferredauthentications=publickey -l {username} -i /certs/{username}.key {remote_host}", "http_timeout": 300, "startup_poll_interval": 30.0, @@ -1154,10 +1150,10 @@ def comma_split(string): "req_homedir": "/tmp", "req_runtime": "240", "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", - "path": "/usr/common/software/jupyter/19-11/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + "path": "/usr/common/software/jupyter/20-06/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", "batchspawner_singleuser_cmd" : " ".join([ "/global/common/cori/das/jupyterhub/jupyter-launcher.sh", - "/global/homes/r/rthomas/.conda/envs/bsp1/bin/batchspawner-singleuser" + "/global/common/cori_cle7/software/jupyter/20-06/bin/batchspawner-singleuser", ]) } ), From 44d15481648928bddbd507eae6ea757cbb825933 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Tue, 23 Jun 2020 13:13:52 -0700 Subject: [PATCH 53/60] Up time out to 3 min --- jupyter-nersc/web-jupyterhub/jupyterhub_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index d38241b..4dfa29d 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -755,7 +755,7 @@ def comma_split(string): # takes longer than this. start should return when the server process is started # and its location is known. #c.Spawner.start_timeout = 60 -c.Spawner.start_timeout = 120 +c.Spawner.start_timeout = 180 #------------------------------------------------------------------------------ # LocalProcessSpawner(Spawner) configuration From a3028901faec49bc366914b3478b05efc5d6925f Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Thu, 25 Jun 2020 07:46:08 -0700 Subject: [PATCH 54/60] For testing JupyterLab 2 --- .../web-jupyterhub/jupyterhub_config.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index 4dfa29d..5350307 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -1013,6 +1013,7 @@ def comma_split(string): { "name": "gerty-shared-node-cpu" }, { "name": "gerty-exclusive-node-cpu" }, { "name": "cori-shared-node-cpu" }, + { "name": "cori2-shared-node-cpu" }, { "name": "cori-shared-node-gpu" }, { "name": "cori-exclusive-node-cpu" }, { "name": "cori-configurable-gpu" }, @@ -1072,6 +1073,10 @@ def comma_split(string): "name": "cori", "roles": [] }, + { + "name": "cori2", + "roles": [] + }, { "name": "spin", "roles": [] @@ -1123,6 +1128,19 @@ def comma_split(string): "ssh_keyfile": '/certs/{username}.key' } ), + "cori2-shared-node-cpu": ( + "sshspawner.sshspawner.SSHSpawner", { + "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", + "/global/common/cori_cle7/software/jupyter/20-06/bin/jupyterhub-singleuser"], + "args": ["--transport=ipc"], + "environment": {"OMP_NUM_THREADS" : "2", "PYTHONFAULTHANDLER": "1"}, + "remote_hosts": ["corijupyter.nersc.gov"], + "remote_port_command": "/usr/bin/python /global/common/cori/das/jupyterhub/new-get-port.py --ip", + "hub_api_url": f"https://{nersc_jupyterhub_subdomain}.nersc.gov/hub/api", + "path": "/global/common/cori_cle7/software/jupyter/20-06/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + "ssh_keyfile": '/certs/{username}.key' + } + ), "cori-shared-node-gpu": ( "nerscslurmspawner.NERSCExclusiveGPUSlurmSpawner", { "cmd": ["/global/common/cori_cle7/software/jupyter/20-06/bin/jupyterhub-singleuser"], From 55fee686599f1e2b8c9fef40f5353162850133b8 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Thu, 25 Jun 2020 07:46:37 -0700 Subject: [PATCH 55/60] Handle gpu4sci --- jupyter-nersc/web-jupyterhub/nerscspawner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter-nersc/web-jupyterhub/nerscspawner.py b/jupyter-nersc/web-jupyterhub/nerscspawner.py index 78db4f2..d596eeb 100644 --- a/jupyter-nersc/web-jupyterhub/nerscspawner.py +++ b/jupyter-nersc/web-jupyterhub/nerscspawner.py @@ -63,7 +63,7 @@ def check_role_staff(self, auth_state): return False def default_gpu_repo(self, auth_state): - for allocation in self.user_allocations(auth_state, ["nstaff", "m1759", "dasrepo"]): + for allocation in self.user_allocations(auth_state, ["nstaff", "m1759", "dasrepo", "gpu4sci"]): for qos in allocation["userAllocationQos"]: if qos["qos"]["qos"] == "gpu": return allocation["computeAllocation"]["repoName"] From b3709af16c274fe8fd3501dcd74157792dbae2b0 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Thu, 25 Jun 2020 07:46:56 -0700 Subject: [PATCH 56/60] Handle gpu4sci --- jupyter-nersc/web-jupyterhub/nerscslurmspawner.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py index 9f7b23a..a7730ca 100644 --- a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py +++ b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py @@ -201,6 +201,11 @@ async def _get_batch_script(self, **subvars): return format_template(self.batch_script, **subvars) def default_gpu_repo(self): + # training + for allocation in self.user_allocations(["gpu4sci"]): + for qos in allocation["userAllocationQos"]: + if qos["qos"]["qos"] == "gpu": + return allocation["computeAllocation"]["repoName"] # special m1759 people for allocation in self.user_allocations(["m1759"]): for qos in allocation["userAllocationQos"]: @@ -218,6 +223,11 @@ def default_gpu_repo(self): return None def gpu_qos(self): + # training + for allocation in self.user_allocations(["gpu4sci"]): + for qos in allocation["userAllocationQos"]: + if qos["qos"]["qos"] == "gpu": + return "regular" # special m1759 people, only special people there for allocation in self.user_allocations(["m1759"]): for qos in allocation["userAllocationQos"]: From f78e84915d0c17ec20db381fcda1fb7ec3b2e573 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Thu, 25 Jun 2020 07:47:14 -0700 Subject: [PATCH 57/60] Temporary advisory for J1->2 --- jupyter-nersc/web-jupyterhub/templates/home.html | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/jupyter-nersc/web-jupyterhub/templates/home.html b/jupyter-nersc/web-jupyterhub/templates/home.html index 215379e..a0853f4 100644 --- a/jupyter-nersc/web-jupyterhub/templates/home.html +++ b/jupyter-nersc/web-jupyterhub/templates/home.html @@ -89,6 +89,17 @@
{% endif %} {% endfor %} + +
+ {%- endmacro %} @@ -102,6 +113,8 @@ {{ spawner_table() }} {% endif %} + + {% endblock %} From 290a7e88a7525a240ad986f0806cd5d63089bdcd Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Thu, 25 Jun 2020 07:52:23 -0700 Subject: [PATCH 58/60] Make runtime dir random --- jupyter-nersc/web-jupyterhub/jupyterhub_config.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index 5350307..59c0de8 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -2,6 +2,7 @@ import os import sys +from uuid import uuid4 import asyncssh from tornado import web @@ -615,7 +616,7 @@ def comma_split(string): # continues to work after upgrades! #c.Spawner.environment = {} c.Spawner.environment = { - "JUPYTER_RUNTIME_DIR": lambda spawner: f"/tmp/jupyter-runtime-{spawner.user.name}" + "JUPYTER_RUNTIME_DIR": lambda spawner: f"/tmp/jupyter-runtime-{uuid4()}" } ## Timeout (in seconds) before giving up on a spawned HTTP server From bb26b2702b62ffd2eb2d3a5d30ad8cc174ff3952 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Sun, 28 Jun 2020 11:56:20 -0700 Subject: [PATCH 59/60] Overdue tweak --- jupyter-nersc/web-jupyterhub/jupyterhub_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index 59c0de8..ffac7b4 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -1162,8 +1162,8 @@ def comma_split(string): "cori-exclusive-node-cpu": ( "nerscslurmspawner.NERSCExclusiveSlurmSpawner", { "cmd": ["/global/common/cori_cle7/software/jupyter/20-06/bin/jupyterhub-singleuser"], + "args": ["--transport=ipc"], "exec_prefix": "/usr/bin/ssh -q -o StrictHostKeyChecking=no -o preferredauthentications=publickey -l {username} -i /certs/{username}.key {remote_host}", - "http_timeout": 300, "startup_poll_interval": 30.0, "req_remote_host": "cori19-224.nersc.gov", "req_homedir": "/tmp", From 5fc3c4675cd4a3036759754fb72f314abf2eb9d0 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Sun, 28 Jun 2020 11:56:58 -0700 Subject: [PATCH 60/60] The logic only gets more complicated --- jupyter-nersc/web-jupyterhub/nerscspawner.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/jupyter-nersc/web-jupyterhub/nerscspawner.py b/jupyter-nersc/web-jupyterhub/nerscspawner.py index d596eeb..32cd8a9 100644 --- a/jupyter-nersc/web-jupyterhub/nerscspawner.py +++ b/jupyter-nersc/web-jupyterhub/nerscspawner.py @@ -63,9 +63,10 @@ def check_role_staff(self, auth_state): return False def default_gpu_repo(self, auth_state): - for allocation in self.user_allocations(auth_state, ["nstaff", "m1759", "dasrepo", "gpu4sci"]): +# for allocation in self.user_allocations(auth_state, ["nstaff", "m1759", "dasrepo", "gpu4sci"]): + for allocation in self.user_allocations(auth_state): for qos in allocation["userAllocationQos"]: - if qos["qos"]["qos"] == "gpu": + if qos["qos"]["qos"] in ["gpu", "gpu_special_m1759"]: return allocation["computeAllocation"]["repoName"] return None
{{ setup.use_cases }}
+ What is the "Cori2" option?
+
    +
  • Click the "Cori2" button if you are willing to give JupyterLab 2.1 a test drive on the Cori shared CPU nodes.
  • +
  • JupyterLab 1.2 is still the default for Cori shared CPU nodes.
  • +
  • At the July maintenance, JupyterLab 2 will replace JupyterLab 1.
  • +
  • If you have any issues with JupyterLab 2, please let us know by ticket at https://help.nersc.gov
  • +
+