diff --git a/jupyter-base/Dockerfile b/jupyter-base/Dockerfile index ee00a48..05a5de4 100644 --- a/jupyter-base/Dockerfile +++ b/jupyter-base/Dockerfile @@ -32,10 +32,11 @@ RUN \ curl -s -o /tmp/miniconda3.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ bash /tmp/miniconda3.sh -f -b -p /opt/anaconda3 && \ rm -rf /tmp/miniconda3.sh && \ - echo "python 3.6.*" >> /opt/anaconda3/conda-meta/pinned && \ + echo "python 3.7.*" >> /opt/anaconda3/conda-meta/pinned && \ /opt/anaconda3/bin/conda update --yes conda && \ /opt/anaconda3/bin/conda install --yes \ alembic \ + cryptography \ decorator \ entrypoints \ jinja2 \ @@ -58,8 +59,11 @@ ENV PATH=/opt/anaconda3/bin:$PATH WORKDIR /tmp RUN \ npm install -g configurable-http-proxy && \ - git clone https://github.com/jupyterhub/jupyterhub.git && \ +# git clone https://github.com/jupyterhub/jupyterhub.git && \ + git clone https://github.com/rcthomas/jupyterhub.git && \ cd jupyterhub && \ +# git checkout tags/1.0.0 && \ + git checkout auth-state-to-spawner && \ /opt/anaconda3/bin/python setup.py js && \ /opt/anaconda3/bin/pip --no-cache-dir install . && \ cp examples/cull-idle/cull_idle_servers.py /opt/anaconda3/bin/. && \ diff --git a/jupyter-nersc/app-monitoring/Dockerfile b/jupyter-nersc/app-monitoring/Dockerfile index bbdd021..67cc5d5 100644 --- a/jupyter-nersc/app-monitoring/Dockerfile +++ b/jupyter-nersc/app-monitoring/Dockerfile @@ -28,7 +28,6 @@ RUN \ curl -s -o /tmp/miniconda3.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ bash /tmp/miniconda3.sh -f -b -p /opt/anaconda3 && \ rm -rf /tmp/miniconda3.sh && \ - echo "python 3.6.*" >> /opt/anaconda3/conda-meta/pinned && \ /opt/anaconda3/bin/conda update --yes conda && \ /opt/anaconda3/bin/pip install --no-cache-dir \ pika==0.13.1 diff --git a/jupyter-nersc/web-jupyterhub/Dockerfile b/jupyter-nersc/web-jupyterhub/Dockerfile index 06eeb37..d503df1 100644 --- a/jupyter-nersc/web-jupyterhub/Dockerfile +++ b/jupyter-nersc/web-jupyterhub/Dockerfile @@ -6,19 +6,25 @@ WORKDIR /srv # Authenticator and spawner +# pip install git+https://github.com/nersc/nerscspawner.git && \ + RUN \ pip install git+https://github.com/nersc/sshapiauthenticator.git && \ + pip install git+https://github.com/jupyterhub/batchspawner.git@4747946 && \ pip install git+https://github.com/jupyterhub/wrapspawner.git && \ - pip install git+https://github.com/nersc/nerscspawner.git && \ pip install git+https://github.com/nersc/sshspawner.git # Customized templates ADD templates templates -# Announcement service +# FIXME Install this stuff +ENV PYTHONPATH=/srv +ADD nerscspawner.py . +ADD nerscslurmspawner.py . ADD announcement.py . +ADD iris.py . # Hub scripts diff --git a/jupyter-nersc/web-jupyterhub/hub-scripts/kill-cori.sh b/jupyter-nersc/web-jupyterhub/hub-scripts/kill-cori.sh new file mode 100644 index 0000000..3e49ed5 --- /dev/null +++ b/jupyter-nersc/web-jupyterhub/hub-scripts/kill-cori.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# Shut down all but the newest jupyterhub server running. + +hostname=$1 +username=$2 +cert=/certs/$username.key +echo $username $cert +if [ ! -f $cert ]; then + echo " ... no cert for $username" + exit 1 +fi +/usr/bin/ssh \ + -i $cert \ + -l $username \ + -o PreferredAuthentications=publickey \ + -o StrictHostKeyChecking=no \ + -p 22 \ + $hostname \ + /global/common/shared/das/jupyterhub/kill-my-old-jupyters.sh diff --git a/jupyter-nersc/web-jupyterhub/hub-scripts/scram-user.sh b/jupyter-nersc/web-jupyterhub/hub-scripts/scram-user.sh new file mode 100644 index 0000000..8529d3e --- /dev/null +++ b/jupyter-nersc/web-jupyterhub/hub-scripts/scram-user.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# Useful when JupyterHub thinks a user has no server running +# but actually they do, and they still have a cert. + +hostname=$1 +username=$2 +cert=/certs/$username.key +echo $username $cert +if [ ! -f $cert ]; then + echo " ... SKIPPED no cert for $username" + continue +fi +for i in 1 2 3 +do + /usr/bin/ssh \ + -i $cert \ + -l $username \ + -o PreferredAuthentications=publickey \ + -o StrictHostKeyChecking=no \ + -p 22 \ + $hostname \ + killall -u $username + sleep 1 +done diff --git a/jupyter-nersc/web-jupyterhub/iris.py b/jupyter-nersc/web-jupyterhub/iris.py new file mode 100644 index 0000000..c42c8e0 --- /dev/null +++ b/jupyter-nersc/web-jupyterhub/iris.py @@ -0,0 +1,47 @@ + +from textwrap import dedent + +from tornado import escape, httpclient + +class Iris: + + def __init__(self, iris_url="https://iris.nersc.gov/graphql"): + self.iris_url = iris_url + + async def query_user(self, name): + query = dedent(""" + query {{ + systemInfo {{ + users(name: "{}") {{ + baseRepos {{ + computeAllocation {{ + repoName + }} + }} + userAllocations {{ + computeAllocation {{ + repoName + }} + userAllocationQos {{ + qos {{ + qos + }} + }} + }} + }} + }} + }}""".format(name)).strip() + data = await self.query(query) + return data["data"]["systemInfo"]["users"][0] + + async def query(self, query): + client = httpclient.AsyncHTTPClient() + request = self.format_request(query) + response = await client.fetch(request) + return escape.json_decode(response.body) + + def format_request(self, query): + return httpclient.HTTPRequest(self.iris_url, + method="POST", + headers={"Content-Type": "application/json"}, + body=escape.json_encode({"query": query})) diff --git a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py index 3711dee..8b25aec 100644 --- a/jupyter-nersc/web-jupyterhub/jupyterhub_config.py +++ b/jupyter-nersc/web-jupyterhub/jupyterhub_config.py @@ -410,7 +410,8 @@ def comma_split(string): # # Should be a subclass of Spawner. #c.JupyterHub.spawner_class = 'jupyterhub.spawner.LocalProcessSpawner' -c.JupyterHub.spawner_class = 'nerscspawner.nerscspawner.NERSCSpawner' +#c.JupyterHub.spawner_class = 'nerscspawner.nerscspawner.NERSCSpawner' +c.JupyterHub.spawner_class = 'nerscspawner.NERSCSpawner' ## Path to SSL certificate file for the public facing interface of the proxy # @@ -588,6 +589,7 @@ def comma_split(string): # process's environment (such as `CONFIGPROXY_AUTH_TOKEN`) is not passed to the # single-user server's process. #c.Spawner.env_keep = ['PATH', 'PYTHONPATH', 'CONDA_ROOT', 'CONDA_DEFAULT_ENV', 'VIRTUAL_ENV', 'LANG', 'LC_ALL'] +c.Spawner.env_keep = ['PATH', 'CONDA_ROOT', 'CONDA_DEFAULT_ENV', 'VIRTUAL_ENV', 'LANG', 'LC_ALL'] ## Extra environment variables to set for the single-user server's process. # @@ -607,13 +609,13 @@ def comma_split(string): # across upgrades, so if you are using the callable take care to verify it # continues to work after upgrades! #c.Spawner.environment = {} -c.Spawner.environment = {"OMP_NUM_THREADS" : "2"} ## Timeout (in seconds) before giving up on a spawned HTTP server # # Once a server has successfully been spawned, this is the amount of time we # wait before assuming that the server is unable to accept connections. #c.Spawner.http_timeout = 30 +c.Spawner.http_timeout = 300 ## The IP address (or hostname) the single-user server should listen on. # @@ -745,6 +747,7 @@ def comma_split(string): # takes longer than this. start should return when the server process is started # and its location is known. #c.Spawner.start_timeout = 60 +c.Spawner.start_timeout = 900 #------------------------------------------------------------------------------ # LocalProcessSpawner(Spawner) configuration @@ -865,6 +868,7 @@ def comma_split(string): # # New in JupyterHub 0.8 #c.Authenticator.enable_auth_state = False +c.Authenticator.enable_auth_state = True ## Dictionary mapping authenticator usernames to JupyterHub users. # @@ -987,8 +991,11 @@ def comma_split(string): #------------------------------------------------------------------------------ c.NERSCSpawner.profiles = [ - { "name": "cori-shared-node-cpu" }, - { "name": "spin-shared-node-cpu" }, + { "name": "gerty-shared-node-cpu" }, + { "name": "cori-shared-node-cpu" }, + { "name": "cori-exclusive-node-cpu" }, + { "name": "cori-exclusive-node-gpu" }, + { "name": "spin-shared-node-cpu" }, ] c.NERSCSpawner.setups = [ @@ -998,23 +1005,64 @@ def comma_split(string): { "name": "cpu", "description": "Shared CPU Node", + "roles": [], } ], "resources": "Use a node shared with other users' notebooks but outside the batch queues.", "use_cases": "Visualization and analytics that are not memory intensive and can run on just a few cores." - } + }, + { + "name": "exclusive-node", + "architectures": [ + { + "name": "cpu", + "description": "Exclusive CPU Node", + "roles": ["cori-exclusive-node-cpu"], + }, + { + "name": "gpu", + "description": "Exclusive GPU Node", + "roles": ["gpu"], + } + ], + "resources": "Use your own node within a job allocation using defaults.", + "use_cases": "Visualization, analytics, machine learning that is compute or memory intensive but can be done on a single node." + }, ] c.NERSCSpawner.systems = [ - { "name": "cori" }, - { "name": "spin" } + { + "name": "gerty", + "roles": ["staff"] + }, + { + "name": "cori", + "roles": [] + }, + { + "name": "spin", + "roles": [] + } ] c.NERSCSpawner.spawners = { + "gerty-shared-node-cpu": ( + "sshspawner.sshspawner.SSHSpawner", { + "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", + "/global/common/cori/software/python/3.6-anaconda-5.2/bin/jupyter-labhub"], + "environment": {"OMP_NUM_THREADS" : "2"}, + "remote_hosts": ["gert01.nersc.gov"], + "remote_port_command": "/usr/bin/python /global/common/cori/das/jupyterhub/new-get-port.py --ip", + "hub_api_url": "http://{}:8081/hub/api".format(ip), + "path": "/global/common/cori/software/python/3.6-anaconda-5.2/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + "ssh_keyfile": '/certs/{username}.key' + } + ), "cori-shared-node-cpu": ( "sshspawner.sshspawner.SSHSpawner", { "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", "/global/common/cori/software/python/3.6-anaconda-5.2/bin/jupyter-labhub"], + "environment": {"OMP_NUM_THREADS" : "2"}, "remote_hosts": ["corijupyter.nersc.gov"], "remote_port_command": "/usr/bin/python /global/common/cori/das/jupyterhub/new-get-port.py --ip", "hub_api_url": "http://{}:8081/hub/api".format(ip), @@ -1022,10 +1070,38 @@ def comma_split(string): "ssh_keyfile": '/certs/{username}.key' } ), + "cori-exclusive-node-cpu": ( + "nerscslurmspawner.NERSCExclusiveSlurmSpawner", { + "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", + "/global/common/cori/software/python/3.6-anaconda-5.2/bin/jupyter-labhub"], + "exec_prefix": "/usr/bin/ssh -q -o StrictHostKeyChecking=no -o preferredauthentications=publickey -l {username} -i /certs/{username}.key {remote_host}", + "http_timeout": 300, + "startup_poll_interval": 30.0, + "req_remote_host": "cori19-224.nersc.gov", + "req_homedir": "/tmp", + "req_runtime": "240", + "hub_api_url": "http://{}:8081/hub/api".format(ip), + "path": "/global/common/cori/software/python/3.6-anaconda-5.2/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + } + ), + "cori-exclusive-node-gpu": ( + "nerscslurmspawner.NERSCExclusiveGPUSlurmSpawner", { + "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", + "/global/common/cori/software/python/3.6-anaconda-5.2/bin/jupyter-labhub"], + "exec_prefix": "/usr/bin/ssh -q -o StrictHostKeyChecking=no -o preferredauthentications=publickey -l {username} -i /certs/{username}.key {remote_host}", + "startup_poll_interval": 30.0, + "req_remote_host": "cori19-224.nersc.gov", + "req_homedir": "/tmp", + "req_runtime": "240", + "hub_api_url": "http://{}:8081/hub/api".format(ip), + "path": "/global/common/cori/software/python/3.6-anaconda-5.2/bin:/global/common/cori/das/jupyterhub:/usr/common/usg/bin:/usr/bin:/bin", + } + ), "spin-shared-node-cpu": ( "sshspawner.sshspawner.SSHSpawner", { "cmd": ["/global/common/cori/das/jupyterhub/jupyter-launcher.sh", "/opt/anaconda3/bin/jupyter-labhub"], + "environment": {"OMP_NUM_THREADS" : "2"}, "remote_hosts": ["app-notebooks"], "remote_port_command": "/opt/anaconda3/bin/python /global/common/cori/das/jupyterhub/new-get-port.py --ip", "hub_api_url": "http://{}:8081/hub/api".format(ip), @@ -1069,6 +1145,32 @@ async def setup(spawner): # c.Spawner.pre_spawn_hook = setup +### + +from iris import Iris + +async def post_auth_hook(authenticator, handler, authentication): + iris = Iris() + userdata = await iris.query_user(authentication["name"]) + if authentication["auth_state"] is None: + authentication["auth_state"] = {} + authentication["auth_state"]["userdata"] = userdata + return authentication + +c.Authenticator.post_auth_hook = post_auth_hook + +### + +def auth_state_hook(spawner, auth_state): + spawner.userdata = auth_state["userdata"] + +c.Spawner.auth_state_hook = auth_state_hook + +### Prometheus + +c.JupyterHub.authenticate_prometheus = False + + ## c.NERSCSpawner.spawners = [ ## ("spin", "sshspawner.sshspawner.SSHSpawner", { ## "remote_hosts" : ["jupyter"], diff --git a/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py new file mode 100644 index 0000000..f0d5b1e --- /dev/null +++ b/jupyter-nersc/web-jupyterhub/nerscslurmspawner.py @@ -0,0 +1,373 @@ + +from textwrap import dedent + +import asyncssh +from traitlets import default, Unicode +from tornado import escape, httpclient + +from batchspawner import format_template, BatchSpawnerRegexStates + +class NERSCSlurmSpawner(BatchSpawnerRegexStates): + """Spawner that connects to a job-submit (login node) and submits a job to + start a process running in the Slurm batch queue. + NOTE Right now we allow the hub to pre-select a random port but when multiple + users are on the same compute node, a la shared-interactive, we need to control + the port selected deterministically or ensure they don't collide in some way. + This has been done in later versions of BatchSpawner.""" + + exec_prefix = Unicode( + "ssh -q -o StrictHostKeyChecking=no -o preferredauthentications=publickey -l {username} -i /tmp/{username}.key {remote_host}", + config=True) + + req_constraint = Unicode('haswell', + help="""Users specify which features are required by their job + using the constraint option, which is required at NERSC on Cori/Gerty.""" + ).tag(config=True) + + req_nodes = Unicode('1', + help="Number of nodes", + config=True) + + req_qos = Unicode('jupyter', + help="QoS name to submit job to resource manager" + ).tag(config=True) + + req_remote_host = Unicode('remote_host', + help="""The SSH remote host to spawn sessions on.""" + ).tag(config=True) + + hub_api_url = Unicode().tag(config=True) + + path = Unicode().tag(config=True) + + req_homedir = Unicode().tag(config=True) + + req_env_text = Unicode() + + @default("req_env_text") + def _req_env_text(self): + env = self.get_env() + text = "" + for item in env.items(): + text += 'export %s=%s\n' % item + return text + +# sim_url=Unicode("https://sim-stage.nersc.gov/graphql").tag(config=True) + sim_url=Unicode("https://sim.nersc.gov/graphql").tag(config=True) + + async def query_sim_accounts(self, name): #rename + query = dedent(""" + query {{ + systemInfo {{ + users(name: "{}") {{ + baseRepos {{ + computeAllocation {{ + repoName + }} + }} + }} + }} + systemInfo {{ + users(name: "{}") {{ + userAllocations {{ + computeAllocation {{ + repoName + }} + }} + }} + }} + }}""".format(name, name)).strip() + data = await self.query_sim(query) + user = data["data"]["systemInfo"]["users"][0] + default_account = user["baseRepos"][0]["computeAllocation"]["repoName"] + accounts = [a["computeAllocation"]["repoName"] for a in user["userAllocations"]] + accounts.sort() + accounts.remove(default_account) + accounts.insert(0, default_account) + return accounts + + async def query_sim(self, query): + http_client = httpclient.AsyncHTTPClient() + request = self.sim_request(query) + response = await http_client.fetch(request) + return escape.json_decode(response.body) + + def sim_request(self, query): + return httpclient.HTTPRequest(self.sim_url, + method="POST", + headers={"Content-Type": "application/json"}, + body=escape.json_encode({"query": query})) + + # outputs line like "Submitted batch job 209" + batch_submit_cmd = Unicode("/usr/bin/sbatch").tag(config=True) + # outputs status and exec node like "RUNNING hostname" + batch_query_cmd = Unicode("/usr/bin/python /global/common/cori/das/jupyterhub/new-get-ip.py {job_id}").tag(config=True) + batch_cancel_cmd = Unicode("/usr/bin/scancel {job_id}").tag(config=True) + # use long-form states: PENDING, CONFIGURING = pending + # RUNNING, COMPLETING = running + state_pending_re = Unicode(r'^(?:PENDING|CONFIGURING)').tag(config=True) + state_running_re = Unicode(r'^(?:RUNNING|COMPLETING)').tag(config=True) + state_exechost_re = Unicode(r'\s+((?:[\w_-]+\.?)+)$').tag(config=True) + + def parse_job_id(self, output): + # make sure jobid is really a number + try: + id = output.split(' ')[-1] + int(id) + except Exception as e: + self.log.error("SlurmSpawner unable to parse job ID from text: " + output) + raise e + return id + + # This is based on SSH Spawner + def get_env(self): + """Add user environment variables""" + env = super().get_env() + + env.update(dict( + JPY_USER=self.user.name, + #JPY_BASE_URL=self.user.server.base_url, + JPY_HUB_PREFIX=self.hub.server.base_url, + JUPYTERHUB_PREFIX=self.hub.server.base_url, + # PATH=self.path + # NERSC local mod + PATH=self.path + )) + + if self.notebook_dir: + env['NOTEBOOK_DIR'] = self.notebook_dir + + hub_api_url = self.hub.api_url + if self.hub_api_url != '': + hub_api_url = self.hub_api_url + + env['JPY_HUB_API_URL'] = hub_api_url + env['JUPYTERHUB_API_URL'] = hub_api_url + + return env + +class NERSCExclusiveSlurmSpawner(NERSCSlurmSpawner): + + batch_script = Unicode("""#!/bin/bash +{%- if constraint %} +#SBATCH --constraint={{ constraint }} +{%- endif %} +#SBATCH --job-name=jupyter +#SBATCH --nodes={{ nodes }} +#SBATCH --qos={{ qos }} +#SBATCH --sdn +#SBATCH --time={{ runtime }} +{{ env_text }} +unset XDG_RUNTIME_DIR +{{ cmd }}""").tag(config=True) + + +class NERSCExclusiveGPUSlurmSpawner(NERSCSlurmSpawner): + + batch_submit_cmd = Unicode("/global/common/cori/das/jupyterhub/esslurm-wrapper.sh sbatch").tag(config=True) + batch_query_cmd = Unicode("/global/common/cori/das/jupyterhub/esslurm-wrapper.sh squeue -h -j {job_id} -o '%T\ %B-144.nersc.gov'").tag(config=True) + batch_cancel_cmd = Unicode("/global/common/cori/das/jupyterhub/esslurm-wrapper.sh scancel {job_id}").tag(config=True) + + batch_script = Unicode("""#!/bin/bash +#SBATCH --account={{ account }} +#SBATCH --constraint=gpu +#SBATCH --gres=gpu:1 +#SBATCH --job-name=jupyter +#SBATCH --mem=30GB +#SBATCH --nodes={{ nodes }} +#SBATCH --time={{ runtime }} +{{ env_text }} +unset XDG_RUNTIME_DIR +{{ cmd }}""").tag(config=True) + + # Have to override this to call get_auth_state() I think + async def _get_batch_script(self, **subvars): + """Format batch script from vars""" + auth_state = await self.user.get_auth_state() + accounts = auth_state["accounts"] + if "nstaff" in accounts: + account = "nstaff" + elif "dasrepo" in accounts: + account = "dasrepo" + elif "m1759" in accounts: + account = "m1759" + else: + account = accounts[0] + subvars["account"] = account + return format_template(self.batch_script, **subvars) + + +class NERSCConfigurableSlurmSpawner(NERSCSlurmSpawner): + + req_image = Unicode("", + help="Shifter image", + config=True) + + req_reservation = Unicode("", + help="Reservation.", + config=True) + + async def options_form(self, spawner): + form = "" + + # Account + + form += dedent(""" + + + """) + + # Nodes, should come from model + + form += dedent(""" + + + """) + + # Time, should come from model + + form += dedent(""" + + + """) + + # QOS, should come from model + + form += dedent(""" + + + """) + + # Constraint, should come from model + + form += dedent(""" + + + """) + + # Reservation + + form += dedent(""" + + + """) + + # Images + + form += dedent(""" + + + """) + + return form + + async def query_reservations(self, name): + # Should filter on username + remote_host = self.req_remote_host + keyfile = "/certs/{}.key".format(name) + certfile = keyfile + "-cert.pub" + k = asyncssh.read_private_key(keyfile) + c = asyncssh.read_certificate(certfile) + async with asyncssh.connect(remote_host, + username=name, + client_keys=[(k,c)], + known_hosts=None) as conn: + result = await conn.run("/usr/bin/scontrol show reservation --oneliner") + reservations = [""] + for line in result.stdout.split("\n"): + columns = line.split() + for column in columns: + key, value = column.split("=", 1) + if key == "ReservationName": + reservations.append(value) + break + return reservations + + async def query_images(self, name): + # Some better filtering is needed... + remote_host = self.req_remote_host + keyfile = "/certs/{}.key".format(name) + certfile = keyfile + "-cert.pub" + k = asyncssh.read_private_key(keyfile) + c = asyncssh.read_certificate(certfile) + async with asyncssh.connect(remote_host, + username=name, + client_keys=[(k,c)], + known_hosts=None) as conn: + result = await conn.run("/usr/bin/shifterimg images") + images = [""] + for line in result.stdout.split("\n"): + columns = line.split() + if columns: + image_name = columns[-1] + if image_name.find("jupyterlab") < 0: + continue + images.append(image_name) + return images + + def options_from_form(self, formdata): + options = dict() + options["account"] = formdata["account"][0] + options["constraint"] = formdata["constraint"][0] + options["image"] = formdata["image"][0] + options["nodes"] = formdata["nodes"][0] + options["qos"] = formdata["qos"][0] + options["reservation"] = formdata["reservation"][0] + options["time"] = formdata["time"][0] + return options + + batch_script = Unicode("""#!/bin/bash +#SBATCH --account={{ account }} +{%- if constraint %} +#SBATCH --constraint={{ constraint }} +{%- endif %} +{%- if image %} +#SBATCH --image={{ image }} +{%- endif %} +#SBATCH --job-name=jupyter +#SBATCH --nodes={{ nodes }} +#SBATCH --output=jupyter-%j.log +#SBATCH --qos={{ qos }} +{%- if reservation %} +#SBATCH --reservation={{ reservation }} +{%- endif %} +#SBATCH --sdn +#SBATCH --time={{ time }} +{{ env_text }} +unset XDG_RUNTIME_DIR +{% if image %} +shifter jupyter-labhub {{ cmd.split()[2:] | join(" ") }} +{% else %} +{{ cmd }} +{% endif %}""").tag(config=True) diff --git a/jupyter-nersc/web-jupyterhub/nerscspawner.py b/jupyter-nersc/web-jupyterhub/nerscspawner.py new file mode 100644 index 0000000..79825bf --- /dev/null +++ b/jupyter-nersc/web-jupyterhub/nerscspawner.py @@ -0,0 +1,123 @@ + +import os + +from jupyterhub.spawner import LocalProcessSpawner + +from tornado import httpclient +from traitlets import List, Dict, Unicode, observe + +from wrapspawner import WrapSpawner + +class NERSCSpawner(WrapSpawner): + + profiles = List(Dict(), + help="TBD", + config=True) + + setups = List(Dict(), + help="TBD", + config=True) + + systems = List(Dict(), + help="TBD", + config=True) + + spawners = Dict( + help="TBD", + config=True) + + child_profile = Unicode() + + userdata = Dict() + + def check_roles(self, roles): + """User has one or more of these roles""" + if roles: + for role in roles: + if self.check_role(role): + return True + return False + else: + return True + + def check_role(self, role): + if role == "gpu": + return self.check_role_gpu() + if role == "staff": + return self.check_role_staff() + if role == "cori-exclusive-node-cpu": + return self.check_role_cori_exclusive_node_cpu() + return False + + def check_role_cori_exclusive_node_cpu(self): + users = os.environ.get("CORI_EXCLUSIVE_NODE_CPU_USERS") + if users: + return self.user.name in users.split(",") + else: + return True + + def check_role_gpu(self): + return self.default_gpu_repo() is not None + + def check_role_staff(self): + for allocation in self.user_allocations(["nstaff"]): + return True + return False + + def default_gpu_repo(self): + for allocation in self.user_allocations(["nstaff", "m1759", "dasrepo"]): + for qos in allocation["userAllocationQos"]: + if qos["qos"]["qos"] == "gpu": + return allocation["computeAllocation"]["repoName"] + return None + + def user_allocations(self, repos=[]): + for allocation in self.userdata["userAllocations"]: + if repos and allocation["computeAllocation"]["repoName"] not in repos: + continue + yield allocation + + def select_profile(self, profile): + self.log.debug("select_profile: " + profile) + if profile in self.spawners: + self.child_class, self.child_config = self.spawners[profile] + else: + self.child_class, self.child_config = LocalProcessSpawner, {} + + def construct_child(self): + self.log.debug("construct_child called") + # self.child_profile = self.user_options.get('profile', "") + self.child_profile = self.name + self.select_profile(self.child_profile) + super().construct_child() + self.child_spawner.orm_spawner = self.orm_spawner ### IS THIS KOSHER?!?!!? + self.options_form = self.child_spawner.options_form # another one... + self.options_from_form = self.child_spawner.options_from_form + self.child_spawner.user_options = self.user_options + ### Think we need to do this to get JUPYTERHUB_OAUTH_CALLBACK_URL set properly + + def load_child_class(self, state): + self.log.debug("load_child_class called") + try: + self.child_profile = state['profile'] + except KeyError: + self.child_profile = '' + self.select_profile(self.child_profile) + + def get_state(self): + state = super().get_state() + state['profile'] = self.child_profile + return state + + def clear_state(self): + super().clear_state() + self.child_profile = '' + + @property + def model_updated(self): + return True + + @observe("user_options") + def _observe_user_options(self, change): + self.log.debug("user_options observed: " + str(change)) + diff --git a/jupyter-nersc/web-jupyterhub/templates/home.html b/jupyter-nersc/web-jupyterhub/templates/home.html index 478d72c..7c3dbce 100644 --- a/jupyter-nersc/web-jupyterhub/templates/home.html +++ b/jupyter-nersc/web-jupyterhub/templates/home.html @@ -19,7 +19,7 @@ {% for setup in user.spawner.setups -%} - {% for arch in setup.architectures %} + {% for arch in setup.architectures if user.spawner.check_roles(arch.roles) %} {{ arch.description }} {% endfor %} {%- endfor %} @@ -28,7 +28,7 @@ {% macro spawner_table_body() -%} - {% for system in user.spawner.systems -%} + {% for system in user.spawner.systems if user.spawner.check_roles(system.roles) -%} {{ spawner_table_body_row(system) }} {%- endfor %} @@ -44,10 +44,11 @@ {%- endmacro %} {% macro spawner_table_body_row_data(system, setup) -%} -{% for arch in setup.architectures -%} +{% for arch in setup.architectures if user.spawner.check_roles(arch.roles) %} {% set profile_key = [system.name, setup.name, arch.name] | join("-") %} - {% if user.spawner.profiles | selectattr("name", "equalto", profile_key) | first %} + {% set profile = user.spawner.profiles | selectattr("name", "equalto", profile_key) | first %} + {% if profile %} {% if profile_key in user.spawners or profile_key in user.orm_user.orm_spawners %} {% set spawner = user.spawners[profile_key] | default(user.orm_user.orm_spawners[profile_key]) %} stop @@ -63,16 +64,30 @@ {%- endfor %} {%- endmacro %} +{# HORRIBLE below, should migrate logic into spawner #} + {% macro spawner_table_footer() -%} Resources {% for setup in user.spawner.setups %} - {{ setup.resources }} + {% set counter = [] %} + {% for arch in setup.architectures if user.spawner.check_roles(arch.roles) %} + {% if counter.append('1') %}{% endif %} + {% endfor %} + {% if counter | length %} + {{ setup.resources }} + {% endif %} {% endfor %} Use Cases {% for setup in user.spawner.setups %} - {{ setup.use_cases }} + {% set counter = [] %} + {% for arch in setup.architectures if user.spawner.check_roles(arch.roles) %} + {% if counter.append('1') %}{% endif %} + {% endfor %} + {% if counter | length %} + {{ setup.use_cases }} + {% endif %} {% endfor %} {%- endmacro %} diff --git a/jupyter-nersc/web-offline/Dockerfile b/jupyter-nersc/web-offline/Dockerfile index 72a684e..41a6a52 100644 --- a/jupyter-nersc/web-offline/Dockerfile +++ b/jupyter-nersc/web-offline/Dockerfile @@ -28,7 +28,7 @@ RUN \ curl -s -o /tmp/miniconda3.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ bash /tmp/miniconda3.sh -f -b -p /opt/anaconda3 && \ rm -rf /tmp/miniconda3.sh && \ - echo "python 3.6.*" >> /opt/anaconda3/conda-meta/pinned && \ + echo "python 3.7.*" >> /opt/anaconda3/conda-meta/pinned && \ /opt/anaconda3/bin/conda update --yes conda && \ /opt/anaconda3/bin/conda install --yes \ flask \