Skip to content

Commit

Permalink
Merge pull request #135 from kbase/dev-service
Browse files Browse the repository at this point in the history
Load JAWS conf file to NERSC
  • Loading branch information
MrCreosote authored Dec 18, 2024
2 parents 365c295 + d7bbbc9 commit 38079c7
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 17 deletions.
6 changes: 3 additions & 3 deletions cdmtaskservice/app_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@ async def build_app(
logr.info("Done")
logr.info("Setting up NERSC manager and installing code at NERSC...")
remote_code_loc = Path(cfg.nersc_remote_code_dir) / VERSION
nerscman = await NERSCManager.create(sfapi_client.get_client, remote_code_loc)
nerscman = await NERSCManager.create(
sfapi_client.get_client, remote_code_loc, cfg.jaws_token, cfg.jaws_group
)
logr.info("Done")
logr.info("Initializing S3 client... ")
s3 = await S3Client.create(
Expand All @@ -98,8 +100,6 @@ async def build_app(
s3,
s3_external,
coman,
cfg.jaws_token,
cfg.jaws_group,
cfg.service_root_url,
s3_insecure_ssl=cfg.s3_allow_insecure,
)
Expand Down
6 changes: 0 additions & 6 deletions cdmtaskservice/jobflows/nersc_jaws.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@ def __init__(
s3_client: S3Client,
s3_external_client: S3Client,
coro_manager: CoroutineWrangler,
jaws_token: str,
jaws_group: str,
service_root_url: str,
s3_insecure_ssl: bool = False,
):
Expand All @@ -51,8 +49,6 @@ def __init__(
that may not be accessible from the current process, but is accessible to remote
processes at NERSC.
coro_manager - a coroutine manager.
jaws_token - a token for the JGI JAWS system.
jaws_group - the group to use for running JAWS jobs.
service_root_url - the URL of the service root, used for constructing service callbacks.
s3_insecure_url - whether to skip checking the SSL certificate for the S3 instance,
leaving the service open to MITM attacks.
Expand All @@ -64,8 +60,6 @@ def __init__(
self._s3ext = _not_falsy(s3_external_client, "s3_external_client")
self._s3insecure = s3_insecure_ssl
self._coman = _not_falsy(coro_manager, "coro_manager")
self._jtoken = _require_string(jaws_token, "jaws_token")
self._jgroup = _require_string(jaws_group, "jaws_group")
self._callback_root = _require_string(service_root_url, "service_root_url")

async def start_job(self, job: models.Job, objmeta: list[S3ObjectMeta]):
Expand Down
38 changes: 30 additions & 8 deletions cdmtaskservice/nersc/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,15 @@

_CTS_SCRATCH_ROOT_DIR = Path("cdm_task_service")


_JAWS_CONF_FILENAME = "jaws.conf"
_JAWS_CONF_TEMPLATE = """
[USER]
token = {token}
default_team = {group}
"""


# TODO PROD add start and end time to task output and record
# TODO NERSCFEATURE if NERSC puts python 3.11 on the dtns revert to regular load
_PYTHON_LOAD_HACK = "module use /global/common/software/nersc/pe/modulefiles/latest"
Expand Down Expand Up @@ -98,6 +107,8 @@ async def create(
cls,
client_provider: Callable[[], AsyncClient],
nersc_code_path: Path,
jaws_token: str,
jaws_group: str,
) -> Self:
"""
Create the NERSC manager.
Expand All @@ -106,9 +117,11 @@ async def create(
the user associated with the client does not change.
nersc_code_path - the path in which to store remote code at NERSC. It is advised to
include version information in the path to avoid code conflicts.
jaws_token - a token for the JGI JAWS system.
jaws_group - the group to use for running JAWS jobs.
"""
nm = NERSCManager(client_provider, nersc_code_path)
await nm._setup_remote_code()
await nm._setup_remote_code(jaws_token, jaws_group)
return nm

def __init__(
Expand All @@ -126,7 +139,7 @@ def _check_path(self, path: Path, name: str):
raise ValueError(f"{name} must be absolute to the NERSC root dir")
return path

async def _setup_remote_code(self):
async def _setup_remote_code(self, jaws_token: str, jaws_group: str):
# TODO RELIABILITY atomically write files. For these small ones probably doesn't matter?
cli = self._client_provider()
perlmutter = await cli.compute(Machine.perlmutter)
Expand All @@ -145,7 +158,15 @@ async def _setup_remote_code(self):
perlmutter,
self._nersc_code_path / _PROCESS_DATA_XFER_MANIFEST_FILENAME,
bio=io.BytesIO(_PROCESS_DATA_XFER_MANIFEST.encode()),
make_exe=True,
chmod="u+x",
))
tg.create_task(self._upload_file_to_nersc(
perlmutter,
Path(_JAWS_CONF_FILENAME), # No path puts it in the home dir
bio=io.BytesIO(
_JAWS_CONF_TEMPLATE.format(token=jaws_token, group=jaws_group).encode()
),
chmod = "600"
))
res = tg.create_task(dt.run('bash -c "echo $SCRATCH"'))
if _PIP_DEPENDENCIES:
Expand Down Expand Up @@ -176,10 +197,11 @@ async def _upload_file_to_nersc(
target: Path,
file: Path = None,
bio: io.BytesIO = None,
make_exe: bool = False,
chmod: str = None,
):
cmd = f'bash -c "mkdir -p {target.parent}"'
await compute.run(cmd)
if target.parent != Path("."):
cmd = f'bash -c "mkdir -p {target.parent}"'
await compute.run(cmd)
# skip some API calls vs. the upload example in the NERSC docs
# don't use a directory as the target or it makes an API call
asrp = AsyncRemotePath(path=target, compute=compute)
Expand All @@ -190,8 +212,8 @@ async def _upload_file_to_nersc(
await asrp.upload(f)
else:
await asrp.upload(bio)
if make_exe:
cmd = f'bash -c "chmod u+x {target}"'
if chmod:
cmd = f'bash -c "chmod {chmod} {target}"'
await compute.run(cmd)

async def download_s3_files(
Expand Down

0 comments on commit 38079c7

Please sign in to comment.