Skip to content

Commit

Permalink
Add a cli command to backfill manifests for all workspaces
Browse files Browse the repository at this point in the history
I have manually tested this using the db from the test backend. I have
not included automated tests, as a) this is a one off migration command,
b) they would be quite hard to write and c) this is an offline process
that is not mission critical
  • Loading branch information
bloodearnest committed Apr 24, 2024
1 parent c29ed94 commit f8f43a9
Showing 1 changed file with 96 additions and 0 deletions.
96 changes: 96 additions & 0 deletions jobrunner/cli/manifests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""
Ops utility for backfilling manifest.json files from db
"""
import argparse

from jobrunner.executors import local
from jobrunner.lib import database
from jobrunner.models import Job


def main(workspaces=None):
conn = database.get_connection()
workspaces = [
w["workspace"] for w in conn.execute("SELECT DISTINCT(workspace) FROM job;")
]

n_workspaces = len(workspaces)
for i, workspace in enumerate(workspaces):
print(f"workspace {i+1}/{n_workspaces}: {workspace}")

level4_dir = local.get_medium_privacy_workspace(workspace)

sentinel = level4_dir / ".manifest-backfill"
if sentinel.exists():
print(" - already done, skipping")
continue

write_manifest(workspace)

sentinel.touch()


def write_manifest(workspace):
conn = database.get_connection()
workspace_dir = local.get_high_privacy_workspace(workspace)
level4_dir = local.get_medium_privacy_workspace(workspace)

job_ids = conn.execute(
"""
SELECT id FROM job
WHERE workspace = ?
AND outputs != ''
AND completed_at IS NOT NULL
AND state = 'succeeded'
ORDER BY completed_at DESC;
""",
(workspace,),
)

outputs = {}

for row in job_ids:
job_id = row["id"]
job = database.find_one(Job, id=job_id)

for output, level in job.outputs.items():
if output in outputs:
# older version of the file, ingnore
continue

abspath = workspace_dir / output

# testing only
abspath.parent.mkdir(parents=True, exist_ok=True)
abspath.touch()

# use presence of message file to detect excluded files
message_file = level4_dir / (output + ".txt")
excluded = message_file.exists()

metadata = local.get_output_metadata(
abspath,
level,
job_id=job_id,
job_request=job.job_request_id,
action=job.action,
commit=job.commit,
excluded=excluded,
)

outputs[output] = metadata

manifest = local.read_manifest_file(level4_dir, workspace)
manifest["outputs"] = outputs
print(f" - writing manifest for {workspace} with {len(outputs)} outputs")
local.write_manifest_file(level4_dir, manifest)


def run():
parser = argparse.ArgumentParser(description=__doc__.partition("\n\n")[0])
args = parser.parse_args()
main(**vars(args))


if __name__ == "__main__":
run()

0 comments on commit f8f43a9

Please sign in to comment.