Skip to content

Commit

Permalink
Segregate docker-worker caches by docker image
Browse files Browse the repository at this point in the history
Sharing caches across different docker images runs the risk of
different versions of tools in different tasks trying to access the same
storage, and breaking in exciting ways (e.g. mercurial clones are not
downgrade-safe, so sharing a cache across all tasks with possibly
different hg versions doesn't work reliably).  Splitting them based on
the docker image used by the task means less data is shared than would
be possible, but seems safer given we can't know what hg version is
on each image.
  • Loading branch information
jcristau committed Nov 27, 2023
1 parent 890d23a commit 26e0dc8
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 11 deletions.
6 changes: 0 additions & 6 deletions src/taskgraph/transforms/job/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,6 @@ def support_vcs_checkout(config, job, taskdesc, repo_configs, sparse=False):
if sparse:
cache_name += "-sparse"

# Workers using Mercurial >= 5.8 will enable revlog-compression-zstd, which
# workers using older versions can't understand, so they can't share cache.
# At the moment, only docker workers use the newer version.
if is_docker:
cache_name += "-hg58"

add_cache(job, taskdesc, cache_name, checkoutdir)

env = taskdesc["worker"].setdefault("env", {})
Expand Down
14 changes: 10 additions & 4 deletions src/taskgraph/transforms/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,8 @@ def build_docker_worker_payload(config, task, task_def):
out_of_tree_image.encode("utf-8")
).hexdigest()
suffix += name_hash[0:12]
else:
suffix += "-<docker-image>"

else:
suffix = cache_version
Expand All @@ -539,13 +541,15 @@ def build_docker_worker_payload(config, task, task_def):
suffix=suffix,
)
caches[name] = cache["mount-point"]
task_def["scopes"].append("docker-worker:cache:%s" % name)
task_def["scopes"].append(
{"task-reference": "docker-worker:cache:%s" % name}
)

# Assertion: only run-task is interested in this.
if run_task:
payload["env"]["TASKCLUSTER_CACHES"] = ";".join(sorted(caches.values()))

payload["cache"] = caches
payload["cache"] = {"task-reference": caches}

# And send down volumes information to run-task as well.
if run_task and worker.get("volumes"):
Expand Down Expand Up @@ -1343,7 +1347,9 @@ def check_run_task_caches(config, tasks):
main_command = command[0] if isinstance(command[0], str) else ""
run_task = main_command.endswith("run-task")

for cache in payload.get("cache", {}):
for cache in payload.get("cache", {}).get(
"task-reference", payload.get("cache", {})
):
if not cache.startswith(cache_prefix):
raise Exception(
"{} is using a cache ({}) which is not appropriate "
Expand All @@ -1364,7 +1370,7 @@ def check_run_task_caches(config, tasks):
"cache name"
)

if not cache.endswith(suffix):
if suffix not in cache:
raise Exception(
f"{task['label']} is using a cache ({cache}) reserved for run-task "
"but the cache name is not dependent on the contents "
Expand Down
2 changes: 1 addition & 1 deletion test/test_transforms_job_run_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def assert_docker_worker(task):
"caches": [
{
"mount-point": "/builds/worker/checkouts",
"name": "checkouts-hg58",
"name": "checkouts",
"skip-untrusted": False,
"type": "persistent",
}
Expand Down

0 comments on commit 26e0dc8

Please sign in to comment.