From da0f103eaee19409b651ea430f5438e1b784e453 Mon Sep 17 00:00:00 2001
From: Julien Cristau <jcristau@mozilla.com>
Date: Wed, 22 Nov 2023 14:25:46 +0100
Subject: [PATCH] Segregate docker-worker caches by docker image

Sharing caches across different docker images runs the risk of
different versions of tools in different tasks trying to access the same
storage, and breaking in exciting ways (e.g. mercurial clones are not
downgrade-safe, so sharing a cache across all tasks with possibly
different hg versions doesn't work reliably).  Splitting them based on
the docker image used by the task means less data is shared than would
be possible, but seems safer given we can't know what hg version is
on each image.
---
 src/taskgraph/transforms/job/common.py |  6 ------
 src/taskgraph/transforms/task.py       | 30 +++++++++++++++-----------
 test/test_transforms_job_run_task.py   |  2 +-
 3 files changed, 19 insertions(+), 19 deletions(-)
diff --git a/src/taskgraph/transforms/job/common.py b/src/taskgraph/transforms/job/common.py
index 04708daf8..b75f9e9ff 100644
--- a/src/taskgraph/transforms/job/common.py
+++ b/src/taskgraph/transforms/job/common.py
@@ -130,12 +130,6 @@ def support_vcs_checkout(config, job, taskdesc, repo_configs, sparse=False):
     if sparse:
         cache_name += "-sparse"
 
-    # Workers using Mercurial >= 5.8 will enable revlog-compression-zstd, which
-    # workers using older versions can't understand, so they can't share cache.
-    # At the moment, only docker workers use the newer version.
-    if is_docker:
-        cache_name += "-hg58"
-
     add_cache(job, taskdesc, cache_name, checkoutdir)
 
     env = taskdesc["worker"].setdefault("env", {})
diff --git a/src/taskgraph/transforms/task.py b/src/taskgraph/transforms/task.py
index c55de7851..f7916ad85 100644
--- a/src/taskgraph/transforms/task.py
+++ b/src/taskgraph/transforms/task.py
@@ -487,19 +487,19 @@ def build_docker_worker_payload(config, task, task_def):
 
         # run-task knows how to validate caches.
         #
-        # To help ensure new run-task features and bug fixes don't interfere
-        # with existing caches, we seed the hash of run-task into cache names.
-        # So, any time run-task changes, we should get a fresh set of caches.
-        # This means run-task can make changes to cache interaction at any time
-        # without regards for backwards or future compatibility.
+        # To help ensure new run-task features and bug fixes, as well as the
+        # versions of tools such as mercurial or git, don't interfere with
+        # existing caches, we seed the underlying docker-image task id into
+        # cache names, for tasks using in-tree Docker images.
         #
         # But this mechanism only works for in-tree Docker images that are built
         # with the current run-task! For out-of-tree Docker images, we have no
         # way of knowing their content of run-task. So, in addition to varying
         # cache names by the contents of run-task, we also take the Docker image
-        # name into consideration. This means that different Docker images will
-        # never share the same cache. This is a bit unfortunate. But it is the
-        # safest thing to do. Fortunately, most images are defined in-tree.
+        # name into consideration.
+        #
+        # This means that different Docker images will never share the same
+        # cache.  This is a bit unfortunate, but is the safest thing to do.
         #
         # For out-of-tree Docker images, we don't strictly need to incorporate
         # the run-task content into the cache name. However, doing so preserves
@@ -520,6 +520,8 @@ def build_docker_worker_payload(config, task, task_def):
                     out_of_tree_image.encode("utf-8")
                 ).hexdigest()
                 suffix += name_hash[0:12]
+            else:
+                suffix += "-<docker-image>"
 
         else:
             suffix = cache_version
@@ -539,13 +541,15 @@ def build_docker_worker_payload(config, task, task_def):
                 suffix=suffix,
             )
             caches[name] = cache["mount-point"]
-            task_def["scopes"].append("docker-worker:cache:%s" % name)
+            task_def["scopes"].append(
+                {"task-reference": "docker-worker:cache:%s" % name}
+            )
 
         # Assertion: only run-task is interested in this.
         if run_task:
             payload["env"]["TASKCLUSTER_CACHES"] = ";".join(sorted(caches.values()))
 
-        payload["cache"] = caches
+        payload["cache"] = {"task-reference": caches}
 
     # And send down volumes information to run-task as well.
     if run_task and worker.get("volumes"):
@@ -1343,7 +1347,9 @@ def check_run_task_caches(config, tasks):
         main_command = command[0] if isinstance(command[0], str) else ""
         run_task = main_command.endswith("run-task")
 
-        for cache in payload.get("cache", {}):
+        for cache in payload.get("cache", {}).get(
+            "task-reference", payload.get("cache", {})
+        ):
             if not cache.startswith(cache_prefix):
                 raise Exception(
                     "{} is using a cache ({}) which is not appropriate "
@@ -1364,7 +1370,7 @@ def check_run_task_caches(config, tasks):
                     "cache name"
                 )
 
-            if not cache.endswith(suffix):
+            if suffix not in cache:
                 raise Exception(
                     f"{task['label']} is using a cache ({cache}) reserved for run-task "
                     "but the cache name is not dependent on the contents "
diff --git a/test/test_transforms_job_run_task.py b/test/test_transforms_job_run_task.py
index 457353118..a7d28aab9 100644
--- a/test/test_transforms_job_run_task.py
+++ b/test/test_transforms_job_run_task.py
@@ -53,7 +53,7 @@ def assert_docker_worker(task):
             "caches": [
                 {
                     "mount-point": "/builds/worker/checkouts",
-                    "name": "checkouts-hg58",
+                    "name": "checkouts",
                     "skip-untrusted": False,
                     "type": "persistent",
                 }