From 13bf171f9c6ef6a3fcc167c6d9523cd6029a1556 Mon Sep 17 00:00:00 2001 From: Lubos Mjachky Date: Fri, 3 May 2024 14:45:51 +0200 Subject: [PATCH] Address pending comments and clean up the code [noissue] --- .../commands/container-handle-image-data.py | 61 +++++++++++-------- .../app/migrations/0039_manifest_data.py | 5 +- pulp_container/app/models.py | 2 +- pulp_container/app/registry.py | 17 ++++-- pulp_container/app/registry_api.py | 2 +- pulp_container/app/tasks/sync_stages.py | 5 +- pulp_container/app/utils.py | 2 +- 7 files changed, 56 insertions(+), 38 deletions(-) diff --git a/pulp_container/app/management/commands/container-handle-image-data.py b/pulp_container/app/management/commands/container-handle-image-data.py index 786374fce..a8635f1e7 100644 --- a/pulp_container/app/management/commands/container-handle-image-data.py +++ b/pulp_container/app/management/commands/container-handle-image-data.py @@ -1,16 +1,17 @@ -import json - from json.decoder import JSONDecodeError from gettext import gettext as _ from contextlib import suppress +from django.conf import settings from django.core.exceptions import ObjectDoesNotExist from django.core.management import BaseCommand from django.db.models import Q -from pulp_container.app.models import Manifest +from pulpcore.plugin.cache import SyncContentCache + +from pulp_container.app.models import ContainerDistribution, Manifest from pulp_container.app.utils import get_content_data @@ -21,14 +22,16 @@ class Command(BaseCommand): """ A management command to handle the initialization of empty DB fields for container images. - This command now initializes flags describing the image nature. + This command initializes flags describing the image nature and moves the manifest's artifact + data into the database. Manifests stored inside Pulp are of various natures. The nature of an image can be determined from JSON-formatted image manifest annotations or image configuration labels. These data are stored inside artifacts. This command reads data from the storage backend and populates the 'annotations', 'labels', - 'is_bootable', and 'is_flatpak' fields on the Manifest model. + 'is_bootable', 'is_flatpak', and 'data' fields on the Manifest model. Note that the Redis + cache will be flushed if there is any. """ help = _(__doc__) @@ -36,15 +39,18 @@ class Command(BaseCommand): def handle(self, *args, **options): manifests_updated_count = 0 - manifests = Manifest.objects.filter(Q(data="") | Q(annotations={}, labels={})) - manifests = manifests.exclude( + manifests_v1 = Manifest.objects.filter(data__isnull=True, media_type=MEDIA_TYPE.MANIFEST_V1) + manifests_updated_count += self.update_manifests(manifests_v1) + + manifests_v2 = Manifest.objects.filter(Q(data__isnull=True) | Q(annotations={}, labels={})) + manifests_v2 = manifests_v2.exclude( media_type__in=[MEDIA_TYPE.MANIFEST_LIST, MEDIA_TYPE.INDEX_OCI, MEDIA_TYPE.MANIFEST_V1] ) - manifests_updated_count += self.update_manifests(manifests) + manifests_updated_count += self.update_manifests(manifests_v2) manifest_lists = Manifest.objects.filter( Q(media_type__in=[MEDIA_TYPE.MANIFEST_LIST, MEDIA_TYPE.INDEX_OCI]), - Q(data="") | Q(annotations={}), + Q(data__isnull=True) | Q(annotations={}), ) manifests_updated_count += self.update_manifests(manifest_lists) @@ -52,23 +58,12 @@ def handle(self, *args, **options): self.style.SUCCESS("Successfully updated %d manifests." % manifests_updated_count) ) - def init_manifest(self, manifest): - has_initialized_data = manifest.data != "" - if has_initialized_data: - manifest_data = json.loads(manifest.data) - else: - manifest_artifact = manifest._artifacts.get() - manifest_data, raw_bytes_data = get_content_data(manifest_artifact) - manifest.data = raw_bytes_data.decode("utf-8") - manifest._artifacts.clear() - - manifest.annotations = manifest_data.get("annotations", {}) + if settings.CACHE_ENABLED and manifests_updated_count != 0: + base_paths = ContainerDistribution.objects.values_list("base_path", flat=True) + if base_paths: + SyncContentCache().delete(base_key=base_paths) - has_annotations = bool(manifest.annotations) - has_labels = manifest.init_labels() - has_image_nature = manifest.init_image_nature() - - return has_annotations or has_labels or has_image_nature or (not has_initialized_data) + self.stdout.write(self.style.SUCCESS("Successfully flushed the cache.")) def update_manifests(self, manifests_qs): manifests_updated_count = 0 @@ -88,6 +83,7 @@ def update_manifests(self, manifests_qs): ) manifests_updated_count += len(manifests_to_update) manifests_to_update.clear() + if manifests_to_update: fields_to_update = ["annotations", "labels", "is_bootable", "is_flatpak", "data"] manifests_qs.model.objects.bulk_update( @@ -97,3 +93,18 @@ def update_manifests(self, manifests_qs): manifests_updated_count += len(manifests_to_update) return manifests_updated_count + + def init_manifest(self, manifest): + if not manifest.data: + manifest_artifact = manifest._artifacts.get() + manifest_data, raw_bytes_data = get_content_data(manifest_artifact) + manifest.data = raw_bytes_data.decode("utf-8") + + if not (manifest.annotations or manifest.labels): + manifest.init_metadata(manifest_data) + + manifest._artifacts.clear() + + return True + + return False diff --git a/pulp_container/app/migrations/0039_manifest_data.py b/pulp_container/app/migrations/0039_manifest_data.py index c58f61a39..c223c3bdc 100644 --- a/pulp_container/app/migrations/0039_manifest_data.py +++ b/pulp_container/app/migrations/0039_manifest_data.py @@ -6,7 +6,8 @@ def print_warning_for_initializing_manifest_data(apps, schema_editor): warnings.warn( - "Run 'pulpcore-manager container-handle-image-data' to move the manifests' data from artifacts to the new 'data' database field." + "Run 'pulpcore-manager container-handle-image-data' to move the manifests' " + "data from artifacts to the new 'data' database field." ) @@ -20,7 +21,7 @@ class Migration(migrations.Migration): migrations.AddField( model_name="manifest", name="data", - field=models.TextField(default=""), + field=models.TextField(null=True), ), migrations.RunPython( print_warning_for_initializing_manifest_data, diff --git a/pulp_container/app/models.py b/pulp_container/app/models.py index 3ef866368..8e7695c65 100644 --- a/pulp_container/app/models.py +++ b/pulp_container/app/models.py @@ -99,7 +99,7 @@ class Manifest(Content): digest = models.TextField(db_index=True) schema_version = models.IntegerField() media_type = models.TextField(choices=MANIFEST_CHOICES) - data = models.TextField(default="") + data = models.TextField(null=True) annotations = models.JSONField(default=dict) labels = models.JSONField(default=dict) diff --git a/pulp_container/app/registry.py b/pulp_container/app/registry.py index 71aa64026..b35c3ba57 100644 --- a/pulp_container/app/registry.py +++ b/pulp_container/app/registry.py @@ -276,15 +276,22 @@ async def get_by_digest(self, request): try: if content_type == "manifests": - manifest = await Manifest.objects.aget(digest=digest) + manifest = await Manifest.objects.prefetch_related("contentartifact_set").aget( + digest=digest + ) headers = { "Content-Type": manifest.media_type, "Docker-Content-Digest": manifest.digest, } # TODO: BACKWARD COMPATIBILITY - remove after migrating to artifactless manifest if not manifest.data: - saved_artifact = await manifest._artifacts.aget() - return await Registry._dispatch(saved_artifact, headers) + if saved_artifact := await manifest._artifacts.afirst(): + return await Registry._dispatch(saved_artifact, headers) + else: + ca = await sync_to_async(lambda x: x[0])(manifest.contentartifact_set.all()) + return await self._stream_content_artifact( + request, web.StreamResponse(), ca + ) # END OF BACKWARD COMPATIBILITY return web.Response(text=manifest.data, headers=headers) elif content_type == "blobs": @@ -393,7 +400,7 @@ async def download_manifest(self, run_pipeline=False): if media_type not in (MEDIA_TYPE.MANIFEST_LIST, MEDIA_TYPE.INDEX_OCI): # add the manifest and blobs to the repository to be able to stream it # in the next round when a client approaches the registry - await self.init_pending_content(digest, manifest_data, raw_text_data, media_type) + await self.init_pending_content(digest, manifest_data, media_type, raw_text_data) return raw_text_data, digest, media_type @@ -431,7 +438,7 @@ async def run_pipeline(self, raw_text_manifest_data): }, ) - async def init_pending_content(self, digest, manifest_data, raw_text_data, media_type): + async def init_pending_content(self, digest, manifest_data, media_type, raw_text_data): if config := manifest_data.get("config", None): config_digest = config["digest"] config_blob = await self.save_config_blob(config_digest) diff --git a/pulp_container/app/registry_api.py b/pulp_container/app/registry_api.py index e4533a14b..73d157377 100644 --- a/pulp_container/app/registry_api.py +++ b/pulp_container/app/registry_api.py @@ -1133,7 +1133,7 @@ def put(self, request, path, pk=None): artifact = self.receive_artifact(chunk) manifest_digest = "sha256:{id}".format(id=artifact.sha256) - with storage.open(artifact.file.name) as artifact_file: + with storage.open(artifact.file.name, mode="rb") as artifact_file: raw_bytes_data = artifact_file.read() raw_text_data = raw_bytes_data.decode("utf-8") diff --git a/pulp_container/app/tasks/sync_stages.py b/pulp_container/app/tasks/sync_stages.py index 882aa8624..05530bc87 100644 --- a/pulp_container/app/tasks/sync_stages.py +++ b/pulp_container/app/tasks/sync_stages.py @@ -86,7 +86,7 @@ async def _check_for_existing_manifest(self, download_tag): content_data = json.loads(raw_text_data) # TODO: BACKWARD COMPATIBILITY - remove after fully migrating to artifactless manifest - elif saved_artifact := await manifest._artifacts.aget(): + elif saved_artifact := await manifest._artifacts.afirst(): content_data, raw_bytes_data = await sync_to_async(get_content_data)(saved_artifact) raw_text_data = raw_bytes_data.decode("utf-8") # if artifact is not available (due to reclaim space) we will download it again @@ -478,7 +478,7 @@ async def create_listed_manifest(self, manifest_data): if manifest.data: content_data = json.loads(manifest.data) # TODO: BACKWARD COMPATIBILITY - remove after fully migrating to artifactless manifest - elif saved_artifact := await manifest._artifacts.aget(): + elif saved_artifact := await manifest._artifacts.afirst(): content_data, _ = await sync_to_async(get_content_data)(saved_artifact) # if artifact is not available (due to reclaim space) we will download it again else: @@ -486,7 +486,6 @@ async def create_listed_manifest(self, manifest_data): manifest_url, digest ) # END OF BACKWARD COMPATIBILITY - else: content_data, manifest = await self._download_and_instantiate_manifest( manifest_url, digest diff --git a/pulp_container/app/utils.py b/pulp_container/app/utils.py index 91116ad1b..4548d0930 100644 --- a/pulp_container/app/utils.py +++ b/pulp_container/app/utils.py @@ -308,7 +308,7 @@ async def save_artifact(artifact_attributes): def get_content_data(saved_artifact): - with storage.open(saved_artifact.file.name) as file: + with storage.open(saved_artifact.file.name, mode="rb") as file: raw_data = file.read() content_data = json.loads(raw_data) return content_data, raw_data