Skip to content

Commit

Permalink
Address pending comments and clean up the code
Browse files Browse the repository at this point in the history
[noissue]
  • Loading branch information
lubosmj committed May 6, 2024
1 parent 28a7d74 commit 13bf171
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 38 deletions.
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
import json

from json.decoder import JSONDecodeError

from gettext import gettext as _

from contextlib import suppress

from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist
from django.core.management import BaseCommand
from django.db.models import Q

from pulp_container.app.models import Manifest
from pulpcore.plugin.cache import SyncContentCache

from pulp_container.app.models import ContainerDistribution, Manifest

from pulp_container.app.utils import get_content_data

Expand All @@ -21,54 +22,48 @@ class Command(BaseCommand):
"""
A management command to handle the initialization of empty DB fields for container images.
This command now initializes flags describing the image nature.
This command initializes flags describing the image nature and moves the manifest's artifact
data into the database.
Manifests stored inside Pulp are of various natures. The nature of an image can be determined
from JSON-formatted image manifest annotations or image configuration labels. These data are
stored inside artifacts.
This command reads data from the storage backend and populates the 'annotations', 'labels',
'is_bootable', and 'is_flatpak' fields on the Manifest model.
'is_bootable', 'is_flatpak', and 'data' fields on the Manifest model. Note that the Redis
cache will be flushed if there is any.
"""

help = _(__doc__)

def handle(self, *args, **options):
manifests_updated_count = 0

manifests = Manifest.objects.filter(Q(data="") | Q(annotations={}, labels={}))
manifests = manifests.exclude(
manifests_v1 = Manifest.objects.filter(data__isnull=True, media_type=MEDIA_TYPE.MANIFEST_V1)
manifests_updated_count += self.update_manifests(manifests_v1)

manifests_v2 = Manifest.objects.filter(Q(data__isnull=True) | Q(annotations={}, labels={}))
manifests_v2 = manifests_v2.exclude(
media_type__in=[MEDIA_TYPE.MANIFEST_LIST, MEDIA_TYPE.INDEX_OCI, MEDIA_TYPE.MANIFEST_V1]
)
manifests_updated_count += self.update_manifests(manifests)
manifests_updated_count += self.update_manifests(manifests_v2)

manifest_lists = Manifest.objects.filter(
Q(media_type__in=[MEDIA_TYPE.MANIFEST_LIST, MEDIA_TYPE.INDEX_OCI]),
Q(data="") | Q(annotations={}),
Q(data__isnull=True) | Q(annotations={}),
)
manifests_updated_count += self.update_manifests(manifest_lists)

self.stdout.write(
self.style.SUCCESS("Successfully updated %d manifests." % manifests_updated_count)
)

def init_manifest(self, manifest):
has_initialized_data = manifest.data != ""
if has_initialized_data:
manifest_data = json.loads(manifest.data)
else:
manifest_artifact = manifest._artifacts.get()
manifest_data, raw_bytes_data = get_content_data(manifest_artifact)
manifest.data = raw_bytes_data.decode("utf-8")
manifest._artifacts.clear()

manifest.annotations = manifest_data.get("annotations", {})
if settings.CACHE_ENABLED and manifests_updated_count != 0:
base_paths = ContainerDistribution.objects.values_list("base_path", flat=True)
if base_paths:
SyncContentCache().delete(base_key=base_paths)

has_annotations = bool(manifest.annotations)
has_labels = manifest.init_labels()
has_image_nature = manifest.init_image_nature()

return has_annotations or has_labels or has_image_nature or (not has_initialized_data)
self.stdout.write(self.style.SUCCESS("Successfully flushed the cache."))

def update_manifests(self, manifests_qs):
manifests_updated_count = 0
Expand All @@ -88,6 +83,7 @@ def update_manifests(self, manifests_qs):
)
manifests_updated_count += len(manifests_to_update)
manifests_to_update.clear()

if manifests_to_update:
fields_to_update = ["annotations", "labels", "is_bootable", "is_flatpak", "data"]
manifests_qs.model.objects.bulk_update(
Expand All @@ -97,3 +93,18 @@ def update_manifests(self, manifests_qs):
manifests_updated_count += len(manifests_to_update)

return manifests_updated_count

def init_manifest(self, manifest):
if not manifest.data:
manifest_artifact = manifest._artifacts.get()
manifest_data, raw_bytes_data = get_content_data(manifest_artifact)
manifest.data = raw_bytes_data.decode("utf-8")

if not (manifest.annotations or manifest.labels):
manifest.init_metadata(manifest_data)

manifest._artifacts.clear()

return True

return False
5 changes: 3 additions & 2 deletions pulp_container/app/migrations/0039_manifest_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

def print_warning_for_initializing_manifest_data(apps, schema_editor):
warnings.warn(
"Run 'pulpcore-manager container-handle-image-data' to move the manifests' data from artifacts to the new 'data' database field."
"Run 'pulpcore-manager container-handle-image-data' to move the manifests' "
"data from artifacts to the new 'data' database field."
)


Expand All @@ -20,7 +21,7 @@ class Migration(migrations.Migration):
migrations.AddField(
model_name="manifest",
name="data",
field=models.TextField(default=""),
field=models.TextField(null=True),
),
migrations.RunPython(
print_warning_for_initializing_manifest_data,
Expand Down
2 changes: 1 addition & 1 deletion pulp_container/app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ class Manifest(Content):
digest = models.TextField(db_index=True)
schema_version = models.IntegerField()
media_type = models.TextField(choices=MANIFEST_CHOICES)
data = models.TextField(default="")
data = models.TextField(null=True)

annotations = models.JSONField(default=dict)
labels = models.JSONField(default=dict)
Expand Down
17 changes: 12 additions & 5 deletions pulp_container/app/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,15 +276,22 @@ async def get_by_digest(self, request):

try:
if content_type == "manifests":
manifest = await Manifest.objects.aget(digest=digest)
manifest = await Manifest.objects.prefetch_related("contentartifact_set").aget(
digest=digest
)
headers = {
"Content-Type": manifest.media_type,
"Docker-Content-Digest": manifest.digest,
}
# TODO: BACKWARD COMPATIBILITY - remove after migrating to artifactless manifest
if not manifest.data:
saved_artifact = await manifest._artifacts.aget()
return await Registry._dispatch(saved_artifact, headers)
if saved_artifact := await manifest._artifacts.afirst():
return await Registry._dispatch(saved_artifact, headers)
else:
ca = await sync_to_async(lambda x: x[0])(manifest.contentartifact_set.all())
return await self._stream_content_artifact(
request, web.StreamResponse(), ca
)
# END OF BACKWARD COMPATIBILITY
return web.Response(text=manifest.data, headers=headers)
elif content_type == "blobs":
Expand Down Expand Up @@ -393,7 +400,7 @@ async def download_manifest(self, run_pipeline=False):
if media_type not in (MEDIA_TYPE.MANIFEST_LIST, MEDIA_TYPE.INDEX_OCI):
# add the manifest and blobs to the repository to be able to stream it
# in the next round when a client approaches the registry
await self.init_pending_content(digest, manifest_data, raw_text_data, media_type)
await self.init_pending_content(digest, manifest_data, media_type, raw_text_data)

return raw_text_data, digest, media_type

Expand Down Expand Up @@ -431,7 +438,7 @@ async def run_pipeline(self, raw_text_manifest_data):
},
)

async def init_pending_content(self, digest, manifest_data, raw_text_data, media_type):
async def init_pending_content(self, digest, manifest_data, media_type, raw_text_data):
if config := manifest_data.get("config", None):
config_digest = config["digest"]
config_blob = await self.save_config_blob(config_digest)
Expand Down
2 changes: 1 addition & 1 deletion pulp_container/app/registry_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1133,7 +1133,7 @@ def put(self, request, path, pk=None):
artifact = self.receive_artifact(chunk)
manifest_digest = "sha256:{id}".format(id=artifact.sha256)

with storage.open(artifact.file.name) as artifact_file:
with storage.open(artifact.file.name, mode="rb") as artifact_file:
raw_bytes_data = artifact_file.read()

raw_text_data = raw_bytes_data.decode("utf-8")
Expand Down
5 changes: 2 additions & 3 deletions pulp_container/app/tasks/sync_stages.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ async def _check_for_existing_manifest(self, download_tag):
content_data = json.loads(raw_text_data)

# TODO: BACKWARD COMPATIBILITY - remove after fully migrating to artifactless manifest
elif saved_artifact := await manifest._artifacts.aget():
elif saved_artifact := await manifest._artifacts.afirst():
content_data, raw_bytes_data = await sync_to_async(get_content_data)(saved_artifact)
raw_text_data = raw_bytes_data.decode("utf-8")
# if artifact is not available (due to reclaim space) we will download it again
Expand Down Expand Up @@ -478,15 +478,14 @@ async def create_listed_manifest(self, manifest_data):
if manifest.data:
content_data = json.loads(manifest.data)
# TODO: BACKWARD COMPATIBILITY - remove after fully migrating to artifactless manifest
elif saved_artifact := await manifest._artifacts.aget():
elif saved_artifact := await manifest._artifacts.afirst():
content_data, _ = await sync_to_async(get_content_data)(saved_artifact)
# if artifact is not available (due to reclaim space) we will download it again
else:
content_data, manifest = await self._download_and_instantiate_manifest(
manifest_url, digest
)
# END OF BACKWARD COMPATIBILITY

else:
content_data, manifest = await self._download_and_instantiate_manifest(
manifest_url, digest
Expand Down
2 changes: 1 addition & 1 deletion pulp_container/app/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ async def save_artifact(artifact_attributes):


def get_content_data(saved_artifact):
with storage.open(saved_artifact.file.name) as file:
with storage.open(saved_artifact.file.name, mode="rb") as file:
raw_data = file.read()
content_data = json.loads(raw_data)
return content_data, raw_data

0 comments on commit 13bf171

Please sign in to comment.