From 63db367083b2522aff889cdae614c652c6d51e53 Mon Sep 17 00:00:00 2001 From: Tobias Grigo Date: Wed, 4 Sep 2024 23:43:42 +0200 Subject: [PATCH] Prefetch package artifacts and remote artifacts closes #1148 --- CHANGES/1148.feature | 1 + .../app/serializers/content_serializers.py | 14 +++--- pulp_deb/app/tasks/publishing.py | 47 +++++++++++++++---- pulp_deb/tests/unit/test_models.py | 8 +++- 4 files changed, 51 insertions(+), 19 deletions(-) create mode 100644 CHANGES/1148.feature diff --git a/CHANGES/1148.feature b/CHANGES/1148.feature new file mode 100644 index 000000000..e0461506a --- /dev/null +++ b/CHANGES/1148.feature @@ -0,0 +1 @@ +Improved performance by prefetching relevant Artifacts and RemoteArtifacts during publishing, reducing the number of database calls. diff --git a/pulp_deb/app/serializers/content_serializers.py b/pulp_deb/app/serializers/content_serializers.py index d7d15e2e2..96d5ba226 100644 --- a/pulp_deb/app/serializers/content_serializers.py +++ b/pulp_deb/app/serializers/content_serializers.py @@ -13,7 +13,7 @@ ValidationError, Serializer, ) -from pulpcore.plugin.models import Artifact, Content, CreatedResource, RemoteArtifact +from pulpcore.plugin.models import Artifact, Content, CreatedResource from pulpcore.plugin.serializers import ( ContentChecksumSerializer, MultipleArtifactContentSerializer, @@ -466,7 +466,7 @@ def from822(cls, data, **kwargs): package_fields["custom_fields"] = custom_fields return cls(data=package_fields, **kwargs) - def to822(self, component=""): + def to822(self, component="", artifact_dict=None, remote_artifact_dict=None): """Create deb822.Package object from model.""" ret = deb822.Packages() @@ -479,11 +479,11 @@ def to822(self, component=""): if custom_fields: ret.update(custom_fields) - try: - artifact = self.instance._artifacts.get() - artifact.touch() # Orphan cleanup protection until we are done! - except Artifact.DoesNotExist: - artifact = RemoteArtifact.objects.filter(sha256=self.instance.sha256).first() + artifact = None + if artifact_dict and self.instance.sha256 in artifact_dict: + artifact = artifact_dict[self.instance.sha256] + elif remote_artifact_dict and self.instance.sha256 in remote_artifact_dict: + artifact = remote_artifact_dict[self.instance.sha256] if artifact: ret["MD5sum"] = artifact.md5 if artifact.md5 else None diff --git a/pulp_deb/app/tasks/publishing.py b/pulp_deb/app/tasks/publishing.py index 82f9a68dc..cc4ac93e2 100644 --- a/pulp_deb/app/tasks/publishing.py +++ b/pulp_deb/app/tasks/publishing.py @@ -16,8 +16,10 @@ from django.forms.models import model_to_dict from pulpcore.plugin.models import ( + Artifact, PublishedArtifact, PublishedMetadata, + RemoteArtifact, RepositoryVersion, ) @@ -147,8 +149,11 @@ def publish( packages = Package.objects.filter( pk__in=repo_version.content.order_by("-pulp_created") + ).prefetch_related("contentartifact_set", "_artifacts") + artifact_dict, remote_artifact_dict = _batch_fetch_artifacts(packages) + release_helper.components[component].add_packages( + packages, artifact_dict, remote_artifact_dict ) - release_helper.components[component].add_packages(packages) source_packages = SourcePackage.objects.filter( pk__in=repo_version.content.order_by("-pulp_created"), @@ -248,12 +253,19 @@ def publish( ) for component in components: - packages = [ - prc.package - for prc in package_release_components - if prc.release_component.component == component - ] - release_helper.components[component].add_packages(packages) + packages = Package.objects.filter( + pk__in=[ + prc.package.pk + for prc in package_release_components + if prc.release_component.component == component + ] + ).prefetch_related("contentartifact_set", "_artifacts") + artifact_dict, remote_artifact_dict = _batch_fetch_artifacts(packages) + release_helper.components[component].add_packages( + packages, + artifact_dict, + remote_artifact_dict, + ) source_packages = [ drc.source_package @@ -311,13 +323,17 @@ def __init__(self, parent, component): source_index_path, ) - def add_packages(self, packages): + def add_packages(self, packages, artifact_dict, remote_artifact_dict): published_artifacts = [] package_data = [] + content_artifacts = { + package.pk: list(package.contentartifact_set.all()) for package in packages + } + for package in packages: with suppress(IntegrityError): - content_artifact = package.contentartifact_set.get() + content_artifact = content_artifacts.get(package.pk, [None])[0] relative_path = package.filename(self.component) published_artifact = PublishedArtifact( @@ -335,7 +351,7 @@ def add_packages(self, packages): for package, architecture in package_data: package_serializer = Package822Serializer(package, context={"request": None}) try: - package_serializer.to822(self.component).dump( + package_serializer.to822(self.component, artifact_dict, remote_artifact_dict).dump( self.package_index_files[architecture][0] ) except KeyError: @@ -557,3 +573,14 @@ def _fetch_file_checksum(file_path, index, checksum): checksum_type = CHECKSUM_TYPE_MAP[checksum] hashed_path = Path(file_path).parents[0] / "by-hash" / checksum_type / digest return hashed_path + + +def _batch_fetch_artifacts(packages): + sha256_values = [package.sha256 for package in packages if package.sha256] + artifacts = Artifact.objects.filter(sha256__in=sha256_values) + artifact_dict = {artifact.sha256: artifact for artifact in artifacts} + + remote_artifacts = RemoteArtifact.objects.filter(sha256__in=sha256_values) + remote_artifact_dict = {artifact.sha256: artifact for artifact in remote_artifacts} + + return artifact_dict, remote_artifact_dict diff --git a/pulp_deb/tests/unit/test_models.py b/pulp_deb/tests/unit/test_models.py index 75a6789fb..d786dd46f 100644 --- a/pulp_deb/tests/unit/test_models.py +++ b/pulp_deb/tests/unit/test_models.py @@ -62,8 +62,9 @@ def test_filename_with_component(self): def test_to822(self): """Test if package transforms correctly into 822dict.""" + artifact_dict = {self.package1.sha256: self.artifact1} package_dict = Package822Serializer(self.package1, context={"request": None}).to822( - "joetunn" + "joetunn", artifact_dict=artifact_dict ) self.assertEqual(package_dict["package"], self.package1.package) self.assertEqual(package_dict["version"], self.package1.version) @@ -77,7 +78,10 @@ def test_to822(self): def test_to822_dump(self): """Test dump to package index.""" + artifact_dict = {self.package1.sha256: self.artifact1} self.assertEqual( - Package822Serializer(self.package1, context={"request": None}).to822().dump(), + Package822Serializer(self.package1, context={"request": None}) + .to822(artifact_dict=artifact_dict) + .dump(), self.PACKAGE_PARAGRAPH, )