Skip to content

Commit

Permalink
Add support for the platform attribute of gems
Browse files Browse the repository at this point in the history
This also adds a management command to repair all the existing gemspec
artifacts and properly set the platform on existing content units.

fixes pulp#130
  • Loading branch information
mdellweg committed Jul 31, 2023
1 parent e006a49 commit 4b3264d
Show file tree
Hide file tree
Showing 10 changed files with 221 additions and 75 deletions.
1 change: 1 addition & 0 deletions CHANGES/130.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added a datarepair-gemspec-platform command to regenerate the gemspec artifacts and properly set the platform attribute on existing gems.
1 change: 1 addition & 0 deletions CHANGES/130.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added support for gems with a platform that is not "ruby".
64 changes: 64 additions & 0 deletions pulp_gem/app/management/commands/datarepair-gemspec-platform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from gettext import gettext as _

from django.core.management import BaseCommand
from django.db import transaction

from pulp_gem.app.models import GemContent
from pulp_gem.app.serializers import _artifact_from_data
from pulp_gem.specs import analyse_gem


class Command(BaseCommand):
"""
Django management command to repair gems created prior to 0.2.0.
"""

help = "This script repairs gem metadata created before 0.2.0 if artifacts are available."

def add_arguments(self, parser):
"""Set up arguments."""
parser.add_argument(
"--dry-run",
action="store_true",
help=_("Don't modify anything, just collect results."),
)

def handle(self, *args, **options):
dry_run = options["dry_run"]
failed_gems = 0
repaired_gems = 0

gem_qs = GemContent.objects.filter(platform="UNKNOWN")
count = gem_qs.count()
print(f"Gems to repair: {count}")
if count == 0:
return

for gem in gem_qs.iterator():
try:
content_artifact = gem.contentartifact_set.get(relative_path=gem.relative_path)
artifact = content_artifact.artifact
spec_content_artifact = gem.contentartifact_set.get(relative_path=gem.gemspec_path)
gem_info, gemspec_data = analyse_gem(artifact.file)

assert gem_info["name"] == gem.name
assert gem_info["version"] == gem.version

gem.platform = gem_info["platform"]
content_artifact.relative_path = gem.relative_path
spec_content_artifact.relative_path = gem.gemspec_path
spec_content_artifact.artifact = _artifact_from_data(gemspec_data)

if not dry_run:
with transaction.atomic():
gem.save(update_fields=["platform"])
content_artifact.save(update_fields=["relative_path"])
spec_content_artifact.save(update_fields=["relative_path", "artifact"])
except Exception as e:
failed_gems += 1
print(f"Failed to migrate gem '{gem.name}' '{gem.ext_version}': {e}")
else:
repaired_gems += 1

print(f"Successfully repaired gems: {repaired_gems}")
print(f"Gems failed to repair: {failed_gems}")
19 changes: 19 additions & 0 deletions pulp_gem/app/migrations/0008_gemcontent_platform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 4.2.1 on 2023-07-27 19:59

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('gem', '0007_DATA_fix_prerelease'),
]

operations = [
migrations.AddField(
model_name='gemcontent',
name='platform',
field=models.TextField(default='UNKNOWN'),
preserve_default=False,
),
]
25 changes: 17 additions & 8 deletions pulp_gem/app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,18 +58,14 @@ class GemContent(Content):
Content of this type represents a ruby gem file
with its spec data.
Fields:
name (str): The name of the gem.
version (str): The version of the gem.
"""

TYPE = "gem"
repo_key_fields = ("name", "version")
repo_key_fields = ("name", "version", "platform")

name = models.TextField(blank=False, null=False)
version = models.TextField(blank=False, null=False)
platform = models.TextField(blank=False, null=False)
checksum = models.CharField(max_length=64, null=False, db_index=True)
prerelease = models.BooleanField(default=False)
dependencies = HStoreField(default=dict)
Expand All @@ -79,12 +75,22 @@ class GemContent(Content):
@property
def relative_path(self):
"""The relative path this gem is stored under for the content app."""
return f"gems/{self.name}-{self.version}.gem"
return f"gems/{self.name}-{self.ext_version}.gem"

@property
def gemspec_path(self):
"""The path for this gem's gemspec for the content app."""
return f"quick/Marshal.4.8/{self.name}-{self.version}.gemspec.rz"
return f"quick/Marshal.4.8/{self.name}-{self.ext_version}.gemspec.rz"

@property
def ext_version(self):
"""The version for this gem with the appended platform if not "ruby"."""
platform_suffix = "" if self.platform == "ruby" else f"-{self.platform}"
# Temporary workaround remove with the datarepair-gemspec-platform command
if self.platform == "UNKNOWN":
platform_suffix = ""
# --------------------
return f"{self.version}{platform_suffix}"

@staticmethod
def init_from_artifact_and_relative_path(artifact, relative_path):
Expand All @@ -103,6 +109,9 @@ def init_from_artifact_and_relative_path(artifact, relative_path):
artifacts = {relative_path: artifact, spec_relative_path: None}
return content, artifacts

def __str__(self):
return f"<GemContent {self.name}-{self.ext_version}>"

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"
unique_together = ("checksum",)
Expand Down
2 changes: 2 additions & 0 deletions pulp_gem/app/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ class GemContentSerializer(MultipleArtifactContentSerializer):
checksum = CharField(help_text=_("SHA256 checksum of the gem"), read_only=True)
name = CharField(help_text=_("Name of the gem"), read_only=True)
version = CharField(help_text=_("Version of the gem"), read_only=True)
platform = CharField(help_text=_("Platform of the gem"), read_only=True)
prerelease = BooleanField(help_text=_("Whether the gem is a prerelease"), read_only=True)
dependencies = HStoreField(read_only=True)
required_ruby_version = CharField(
Expand Down Expand Up @@ -163,6 +164,7 @@ class Meta:
"checksum",
"name",
"version",
"platform",
"prerelease",
"dependencies",
"required_ruby_version",
Expand Down
25 changes: 14 additions & 11 deletions pulp_gem/app/tasks/publishing.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import shutil

from gettext import gettext as _
from packaging import version

from django.conf import settings
from django.core.files import File
Expand All @@ -22,7 +21,7 @@
)

from pulp_gem.app.models import GemContent, GemPublication
from pulp_gem.specs import write_specs, Key
from pulp_gem.specs import ruby_ver_cmp, write_specs, Key


log = logging.getLogger(__name__)
Expand Down Expand Up @@ -122,20 +121,22 @@ def publish(repository_version_pk):
gemspecs = []
for content in (
GemContent.objects.filter(pk__in=publication.repository_version.content)
.only("name", "version")
.only("name", "version", "platform")
.order_by("-pulp_created")
.iterator()
):
if content.prerelease:
prerelease_specs.append(Key(content.name, content.version))
prerelease_specs.append(Key(content.name, content.version, content.platform))
else:
specs.append(Key(content.name, content.version))
old_ver = latest_versions.get(content.name)
if old_ver is None or version.parse(old_ver) < version.parse(content.version):
latest_versions[content.name] = content.version
specs.append(Key(content.name, content.version, content.platform))
old_ver = latest_versions.get((content.name, content.platform))
if old_ver is None or ruby_ver_cmp(old_ver, content.version) < 0:
latest_versions[(content.name, content.platform)] = content.version
gems.append(content.relative_path)
gemspecs.append(content.gemspec_path)
latest_specs = [Key(name, ver) for name, ver in latest_versions.items()]
latest_specs = [
Key(name, ver, platform) for (name, platform), ver in latest_versions.items()
]

_publish_specs(specs, "specs.4.8", publication)
_publish_specs(latest_specs, "latest_specs.4.8", publication)
Expand All @@ -150,16 +151,18 @@ def publish(repository_version_pk):
os.mkdir("info")
for name in names_qs:
lines = []
version_list = []
for gem in gems_qs.filter(name=name):
deps = ",".join((f"{key}:{value}" for key, value in gem.dependencies.items()))
line = f"{gem.version} {deps}|checksum:{gem.checksum}"
line = f"{gem.ext_version} {deps}|checksum:{gem.checksum}"
if gem.required_ruby_version:
line += f",ruby:{gem.required_ruby_version}"
if gem.required_rubygems_version:
line += f",rubygems:{gem.required_rubygems_version}"
lines.append(line)
version_list.append(gem.ext_version)
info_metadata = _publish_compact_index(lines, f"info/{name}", publication)
versions = ",".join(gems_qs.filter(name=name).values_list("version", flat=True))
versions = ",".join(version_list)
if "md5" in settings.ALLOWED_CONTENT_CHECKSUMS:
md5_sum = info_metadata._artifacts.first().md5
else:
Expand Down
105 changes: 69 additions & 36 deletions pulp_gem/app/tasks/synchronizing.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging

from aiohttp import ClientConnectionError
from gettext import gettext as _
from urllib.parse import urljoin

Expand All @@ -16,11 +17,11 @@
from pulp_gem.app.models import GemContent, GemRemote
from pulp_gem.specs import (
NAME_REGEX,
VERSION_REGEX,
PRERELEASE_VERSION_REGEX,
read_versions,
read_info,
ruby_ver_includes,
split_ext_version,
)


Expand Down Expand Up @@ -72,54 +73,86 @@ async def run(self):
"""
# Interpret policy to download Artifacts or not
deferred_download = self.remote.policy != Remote.IMMEDIATE
remote_url = self.remote.url

# Read filters from remote
includes = self.remote.includes
excludes = self.remote.excludes
prereleases = self.remote.prereleases

async with ProgressReport(
message="Downloading versions list", total=1
) as pr_download_versions:
versions_url = urljoin(self.remote.url, "versions")
versions_url = urljoin(remote_url, "versions")
versions_downloader = self.remote.get_downloader(url=versions_url)
versions_result = await versions_downloader.run()
try:
versions_result = await versions_downloader.run()
except ClientConnectionError as e:
raise Exception(f"Could not connect to host {e.host}")
await pr_download_versions.aincrement()

async with ProgressReport(message="Parsing versions list") as pr_parse_versions:
async with ProgressReport(message="Parsing versions info") as pr_parse_info:
async for name, versions, md5_sum in read_versions(versions_result.path):
async for name, ext_versions, md5_sum in read_versions(versions_result.path):
await pr_parse_versions.aincrement()

# Skip conditions based on the gem name
# =====================================
if not NAME_REGEX.fullmatch(name):
log.warn(f"Skipping invalid gem name: '{name}'.")
continue
if not self.remote.prereleases:
versions = [
version for version in versions if VERSION_REGEX.fullmatch(version)
]
if includes is not None:
if name not in includes:
continue
include_versions = includes[name]
else:
versions = [
version
for version in versions
if PRERELEASE_VERSION_REGEX.fullmatch(version)
]
if self.remote.includes:
if name not in self.remote.includes:
include_versions = None
if excludes is not None and name in excludes:
exclude_versions = excludes[name]
if exclude_versions is None:
continue
version_requirements = self.remote.includes[name]
if version_requirements is not None:
versions = [
version
for version in versions
if ruby_ver_includes(version_requirements, version)
]
if self.remote.excludes:
if name in self.remote.excludes:
version_requirements = self.remote.excludes[name]
if version_requirements is None:
continue
versions = [
version
for version in versions
if not ruby_ver_includes(version_requirements, version)
]
if not versions:
else:
exclude_versions = None

# Skip conditions based on the gem version
# ========================================

# The list 'ext_versions' contains "{version}[-{platform}]" entries!
# This dict is like a set of ext_versions with payload dict on
# {version, platform, prerelease}.
versions_info = {
ext_version: split_ext_version(ext_version) for ext_version in ext_versions
}
if not prereleases:
# Prerelease versions are already sanitized.
versions_info = {
k: v for k, v in versions_info.items() if not v["prerelease"]
}
else:
# Sanitize versions.
# TODO Logging?
versions_info = {
k: v
for k, v in versions_info.items()
if PRERELEASE_VERSION_REGEX.fullmatch(v["version"])
}
if include_versions is not None:
versions_info = {
k: v
for k, v in versions_info.items()
if ruby_ver_includes(include_versions, v["version"])
}
if exclude_versions is not None:
versions_info = {
k: v
for k, v in versions_info.items()
if not ruby_ver_includes(exclude_versions, v["version"])
}

if not versions_info:
# No version left for this name; skip even reading the info file
continue

info_url = urljoin(urljoin(self.remote.url, "info/"), name)
if "md5" in settings.ALLOWED_CONTENT_CHECKSUMS:
extra_kwargs = {"expected_digests": {"md5": md5_sum}}
Expand All @@ -128,13 +161,13 @@ async def run(self):
log.warn(f"Checksum of info file for '{name}' could not be validated.")
info_downloader = self.remote.get_downloader(url=info_url, **extra_kwargs)
info_result = await info_downloader.run()
async for gem_info in read_info(info_result.path, versions):
async for gem_info in read_info(info_result.path, versions_info):
gem_info["name"] = name
gem = GemContent(**gem_info)
gem_path = gem.relative_path
gem_url = urljoin(self.remote.url, gem_path)
gem_url = urljoin(remote_url, gem_path)
gemspec_path = gem.gemspec_path
gemspec_url = urljoin(self.remote.url, gemspec_path)
gemspec_url = urljoin(remote_url, gemspec_path)

da_gem = DeclarativeArtifact(
artifact=Artifact(sha256=gem_info["checksum"]),
Expand Down
Loading

0 comments on commit 4b3264d

Please sign in to comment.