Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix namespace detection from distro IDs #3770

Open
wants to merge 4 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/packagedcode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,8 +218,10 @@
debian.DebianInstalledMd5sumFilelistHandler,
debian.DebianInstalledStatusDatabaseHandler,

distro.EtcOsReleaseHandler,

rpm.RpmLicenseFilesHandler,
rpm.RpmMarinerContainerManifestHandler
rpm.RpmMarinerContainerManifestHandler,
]

if on_linux:
Expand Down
26 changes: 13 additions & 13 deletions src/packagedcode/alpine.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class AlpineApkArchiveHandler(models.DatafileHandler):
datasource_id = 'alpine_apk_archive'
path_patterns = ('*.apk',)
filetypes = ('gzip compressed data',)
default_package_type = 'alpine'
default_package_type = 'apk'
description = 'Alpine Linux .apk package archive'
documentation_url = 'https://wiki.alpinelinux.org/wiki/Alpine_package_format'

Expand All @@ -59,7 +59,7 @@ def get_license_detections_and_expression(package):
class AlpineInstalledDatabaseHandler(models.DatafileHandler):
datasource_id = 'alpine_installed_db'
path_patterns = ('*lib/apk/db/installed',)
default_package_type = 'alpine'
default_package_type = 'apk'
description = 'Alpine Linux installed package database'

@classmethod
Expand All @@ -73,19 +73,17 @@ def parse(cls, location, package_only=False):

@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):
# get the root resource of the rootfs
levels_up = len('lib/apk/db/installed'.split('/'))
root_resource = get_ancestor(
levels_up=levels_up,
resource=resource,
codebase=codebase,
)
root_resource = cls.get_root_resource_for_rootfs(resource, codebase)

package = models.Package.from_package_data(
package_data=package_data,
datafile_path=resource.path,
)
package_uid = package.package_uid
namespace = cls.get_distro_identifier_rootfs(root_resource, codebase)
if namespace:
package.namespace = namespace

package_uid = package.refresh_and_get_package_uid()

cls.populate_license_fields(package)

Expand Down Expand Up @@ -119,18 +117,20 @@ def assemble(cls, package_data, resource, codebase, package_adder):

dependent_packages = package_data.dependencies
if dependent_packages:
yield from models.Dependency.from_dependent_packages(
for dep in models.Dependency.from_dependent_packages(
dependent_packages=dependent_packages,
datafile_path=resource.path,
datasource_id=package_data.datasource_id,
package_uid=package_uid,
)
):
dep.update_namespace(namespace)
yield dep


class AlpineApkbuildHandler(models.DatafileHandler):
datasource_id = 'alpine_apkbuild'
path_patterns = ('*APKBUILD',)
default_package_type = 'alpine'
default_package_type = 'apk'
description = 'Alpine Linux APKBUILD package script'
documentation_url = 'https://wiki.alpinelinux.org/wiki/APKBUILD_Reference'

Expand Down
50 changes: 23 additions & 27 deletions src/packagedcode/debian.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,37 +246,33 @@ def parse(cls, location, package_only=False):

@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):
# get the root resource of the rootfs
levels_up = len('var/lib/dpkg/status'.split('/'))
root_resource = get_ancestor(
levels_up=levels_up,
resource=resource,
codebase=codebase,
)
root_resource = cls.get_root_resource_for_rootfs(resource, codebase)

package_name = package_data.name

package = models.Package.from_package_data(
package_data=package_data,
datafile_path=resource.path,
)
namespace = cls.get_distro_identifier_rootfs(root_resource, codebase)
if namespace:
package.namespace = namespace

package_uid = package.refresh_and_get_package_uid()

package_file_references = []
package_file_references.extend(package_data.file_references)
package_uid = package.package_uid

dependencies = []
dependent_packages = package_data.dependencies
if dependent_packages:
deps = list(
models.Dependency.from_dependent_packages(
dependent_packages=dependent_packages,
datafile_path=resource.path,
datasource_id=package_data.datasource_id,
package_uid=package_uid,
)
)
dependencies.extend(deps)
for dep in models.Dependency.from_dependent_packages(
dependent_packages=dependent_packages,
datafile_path=resource.path,
datasource_id=package_data.datasource_id,
package_uid=package_uid,
):
dep.update_namespace(namespace)
dependencies.append(dep)

# Multi-Arch can be: "foreign", "same", "allowed", "all", "optional" or
# empty/non-present. See https://wiki.debian.org/Multiarch/HOWTO
Expand Down Expand Up @@ -341,15 +337,15 @@ def assemble(cls, package_data, resource, codebase, package_adder):
# yield possible dependencies
dependent_packages = package_data.dependencies
if dependent_packages:
deps = list(
models.Dependency.from_dependent_packages(
dependent_packages=dependent_packages,
datafile_path=res.path,
datasource_id=package_data.datasource_id,
package_uid=package_uid,
)
)
dependencies.extend(deps)
for dep in models.Dependency.from_dependent_packages(
dependent_packages=dependent_packages,
datafile_path=res.path,
datasource_id=package_data.datasource_id,
package_uid=package_uid,
):
if namespace and not dep.namespace:
dep.namespace = namespace
dependencies.append(dep)

resources.append(res)

Expand Down
30 changes: 20 additions & 10 deletions src/packagedcode/distro.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,24 +27,34 @@ class EtcOsReleaseHandler(models.NonAssemblableDatafileHandler):
@classmethod
def parse(cls, location, package_only=False):
distro = Distro.from_os_release_file(location)
distro_identifier = distro.identifier
distro_identifier = None
if distro:
distro_identifier = distro.identifier

if not distro_identifier:
return

pretty_name = distro.pretty_name and distro.pretty_name.lower() or ''

# TODO: It is misleading to use package data fields
# name and namespace to store distro/os infomration,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

infomration -> information

# we should consider using extra_data fields instead.

if distro_identifier == 'debian':
namespace = 'debian'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you sure there is no namespace?

name = 'debian'

if 'distroless' in pretty_name:
name = 'distroless'
elif pretty_name.startswith('debian'):
name = 'distroless'
namespace = 'distroless'
else:
namespace = 'debian'

elif distro_identifier == 'ubuntu' and distro.id_like == 'debian':
namespace = 'debian'
name = 'ubuntu'
namespace = 'ubuntu'
name = 'debian'

elif distro_identifier.startswith('fedora') or distro.id_like == 'fedora':
namespace = distro_identifier
name = distro.id_like or distro_identifier
elif distro_identifier.startswith('fedora') or distro.id_like == 'fedora':
name = distro_identifier or distro.id_like
namespace = distro.id_like

else:
# FIXME: this needs to be seriously updated
Expand Down
59 changes: 59 additions & 0 deletions src/packagedcode/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
licensing = None

from packagedcode.licensing import get_declared_license_expression_spdx
from packagedcode.utils import get_ancestor

"""
This module contain data models for package and dependencies, abstracting and
Expand Down Expand Up @@ -419,6 +420,24 @@ def __attrs_post_init__(self, *args, **kwargs):
if not self.dependency_uid:
self.dependency_uid = build_package_uid(self.purl)

def refresh_dependency_uid(self):
self.dependency_uid = build_package_uid(self.purl)

def update_namespace(self, namespace):
if not namespace:
return

purl = PackageURL.from_string(self.purl)
new_purl = PackageURL(
type=purl.type,
namespace=namespace,
name=purl.name,
version=purl.version,
qualifiers=purl.qualifiers,
)
self.purl = new_purl.to_string()
self.refresh_dependency_uid()

@classmethod
def from_dependent_package(
cls,
Expand Down Expand Up @@ -1461,6 +1480,42 @@ def get_top_level_resources(cls, manifest_resource, codebase):
"""
pass

@classmethod
def get_root_resource_for_rootfs(cls, resource, codebase):

# get the root resource of the rootfs
# take the 1st pattern as a reference
# for instance: '*usr/lib/sysimage/rpm/Packages.db'
base_path_patterns = cls.path_patterns[0]

# how many levels up are there to the root of the rootfs?
levels_up = len(base_path_patterns.split('/'))

return get_ancestor(
levels_up=levels_up,
resource=resource,
codebase=codebase,
)

@classmethod
def get_distro_identifier_rootfs(cls, root_resource, codebase):
identifier = None
root_path = root_resource.path
os_release_rootfs_paths = ('etc/os-release', 'usr/lib/os-release',)
for os_release_rootfs_path in os_release_rootfs_paths:
os_release_path = '/'.join([root_path, os_release_rootfs_path])
os_release_res = codebase.get_resource(os_release_path)
if not os_release_res:
continue

# there can be only one distro
distro = os_release_res.package_data and os_release_res.package_data[0]
if distro:
identifier = distro.get("namespace")
break

return identifier


class NonAssemblableDatafileHandler(DatafileHandler):
"""
Expand Down Expand Up @@ -1535,6 +1590,10 @@ def __attrs_post_init__(self, *args, **kwargs):
if not self.package_uid:
self.package_uid = build_package_uid(self.purl)

def refresh_and_get_package_uid(self):
self.package_uid = build_package_uid(self.purl)
return self.package_uid

def to_dict(self):
return super().to_dict(with_details=False)

Expand Down
57 changes: 15 additions & 42 deletions src/packagedcode/rpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from packagedcode.rpm_installed import collect_installed_rpmdb_xmlish_from_rpmdb_loc
from packagedcode.rpm_installed import parse_rpm_xmlish
from packagedcode.utils import build_description
from packagedcode.utils import get_ancestor

from scancode.api import get_licenses

TRACE = os.environ.get('SCANCODE_DEBUG_PACKAGE_API', False)
Expand Down Expand Up @@ -143,54 +143,29 @@ def parse(cls, location, package_only=False):
package_type=cls.default_package_type,
package_only=package_only,
)
# TODO: package_data.namespace = cls.default_package_namespace
return package_data

@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):
# get the root resource of the rootfs
# take the 1st pattern as a reference
# for instance: '*usr/lib/sysimage/rpm/Packages.db'
base_path_patterns = cls.path_patterns[0]

# how many levels up are there to the root of the rootfs?
levels_up = len(base_path_patterns.split('/'))

root_resource = get_ancestor(
levels_up=levels_up,
resource=resource,
codebase=codebase,
)

root_resource = cls.get_root_resource_for_rootfs(resource, codebase)

package = models.Package.from_package_data(
package_data=package_data,
datafile_path=resource.path,
)
package_uid = package.package_uid

root_path = root_resource.path
# get etc/os-release for namespace
namespace = None
os_release_rootfs_paths = ('etc/os-release', 'usr/lib/os-release',)
for os_release_rootfs_path in os_release_rootfs_paths:
os_release_path = '/'.join([root_path, os_release_rootfs_path])
os_release_res = codebase.get_resource(os_release_path)
if not os_release_res:
continue
# there can be only one distro
distro = os_release_res.package_data and os_release_res.package_data[0]
if distro:
namespace = distro.namespace
break
namespace = cls.get_distro_identifier_rootfs(root_resource, codebase)
if namespace:
package.namespace = namespace

package.namespace = namespace
package_uid = package.refresh_and_get_package_uid()

# tag files from refs
resources = []
missing_file_references = []
# a file ref extends from the root of the filesystem
for ref in package.file_references:
ref_path = '/'.join([root_path, ref.path])
ref_path = '/'.join([root_resource.path, ref.path])
res = codebase.get_resource(ref_path)
if not res:
missing_file_references.append(ref)
Expand All @@ -216,8 +191,7 @@ def assemble(cls, package_data, resource, codebase, package_adder):
datasource_id=package_data.datasource_id,
package_uid=package_uid,
):
if not dep.namespace:
dep.namespace = namespace
dep.update_namespace(namespace)
yield dep

for resource in resources:
Expand Down Expand Up @@ -424,19 +398,18 @@ def parse(cls, location, package_only=False):
@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):

levels_up = len('var/lib/rpmmanifest/container-manifest-2'.split('/'))
root_resource = get_ancestor(
levels_up=levels_up,
resource=resource,
codebase=codebase,
)
root_resource = cls.get_root_resource_for_rootfs(resource, codebase)
package_name = package_data.name

package = models.Package.from_package_data(
package_data=package_data,
datafile_path=resource.path,
)
package_uid = package.package_uid
namespace = cls.get_distro_identifier_rootfs(root_resource, codebase)
if namespace:
package.namespace = namespace

package_uid = package.refresh_and_get_package_uid()

assemblable_paths = tuple(set([
f'*usr/share/licenses/{package_name}/COPYING*',
Expand Down
Loading
Loading