Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update package assembly to consider other manifests #3874

Open
wants to merge 4 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions src/licensedcode/plugin_license.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,6 @@ def process_codebase(self, codebase, license_text=False, license_diagnostics=Fal
f'before: {license_expressions_before}\n'
f'after : {license_expressions_after}'
)

#raise Exception()

license_detections = collect_license_detections(
codebase=codebase,
Expand Down
11 changes: 7 additions & 4 deletions src/packagedcode/about.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,11 @@ def parse(cls, location, package_only=False):
declared_license_expression = package_data.get('license_expression')

owner = package_data.get('owner')
if not isinstance(owner, str):
owner = repr(owner)
parties = [models.Party(type=models.party_person, name=owner, role='owner')]
parties = []
if owner:
if not isinstance(owner, str):
owner = repr(owner)
parties.append(models.Party(type=models.party_person, name=owner, role='owner'))

# FIXME: also include notice_file and license_file(s) as file_references
file_references = []
Expand Down Expand Up @@ -157,7 +159,8 @@ def assemble(cls, package_data, resource, codebase, package_adder):
missing = sorted(file_references_by_path.values(), key=lambda r: r.path)
package.extra_data['missing_file_references'] = missing
else:
package.extra_data['missing_file_references'] = package_data.file_references[:]
if package.file_references:
package.extra_data['missing_file_references'] = package_data.file_references[:]

# we yield this as we do not want this further processed
yield resource
9 changes: 7 additions & 2 deletions src/packagedcode/cargo.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,18 @@ def logger_debug(*args):


class CargoBaseHandler(models.DatafileHandler):

@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):
"""
Assemble Cargo.toml and possible Cargo.lock datafiles. Also
support cargo workspaces where we have multiple packages from
a repository and some shared information present at top-level.
"""
datafile_name_patterns = (
CargoLockHandler.path_patterns + CargoTomlHandler.path_patterns
)

workspace = package_data.extra_data.get('workspace', {})
workspace_members = workspace.get("members", [])
workspace_package_data = workspace.get("package", {})
Expand Down Expand Up @@ -89,14 +94,14 @@ def assemble(cls, package_data, resource, codebase, package_adder):
resource.save(codebase)

yield from cls.assemble_from_many_datafiles(
datafile_name_patterns=('Cargo.toml', 'cargo.toml', 'Cargo.lock', 'cargo.lock'),
datafile_name_patterns=datafile_name_patterns,
directory=workspace_directory,
codebase=codebase,
package_adder=package_adder,
)
else:
yield from cls.assemble_from_many_datafiles(
datafile_name_patterns=('Cargo.toml', 'cargo.toml', 'Cargo.lock', 'cargo.lock'),
datafile_name_patterns=datafile_name_patterns,
directory=resource.parent(codebase),
codebase=codebase,
package_adder=package_adder,
Expand Down
5 changes: 4 additions & 1 deletion src/packagedcode/chef.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,11 @@ def assemble(cls, package_data, resource, codebase, package_adder):
"""
Assemble Package from Chef metadata.rb, then from metadata.json files.
"""
datafile_name_patterns = (
ChefMetadataRbHandler.path_patterns + ChefMetadataJsonHandler.path_patterns
)
yield from cls.assemble_from_many_datafiles(
datafile_name_patterns=('metadata.rb', 'metadata.json',),
datafile_name_patterns=datafile_name_patterns,
directory=resource.parent(codebase),
codebase=codebase,
package_adder=package_adder,
Expand Down
7 changes: 4 additions & 3 deletions src/packagedcode/cocoapods.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import logging
import sys
from functools import partial
from fnmatch import fnmatchcase

import saneyaml
from packageurl import PackageURL
Expand Down Expand Up @@ -142,18 +143,18 @@ def assemble(cls, package_data, resource, codebase, package_adder):
else:
# do we have more than one podspec?
parent = resource.parent(codebase)
podspec_path_pattern = PodspecHandler.path_patterns[0]
sibling_podspecs = [
r for r in parent.children(codebase)
if r.name.endswith('.podspec')
if fnmatchcase(r.name, podspec_path_pattern)
]

siblings_counts = len(sibling_podspecs)
has_single_podspec = siblings_counts == 1
has_multiple_podspec = siblings_counts > 1

datafile_name_patterns = (
'Podfile.lock',
'Podfile',
PodfileHandler.path_patterns + PodfileLockHandler.path_patterns
)

if has_single_podspec:
Expand Down
5 changes: 4 additions & 1 deletion src/packagedcode/golang.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,11 @@ def assemble(cls, package_data, resource, codebase, package_adder):
"""
Always use go.mod first then go.sum
"""
datafile_name_patterns = (
GoModHandler.path_patterns + GoSumHandler.path_patterns
)
yield from cls.assemble_from_many_datafiles(
datafile_name_patterns=('go.mod', 'go.sum',),
datafile_name_patterns=datafile_name_patterns,
directory=resource.parent(codebase),
codebase=codebase,
package_adder=package_adder,
Expand Down
35 changes: 22 additions & 13 deletions src/packagedcode/licensing.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from licensedcode import query

from packagedcode.utils import combine_expressions
from packagedcode.models import PackageData
from summarycode.classify import check_resource_name_start_and_end
from summarycode.classify import LEGAL_STARTS_ENDS
from summarycode.classify import README_STARTS_ENDS
Expand Down Expand Up @@ -351,32 +352,40 @@ def add_license_from_sibling_file(resource, codebase):
if not resource.is_file:
return

package_data = resource.package_data
if not package_data:
package_data_mappings = resource.package_data
if not package_data_mappings:
return

for pkg in package_data:
for pkg in package_data_mappings:
pkg_license_detections = pkg["license_detections"]
if pkg_license_detections:
return

license_detections, license_expression = get_license_detections_from_sibling_file(
resource=resource,
codebase=codebase,
)
package_data_mapping = resource.package_data[0]
package_data = PackageData.from_data(package_data_mapping)
license_detections = None

# We do not want to get licenses detections populated from sibling files
# for package manifests which are not the primary package manifests, without
# purls (example: dependency lockfiles/requirements/other build manifests)
if package_data.purl and package_data.can_assemble:
license_detections, license_expression = get_license_detections_from_sibling_file(
resource=resource,
codebase=codebase,
)

if not license_detections:
return

package = resource.package_data[0]
package["license_detections"] = license_detections
package["declared_license_expression"] = license_expression
package["declared_license_expression_spdx"] = str(build_spdx_license_expression(
license_expression=package["declared_license_expression"],
package_data_mapping["license_detections"] = license_detections
package_data_mapping["declared_license_expression"] = license_expression
package_data_mapping["declared_license_expression_spdx"] = str(build_spdx_license_expression(
license_expression=package_data_mapping["declared_license_expression"],
licensing=get_cache().licensing,
))

codebase.save_resource(resource)
return package
return package_data_mapping


def is_legal_or_readme(resource):
Expand Down
2 changes: 1 addition & 1 deletion src/packagedcode/maven.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def assemble(cls, package_data, resource, codebase, package_adder=models.add_to_

# This order is important as we want pom.xml to be used for package
# creation and then to update from MANIFEST later
manifest_path_pattern = '*/META-INF/MANIFEST.MF'
manifest_path_pattern = JavaJarManifestHandler.path_patterns[0]
nested_pom_xml_path_pattern = '*/META-INF/maven/**/pom.xml'
datafile_name_patterns = (nested_pom_xml_path_pattern, manifest_path_pattern)

Expand Down
63 changes: 44 additions & 19 deletions src/packagedcode/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
except ImportError:
licensing = None

from packagedcode.licensing import get_declared_license_expression_spdx

"""
This module contain data models for package and dependencies, abstracting and
Expand Down Expand Up @@ -755,27 +754,23 @@ def from_data(cls, package_data, package_only=False):

Skip the license/copyright detection step if `package_only` is True.
"""
if "purl" in package_data:
package_data.pop("purl")
package_mapping = package_data.copy()
if "purl" in package_mapping:
package_mapping.pop("purl")

package_data = cls(**package_data)
package_data_obj = cls(**package_mapping)

if not package_only:
package_data.populate_license_fields()
package_data.populate_holder_field()
package_data_obj.populate_license_fields()
package_data_obj.populate_holder_field()
else:
package_data.normalize_extracted_license_statement()
package_data_obj.normalize_extracted_license_statement()

return package_data
return package_data_obj

@property
def can_assemble(self):
from packagedcode import HANDLER_BY_DATASOURCE_ID
handler = HANDLER_BY_DATASOURCE_ID.get(self.datasource_id)
if issubclass(handler, NonAssemblableDatafileHandler):
return False

return True
return is_from_assemblable_handler(self.datasource_id)

def normalize_extracted_license_statement(self):
"""
Expand Down Expand Up @@ -836,6 +831,8 @@ def populate_license_fields(self):
object, and add the declared_license_expression (and the spdx expression)
and corresponding LicenseDetection data.
"""
from packagedcode.licensing import get_declared_license_expression_spdx

if not self.declared_license_expression and self.extracted_license_statement:

self.license_detections, self.declared_license_expression = \
Expand Down Expand Up @@ -976,6 +973,22 @@ def get_license_detections_and_expression(self):
)


def is_from_assemblable_handler(datasource_id):
"""
Return True if the corresponding datafile handler for a
`datasource_id` can be assembled in a package instance.
"""
if not datasource_id:
return False

from packagedcode import HANDLER_BY_DATASOURCE_ID
handler = HANDLER_BY_DATASOURCE_ID.get(datasource_id)
if issubclass(handler, NonAssemblableDatafileHandler):
return False

return True


def get_default_relation_license(datasource_id):
from packagedcode import HANDLER_BY_DATASOURCE_ID
handler = HANDLER_BY_DATASOURCE_ID.get(datasource_id, None)
Expand Down Expand Up @@ -1282,7 +1295,7 @@ def assemble_from_many(
else:
# FIXME: What is the package_data is NOT for the same package as package?
# FIXME: What if the update did not do anything? (it does return True or False)
# FIXME: There we would be missing out packges AND/OR errors
# FIXME: There we would be missing out packages AND/OR errors
package.update(
package_data=package_data,
datafile_path=resource.path,
Expand Down Expand Up @@ -1312,7 +1325,7 @@ def assemble_from_many(
yield package
yield from dependencies

# Associate Package to Resources and yield them
# Associate Package to the manifest resources and yield them
for resource in resources:
package_adder(package_uid, resource, codebase)
yield resource
Expand All @@ -1321,11 +1334,12 @@ def assemble_from_many(
package_adder(package_uid, resource, codebase)
yield resource

# the whole parent subtree of the base_resource is for this package
# the whole parent subtree of the base_resource is for this package,
# so assign resources to package
if package_uid:
for res in base_resource.walk(codebase):
package_adder(package_uid, res, codebase)
yield res

if parent_resource:
package_adder(package_uid, parent_resource, codebase)
yield parent_resource
Expand Down Expand Up @@ -1371,7 +1385,10 @@ def assemble_from_many_datafiles(
# we iterate on datafile_name_patterns because their order matters
for datafile_name_pattern in datafile_name_patterns:
for sibling in siblings:
if fnmatchcase(sibling.name, datafile_name_pattern):
if (
fnmatchcase(sibling.name, datafile_name_pattern) or
fnmatchcase(sibling.location, datafile_name_pattern)
):
for package_data in sibling.package_data:
package_data = PackageData.from_dict(package_data)
pkgdata_resources.append((package_data, sibling,))
Expand Down Expand Up @@ -1467,6 +1484,8 @@ def populate_license_fields(cls, package_data):
object, and add the declared_license_expression (and the spdx expression)
and corresponding LicenseDetection data.
"""
from packagedcode.licensing import get_declared_license_expression_spdx

if not package_data.declared_license_expression and package_data.extracted_license_statement:

package_data.license_detections, package_data.declared_license_expression = \
Expand Down Expand Up @@ -1758,6 +1777,12 @@ def update(
return True

def refresh_license_expressions(self, default_relation='AND'):
"""
Re-populate the declared and other license expressions from the
license detections and other license detections for a package.
"""
from packagedcode.licensing import get_declared_license_expression_spdx

if self.license_detections:
self.declared_license_expression = str(combine_expressions(
expressions=[
Expand Down
5 changes: 2 additions & 3 deletions src/packagedcode/npm.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def assemble(cls, package_data, resource, codebase, package_adder):
for npm_res in cls.walk_npm(resource=workspace_root, codebase=codebase):
if package_uid and package_uid not in npm_res.for_packages:
package_adder(package_uid, npm_res, codebase)
yield npm_res

yield package_resource

elif workspaces:
Expand Down Expand Up @@ -190,7 +190,7 @@ def assemble(cls, package_data, resource, codebase, package_adder):
for npm_res in cls.walk_npm(resource=workspace_root, codebase=codebase):
if package_uid and not npm_res.for_packages:
package_adder(package_uid, npm_res, codebase)
yield npm_res

yield package_resource

else:
Expand Down Expand Up @@ -257,7 +257,6 @@ def create_packages_from_workspaces(
for npm_res in cls.walk_npm(resource=member_root, codebase=codebase):
if package_uid and package_uid not in npm_res.for_packages:
package_adder(package_uid, npm_res, codebase)
yield npm_res

yield from cls.yield_npm_dependencies_and_resources(
package_resource=workspace_member,
Expand Down
6 changes: 2 additions & 4 deletions src/packagedcode/phpcomposer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,8 @@ class BasePhpComposerHandler(models.DatafileHandler):

@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):
datafile_name_patterns = (
'composer.json',
'composer.lock',
)
datafile_name_patterns = \
PhpComposerJsonHandler.path_patterns + PhpComposerLockHandler.path_patterns

if resource.has_parent():
dir_resource = resource.parent(codebase)
Expand Down
Loading
Loading