From 4458b27b99ccbc63a98b63561f9346e14722787f Mon Sep 17 00:00:00 2001 From: Bastian Krol Date: Tue, 22 Sep 2020 13:02:31 +0200 Subject: [PATCH] Add new attributes to the report * license version (for licenses that are versioned, like Apache-1.0 vs Apache-2.0), * SPDX ID without the version suffix, * link to license file, * copyright holder, * copyright year All mentioned attributes are computed in a best effort, that is, there is no guarantee that the implementation will find the respective values even if it might be available in the package metadata or license file. Also: - add safeguard around read-package-tree - sort packages by name and version before printing the report --- cli-options.js | 2 +- formatters/table.js | 5 +++ helpers/extract-copyright.js | 59 ++++++++++++++++++++++++++++++ helpers/extract-license-id.js | 2 +- helpers/extract-license-version.js | 23 ++++++++++++ helpers/get-package-details.js | 12 ++++-- helpers/npm-list.js | 2 +- lib.js | 48 ++++++++++++++++++++---- tests/get-package-details.test.js | 6 +-- 9 files changed, 141 insertions(+), 18 deletions(-) create mode 100644 helpers/extract-copyright.js create mode 100644 helpers/extract-license-version.js diff --git a/cli-options.js b/cli-options.js index 3a46180..53f7f3b 100644 --- a/cli-options.js +++ b/cli-options.js @@ -12,7 +12,7 @@ module.exports = { include: { description: 'List of properties to include', type: 'array', - choices: ['id', 'name', 'version', 'license', 'licenseId', 'licenseFullName', 'licenseFilePath', 'path', 'repository', 'author', 'homepage', 'dependencyLevel', 'description'], + choices: ['id', 'name', 'version', 'license', 'licenseId', 'licenseIdWithoutVersion', 'licenseFullName', 'licenseVersion', 'licenseFilePath', 'licenseLink', 'copyrightYear', 'copyrightHolder', 'path', 'repository', 'author', 'homepage', 'dependencyLevel', 'description'], default: ['id', 'name', 'version', 'license', 'repository', 'author', 'homepage', 'dependencyLevel'] }, production: { diff --git a/formatters/table.js b/formatters/table.js index d4f8bda..6ca3a7b 100644 --- a/formatters/table.js +++ b/formatters/table.js @@ -21,9 +21,14 @@ module.exports = function ({data, header}) { name: 'Package Name', version: 'Version', licenseId: 'SPDX ID', + licenseIdWithoutVersion: 'SPDX ID (without version)', + licenseVersion: 'License Version', licenseFullName: 'SPDX Full Name', licenseFilePath: 'Path to license file', license: 'License', + licenseLink: 'License Link', + copyrightYear: 'Copyright Year', + copyrightHolder: 'Copyright Holder', homepage: 'Homepage', repository: 'Repository', author: 'Author', diff --git a/helpers/extract-copyright.js b/helpers/extract-copyright.js new file mode 100644 index 0000000..a2708a2 --- /dev/null +++ b/helpers/extract-copyright.js @@ -0,0 +1,59 @@ +const fsPromises = require('fs').promises + +const { isString, isObject, isArray, compact } = require('lodash') + +/** + * Inpsects the license file and tries to heuristically determine the copyright holder and the copyright year from it. + * + * @param {string} the path to the license file + * @returns {{copyrightYear: string, copyrightHolder: string}} the copyright information parsed from the license file + */ +module.exports = async function extractCopyright(licenseFilePaths) { + if (!licenseFilePaths || licenseFilePaths.length === 0) { + return {} + } + const licenseFilePath = licenseFilePaths[0] + let handle + try { + handle = await fsPromises.open(licenseFilePath, 'r') + const fullFile = await handle.readFile({ encoding: 'utf-8' }) + const lines = fullFile.split('\n') + // The copyright line should be somewhere at the start, inspect the first few lines. + for (let i = 0; i < Math.min(lines.length, 5); i++) { + const line = lines[i] + const matchWithRange = /copyright(?:.*)(\d{4}\s*-\s*\d{4})(?:[,;.]?)\s+(.*)$/i.exec(line) + if (matchWithRange) { + return cleanUp({ copyrightYear: matchWithRange[1], copyrightHolder: matchWithRange[2] }) + } + const matchWithYear = /copyright(?:.*)(\d{4})(?:[,;.]?)\s+(.*)$/i.exec(line) + if (matchWithYear) { + return cleanUp({ copyrightYear: matchWithYear[1], copyrightHolder: matchWithYear[2] }) + } + const matchWithoutYear = /copyright\s+(.*)$/i.exec(line) + if (matchWithoutYear) { + return cleanUp({ copyrightYear: null, copyrightHolder: matchWithoutYear[1] }) + } + } + } catch (e) { + console.warn('Could not open license file to parse copyright information.', e) + } finally { + if (handle) { + await handle.close() + } + } + return {} +} + +function cleanUp(copyright) { + const patterns = [ + /\s*All rights reserved.\s*/ig, + /\s*\([^\s]+@[^\s]+\)/ig, // matches "(email-address@domain.tld)" + /\s*<[^\s]+@[^\s]+>/ig, // matches "" + /\s*/ig, // matches "" + /\s*\([cC]\)/ig + ] + patterns.forEach(p => { + copyright.copyrightHolder = copyright.copyrightHolder.replace(p, '') + }) + return copyright +} diff --git a/helpers/extract-license-id.js b/helpers/extract-license-id.js index 01f7985..8e6b033 100644 --- a/helpers/extract-license-id.js +++ b/helpers/extract-license-id.js @@ -1,7 +1,7 @@ const { isString, isObject, isArray, compact } = require('lodash') /** - * Deal with all the crazy stuff the "license" field in package.json can have and return only the SPDX ID (if any) + * Deal with all the wild stuff the "license" field in package.json can have and return only the SPDX ID (if any). * * @param {*} license * @returns {string} diff --git a/helpers/extract-license-version.js b/helpers/extract-license-version.js new file mode 100644 index 0000000..839038b --- /dev/null +++ b/helpers/extract-license-version.js @@ -0,0 +1,23 @@ +const { isString, isObject, isArray, compact } = require('lodash') + +/** + * Takes an SPDX identifier like Apache-1.0 and splits it into "Apache" and "1.0". + * + * @param {string} an SPDX identifier + * @returns {{licenseIdWithoutVersion: string, licenseVersion: string}} the SPDX ID parsed into individual parts. For + * unversioned licenses, licenseIdWithoutVersion without version will contain the input and licenseVersion will be + * null. + */ +module.exports = function extractLicenseText(spdxId) { + const match = /^(.*?)-(\d[\d\.]+)$/.exec(spdxId) + if (match) { + return { + licenseIdWithoutVersion: match[1], + licenseVersion: match[2] + } + } + return { + licenseIdWithoutVersion: spdxId, + licenseVersion: null + } +} diff --git a/helpers/get-package-details.js b/helpers/get-package-details.js index 93a7636..aa825ea 100644 --- a/helpers/get-package-details.js +++ b/helpers/get-package-details.js @@ -10,7 +10,11 @@ module.exports = async function (path) { if (!path) { throw new Error('You must specify a path') } - const raw = await readPackageTree(path) - return raw.package - -} \ No newline at end of file + try { + const raw = await readPackageTree(path) + return raw.package + } catch (e) { + console.error(`Reading package tree failed for ${path}.`, e); + return null; + } +} diff --git a/helpers/npm-list.js b/helpers/npm-list.js index ad36811..f790d80 100644 --- a/helpers/npm-list.js +++ b/helpers/npm-list.js @@ -13,7 +13,7 @@ const optionsToArgv = require('./options-to-args') module.exports = function (opts = {}) { const blackListOpts = ['format'] const options = optionsToArgv(opts, blackListOpts) - + return new Promise((resolve, reject) => { debug('Got these options: %s', JSON.stringify(options, null, 2)) diff --git a/lib.js b/lib.js index bd91c79..8c53663 100644 --- a/lib.js +++ b/lib.js @@ -1,8 +1,11 @@ +const { chain, compact, sortBy } = require('lodash') const promisify = require('util').promisify const npmLs = require('./helpers/npm-list') const getPackageDetails = require('./helpers/get-package-details') const getExpandedLicName = require('./helpers/get-spdx-full-name') -const extractLicenseText = require('./helpers/extract-license-id') +const extractLicenseId = require('./helpers/extract-license-id') +const extractLicenseVersion = require('./helpers/extract-license-version') +const extractCopyright = require('./helpers/extract-copyright') const glob = promisify(require('glob')) /** @@ -12,24 +15,39 @@ const glob = promisify(require('glob')) */ module.exports = async function (options = {}) { const pathList = await npmLs(options) - return await Promise.all(pathList.map(async (path, index) => { + const results = await Promise.all(pathList.map(async (path, index) => { const pkg = await getPackageDetails(path) - const licShortName = extractLicenseText(pkg.license || pkg.licenses || pkg.licence || pkg.licences) + if (!pkg) { + return null; + } + const repository = (pkg.repository || {}).url + const licShortName = extractLicenseId(pkg.license || pkg.licenses || pkg.licence || pkg.licences) const licLongName = getExpandedLicName(licShortName) || 'unknown' + const { licenseIdWithoutVersion, licenseVersion } = extractLicenseVersion(licShortName) // find any local licences files and build a path to them - const licFilePath = await glob('+(license**|licence**)', {cwd: path, nocase: true, nodir: true}) - .then(files => files.map(file => `${path}/${file}`)) + const allLicenseFiles = await glob('+(license**|licence**)', {cwd: path, nocase: true, nodir: true}) + const licenseFilePaths = allLicenseFiles.map(file => `${path}/${file}`) + const licenseLink = + repository && allLicenseFiles.length > 0 ? + `${repositoryToHttp(repository)}/${allLicenseFiles[0]}` : + '' + const { copyrightYear, copyrightHolder } = await extractCopyright(licenseFilePaths) return { id: index, name: pkg.name, version: pkg.version, licenseId: licShortName, + licenseIdWithoutVersion, + licenseVersion, licenseFullName: licLongName, - licenseFilePath: licFilePath || [], + licenseFilePath: licenseFilePaths || [], license: `${licLongName} (${licShortName || '?'})`, - repository: (pkg.repository || {}).url, + licenseLink, + copyrightYear, + copyrightHolder, + repository, author: (pkg.author || {}).name, homepage: pkg.homepage, path, @@ -37,5 +55,19 @@ module.exports = async function (options = {}) { description: pkg.description } })) + return chain(results).compact().sortBy(['name', 'version']).value() +} -} \ No newline at end of file +function repositoryToHttp(repositoryUrl) { + if (repositoryUrl) { + // The branch "master" might not be actually the default branch of the project but + // the link will still resolve. If there is no master branch, Github will pick the correct default branch and show: + // "Branch not found, redirected to default branch." + // Naturally, for projects not hosted on Github we might be out of luck. + return repositoryUrl + .replace(/^git\+/, '') + .replace(/^ssh:\/\/git@/, 'https://') + .replace(/^git:\/\//, 'https://') + .replace(/\.git/, '/blob/master') + } +} diff --git a/tests/get-package-details.test.js b/tests/get-package-details.test.js index 401df94..44e4349 100644 --- a/tests/get-package-details.test.js +++ b/tests/get-package-details.test.js @@ -22,11 +22,11 @@ test('Returns details for a package at a given path', async (t) => { t.is(actual.description, expected.description) }) -test('Should fail if the path does not exist', async (t) => { +test('Should return null if the path does not exist', async (t) => { const path = './some-fake-path' try { - await getPackageDetails(path) - t.fail('Expected an exception') + const actual = await getPackageDetails(path) + t.is(actual, null) } catch (err) { t.pass() }