diff --git a/package.json b/package.json index d6f774d..3beab82 100644 --- a/package.json +++ b/package.json @@ -52,6 +52,7 @@ "yargs": "16.0.3" }, "devDependencies": { + "@types/cheerio": "0.22.23", "@types/jest": "26.0.13", "@types/lodash": "4.14.161", "@types/node": "14.6.3", diff --git a/src/lib/license-text/non-spdx.ts b/src/lib/license-text/non-spdx.ts index eb7bac2..25f5091 100644 --- a/src/lib/license-text/non-spdx.ts +++ b/src/lib/license-text/non-spdx.ts @@ -14,7 +14,7 @@ export async function fetchNonSpdxLicenseTextAndUrl( 'utf-8', ); const licenseUrl = nonSpdxLicenseUrls[licenseId]; - return { licenseText, licenseUrl }; + return { licenseText: `LICENSE TEXT\n${licenseText}`, licenseUrl }; } catch (e) { debug(`Did not fetch license text successfully. Error: ${e}`); throw e; diff --git a/src/lib/license-text/spdx.ts b/src/lib/license-text/spdx.ts index 9ce6aa1..64a1a50 100644 --- a/src/lib/license-text/spdx.ts +++ b/src/lib/license-text/spdx.ts @@ -19,8 +19,20 @@ export async function fetchSpdxLicenseTextAndUrl( throw new Error(rawHtml); } const $ = cheerio.load(rawHtml); - const licenseText = $('body').text(); - return { licenseText, licenseUrl }; + const licenseText = $('[property="spdx:licenseText"]') + .text() + .replace(/\n\s*\n/g, '\n'); + const licenseTextHeader = $('[property="spdx:standardLicenseHeader"]') + .text() + .replace(/\n\s*\n/g, '\n'); + return { + licenseText: `LICENSE TEXT\n${licenseText}${ + licenseTextHeader + ? `\nSTANDARD LICENSE HEADER\n ${licenseTextHeader}` + : undefined + }`, + licenseUrl, + }; } catch (e) { debug(`Did not fetch license text successfully. Error: ${e}`); throw e; diff --git a/test/lib/__snapshots__/fetch-non-spdx-license-text.test.ts.snap b/test/lib/__snapshots__/fetch-non-spdx-license-text.test.ts.snap index 1a3006f..d95397c 100644 --- a/test/lib/__snapshots__/fetch-non-spdx-license-text.test.ts.snap +++ b/test/lib/__snapshots__/fetch-non-spdx-license-text.test.ts.snap @@ -2,7 +2,8 @@ exports[`ASPSecurityKit-Khosla-Tech license fetched locally as expected 1`] = ` Object { - "licenseText": "

KHOSLA TECH - END USER AGREEMENT

+ "licenseText": "LICENSE TEXT +

KHOSLA TECH - END USER AGREEMENT

IMPORTANT – PLEASE READ THIS AGREEMENT!