Skip to content

Commit

Permalink
Add workaround for Pixar license being lame (#626)
Browse files Browse the repository at this point in the history
The Pixar license is an almost exact copy of Apache-2.0, but doesn't
actually have enough changes compared to the Apache-2.0 license to be
fuzzy matched if the apache license text has the appendix at the end
removed (eg, doesn't even have Pixar in the title), so this PR just adds
a workaround specifically for this case.

Resolves: #625
  • Loading branch information
Jake-Shadle authored Mar 6, 2024
1 parent 272ef5b commit 63e48ee
Show file tree
Hide file tree
Showing 10 changed files with 502 additions and 2 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

<!-- next-header -->
## [Unreleased] - ReleaseDate
### Fixed
- [PR#626](https://github.com/EmbarkStudios/cargo-deny/pull/626) resolved [#625](https://github.com/EmbarkStudios/cargo-deny/issues/625) by explicitly checking that a license identified as Pixar was actually (probably) the Pixar license, instead of a normal Apache-2.0 license.

## [0.14.15] - 2024-02-28
### Added
- [PR#618](https://github.com/EmbarkStudios/cargo-deny/pull/618) added metadata notes to diagnostics when a license is rejected, as well as removing span information for accepted licenses unless the log level is `info` or higher to make the diagnostic clearer by default.
Expand Down
10 changes: 9 additions & 1 deletion src/licenses.rs
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ fn evaluate_expression(
),
);

let mut notes = Vec::new();
let mut notes = krate_lic_nfo.notes.clone();

for ((reason, accepted), failed_req) in reasons.into_iter().zip(expr.requirements()) {
if accepted && ctx.log_level < log::LevelFilter::Info {
Expand All @@ -241,6 +241,8 @@ fn evaluate_expression(
if let Some(id) = failed_req.req.license.id() {
notes.push(format!("{} - {}:", id.name, id.full_name));

let len = notes.len();

if id.is_deprecated() {
notes.push(" - **DEPRECATED**".into());
}
Expand All @@ -256,7 +258,13 @@ fn evaluate_expression(
if id.is_copyleft() {
notes.push(" - Copyleft".into());
}

if len == notes.len() {
notes.push(" - No additional metadata available for license".into());
}
} else {
// This would only happen if askalono used a newer license list than spdx, but we update
// both simultaneously
notes.push(format!("{} is not an SPDX license", failed_req.req));
}
}
Expand Down
33 changes: 32 additions & 1 deletion src/licenses/gather.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ struct LicensePack {
struct GatheredExpr {
synthesized_toml: String,
failures: Vec<Label>,
notes: Vec<String>,
expr: spdx::Expression,
file_sources: Vec<String>,
}
Expand Down Expand Up @@ -220,6 +221,7 @@ impl LicensePack {
}

let mut failures = Vec::new();
let mut notes = Vec::new();
synth_toml.push_str("license-files = [\n");

for lic_contents in &self.license_files {
Expand All @@ -237,7 +239,27 @@ impl LicensePack {
let text = askalono::TextData::new(&data.content);
match strat.scan(&text) {
Ok(lic_match) => {
if let Some(identified) = lic_match.license {
if let Some(mut identified) = lic_match.license {
// See https://github.com/EmbarkStudios/cargo-deny/issues/625
// but the Pixar license is just a _slightly_ modified Apache-2.0 license, and since
// the apache 2.0 license is so common, and the modification of removing the appendix,
// which causes askalono to think it is pixar instead is probably common enough we need
// to just explicitly handle it. Really this should be fixed in askalono but that library
// is basically abandoned at this point and should be replaced https://github.com/EmbarkStudios/spdx/issues/67
if identified.name == "Pixar" {
// Very loose, but just check if the title is actually for the pixar license or not
if !data
.content
.trim_start()
.starts_with("Modified Apache 2.0 License")
{
// emit a note about this, just in case
notes.push(format!("'{}' fuzzy matched to Pixar license, but it actually a normal Apache-2.0 license", lic_contents.path));

identified.name = "Apache-2.0";
}
}

// askalano doesn't report any matches below the confidence threshold
// but we want to see what it thinks the license is if the confidence
// is somewhat ok at least
Expand Down Expand Up @@ -312,6 +334,7 @@ impl LicensePack {
Ok(GatheredExpr {
synthesized_toml: synth_toml,
failures,
notes,
expr: spdx::Expression::parse(&expr).unwrap(),
file_sources: sources,
})
Expand Down Expand Up @@ -358,6 +381,8 @@ pub struct KrateLicense<'a> {
pub krate: &'a Krate,
pub lic_info: LicenseInfo,

pub(crate) notes: Vec<String>,

// Reasons for why the license was determined (or not!) when
// gathering the license information
pub(crate) labels: SmallVec<[Label; 1]>,
Expand Down Expand Up @@ -579,6 +604,7 @@ impl Gatherer {
},
},
labels,
notes: Vec::new(),
};
}
}
Expand Down Expand Up @@ -616,6 +642,7 @@ impl Gatherer {
},
},
labels,
notes: Vec::new(),
};
}
Err(err) => {
Expand Down Expand Up @@ -653,6 +680,7 @@ impl Gatherer {
},
},
labels,
notes: Vec::new(),
};
}
}
Expand All @@ -678,6 +706,7 @@ impl Gatherer {
Ok(GatheredExpr {
synthesized_toml,
failures,
notes,
expr,
file_sources,
}) => {
Expand Down Expand Up @@ -721,6 +750,7 @@ impl Gatherer {
},
},
labels,
notes,
};
}
Err((new_toml, lic_file_lables)) => {
Expand Down Expand Up @@ -763,6 +793,7 @@ impl Gatherer {
krate,
lic_info: LicenseInfo::Unlicensed,
labels,
notes: Vec::new(),
}
})
.collect();
Expand Down
42 changes: 42 additions & 0 deletions tests/licenses.rs
Original file line number Diff line number Diff line change
Expand Up @@ -273,3 +273,45 @@ include-dev = true

insta::assert_json_snapshot!(diags);
}

/// Ensures that an Apache-2.0 licenses without the appendix are not misidentified
/// as Pixar, because Pixar is an almost exact copy of Apache-2.0. Fuck I hate licenses so much.
#[test]
fn forces_apache_over_pixar() {
let mut cmd = krates::Cmd::new();
cmd.manifest_path("tests/test_data/so-annoying/Cargo.toml");

let krates: Krates = krates::Builder::new()
.build(cmd, krates::NoneFilter)
.unwrap();

let gatherer = licenses::Gatherer::default()
.with_store(store())
.with_confidence_threshold(0.8);

let cfg = tu::Config::new(
r#"
allow = ['Apache-2.0']
"#,
);

let diags = tu::gather_diagnostics_with_files::<Config, _, _>(
&krates,
func_name!(),
cfg,
codespan::Files::new(),
|ctx, _cs, tx, files| {
let summary = gatherer.gather(ctx.krates, files, Some(&ctx.cfg));
crate::licenses::check(
ctx,
summary,
diag::ErrorSink {
overrides: None,
channel: tx,
},
);
},
);

insta::assert_json_snapshot!(diags);
}
53 changes: 53 additions & 0 deletions tests/snapshots/licenses__forces_apache_over_pixar.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
---
source: tests/licenses.rs
expression: diags
---
[
{
"fields": {
"code": "rejected",
"graphs": [
{
"Krate": {
"name": "so-annoying",
"version": "0.1.0"
}
}
],
"labels": [
{
"column": 12,
"line": 4,
"message": "license expression was not specified",
"span": ""
},
{
"column": 15,
"line": 5,
"message": "license expression retrieved via LICENSE-APACHE, LICENSE-PIXAR",
"span": "Apache-2.0 AND Pixar"
},
{
"column": 15,
"line": 5,
"message": "accepted: license is explicitly allowed",
"span": "Apache-2.0"
},
{
"column": 30,
"line": 5,
"message": "rejected: not explicitly allowed",
"span": "Pixar"
}
],
"message": "failed to satisfy license requirements",
"notes": [
"'LICENSE-APACHE' fuzzy matched to Pixar license, but it actually a normal Apache-2.0 license",
"Pixar - Pixar License:",
" - No additional metadata available for license"
],
"severity": "error"
},
"type": "diagnostic"
}
]
7 changes: 7 additions & 0 deletions tests/test_data/so-annoying/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions tests/test_data/so-annoying/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[package]
name = "so-annoying"
version = "0.1.0"
edition = "2021"
publish = false
Loading

0 comments on commit 63e48ee

Please sign in to comment.