From e2043d102109e95b11fd82025d4c5e8a129fbc07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20F=C3=A4rnstrand?= Date: Fri, 29 Nov 2024 14:11:37 +0100 Subject: [PATCH 1/2] Introduce scan errors and report them upon exit The only error for now is failing to read the code file. But the types allow expanding on this later --- README.md | 7 +++- example-files/not-utf-8-file.ts | Bin 0 -> 128 bytes src/main.rs | 67 +++++++++++++++++++++++++------- 3 files changed, 59 insertions(+), 15 deletions(-) create mode 100644 example-files/not-utf-8-file.ts diff --git a/README.md b/README.md index 7cd971b..43ea0f8 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ Where `[PATHS]...` is a list of files or directory to check. ### Example usage ```console -$ unicop example-files/homoglyph.js example-files/invisible.js +$ unicop example-files/homoglyph.js example-files/invisible.js example-files/not-utf-8-file.ts ? failed × found disallowed character LATIN LETTER RETROFLEX CLICK in identifier ╭─[example-files/homoglyph.js:4:18] @@ -71,7 +71,10 @@ $ unicop example-files/homoglyph.js example-files/invisible.js · ╰── HANGUL JUNGSEONG FILLER 6 │ ]; ╰──── -Scanned 486 unicode code points in 2 files, resulting in 3 rule violations +Error while scanning example-files/not-utf-8-file.ts: Failed to read file (stream did not contain valid UTF-8) +Scanned 486 unicode code points in 2 files, resulting in: + 3 rule violations + 1 other error ``` diff --git a/example-files/not-utf-8-file.ts b/example-files/not-utf-8-file.ts new file mode 100644 index 0000000000000000000000000000000000000000..cee08dfbc5fe48efbbf01eca3436304a87ccc2e8 GIT binary patch literal 128 zcmV-`0Du2+B6s}A{*SyI%t;{qtI<4G iI@2-9{Q#s3M8j(-KRu8D literal 0 HcmV?d00001 diff --git a/src/main.rs b/src/main.rs index 397f018..92cb6be 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ use std::collections::HashMap; +use std::fmt; use std::fs; use std::io; use std::path::{Path, PathBuf}; @@ -170,6 +171,7 @@ fn main() -> anyhow::Result<()> { }; let mut num_files_scanned: u64 = 0; + let mut num_errors: u64 = 0; let mut global_scan_stats = ScanStats { num_unicode_code_points: 0, num_rule_violations: 0, @@ -181,11 +183,18 @@ fn main() -> anyhow::Result<()> { Ok(entry) if entry.file_type().is_file() => { let entry_path = entry.path(); dispatcher.user_config = get_user_config(entry_path)?; - if let Some(scan_stats) = check_file(&dispatcher, entry_path) { - num_files_scanned += 1; - global_scan_stats.num_unicode_code_points += - scan_stats.num_unicode_code_points; - global_scan_stats.num_rule_violations += scan_stats.num_rule_violations; + match check_file(&dispatcher, entry_path) { + Ok(Some(scan_stats)) => { + num_files_scanned += 1; + global_scan_stats.num_unicode_code_points += + scan_stats.num_unicode_code_points; + global_scan_stats.num_rule_violations += scan_stats.num_rule_violations; + } + Ok(None) => (), + Err(e) => { + num_errors += 1; + eprintln!("Error while scanning {}: {e}", entry_path.display()); + } } } Ok(_) => {} @@ -194,25 +203,57 @@ fn main() -> anyhow::Result<()> { } println!( - "Scanned {} unicode code points in {} files, resulting in {} rule violations", - global_scan_stats.num_unicode_code_points, - num_files_scanned, + "Scanned {} unicode code points in {} files, resulting in:", + global_scan_stats.num_unicode_code_points, num_files_scanned, + ); + println!( + "\t{} rule violations", global_scan_stats.num_rule_violations, ); - if global_scan_stats.num_rule_violations > 0 { + match num_errors { + 1 => println!("\t1 other error"), + _ => println!("\t{num_errors} other errors"), + } + if global_scan_stats.num_rule_violations > 0 || num_errors > 0 { std::process::exit(1); } Ok(()) } +#[derive(Debug)] +enum ScanError { + /// Failed to read the source code file + ReadFile(io::Error), +} + +impl fmt::Display for ScanError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use ScanError::*; + match self { + ReadFile(ref e) => write!(f, "Failed to read file ({e})"), + } + } +} + +impl std::error::Error for ScanError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + use ScanError::*; + match &self { + ReadFile(e) => e.source(), + } + } +} + /// Scans a single file at `path` using the rules defined in `dispatcher`. /// /// If the file was actually scanned (matched a language in the rule dispatcher), /// then stats about the scan are returned. -fn check_file(dispatcher: &RuleDispatcher, path: &Path) -> Option { - let lang = dispatcher.language(path)?; +fn check_file(dispatcher: &RuleDispatcher, path: &Path) -> Result, ScanError> { + let Some(lang) = dispatcher.language(path) else { + return Ok(None); + }; let filename = path.display().to_string(); - let src = fs::read_to_string(path).unwrap(); + let src = fs::read_to_string(path).map_err(ScanError::ReadFile)?; let named_source = NamedSource::new(&filename, src.clone()); let mut parser = tree_sitter::Parser::new(); parser @@ -275,7 +316,7 @@ fn check_file(dispatcher: &RuleDispatcher, path: &Path) -> Option { print!("{:?}", report); } - Some(scan_stats) + Ok(Some(scan_stats)) } /// Statistics about unicop scans. From f98d50d1d0325823fa4f9532ffc621d4d8359e21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20F=C3=A4rnstrand?= Date: Fri, 29 Nov 2024 14:41:06 +0100 Subject: [PATCH 2/2] Change format of the printed stats --- README.md | 5 ++--- src/main.rs | 21 ++++++++++----------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 43ea0f8..55e68c2 100644 --- a/README.md +++ b/README.md @@ -72,9 +72,8 @@ $ unicop example-files/homoglyph.js example-files/invisible.js example-files/not 6 │ ]; ╰──── Error while scanning example-files/not-utf-8-file.ts: Failed to read file (stream did not contain valid UTF-8) -Scanned 486 unicode code points in 2 files, resulting in: - 3 rule violations - 1 other error +Scanned 486 unicode code points in 2 files, resulting in 3 rule violations +Failed to scan 1 file ``` diff --git a/src/main.rs b/src/main.rs index 92cb6be..95bc76c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -171,7 +171,7 @@ fn main() -> anyhow::Result<()> { }; let mut num_files_scanned: u64 = 0; - let mut num_errors: u64 = 0; + let mut num_failed_files: u64 = 0; let mut global_scan_stats = ScanStats { num_unicode_code_points: 0, num_rule_violations: 0, @@ -192,7 +192,7 @@ fn main() -> anyhow::Result<()> { } Ok(None) => (), Err(e) => { - num_errors += 1; + num_failed_files += 1; eprintln!("Error while scanning {}: {e}", entry_path.display()); } } @@ -203,18 +203,17 @@ fn main() -> anyhow::Result<()> { } println!( - "Scanned {} unicode code points in {} files, resulting in:", - global_scan_stats.num_unicode_code_points, num_files_scanned, - ); - println!( - "\t{} rule violations", + "Scanned {} unicode code points in {} files, resulting in {} rule violations", + global_scan_stats.num_unicode_code_points, + num_files_scanned, global_scan_stats.num_rule_violations, ); - match num_errors { - 1 => println!("\t1 other error"), - _ => println!("\t{num_errors} other errors"), + match num_failed_files { + 0 => (), + 1 => println!("Failed to scan 1 file"), + 2.. => println!("Failed to scan {num_failed_files} files"), } - if global_scan_stats.num_rule_violations > 0 || num_errors > 0 { + if global_scan_stats.num_rule_violations > 0 || num_failed_files > 0 { std::process::exit(1); } Ok(())