From f95e0b9519ed48fede31481c974520d4aff1bb7e Mon Sep 17 00:00:00 2001 From: Darrell Roberts Date: Sat, 23 Nov 2024 12:42:35 -0500 Subject: [PATCH 1/9] Use parallel walker instead of rayon --- Cargo.lock | 53 +++++++++------ cli/Cargo.toml | 2 +- cli/src/main.rs | 21 ++---- cli/src/parse.rs | 165 ++++++++++++++++++++++++++++----------------- core/src/parser.rs | 32 +++++---- 5 files changed, 158 insertions(+), 115 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 67b94c3d..eea8b888 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -208,6 +208,28 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" +[[package]] +name = "crossbeam" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-epoch", + "crossbeam-queue", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.5" @@ -227,6 +249,15 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-queue" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.19" @@ -479,26 +510,6 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "rayon" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" -dependencies = [ - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" -dependencies = [ - "crossbeam-deque", - "crossbeam-utils", -] - [[package]] name = "regex" version = "1.10.5" @@ -702,11 +713,11 @@ dependencies = [ "anyhow", "clap", "clap_complete", + "crossbeam", "flexi_logger", "ignore", "log", "once_cell", - "rayon", "serde", "toml", "typeshare-core", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index bfcb576c..7cf07351 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -23,7 +23,6 @@ clap = { version = "4.5", features = [ ] } ignore = "0.4" once_cell = "1" -rayon = "1.10" serde = { version = "1", features = ["derive"] } toml = "0.8" typeshare-core = { path = "../core", version = "=1.13.2" } @@ -31,3 +30,4 @@ log.workspace = true flexi_logger.workspace = true anyhow = "1" clap_complete = "4.5.32" +crossbeam = "0.8" diff --git a/cli/src/main.rs b/cli/src/main.rs index 64011fc9..8bda2730 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -19,7 +19,7 @@ use clap_complete::aot::generate; use flexi_logger::AdaptiveFormat; use ignore::{overrides::OverrideBuilder, types::TypesBuilder, WalkBuilder}; use log::{error, info}; -use rayon::iter::ParallelBridge; +use parse::parallel_parse; #[cfg(feature = "go")] use typeshare_core::language::Go; use typeshare_core::{ @@ -34,7 +34,7 @@ use typeshare_core::{ use crate::{ args::{Args, Command}, config::Config, - parse::{all_types, parse_input, parser_inputs}, + parse::all_types, writer::{write_generated, Output}, }; @@ -46,8 +46,6 @@ fn main() -> anyhow::Result<()> { let options = Args::parse(); - info!("typeshare started generating types"); - if let Some(options) = options.subcommand { match options { Command::Completions { shell } => { @@ -70,6 +68,8 @@ fn main() -> anyhow::Result<()> { return Ok(()); } + info!("typeshare started generating types"); + let config = config::load_config(config_file).context("Unable to read configuration file")?; let config = override_configuration(config, &options)?; @@ -140,18 +140,7 @@ fn main() -> anyhow::Result<()> { target_os, }; - // The walker ignores directories that are git-ignored. If you need - // a git-ignored directory to be processed, add the specific directory to - // the list of directories given to typeshare when it's invoked in the - // makefiles - // TODO: The `ignore` walker supports parallel walking. We should use this - // and implement a `ParallelVisitor` that builds up the mapping of parsed - // data. That way both walking and parsing are in parallel. - // https://docs.rs/ignore/latest/ignore/struct.WalkParallel.html - let crate_parsed_data = parse_input( - parser_inputs(walker_builder, language_type, multi_file).par_bridge(), - &parse_context, - )?; + let crate_parsed_data = parallel_parse(&parse_context, walker_builder, language_type); // Collect all the types into a map of the file name they // belong too and the list of type names. Used for generating diff --git a/cli/src/parse.rs b/cli/src/parse.rs index f6e961cf..f12067e0 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -1,10 +1,13 @@ //! Source file parsing. use anyhow::Context; -use ignore::WalkBuilder; -use rayon::iter::{IntoParallelIterator, ParallelIterator}; +use crossbeam::channel::bounded; +use ignore::{DirEntry, WalkBuilder, WalkState}; +use log::error; use std::{ collections::{hash_map::Entry, BTreeMap, HashMap}, + ops::Not, path::PathBuf, + thread, }; use typeshare_core::{ context::{ParseContext, ParseFileContext}, @@ -18,35 +21,28 @@ pub struct ParserInput { /// Rust source file path. file_path: PathBuf, /// File name source from crate for output. - file_name: String, + out_file_name: String, /// The crate name the source file belongs to. crate_name: CrateName, } -/// Walk the source folder and collect all parser inputs. -pub fn parser_inputs( - walker_builder: WalkBuilder, - language_type: SupportedLanguage, +fn mk_parse_input( multi_file: bool, -) -> impl Iterator { - walker_builder - .build() - .filter_map(Result::ok) - .filter(|dir_entry| !dir_entry.path().is_dir()) - .filter_map(move |dir_entry| { - let crate_name = if multi_file { - CrateName::find_crate_name(dir_entry.path())? - } else { - SINGLE_FILE_CRATE_NAME - }; - let file_path = dir_entry.path().to_path_buf(); - let file_name = output_file_name(language_type, &crate_name); - Some(ParserInput { - file_path, - file_name, - crate_name, - }) - }) + language_type: SupportedLanguage, + dir_entry: DirEntry, +) -> Option { + let crate_name = if multi_file { + CrateName::find_crate_name(dir_entry.path())? + } else { + SINGLE_FILE_CRATE_NAME + }; + let file_path = dir_entry.path().to_path_buf(); + let out_file_name = output_file_name(language_type, &crate_name); + Some(ParserInput { + file_path, + out_file_name, + crate_name, + }) } /// The output file name to write to. @@ -87,47 +83,90 @@ pub fn all_types(file_mappings: &BTreeMap) -> CrateTypes ) } -/// Collect all the parsed sources into a mapping of crate name to parsed data. pub fn parse_input( - inputs: impl ParallelIterator, parse_context: &ParseContext, -) -> anyhow::Result> { - inputs - .into_par_iter() - .try_fold( - BTreeMap::new, - |mut parsed_crates: BTreeMap, - ParserInput { - file_path, - file_name, - crate_name, - }| { - let parse_file_context = ParseFileContext { - source_code: std::fs::read_to_string(&file_path) - .with_context(|| format!("Failed to read input: {file_name}"))?, - crate_name: crate_name.clone(), - file_name: file_name.clone(), - file_path, - }; - - let parsed_result = - typeshare_core::parser::parse(parse_context, parse_file_context) - .with_context(|| format!("Failed to parse: {file_name}"))?; - - if let Some(parsed_data) = parsed_result { - parsed_crates - .entry(crate_name) - .or_default() - .add(parsed_data); + parse_input: ParserInput, +) -> anyhow::Result> { + let input_file = parse_input + .file_path + .to_str() + .map(ToOwned::to_owned) + .unwrap_or_default(); + + let parse_file_context = ParseFileContext { + source_code: std::fs::read_to_string(&parse_input.file_path) + .with_context(|| format!("Failed to read input: {input_file}"))?, + crate_name: parse_input.crate_name.clone(), + file_name: parse_input.out_file_name, + file_path: parse_input.file_path, + }; + + let parsed_result = typeshare_core::parser::parse(parse_context, parse_file_context) + .with_context(|| format!("Failed to parse: {input_file}"))?; + + Ok(parsed_result) +} + +fn parse_dir_entry( + parse_context: &ParseContext, + language_type: SupportedLanguage, + dir_entry: DirEntry, +) -> Option { + dir_entry + .path() + .is_dir() + .not() + .then(|| { + let input = mk_parse_input(parse_context.multi_file, language_type, dir_entry)?; + match parse_input(parse_context, input) { + Ok(parsed_data) => parsed_data, + Err(err) => { + error!("Failed to parse {err}"); + None } + } + }) + .flatten() +} - Ok(parsed_crates) - }, - ) - .try_reduce(BTreeMap::new, |mut file_maps, parsed_crates| { - for (crate_name, parsed_data) in parsed_crates { - file_maps.entry(crate_name).or_default().add(parsed_data); +/// Use parallel builder to walk all source directories +/// in parallel. +pub fn parallel_parse( + parse_context: &ParseContext, + walker_builder: WalkBuilder, + language_type: SupportedLanguage, +) -> BTreeMap { + let (tx, rx) = bounded::(100); + + let collector_thread = thread::spawn(move || { + let mut crate_parsed_data: BTreeMap = BTreeMap::new(); + + for parsed_data in rx { + let crate_name = parsed_data.crate_name.clone(); + *crate_parsed_data.entry(crate_name).or_default() += parsed_data; + } + + crate_parsed_data + }); + + walker_builder.build_parallel().run(|| { + let tx = tx.clone(); + let parse_context = &parse_context; + + Box::new(move |direntry_result| match direntry_result { + Ok(dir_entry) => { + if let Some(result) = parse_dir_entry(parse_context, language_type, dir_entry) { + tx.send(result).unwrap(); + } + WalkState::Continue + } + Err(e) => { + error!("Failed to walk {e}"); + WalkState::Quit } - Ok(file_maps) }) + }); + + drop(tx); + collector_thread.join().unwrap() } diff --git a/core/src/parser.rs b/core/src/parser.rs index c278217b..64c78422 100644 --- a/core/src/parser.rs +++ b/core/src/parser.rs @@ -16,6 +16,7 @@ use proc_macro2::Ident; use std::{ collections::{BTreeSet, HashMap, HashSet}, convert::TryFrom, + ops::AddAssign, }; use syn::{ ext::IdentExt, parse::ParseBuffer, punctuated::Punctuated, visit::Visit, Attribute, Expr, @@ -109,6 +110,23 @@ pub struct ParsedData { pub multi_file: bool, } +// The better abstraction here is Semigroup Monoid but such +// traits don't exist in Rust. I'd rather have infix <>. +impl AddAssign for ParsedData { + fn add_assign(&mut self, mut rhs: ParsedData) { + self.structs.append(&mut rhs.structs); + self.enums.append(&mut rhs.enums); + self.aliases.append(&mut rhs.aliases); + self.import_types.extend(rhs.import_types); + self.type_names.extend(rhs.type_names); + self.errors.append(&mut rhs.errors); + + self.file_name = rhs.file_name; + self.crate_name = rhs.crate_name; + self.multi_file = rhs.multi_file; + } +} + impl ParsedData { /// Create a new parsed data. pub fn new(crate_name: CrateName, file_name: String, multi_file: bool) -> Self { @@ -120,20 +138,6 @@ impl ParsedData { } } - /// Add the parsed data from `other` to `self`. - pub fn add(&mut self, mut other: Self) { - self.structs.append(&mut other.structs); - self.enums.append(&mut other.enums); - self.aliases.append(&mut other.aliases); - self.import_types.extend(other.import_types); - self.type_names.extend(other.type_names); - self.errors.append(&mut other.errors); - - self.file_name = other.file_name; - self.crate_name = other.crate_name; - self.multi_file = other.multi_file; - } - pub(crate) fn push(&mut self, rust_thing: RustItem) { match rust_thing { RustItem::Struct(s) => { From 2cf6ea1dab484d4d51c25b31f40ab470cf8dfcbc Mon Sep 17 00:00:00 2001 From: Darrell Roberts Date: Sat, 23 Nov 2024 16:56:17 -0500 Subject: [PATCH 2/9] cleanup --- cli/src/main.rs | 13 ++++--------- cli/src/parse.rs | 5 ++--- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index 8bda2730..559fcd78 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -140,27 +140,22 @@ fn main() -> anyhow::Result<()> { target_os, }; - let crate_parsed_data = parallel_parse(&parse_context, walker_builder, language_type); + let parsed_data = parallel_parse(&parse_context, walker_builder, language_type); // Collect all the types into a map of the file name they // belong too and the list of type names. Used for generating // imports in generated files. let import_candidates = if multi_file { - all_types(&crate_parsed_data) + all_types(&parsed_data) } else { HashMap::new() }; - check_parse_errors(&crate_parsed_data)?; + check_parse_errors(&parsed_data)?; info!("typeshare started writing generated types"); - write_generated( - destination, - lang.as_mut(), - crate_parsed_data, - import_candidates, - )?; + write_generated(destination, lang.as_mut(), parsed_data, import_candidates)?; info!("typeshare finished generating types"); Ok(()) diff --git a/cli/src/parse.rs b/cli/src/parse.rs index f12067e0..f1cbe071 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -129,8 +129,7 @@ fn parse_dir_entry( .flatten() } -/// Use parallel builder to walk all source directories -/// in parallel. +/// Use parallel builder to walk all source directories concurrently. pub fn parallel_parse( parse_context: &ParseContext, walker_builder: WalkBuilder, @@ -143,6 +142,7 @@ pub fn parallel_parse( for parsed_data in rx { let crate_name = parsed_data.crate_name.clone(); + // Append each yielded parsed data by its respective crate. *crate_parsed_data.entry(crate_name).or_default() += parsed_data; } @@ -151,7 +151,6 @@ pub fn parallel_parse( walker_builder.build_parallel().run(|| { let tx = tx.clone(); - let parse_context = &parse_context; Box::new(move |direntry_result| match direntry_result { Ok(dir_entry) => { From 3190d6904c743ab94525d0074599588c8cc1c36d Mon Sep 17 00:00:00 2001 From: Darrell Roberts Date: Sat, 23 Nov 2024 20:24:22 -0500 Subject: [PATCH 3/9] cleanup --- cli/src/parse.rs | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/cli/src/parse.rs b/cli/src/parse.rs index f1cbe071..da64950b 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -112,21 +112,15 @@ fn parse_dir_entry( language_type: SupportedLanguage, dir_entry: DirEntry, ) -> Option { - dir_entry - .path() - .is_dir() - .not() - .then(|| { - let input = mk_parse_input(parse_context.multi_file, language_type, dir_entry)?; - match parse_input(parse_context, input) { - Ok(parsed_data) => parsed_data, - Err(err) => { - error!("Failed to parse {err}"); - None - } - } - }) - .flatten() + dir_entry.path().is_dir().not().then_some(())?; + let input = mk_parse_input(parse_context.multi_file, language_type, dir_entry)?; + match parse_input(parse_context, input) { + Ok(parsed_data) => parsed_data, + Err(err) => { + error!("Failed to parse {err}"); + None + } + } } /// Use parallel builder to walk all source directories concurrently. @@ -152,7 +146,7 @@ pub fn parallel_parse( walker_builder.build_parallel().run(|| { let tx = tx.clone(); - Box::new(move |direntry_result| match direntry_result { + Box::new(move |result| match result { Ok(dir_entry) => { if let Some(result) = parse_dir_entry(parse_context, language_type, dir_entry) { tx.send(result).unwrap(); @@ -160,7 +154,7 @@ pub fn parallel_parse( WalkState::Continue } Err(e) => { - error!("Failed to walk {e}"); + error!("Traversing directories failed: {e}"); WalkState::Quit } }) From ce2c9a98f3de546f240afc57d6904b34eae75ce4 Mon Sep 17 00:00:00 2001 From: Darrell Roberts Date: Sat, 23 Nov 2024 20:34:53 -0500 Subject: [PATCH 4/9] update log format --- cli/src/main.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index 559fcd78..937b0080 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -40,8 +40,8 @@ use crate::{ fn main() -> anyhow::Result<()> { flexi_logger::Logger::try_with_env_or_str("info")? - .adaptive_format_for_stderr(AdaptiveFormat::Detailed) - .adaptive_format_for_stdout(AdaptiveFormat::Detailed) + .adaptive_format_for_stderr(AdaptiveFormat::Opt) + .adaptive_format_for_stdout(AdaptiveFormat::Opt) .start()?; let options = Args::parse(); From a963117fa879ef38c8bd0ccec262a1bf68ca4d71 Mon Sep 17 00:00:00 2001 From: Darrell Roberts Date: Sun, 24 Nov 2024 11:57:57 -0500 Subject: [PATCH 5/9] Avoid cloning all type names. Used only to build CrateTypes map. --- cli/src/main.rs | 69 ++++++++++++++++++++++++++---------------------- cli/src/parse.rs | 26 +++++++++--------- 2 files changed, 49 insertions(+), 46 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index 937b0080..68e75e94 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -89,36 +89,6 @@ fn main() -> anyhow::Result<()> { }, }; - let mut types = TypesBuilder::new(); - types - .add("rust", "*.rs") - .context("Failed to add rust type extensions")?; - types.select("rust"); - - // This is guaranteed to always have at least one value by the clap configuration - let first_root = directories - .first() - .expect("directories is empty; this shouldn't be possible"); - - let overrides = OverrideBuilder::new(first_root) - // Don't process files inside of tools/typeshare/ - .add("!**/tools/typeshare/**") - .context("Failed to parse override")? - .build() - .context("Failed to build override")?; - - let mut walker_builder = WalkBuilder::new(first_root); - // Sort walker output for deterministic output across platforms - walker_builder - .sort_by_file_path(Path::cmp) - .types(types.build().context("Failed to build types")?) - .overrides(overrides) - .follow_links(options.follow_links); - - for root in directories.iter().skip(1) { - walker_builder.add(root); - } - let destination = if let Some(ref file) = options.output.file { Output::File(file) } else if let Some(ref folder) = options.output.folder { @@ -140,13 +110,17 @@ fn main() -> anyhow::Result<()> { target_os, }; - let parsed_data = parallel_parse(&parse_context, walker_builder, language_type); + let mut parsed_data = parallel_parse( + &parse_context, + walker_builder(directories, &options)?, + language_type, + ); // Collect all the types into a map of the file name they // belong too and the list of type names. Used for generating // imports in generated files. let import_candidates = if multi_file { - all_types(&parsed_data) + all_types(&mut parsed_data) } else { HashMap::new() }; @@ -161,6 +135,37 @@ fn main() -> anyhow::Result<()> { Ok(()) } +fn walker_builder( + directories: &[std::path::PathBuf], + options: &Args, +) -> anyhow::Result { + let mut types = TypesBuilder::new(); + types + .add("rust", "*.rs") + .context("Failed to add rust type extensions")?; + types.select("rust"); + + let first_root = directories + .first() + .expect("directories is empty; this shouldn't be possible"); + let overrides = OverrideBuilder::new(first_root) + // Don't process files inside of tools/typeshare/ + .add("!**/tools/typeshare/**") + .context("Failed to parse override")? + .build() + .context("Failed to build override")?; + let mut walker_builder = WalkBuilder::new(first_root); + walker_builder + .sort_by_file_path(Path::cmp) + .types(types.build().context("Failed to build types")?) + .overrides(overrides) + .follow_links(options.follow_links); + for root in directories.iter().skip(1) { + walker_builder.add(root); + } + Ok(walker_builder) +} + /// Get the language trait impl for the given supported language and configuration. fn language( language_type: SupportedLanguage, diff --git a/cli/src/parse.rs b/cli/src/parse.rs index da64950b..d989d678 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -4,7 +4,8 @@ use crossbeam::channel::bounded; use ignore::{DirEntry, WalkBuilder, WalkState}; use log::error; use std::{ - collections::{hash_map::Entry, BTreeMap, HashMap}, + collections::{BTreeMap, HashMap}, + mem, ops::Not, path::PathBuf, thread, @@ -63,21 +64,17 @@ fn output_file_name(language_type: SupportedLanguage, crate_name: &CrateName) -> /// Collect all the typeshared types into a mapping of crate names to typeshared types. This /// mapping is used to lookup and generated import statements for generated files. -pub fn all_types(file_mappings: &BTreeMap) -> CrateTypes { +pub fn all_types(file_mappings: &mut BTreeMap) -> CrateTypes { file_mappings - .iter() - .map(|(crate_name, parsed_data)| (crate_name, parsed_data.type_names.clone())) + .iter_mut() + .map(|(crate_name, parsed_data)| (crate_name, mem::take(&mut parsed_data.type_names))) .fold( HashMap::new(), |mut import_map: CrateTypes, (crate_name, type_names)| { - match import_map.entry(crate_name.clone()) { - Entry::Occupied(mut e) => { - e.get_mut().extend(type_names); - } - Entry::Vacant(e) => { - e.insert(type_names); - } - } + import_map + .entry(crate_name.clone()) + .or_default() + .extend(type_names); import_map }, ) @@ -148,8 +145,9 @@ pub fn parallel_parse( Box::new(move |result| match result { Ok(dir_entry) => { - if let Some(result) = parse_dir_entry(parse_context, language_type, dir_entry) { - tx.send(result).unwrap(); + if let Some(parsed_data) = parse_dir_entry(parse_context, language_type, dir_entry) + { + tx.send(parsed_data).unwrap(); } WalkState::Continue } From d290e0d7e16d22d5d95bdb7f0ccd33d035162502 Mon Sep 17 00:00:00 2001 From: Darrell Roberts Date: Sun, 24 Nov 2024 12:48:34 -0500 Subject: [PATCH 6/9] update visibility --- cli/src/parse.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cli/src/parse.rs b/cli/src/parse.rs index d989d678..50e304db 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -18,7 +18,7 @@ use typeshare_core::{ }; /// Input data for parsing each source file. -pub struct ParserInput { +struct ParserInput { /// Rust source file path. file_path: PathBuf, /// File name source from crate for output. @@ -80,7 +80,7 @@ pub fn all_types(file_mappings: &mut BTreeMap) -> CrateTy ) } -pub fn parse_input( +fn parse_input( parse_context: &ParseContext, parse_input: ParserInput, ) -> anyhow::Result> { From aea802724a2fe61f8ad42ffc8a167a21b4e7d9d8 Mon Sep 17 00:00:00 2001 From: Darrell Roberts Date: Sun, 24 Nov 2024 14:32:59 -0500 Subject: [PATCH 7/9] improve parser error handling --- cli/src/main.rs | 8 ++- cli/src/parse.rs | 128 ++++++++++++++++++++------------------------- core/src/parser.rs | 2 + 3 files changed, 65 insertions(+), 73 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index 68e75e94..bc139f25 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -68,6 +68,12 @@ fn main() -> anyhow::Result<()> { return Ok(()); } + generate_types(config_file, &options).inspect_err(|err| { + error!("Typeshare failed to generate types: {err}"); + }) +} + +fn generate_types(config_file: Option<&Path>, options: &Args) -> anyhow::Result<()> { info!("typeshare started generating types"); let config = config::load_config(config_file).context("Unable to read configuration file")?; @@ -114,7 +120,7 @@ fn main() -> anyhow::Result<()> { &parse_context, walker_builder(directories, &options)?, language_type, - ); + )?; // Collect all the types into a map of the file name they // belong too and the list of type names. Used for generating diff --git a/cli/src/parse.rs b/cli/src/parse.rs index 50e304db..9c22a8e1 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -2,48 +2,47 @@ use anyhow::Context; use crossbeam::channel::bounded; use ignore::{DirEntry, WalkBuilder, WalkState}; -use log::error; use std::{ collections::{BTreeMap, HashMap}, - mem, - ops::Not, - path::PathBuf, - thread, + mem, thread, }; use typeshare_core::{ context::{ParseContext, ParseFileContext}, language::{CrateName, CrateTypes, SupportedLanguage, SINGLE_FILE_CRATE_NAME}, - parser::ParsedData, + parser::{ParseError, ParsedData}, RenameExt, }; -/// Input data for parsing each source file. -struct ParserInput { - /// Rust source file path. - file_path: PathBuf, - /// File name source from crate for output. - out_file_name: String, - /// The crate name the source file belongs to. - crate_name: CrateName, -} - -fn mk_parse_input( +fn parse_file_context( multi_file: bool, language_type: SupportedLanguage, - dir_entry: DirEntry, -) -> Option { + dir_entry: &DirEntry, +) -> anyhow::Result> { let crate_name = if multi_file { - CrateName::find_crate_name(dir_entry.path())? + let Some(crate_name) = CrateName::find_crate_name(dir_entry.path()) else { + return Ok(None); + }; + crate_name } else { SINGLE_FILE_CRATE_NAME }; let file_path = dir_entry.path().to_path_buf(); let out_file_name = output_file_name(language_type, &crate_name); - Some(ParserInput { - file_path, - out_file_name, + + let input_file = file_path + .to_str() + .map(ToOwned::to_owned) + .unwrap_or_default(); + + let parse_file_context = ParseFileContext { + source_code: std::fs::read_to_string(&file_path) + .with_context(|| format!("Failed to read input: {input_file}"))?, crate_name, - }) + file_name: out_file_name, + file_path, + }; + + Ok(Some(parse_file_context)) } /// The output file name to write to. @@ -80,44 +79,23 @@ pub fn all_types(file_mappings: &mut BTreeMap) -> CrateTy ) } -fn parse_input( - parse_context: &ParseContext, - parse_input: ParserInput, -) -> anyhow::Result> { - let input_file = parse_input - .file_path - .to_str() - .map(ToOwned::to_owned) - .unwrap_or_default(); - - let parse_file_context = ParseFileContext { - source_code: std::fs::read_to_string(&parse_input.file_path) - .with_context(|| format!("Failed to read input: {input_file}"))?, - crate_name: parse_input.crate_name.clone(), - file_name: parse_input.out_file_name, - file_path: parse_input.file_path, - }; - - let parsed_result = typeshare_core::parser::parse(parse_context, parse_file_context) - .with_context(|| format!("Failed to parse: {input_file}"))?; - - Ok(parsed_result) -} - fn parse_dir_entry( parse_context: &ParseContext, language_type: SupportedLanguage, - dir_entry: DirEntry, -) -> Option { - dir_entry.path().is_dir().not().then_some(())?; - let input = mk_parse_input(parse_context.multi_file, language_type, dir_entry)?; - match parse_input(parse_context, input) { - Ok(parsed_data) => parsed_data, - Err(err) => { - error!("Failed to parse {err}"); - None - } + dir_entry: &DirEntry, +) -> Result, ParseError> { + if dir_entry.path().is_dir() { + return Ok(None); } + + let Some(parse_file_context) = + parse_file_context(parse_context.multi_file, language_type, &dir_entry) + .map_err(|err| ParseError::IOError(err.to_string()))? + else { + return Ok(None); + }; + + typeshare_core::parser::parse(parse_context, parse_file_context) } /// Use parallel builder to walk all source directories concurrently. @@ -125,35 +103,41 @@ pub fn parallel_parse( parse_context: &ParseContext, walker_builder: WalkBuilder, language_type: SupportedLanguage, -) -> BTreeMap { - let (tx, rx) = bounded::(100); +) -> anyhow::Result> { + let (tx, rx) = bounded::>(100); let collector_thread = thread::spawn(move || { let mut crate_parsed_data: BTreeMap = BTreeMap::new(); - for parsed_data in rx { + for result in rx { + let parsed_data = result?; let crate_name = parsed_data.crate_name.clone(); // Append each yielded parsed data by its respective crate. *crate_parsed_data.entry(crate_name).or_default() += parsed_data; } - crate_parsed_data + Ok(crate_parsed_data) }); walker_builder.build_parallel().run(|| { let tx = tx.clone(); - Box::new(move |result| match result { - Ok(dir_entry) => { - if let Some(parsed_data) = parse_dir_entry(parse_context, language_type, dir_entry) - { - tx.send(parsed_data).unwrap(); + Box::new(move |result| { + let result = result.context("Failed traversing").and_then(|dir_entry| { + parse_dir_entry(parse_context, language_type, &dir_entry) + .with_context(|| format!("Parsing failed: {:?}", dir_entry.path())) + }); + match result { + Ok(parsed_data) => { + if let Some(parsed_data) = parsed_data { + tx.send(Ok(parsed_data)).unwrap(); + } + WalkState::Continue + } + Err(err) => { + tx.send(Err(err)).unwrap(); + WalkState::Quit } - WalkState::Continue - } - Err(e) => { - error!("Traversing directories failed: {e}"); - WalkState::Quit } }) }); diff --git a/core/src/parser.rs b/core/src/parser.rs index 64c78422..01f02a5b 100644 --- a/core/src/parser.rs +++ b/core/src/parser.rs @@ -76,6 +76,8 @@ pub enum ParseError { SerdeContentRequired { enum_ident: String }, #[error("the serde flatten attribute is not currently supported")] SerdeFlattenNotAllowed, + #[error("IO error: {0}")] + IOError(String), } /// Error with it's related data. From 80311393a8687fe2f9d9b1c3888ae450f4eaad63 Mon Sep 17 00:00:00 2001 From: Darrell Roberts Date: Sun, 24 Nov 2024 15:30:08 -0500 Subject: [PATCH 8/9] cleanup control flow, error handling --- cli/src/main.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index bc139f25..d7702486 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -63,14 +63,14 @@ fn main() -> anyhow::Result<()> { let config_file = options.config_file.as_deref(); if options.output.generate_config { - let config = override_configuration(Config::default(), &options)?; - config::store_config(&config, config_file).context("Failed to create new config file")?; - return Ok(()); + override_configuration(Config::default(), &options) + .and_then(|config| config::store_config(&config, config_file)) + .inspect_err(|err| error!("typeshare failed to create new config file: {err}")) + } else { + generate_types(config_file, &options).inspect_err(|err| { + error!("typeshare failed to generate types: {err}"); + }) } - - generate_types(config_file, &options).inspect_err(|err| { - error!("Typeshare failed to generate types: {err}"); - }) } fn generate_types(config_file: Option<&Path>, options: &Args) -> anyhow::Result<()> { From 015aa83d7cffaf39ec3a73057d78cbe7def700be Mon Sep 17 00:00:00 2001 From: Darrell Roberts Date: Sun, 24 Nov 2024 15:48:05 -0500 Subject: [PATCH 9/9] cleanup --- cli/src/main.rs | 4 ++-- cli/src/parse.rs | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index d7702486..aabe985b 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -77,7 +77,7 @@ fn generate_types(config_file: Option<&Path>, options: &Args) -> anyhow::Result< info!("typeshare started generating types"); let config = config::load_config(config_file).context("Unable to read configuration file")?; - let config = override_configuration(config, &options)?; + let config = override_configuration(config, options)?; let directories = options.directories.as_slice(); @@ -118,7 +118,7 @@ fn generate_types(config_file: Option<&Path>, options: &Args) -> anyhow::Result< let mut parsed_data = parallel_parse( &parse_context, - walker_builder(directories, &options)?, + walker_builder(directories, options)?, language_type, )?; diff --git a/cli/src/parse.rs b/cli/src/parse.rs index 9c22a8e1..b59fb4fa 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -89,7 +89,7 @@ fn parse_dir_entry( } let Some(parse_file_context) = - parse_file_context(parse_context.multi_file, language_type, &dir_entry) + parse_file_context(parse_context.multi_file, language_type, dir_entry) .map_err(|err| ParseError::IOError(err.to_string()))? else { return Ok(None); @@ -128,12 +128,11 @@ pub fn parallel_parse( .with_context(|| format!("Parsing failed: {:?}", dir_entry.path())) }); match result { - Ok(parsed_data) => { - if let Some(parsed_data) = parsed_data { - tx.send(Ok(parsed_data)).unwrap(); - } + Ok(Some(parsed_data)) => { + tx.send(Ok(parsed_data)).unwrap(); WalkState::Continue } + Ok(None) => WalkState::Continue, Err(err) => { tx.send(Err(err)).unwrap(); WalkState::Quit