From da9262d27514ede7e535a6d0ed3fc896cb1de44b Mon Sep 17 00:00:00 2001 From: David Edey Date: Wed, 18 Dec 2024 00:47:43 +0000 Subject: [PATCH] tweak: README update --- README.md | 23 ++++++++------ src/lib.rs | 88 +++++++++++++++++++++++++++++------------------------- 2 files changed, 61 insertions(+), 50 deletions(-) diff --git a/README.md b/README.md index c172ae3..8351667 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,12 @@ [build status](https://github.com/dhedey/preinterpret/actions?query=branch%3Amain) This crate provides the `preinterpret!` macro, which works as a simple pre-processor to the token stream. It takes inspiration from and effectively combines the [quote](https://crates.io/crates/quote), [paste](https://crates.io/crates/paste) and [syn](https://crates.io/crates/syn) crates, to empower code generation authors and declarative macro writers, bringing: @@ -26,9 +30,9 @@ preinterpret = "0.2" ## User Guide -Preinterpret works with its own very simple language, with two main syntax elements: +Preinterpret works with its own very simple language, with two pieces of syntax: -* **Commands**: `[!command_name! ...input token stream...]` take an input token stream and output a token stream. There are a number of commands which cover a toolkit of useful functions. +* **Commands**: `[!command_name! input token stream...]` take an input token stream and output a token stream. There are a number of commands which cover a toolkit of useful functions. * **Variables**: `[!set! #var_name = token stream...]` defines a variable, and `#var_name` substitutes the variable into another command or the output. Commands can be nested intuitively. The input of all commands (except `[!raw! ...]`) are first interpreted before the command itself executes. @@ -80,10 +84,10 @@ To properly understand how preinterpret works, we need to take a very brief deto In Rust, the input and output to a macro is a [`TokenStream`](https://doc.rust-lang.org/proc_macro/enum.TokenStream.html). A `TokenStream` is simply an iterator of [`TokenTree`](https://doc.rust-lang.org/proc_macro/enum.TokenTree.html)s at a particular nesting level. A token tree is one of four things: -* A [`Group`](https://doc.rust-lang.org/proc_macro/struct.Group.html) - typically `(..)`, `[..]` or `{..}`. It consists of a matched pair of brackets "[`Delimiter`s]` and an internal token stream. There is technically a [confusing](https://github.com/rust-lang/rust/issues/67062) fourth type of group, with transparent brackets; used to encapsulate declarative macro substitutions. This is purposefully ignored/flattened in pre-interpret. -* An [`Ident`](https://doc.rust-lang.org/proc_macro/struct.Ident.html) - An unquoted string, used to identitied something named. Think `MyStruct`, or `do_work` or `my_module`. +* A [`Group`](https://doc.rust-lang.org/proc_macro/struct.Group.html) - typically `(..)`, `[..]` or `{..}`. It consists of a matched pair of [`Delimiter`s](https://doc.rust-lang.org/proc_macro/enum.Delimiter.html) and an internal token stream. There is also a transparent delimiter, used to group the result of token stream substitutions (although [confusingly](https://github.com/rust-lang/rust/issues/67062) a little broken in rustc). +* An [`Ident`](https://doc.rust-lang.org/proc_macro/struct.Ident.html) - An unquoted string, used to identitied something named. Think `MyStruct`, or `do_work` or `my_module`. Note that keywords such as `struct` or `async` and the values `true` and `false` are classified as idents at this abstraction level. * A [`Punct`](https://doc.rust-lang.org/proc_macro/struct.Punct.html) - A single piece of punctuation. Think `!` or `:`. -* A [`Literal`](https://doc.rust-lang.org/proc_macro/struct.Literal.html) - This includes string literals `"my string"`, char literals `'x'` and numeric literals `23` / `51u64`. Note that `true`/`false` are technically idents. +* A [`Literal`](https://doc.rust-lang.org/proc_macro/struct.Literal.html) - This includes string literals `"my string"`, char literals `'x'` and numeric literals `23` / `51u64`. When you return output from a macro, you are outputting back a token stream, which the compiler will interpret. @@ -150,7 +154,6 @@ The following commands output strings, without dropping non-alphanumeric charact * `[!lower! FooBar]` outputs `"foobar"` * `[!capitalize! fooBar]` outputs `"FooBar"` * `[!decapitalize! FooBar]` outputs `"fooBar"` -* `[!insert_spaces! fooBar]` outputs `"foo Bar"` The following commands output strings, whilst also dropping non-alphanumeric characters: @@ -159,6 +162,8 @@ The following commands output strings, whilst also dropping non-alphanumeric cha * `[!camel! foo_bar]` and `[!upper_camel! foo_bar]` are equivalent and output `"FooBar"`. This filters out non-alphanumeric characters. * `[!lower_camel! foo_bar]` outputs `"fooBar"` * `[!kebab! fooBar]` outputs `"foo-bar"` +* `[!title! fooBar]` outputs `"Foo Bar"` +* `[!insert_spaces! fooBar]` outputs `"foo Bar"` > [!NOTE] > @@ -173,7 +178,7 @@ The following commands output strings, whilst also dropping non-alphanumeric cha ### Readability -The preinterpret syntax is intended to be immediately intuitive even for people not familiar with the crate. And it enables developers to make more readable macros: +The preinterpret syntax is intended to be immediately intuitive even for people not familiar with the crate. It enables developers to make more readable macros: * Developers can name clear concepts in their macro output, and re-use them by name, decreasing code duplication. * Developers can use variables to subdivide logic inside the macro, without having to resort to creating lots of small, functional helper macros. diff --git a/src/lib.rs b/src/lib.rs index bf2622c..5a11717 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,8 +7,12 @@ //! [build status](https://github.com/dhedey/preinterpret/actions?query=branch%3Amain) //! //! //! //! This crate provides the `preinterpret!` macro, which works as a simple pre-processor to the token stream. It takes inspiration from and effectively combines the [quote](https://crates.io/crates/quote), [paste](https://crates.io/crates/paste) and [syn](https://crates.io/crates/syn) crates, to empower code generation authors and declarative macro writers, bringing: @@ -26,9 +30,9 @@ //! //! ## User Guide //! -//! Preinterpret works with its own very simple language, with two main syntax elements: +//! Preinterpret works with its own very simple language, with two pieces of syntax: //! -//! * **Commands**: `[!command_name! ...input token stream...]` take an input token stream and output a token stream. There are a number of commands which cover a toolkit of useful functions. +//! * **Commands**: `[!command_name! input token stream...]` take an input token stream and output a token stream. There are a number of commands which cover a toolkit of useful functions. //! * **Variables**: `[!set! #var_name = token stream...]` defines a variable, and `#var_name` substitutes the variable into another command or the output. //! //! Commands can be nested intuitively. The input of all commands (except `[!raw! ...]`) are first interpreted before the command itself executes. @@ -80,10 +84,10 @@ //! //! In Rust, the input and output to a macro is a [`TokenStream`](https://doc.rust-lang.org/proc_macro/enum.TokenStream.html). A `TokenStream` is simply an iterator of [`TokenTree`](https://doc.rust-lang.org/proc_macro/enum.TokenTree.html)s at a particular nesting level. A token tree is one of four things: //! -//! * A [`Group`](https://doc.rust-lang.org/proc_macro/struct.Group.html) - typically `(..)`, `[..]` or `{..}`. It consists of a matched pair of brackets "[`Delimiter`s]` and an internal token stream. There is technically a [confusing](https://github.com/rust-lang/rust/issues/67062) fourth type of group, with transparent brackets; used to encapsulate declarative macro substitutions. This is purposefully ignored/flattened in pre-interpret. -//! * An [`Ident`](https://doc.rust-lang.org/proc_macro/struct.Ident.html) - An unquoted string, used to identitied something named. Think `MyStruct`, or `do_work` or `my_module`. +//! * A [`Group`](https://doc.rust-lang.org/proc_macro/struct.Group.html) - typically `(..)`, `[..]` or `{..}`. It consists of a matched pair of [`Delimiter`s](https://doc.rust-lang.org/proc_macro/enum.Delimiter.html) and an internal token stream. There is also a transparent delimiter, used to group the result of token stream substitutions (although [confusingly](https://github.com/rust-lang/rust/issues/67062) a little broken in rustc). +//! * An [`Ident`](https://doc.rust-lang.org/proc_macro/struct.Ident.html) - An unquoted string, used to identitied something named. Think `MyStruct`, or `do_work` or `my_module`. Note that keywords such as `struct` or `async` and the values `true` and `false` are classified as idents at this abstraction level. //! * A [`Punct`](https://doc.rust-lang.org/proc_macro/struct.Punct.html) - A single piece of punctuation. Think `!` or `:`. -//! * A [`Literal`](https://doc.rust-lang.org/proc_macro/struct.Literal.html) - This includes string literals `"my string"`, char literals `'x'` and numeric literals `23` / `51u64`. Note that `true`/`false` are technically idents. +//! * A [`Literal`](https://doc.rust-lang.org/proc_macro/struct.Literal.html) - This includes string literals `"my string"`, char literals `'x'` and numeric literals `23` / `51u64`. //! //! When you return output from a macro, you are outputting back a token stream, which the compiler will interpret. //! @@ -150,7 +154,6 @@ //! * `[!lower! FooBar]` outputs `"foobar"` //! * `[!capitalize! fooBar]` outputs `"FooBar"` //! * `[!decapitalize! FooBar]` outputs `"fooBar"` -//! * `[!insert_spaces! fooBar]` outputs `"foo Bar"` //! //! The following commands output strings, whilst also dropping non-alphanumeric characters: //! @@ -159,6 +162,8 @@ //! * `[!camel! foo_bar]` and `[!upper_camel! foo_bar]` are equivalent and output `"FooBar"`. This filters out non-alphanumeric characters. //! * `[!lower_camel! foo_bar]` outputs `"fooBar"` //! * `[!kebab! fooBar]` outputs `"foo-bar"` +//! * `[!title! fooBar]` outputs `"Foo Bar"` +//! * `[!insert_spaces! fooBar]` outputs `"foo Bar"` //! //! > [!NOTE] //! > @@ -173,7 +178,7 @@ //! //! ### Readability //! -//! The preinterpret syntax is intended to be immediately intuitive even for people not familiar with the crate. And it enables developers to make more readable macros: +//! The preinterpret syntax is intended to be immediately intuitive even for people not familiar with the crate. It enables developers to make more readable macros: //! //! * Developers can name clear concepts in their macro output, and re-use them by name, decreasing code duplication. //! * Developers can use variables to subdivide logic inside the macro, without having to resort to creating lots of small, functional helper macros. @@ -358,17 +363,17 @@ //! //! In more detail: //! -//! * `[!parse! () = ()]` is a more general `[!set!]` which acts like a `let = else { panic!() }`. It takes a `()`-wrapped parse destructuring on the left and a token stream as input on the right. Any `#x` in the parse definition acts as a binding rather than as a substitution. Parse operations look like `[<] Iterations are _not_ supported, but `[!optional] This will handled commas intelligently, and accept intelligent parse-helpers like: -//! * `[ { hello: #a, world?: #b }]` - which can parse `#x` in any order, cope with trailing commas, and permit fields on the RHS not on the LHS -//! * `[ { hello: #a, world?: #b }]` - which can parse fields in any order, cope with trailing commas, and permit fields on the RHS not on the LHS -//! * `[ { #ident, #impl_generics, ... }]` - which calls syn's parse item on the token -//! * `[ ...]`, `[ ...]` and the like to parse idents / literals etc directly from the token stream (rather than token streams). -//! * More tailored examples, such as `[ { impl: #x, type: #y, where: #z }]` which uses syn to parse the generics, and then uses subfields on the result. -//! * Possibly `[ #x]` to parse a group with no brackets, to avoid parser ambguity in some cases -//! * Any complex logic (loops, matching), is delayed lazily until execution logic time - making it much more intuitive. -//! * `[!for! () in () { ... }]` which operates like the rust `for` loop, and uses a parse destructuring on the left, and has support for optional commas between values -//! * `[!match! () => { () => { ... }, () => { ... }, (#fallback) => { ... } }]` which operates like a rust `match` expression, and can replace the function of the branches of declarative macro inputs. -//! * `[!macro_rules! name!() = { ... }]` which can define a declarative macro, but just parses its inputs as a token stream, and uses preinterpret for its heavy lifting. +//! * `[!parse! (DESTRUCTURING) = (INPUT)]` is a more general `[!set!]` which acts like a `let = else { panic!() }`. It takes a `()`-wrapped parse destructuring on the left and a token stream as input on the right. Any `#x` in the parse definition acts as a binding rather than as a substitution. Parsing will handled commas intelligently, and accept intelligent parse operations to do heavy-lifting for the user. Parse operations look like `[!OPERATION! DESTRUCTURING]` with the operation name in `UPPER_SNAKE_CASE`. Some examples might be: +//! * `[!FIELDS! { hello: #a, world?: #b }]` - which can be parsed in any order, cope with trailing commas, and forbid fields in the source stream which aren't in the destructuring. +//! * `[!SUBFIELDS! { hello: #a, world?: #b }]` - which can parse fields in any order, cope with trailing commas, and allow fields in the source stream which aren't in the destructuring. +//! * `[!ITEM! { #ident, #impl_generics, ... }]` - which calls syn's parse item on the token +//! * `[!IDENT! #x]`, `[!LITERAL! #x]`, `[!TYPE! { tokens: #x, path: #y }]` and the like to parse idents / literals etc directly from the token stream (rather than token streams). These will either take just a variable to capture the full token stream, or support an optional-argument style binding, where the developer can request certain sub-patterns or mapped token streams. +//! * More tailored examples, such as `[!GENERICS! { impl: #x, type: #y, where: #z }]` which uses syn to parse the generics, and then uses subfields on the result. +//! * Possibly `[!GROUPED! #x]` to parse a group with no brackets, to avoid parser ambiguity in some cases +//! * `[!OPTIONAL! ...]` might be supported, but other complex logic (loops, matching) is delayed lazily until interpretation time - which feels more intuitive. +//! * `[!for! (DESTRUCTURING) in (INPUT) { ... }]` which operates like the rust `for` loop, and uses a parse destructuring on the left, and has support for optional commas between values +//! * `[!match! (INPUT) => { (DESTRUCTURING_1) => { ... }, (DESTRUCTURING_2) => { ... }, (#fallback) => { ... } }]` which operates like a rust `match` expression, and can replace the function of the branches of declarative macro inputs. +//! * `[!macro_rules! name!(DESTRUCTURING) = { ... }]` which can define a declarative macro, but just parses its inputs as a token stream, and uses preinterpret for its heavy lifting. //! //! And then we can end up with syntax like the following: //! @@ -385,39 +390,32 @@ //! }] //! }] //! } -//! -//! // Or can parse input - although loops are kept as a token stream and delegated -//! // to explicit lazy iteration, allowing a more procedural code style, -//! // and clearer compiler errors. +//! my_macro!( +//! MyTrait for MyType, +//! MyTrait for MyType2, +//! ); +//! +//! // It can also parse its input in the declaration. +//! // Repeated sections have to be captured as a stream, and delegated to explicit lazy [!for! ...] binding. +//! // This enforces a more procedural code style, and gives clearer compiler errors. //! preinterpret::preinterpret! { //! [!macro_rules! multi_impl_super_duper!( //! #type_list, -//! ImplOptions [!fields! { -//! hello: #hello, -//! world?: #world (default "Default") +//! ImplOptions [!FIELDS! { +//! greeting: #hello, +//! location: #world, +//! punctuation?: #punct = ("!") // Default //! }] //! ) = { //! [!for! ( -//! #type [!generics! { impl: #impl_generics, type: #type_generics }] +//! #type [!GENERICS! { impl: #impl_generics, type: #type_generics }] //! ) in (#type_list) { //! impl<#impl_generics> SuperDuper for #type #type_generics { -//! type Hello = #hello; -//! type World = #world; +//! const Hello: &'static str = [!string! #hello " " #world #punct]; //! } //! }] //! }] //! } -//! -//! preinterpret::preinterpret! { -//! [!set! #input = -//! MyTrait for MyType, -//! MyTrait for MyType2, -//! ] -//! -//! [!for! (#trait for #type) in (#input) { -//! impl #trait for #type -//! }] -//! } //! ``` //! //! ### Possible extension: Integer commands @@ -508,6 +506,14 @@ //! The heavy `syn` library is (in basic preinterpret) only needed for literal parsing, and error conversion into compile errors. //! //! We could add a parsing feature to speed up compile times a lot for stacks which don't need the parsing functionality. +//! +//! ## License +//! +//! Licensed under either of the [Apache License, Version 2.0](LICENSE-APACHE) +//! or the [MIT license](LICENSE-MIT) at your option. +//! +//! Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in this crate by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. +//! mod command; mod commands; mod internal_prelude;