From eaf676ce0271c805ac9d6de8ef158b4c1680919a Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Fri, 6 Dec 2024 17:01:36 +0100 Subject: [PATCH] Change function representation to be multi-ary (#2112) In the new AST, the representation function application has been changed to be multi-ary by default (as is primop application as well). It just matches the syntax better and it avoids having to desugar n-ary application to nested unary-application, saving space and time. The only cost is that unary application are slighty larger in memory (basically one word for the size of the slice). This commit applies the same treatment to function abstraction since the same arguments apply. Passing by, we also specialize macros like `fun` and `app` for one argument to avoid vec allocation. --- ...tderr_naked_contract_parametrized.ncl.snap | 4 +- ...tderr_non_serializable_print_path.ncl.snap | 4 +- core/src/bytecode/ast/builder.rs | 69 ++++++++++--- core/src/bytecode/ast/compat.rs | 96 ++++++++++++++----- core/src/bytecode/ast/mod.rs | 35 ++++--- core/src/parser/grammar.lalrpop | 13 +-- core/src/parser/utils.rs | 2 +- 7 files changed, 153 insertions(+), 70 deletions(-) diff --git a/cli/tests/snapshot/snapshots/snapshot__eval_stderr_naked_contract_parametrized.ncl.snap b/cli/tests/snapshot/snapshots/snapshot__eval_stderr_naked_contract_parametrized.ncl.snap index d212c9e1f..465012187 100644 --- a/cli/tests/snapshot/snapshots/snapshot__eval_stderr_naked_contract_parametrized.ncl.snap +++ b/cli/tests/snapshot/snapshots/snapshot__eval_stderr_naked_contract_parametrized.ncl.snap @@ -6,7 +6,7 @@ warning: plain functions as contracts are deprecated ┌─ [INPUTS_PATH]/warnings/naked_contract_parametrized.ncl:6:3 │ 3 │ let C = fun arg label value => value in - │ ---------------------------- this function + │ -------------------- this function · 6 │ [1, 2] | Array (C "hi") │ ^^^^^^ applied to this term @@ -17,7 +17,7 @@ warning: plain functions as contracts are deprecated ┌─ [INPUTS_PATH]/warnings/naked_contract_parametrized.ncl:5:3 │ 3 │ let C = fun arg label value => value in - │ ---------------------------- this function + │ -------------------- this function 4 │ [ 5 │ 1 | C "hi", │ ^ applied to this term diff --git a/cli/tests/snapshot/snapshots/snapshot__export_stderr_non_serializable_print_path.ncl.snap b/cli/tests/snapshot/snapshots/snapshot__export_stderr_non_serializable_print_path.ncl.snap index 0957b1d53..6a22382bc 100644 --- a/cli/tests/snapshot/snapshots/snapshot__export_stderr_non_serializable_print_path.ncl.snap +++ b/cli/tests/snapshot/snapshots/snapshot__export_stderr_non_serializable_print_path.ncl.snap @@ -14,10 +14,10 @@ warning: plain functions as contracts are deprecated = wrap this function using one of the constructors in `std.contract` instead, like `std.contract.from_validator` or `std.contract.custom` error: non serializable term - ┌─ [INPUTS_PATH]/errors/non_serializable_print_path.ncl:8:30 + ┌─ [INPUTS_PATH]/errors/non_serializable_print_path.ncl:8:50 │ 8 │ let SomeParametricContract = fun parameter label value => value - │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + │ ^^^^^^^^^^^^^^ │ = When exporting field `foo.bar.baz[3].inner.qux_miss_param` = Nickel only supports serializing to and from strings, booleans, numbers, enum tags, `null` (depending on the format), as well as records and arrays of serializable values. diff --git a/core/src/bytecode/ast/builder.rs b/core/src/bytecode/ast/builder.rs index 52b7db846..abcb9c07f 100644 --- a/core/src/bytecode/ast/builder.rs +++ b/core/src/bytecode/ast/builder.rs @@ -312,9 +312,21 @@ impl<'ast> Record<'ast> { /// Multi-ary application for types implementing `Into`. #[macro_export] macro_rules! app { - ( $alloc:expr, $f:expr $(, $args:expr )+ $(,)?) => { + // We avoid a vec allocation for unary applications, which are relatively common. + ( $alloc:expr, $f:expr , $arg:expr $(,)?) => { + $crate::bytecode::ast::Ast::from( + $alloc.app( + $crate::bytecode::ast::Ast::from($f), + std::iter::once($crate::bytecode::ast::Ast::from($arg)) + ) + ) + }; + ( $alloc:expr, $f:expr, $arg1:expr $(, $args:expr )+ $(,)?) => { { - let args = vec![$( $crate::bytecode::ast::Ast::from($args) ),+]; + let args = vec![ + $crate::bytecode::ast::Ast::from($arg1) + $(, $crate::bytecode::ast::Ast::from($args) )+ + ]; $crate::bytecode::ast::Ast::from($alloc.app($crate::bytecode::ast::Ast::from($f), args)) } @@ -324,9 +336,21 @@ macro_rules! app { #[macro_export] /// Multi-ary application for types implementing `Into`. macro_rules! primop_app { - ( $alloc: expr, $op:expr $(, $args:expr )+ $(,)?) => { + // We avoid a vec allocation for unary primop applications, which are relatively common. + ( $alloc:expr, $op:expr , $arg:expr $(,)?) => { + $crate::bytecode::ast::Ast::from( + $alloc.prim_op( + $op, + std::iter::once($crate::bytecode::ast::Ast::from($arg)) + ) + ) + }; + ( $alloc:expr, $op:expr, $arg1:expr $(, $args:expr )+ $(,)?) => { { - let args = vec![$( $crate::bytecode::ast::Ast::from($args) ),+]; + let args = vec![ + $crate::bytecode::ast::Ast::from($arg1) + $(, $crate::bytecode::ast::Ast::from($args) )+ + ]; $crate::bytecode::ast::Ast::from($alloc.prim_op($op, args)) } }; @@ -336,20 +360,41 @@ macro_rules! primop_app { /// Multi argument function for types implementing `Into` (for the identifiers), and /// `Into` for the body. macro_rules! fun { - ( $alloc: expr, $id:expr, $body:expr $(,)?) => { + // Auxiliary form for `fun!` where the arguments are clearly delimited syntactically by + // brackets. The issue with the general interface of `fun!` is that it must extract its last + // argument, the body of the function, with an arbitrary number of macro arguments before it. + // This isn't easy to do because the macro parser can't backtrack. The bracketed form uses + // recursion and a separate syntax for arguments to make it work. + // + // For example, calling `fun!(alloc, args=[], arg1, arg2, body)` will expand to `fun!(alloc, + // args=[arg1, arg2], body)`, which is the base case that can be turned into a function. The + // bracket part is used to accumulate the arguments before the body. + ($alloc:expr, args=[ $( $args:expr, )+ ], $body:expr) => { + { + let args = vec![ + $($crate::bytecode::ast::pattern::Pattern::any($crate::identifier::LocIdent::from($args)), )+ + ]; + + $crate::bytecode::ast::Ast::from( + $alloc.fun(args, $crate::bytecode::ast::Ast::from($body)) + ) + } + }; + // Recursive case for the bracketed form. + ($alloc:expr, args=[ $( $args:expr, )* ], $next_arg:expr $(, $rest:expr )+) => { + fun!($alloc, args=[ $( $args, )* $next_arg, ] $(, $rest )+) + }; + // We avoid a vec allocation for unary functions, which are relatively common. + ( $alloc:expr, $arg:expr, $body:expr $(,)?) => { $crate::bytecode::ast::Ast::from( $alloc.fun( - $crate::bytecode::ast::pattern::Pattern::any($crate::identifier::LocIdent::from($id)), + std::iter::once($crate::bytecode::ast::pattern::Pattern::any($crate::identifier::LocIdent::from($arg))), $crate::bytecode::ast::Ast::from($body) ) ) }; - ( $alloc:expr, $id1:expr, $id2:expr $(, $rest:expr )+ $(,)?) => { - fun!( - $alloc, - $id1, - fun!($alloc, $id2, $( $rest ),+) - ) + ( $alloc:expr, $arg1:expr, $arg2:expr $(, $rest:expr )+ $(,)?) => { + fun!($alloc, args=[ $arg1, $arg2, ] $(, $rest )+) }; } diff --git a/core/src/bytecode/ast/compat.rs b/core/src/bytecode/ast/compat.rs index 8d0b05c3f..03e461d01 100644 --- a/core/src/bytecode/ast/compat.rs +++ b/core/src/bytecode/ast/compat.rs @@ -277,8 +277,33 @@ impl<'ast> FromMainline<'ast, term::Term> for Node<'ast> { } })) } - Term::Fun(id, body) => alloc.fun(Pattern::any(*id), body.to_ast(alloc)), - Term::FunPattern(pat, body) => alloc.fun(pat.to_ast(alloc), body.to_ast(alloc)), + t @ (Term::Fun(_, body) | Term::FunPattern(_, body)) => { + let fst_arg = match t { + Term::Fun(id, _) => Pattern::any(*id), + Term::FunPattern(pat, _) => pat.to_ast(alloc), + // unreachable!(): we are in a match arm that matches either Fun or FunPattern + _ => unreachable!(), + }; + + let mut args = vec![fst_arg]; + let mut maybe_next_fun = body; + + let final_body = loop { + match maybe_next_fun.as_ref() { + Term::Fun(next_id, next_body) => { + args.push(Pattern::any(*next_id)); + maybe_next_fun = next_body; + } + Term::FunPattern(next_pat, next_body) => { + args.push(next_pat.to_ast(alloc)); + maybe_next_fun = next_body; + } + _ => break maybe_next_fun, + } + }; + + alloc.fun(args, final_body.to_ast(alloc)) + } Term::Let(bindings, body, attrs) => alloc.let_block( bindings.iter().map(|(id, value)| LetBinding { pattern: Pattern::any(*id), @@ -327,7 +352,11 @@ impl<'ast> FromMainline<'ast, term::Term> for Node<'ast> { maybe_next_app = next_fun.as_ref(); } - alloc.app(fun.to_ast(alloc), args.into_iter().rev()) + // Application is left-associative: `f x y` is parsed as `(f x) y`. So we see the + // outer argument `y` first, and `args` will be `[y, x]`. We need to reverse it to + // match the expected order of a flat application node. + args.reverse(); + alloc.app(fun.to_ast(alloc), args) } Term::Var(id) => Node::Var(*id), Term::Enum(id) => alloc.enum_variant(*id, None), @@ -1207,10 +1236,31 @@ impl<'ast> FromAst> for term::Term { Term::StrChunks(chunks) } - Node::Fun { arg, body } => match arg.data { - PatternData::Any(id) => Term::Fun(id, body.to_mainline()), - _ => Term::FunPattern((*arg).to_mainline(), body.to_mainline()), - }, + Node::Fun { args, body } => { + let body_pos = body.pos; + + // We transform a n-ary function representation to a chain of nested unary + // functions, the latter being the representation used in the mainline AST. + args.iter() + .rev() + .fold(term::RichTerm::from_ast(body), |acc, arg| { + let term = match arg.data { + PatternData::Any(id) => Term::Fun(id, acc), + _ => term::Term::FunPattern((*arg).to_mainline(), acc), + }; + + // [^nary-constructors-unrolling]: this case is a bit annoying: we need to + // extract the position of the intermediate created functions to satisfy + // the old AST structure, but this information isn't available directly. + // + // What we do here is to fuse the span of the term being built and the one + // of the current argument, which should be a reasonable approximation (if + // not exactly the same thing). + term::RichTerm::new(term, arg.pos.fuse(body_pos)) + }) + .term + .into_owned() + } Node::Let { bindings, body, @@ -1274,22 +1324,22 @@ impl<'ast> FromAst> for term::Term { Term::LetPattern(bindings, body, attrs) } } - Node::App { head: fun, args } => { - let fun_pos = fun.pos; - - let rterm = args.iter().fold(fun.to_mainline(), |result, arg| { - // This case is a bit annoying: we need to extract the position of the sub - // application to satisfy the old AST structure, but this information isn't - // available directly. - // - // What we do here is to fuse the span of the term being built and the one of - // the current argument, which should be a reasonable approximation (if not - // exactly the same thing). - let arg_pos = arg.pos; - term::RichTerm::new(Term::App(result, arg.to_mainline()), fun_pos.fuse(arg_pos)) - }); - - rterm.term.into_owned() + Node::App { head, args } => { + let head_pos = head.pos; + + // We transform a n-ary application representation to a chain of nested unary + // applications, the latter being the representation used in the mainline AST. + args.iter() + .fold(head.to_mainline(), |result, arg| { + // see [^nary-constructors-unrolling] + let arg_pos = arg.pos; + term::RichTerm::new( + Term::App(result, arg.to_mainline()), + head_pos.fuse(arg_pos), + ) + }) + .term + .into_owned() } Node::Var(loc_ident) => Term::Var(*loc_ident), Node::EnumVariant { tag, arg } => { diff --git a/core/src/bytecode/ast/mod.rs b/core/src/bytecode/ast/mod.rs index f05d225a4..092cb3fde 100644 --- a/core/src/bytecode/ast/mod.rs +++ b/core/src/bytecode/ast/mod.rs @@ -12,7 +12,7 @@ use std::{ ffi::{OsStr, OsString}, fmt::Debug, - rc, + iter, rc, }; use pattern::Pattern; @@ -79,7 +79,7 @@ pub enum Node<'ast> { /// A function. Fun { - arg: &'ast Pattern<'ast>, + args: &'ast [Pattern<'ast>], body: &'ast Ast<'ast>, }, @@ -385,8 +385,7 @@ impl AstAlloc { /// Allocates an array with exactly one element in the arena. pub fn alloc_singleton(&self, value: T) -> &[T] { - self.generic_arena - .alloc_slice_fill_iter(std::iter::once(value)) + self.generic_arena.alloc_slice_fill_iter(iter::once(value)) } /// Allocates a string in the arena. @@ -414,24 +413,22 @@ impl AstAlloc { Node::StringChunks(self.generic_arena.alloc_slice_fill_iter(chunks)) } - pub fn fun<'ast>(&'ast self, pat: Pattern<'ast>, body: Ast<'ast>) -> Node<'ast> { - let arg = self.generic_arena.alloc(pat); - let body = self.generic_arena.alloc(body); - Node::Fun { arg, body } - } - - pub fn nary_fun<'ast, I>(&'ast self, args: I, body: Ast<'ast>) -> Node<'ast> + pub fn fun<'ast, I>(&'ast self, args: I, body: Ast<'ast>) -> Node<'ast> where I: IntoIterator>, - I::IntoIter: DoubleEndedIterator, + I::IntoIter: ExactSizeIterator, { - args.into_iter() - .rev() - .fold(body, |body, arg| Ast { - node: self.fun(arg, body), - pos: TermPos::None, - }) - .node + Node::Fun { + args: self.generic_arena.alloc_slice_fill_iter(args), + body: self.generic_arena.alloc(body), + } + } + + pub fn unary_fun<'ast>(&'ast self, arg: Pattern<'ast>, body: Ast<'ast>) -> Node<'ast> { + Node::Fun { + args: self.generic_arena.alloc_slice_fill_iter(iter::once(arg)), + body: self.generic_arena.alloc(body), + } } pub fn let_block<'ast, I>(&'ast self, bindings: I, body: Ast<'ast>, rec: bool) -> Node<'ast> diff --git a/core/src/parser/grammar.lalrpop b/core/src/parser/grammar.lalrpop index 4c72b3e6b..22c45db24 100644 --- a/core/src/parser/grammar.lalrpop +++ b/core/src/parser/grammar.lalrpop @@ -288,17 +288,8 @@ UniTerm: UniTerm<'ast> = { body, )?)) }, - "fun" "=>" => { - let pos = mk_pos(src_id, l, r); - - let expr = pats - .into_iter() - .rev() - .fold(body, |built, next_arg| - alloc.fun(next_arg, built).spanned(pos) - ); - - UniTerm::from(expr) + "fun" "=>" => { + UniTerm::from(alloc.fun(pats, body)) }, "if" "then" "else" => UniTerm::from(alloc.if_then_else(cond, e1, e2)), diff --git a/core/src/parser/utils.rs b/core/src/parser/utils.rs index 946f8029a..4d4f6be48 100644 --- a/core/src/parser/utils.rs +++ b/core/src/parser/utils.rs @@ -185,7 +185,7 @@ impl EtaExpand for InfixOp { let fun_args: Vec<_> = vars.iter().map(|arg| pattern::Pattern::any(*arg)).collect(); let args: Vec<_> = vars.into_iter().map(builder::var).collect(); - alloc.nary_fun(fun_args, alloc.prim_op(op, args).spanned(pos)) + alloc.fun(fun_args, alloc.prim_op(op, args).spanned(pos)) } } }