From 49fd0e9f0cf64527e8d96372bc6d091a8e9b5bf3 Mon Sep 17 00:00:00 2001 From: James Fennell Date: Mon, 12 Jun 2023 09:50:42 -0400 Subject: [PATCH] Reimplement dynamic memory allocation (#3) This is to make the dynamic memory allocation system work with serializable VMs. --- .vscode/settings.json | 2 + crates/texlang-core/src/command/mod.rs | 1 - crates/texlang-core/src/variable.rs | 15 -- crates/texlang-stdlib/src/alloc.rs | 270 +++++++------------------ crates/texlang-stdlib/src/lib.rs | 4 +- crates/texlang-stdlib/src/the.rs | 2 +- performance/benches/digits_of_pi.tex | 1 - playground/src/lib.rs | 4 +- playground/ui/src/Content.tsx | 34 ++-- 9 files changed, 95 insertions(+), 238 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 9dcbf9a8..8d6bf580 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -25,6 +25,8 @@ "muglue", "nevec", "newcommand", + "newint", + "newintarray", "noexpand", "PDFTEX", "randtex", diff --git a/crates/texlang-core/src/command/mod.rs b/crates/texlang-core/src/command/mod.rs index b2b8763d..426cdf30 100644 --- a/crates/texlang-core/src/command/mod.rs +++ b/crates/texlang-core/src/command/mod.rs @@ -326,7 +326,6 @@ impl PrimitiveKey { variable::IndexResolver::Dynamic(f) => Some( PrimitiveKey::VariableArrayDynamic(variable_key, *f as usize), ), - variable::IndexResolver::DynamicVirtual(_) => None, }, } } diff --git a/crates/texlang-core/src/variable.rs b/crates/texlang-core/src/variable.rs index f0add0f3..c45ad6f5 100644 --- a/crates/texlang-core/src/variable.rs +++ b/crates/texlang-core/src/variable.rs @@ -326,20 +326,6 @@ pub enum IndexResolver { /// For example, in `\count 4` the index of `4` is determined by parsing a number /// from the input token stream. Dynamic(fn(token::Token, &mut vm::ExpandedStream) -> Result>), - /// A dynamic index, but determined using virtual method dispatch. - /// - /// This method is more flexible than the Dynamic variant, but less performant. - DynamicVirtual(Box>), -} - -/// Trait used for dynamically determining an index using virtual method dispatch. -pub trait DynamicIndexResolver { - /// Determine the index of a variable. - fn resolve( - &self, - token: token::Token, - input: &mut vm::ExpandedStream, - ) -> Result>; } impl IndexResolver { @@ -352,7 +338,6 @@ impl IndexResolver { match self { IndexResolver::Static(addr) => Ok(*addr), IndexResolver::Dynamic(f) => f(token, input), - IndexResolver::DynamicVirtual(v) => v.resolve(token, input), } } } diff --git a/crates/texlang-stdlib/src/alloc.rs b/crates/texlang-stdlib/src/alloc.rs index d517a6aa..c4c9c4dc 100644 --- a/crates/texlang-stdlib/src/alloc.rs +++ b/crates/texlang-stdlib/src/alloc.rs @@ -1,62 +1,54 @@ //! Dynamic allocation of variables and arrays //! //! This module contains implementations of brand new Texcraft commands -//! `\newint` and `\newarray` which perform dynamic memory allocation. +//! `\newInt` and `\newIntArray` which perform dynamic memory allocation. -use std::collections::{BTreeMap, HashMap}; -use std::ops::Bound::Included; +use std::collections::HashMap; use texcraft_stdext::collections::groupingmap; -use texcraft_stdext::collections::nevec::Nevec; use texlang_core::parse::Command; use texlang_core::traits::*; use texlang_core::*; -pub const NEWINT_DOC: &str = r"Allocate a new integer variable +pub const NEWINT_DOC: &str = r"Allocate a new integer -Usage: `\newint ` +Usage: `\newInt ` -The `\newint` command allocates a new integer variable +The `\newInt` command allocates a new integer that is referenced using the provided control sequence. Simple example: ``` -\newint \myvariable +\newInt \myvariable \myvariable = 4 \advance \myvariable by 5 \asserteq{\the \myvariable}{9} ``` -You can think of `\newint` as being a replacement for +You can think of `\newInt` as being a replacement for Plain TeX's `\newcount` macro (TeXBook p346). -The benefit of `\newint` is that different callers of the command +The benefit of `\newInt` is that different callers of the command do not share the underlying memory; the allocated memory is unique to the caller. -Under the hood `\newint` works by allocating new memory on -the TeX engine's heap. This memory is allocated in a large Rust vector, -so for n invocations of `\newint` there are log(n) heap allocations. - -The variable is deallocated at the end of the current group. -If there is no current group (i.e., this is the global scope) -then the variable is never deallocated. +Under the hood `\newInt` works by allocating new memory on the TeX engine's heap. "; -pub const NEWARRAY_DOC: &str = r"Allocate a new integer array +pub const NEWINTARRAY_DOC: &str = r"Allocate a new integer array -Usage: `\newarray ` +Usage: `\newIntArray ` -The `\newarray` command allocates a new array of integers that +The `\newIntArray` command allocates a new array of integers that is referenced using the provided control sequence. -This new array works pretty much like `\count`, but you can create -as many arrays as you like and don't need to worry about other -TeX code reusing the memory. -Also, unlike `\count`, the size of the array is not fixed by +This new control sequence works pretty much like `\count`, but you can create + as many arrays as you like and don't need to worry about other + TeX code reusing the memory. +Unlike `\count`, the size of the array is not fixed by the engine. -The only constaint on the size is that you have enough RAM -on the machine to store it. +The only constraint on the size is that you have enough RAM + on the machine to store it. Simple example: ``` -\newarray \myarray 3 +\newIntArray \myarray 3 \myarray 0 = 4 \asserteq{\the \myarray 0}{4} \myarray 1 = 5 @@ -65,114 +57,18 @@ Simple example: \asserteq{\the \myarray 2}{6} ``` -The array is deallocated at the end of the current group. -If there is no current group (i.e., this is the global scope) -then the variable is never deallocated. +The new control sequence can *not* be aliased using \let. "; /// Component required for the alloc commands. #[derive(Default)] pub struct Component { - allocations: Nevec, - - // Map from addr to (group allocations index, allocations.singletons index) - singleton_addr_map: HashMap, - next_singleton_addr: variable::Index, - - // Map from addr to (group allocations index, gallocations.singletons index) - // - // Note that addr points to the first element of the array; when - // we add a new array, addr is incremented by the size of the array. - // addr + i then references the ith element of the array, for - // appropriate i. We use a BTreeMap to make it fast to obtain the - // indices from addr + i for any i. - array_addr_map: BTreeMap, - next_array_addr: variable::Index, -} - -/// Contains all the allocations for a single group. -#[derive(Default)] -struct GroupAllocations { - // TODO: rather than using a Vec of Vecs it may be more efficient - // to use a single global Vec and a single non-global Vec. - singletons: Vec, - arrays: Vec, -} - -struct Singleton { - addr: variable::Index, - value: i32, -} - -struct Array { - addr: variable::Index, - value: Vec, -} - -impl Component { - fn alloc_int(&mut self) -> variable::Index { - let i = ( - self.allocations.len() - 1, - self.allocations.last().singletons.len(), - ); - self.allocations.last_mut().singletons.push(Singleton { - addr: self.next_singleton_addr, - value: 0, - }); - self.singleton_addr_map.insert(self.next_singleton_addr, i); - self.next_singleton_addr.0 += 1; - variable::Index(self.next_singleton_addr.0 - 1) - } - - fn alloc_array(&mut self, len: usize) -> variable::Index { - let i = ( - self.allocations.len() - 1, - self.allocations.last().arrays.len(), - ); - self.allocations.last_mut().arrays.push(Array { - addr: self.next_array_addr, - value: vec![0; len], - }); - self.array_addr_map.insert(self.next_array_addr, i); - self.next_array_addr.0 += len; - variable::Index(self.next_array_addr.0 - len) - } - - fn find_array(&self, elem_addr: variable::Index) -> (variable::Index, (usize, usize)) { - let (a, (b, c)) = self - .array_addr_map - .range((Included(&variable::Index(0)), Included(&elem_addr))) - .rev() - .next() - .unwrap(); - (*a, (*b, *c)) - } - - // TODO: why isn't this used? - pub fn begin_group(&mut self) { - self.allocations.push(GroupAllocations { - singletons: vec![], - arrays: vec![], - }); - } - - // TODO: why isn't this used? - pub fn end_group(&mut self) -> bool { - let allocations = match self.allocations.pop_from_tail() { - None => return false, - Some(allocations) => allocations, - }; - for singleton in &allocations.singletons { - self.singleton_addr_map.remove(&singleton.addr); - } - for array in &allocations.arrays { - self.array_addr_map.remove(&array.addr); - } - true - } + singletons: Vec, + arrays: Vec, + array_refs: HashMap, } -/// Get the `\newint` exeuction command. +/// Get the `\newInt` execution command. pub fn get_newint>() -> command::BuiltIn { command::BuiltIn::new_execution(newint_primitive_fn) } @@ -182,64 +78,53 @@ fn newint_primitive_fn>( input: &mut vm::ExecutionInput, ) -> command::Result<()> { let Command::ControlSequence(name) = Command::parse(input)?; - let addr = input.state_mut().component_mut().alloc_int(); + let component = input.state_mut().component_mut(); + let index = component.singletons.len(); + component.singletons.push(Default::default()); input.commands_map_mut().insert_variable_command( name, variable::Command::new_array( singleton_ref_fn, singleton_mut_ref_fn, - variable::IndexResolver::DynamicVirtual(Box::new(SingletonIndexResolver(addr))), + variable::IndexResolver::Static(variable::Index(index)), ), groupingmap::Scope::Local, ); Ok(()) } -struct SingletonIndexResolver(variable::Index); - -impl variable::DynamicIndexResolver for SingletonIndexResolver { - fn resolve( - &self, - _: texlang_core::token::Token, - _: &mut vm::ExpandedStream, - ) -> command::Result { - Ok(self.0) - } -} - -fn singleton_ref_fn>(state: &S, addr: variable::Index) -> &i32 { - let a = state.component(); - let (allocations_i, inner_i) = a.singleton_addr_map[&addr]; - &a.allocations.get(allocations_i).unwrap().singletons[inner_i].value +fn singleton_ref_fn>(state: &S, index: variable::Index) -> &i32 { + &state.component().singletons[index.0] } fn singleton_mut_ref_fn>( state: &mut S, - addr: variable::Index, + index: variable::Index, ) -> &mut i32 { - let a = state.component_mut(); - let (allocations_i, inner_i) = a.singleton_addr_map[&addr]; - &mut a.allocations.get_mut(allocations_i).unwrap().singletons[inner_i].value + &mut state.component_mut().singletons[index.0] } -/// Get the `\newarray` execution command. -pub fn get_newarray>() -> command::BuiltIn { - command::BuiltIn::new_execution(newarray_primitive_fn) +/// Get the `\newIntArray` execution command. +pub fn get_newintarray>() -> command::BuiltIn { + command::BuiltIn::new_execution(newintarray_primitive_fn) } -fn newarray_primitive_fn>( +fn newintarray_primitive_fn>( _: token::Token, input: &mut vm::ExecutionInput, ) -> command::Result<()> { let Command::ControlSequence(name) = Command::parse(input)?; let len = usize::parse(input)?; - let addr = input.state_mut().component_mut().alloc_array(len); + let component = input.state_mut().component_mut(); + let start = component.arrays.len(); + component.arrays.resize(start + len, Default::default()); + component.array_refs.insert(name, (start, len)); input.commands_map_mut().insert_variable_command( name, variable::Command::new_array( array_element_ref_fn, array_element_mut_ref_fn, - variable::IndexResolver::DynamicVirtual(Box::new(ArrayIndexResolver(addr))), + variable::IndexResolver::Dynamic(resolve), ), groupingmap::Scope::Local, ); @@ -247,50 +132,37 @@ fn newarray_primitive_fn>( Ok(()) } -struct ArrayIndexResolver(variable::Index); - -impl> variable::DynamicIndexResolver for ArrayIndexResolver { - fn resolve( - &self, - token: texlang_core::token::Token, - input: &mut vm::ExpandedStream, - ) -> command::Result { - let array_addr = self.0; - let array_index = usize::parse(input)?; - let (allocations_i, inner_i) = input.state().component().array_addr_map[&array_addr]; - let array_len = input.state().component().allocations[allocations_i].arrays[inner_i] - .value - .len(); - if array_index >= array_len { - return Err(error::SimpleTokenError::new(input.vm(), +fn resolve>( + token: token::Token, + input: &mut vm::ExpandedStream, +) -> command::Result { + let name = match token.value() { + token::Value::ControlSequence(name) => name, + _ => todo!(), + }; + let (array_index, array_len) = *input.state().component().array_refs.get(&name).unwrap(); + let inner_index = usize::parse(input)?; + if inner_index >= array_len { + return Err(error::SimpleTokenError::new(input.vm(), token, format![ - "Array out of bounds: cannot access index {array_index} of array with length {array_len}" + "Array out of bounds: cannot access index {inner_index} of array with length {array_len}" ], ) .into()); - } - Ok(variable::Index(array_addr.0 + array_index)) } + Ok(variable::Index(array_index + inner_index)) } -fn array_element_ref_fn>(state: &S, addr: variable::Index) -> &i32 { - let (addr_0, (allocations_i, inner_i)) = state.component().find_array(addr); - &state.component().allocations[allocations_i].arrays[inner_i].value[addr.0 - addr_0.0] +fn array_element_ref_fn>(state: &S, index: variable::Index) -> &i32 { + &state.component().arrays[index.0] } fn array_element_mut_ref_fn>( state: &mut S, - addr: variable::Index, + index: variable::Index, ) -> &mut i32 { - let (addr_0, (allocations_i, inner_i)) = state.component().find_array(addr); - &mut state - .component_mut() - .allocations - .get_mut(allocations_i) - .unwrap() - .arrays[inner_i] - .value[addr.0 - addr_0.0] + &mut state.component_mut().arrays[index.0] } #[cfg(test)] @@ -312,35 +184,35 @@ mod test { fn initial_commands() -> HashMap<&'static str, command::BuiltIn> { HashMap::from([ - ("newint", get_newint()), - ("newarray", get_newarray()), + ("newInt", get_newint()), + ("newIntArray", get_newintarray()), ("the", get_the()), ]) } test_suite![ expansion_equality_tests( - (newint_base_case, r"\newint\a \a=3 \the\a", "3"), + (newint_base_case, r"\newInt\a \a=3 \the\a", "3"), ( - newarray_base_case_0, - r"\newarray \a 3 \a 0 = 2 \the\a 0", + newintarray_base_case_0, + r"\newIntArray \a 3 \a 0 = 2 \the\a 0", "2" ), ( - newarray_base_case_1, - r"\newarray \a 3 \a 1 = 2 \the\a 1", + newintarray_base_case_1, + r"\newIntArray \a 3 \a 1 = 2 \the\a 1", "2" ), ( - newarray_base_case_2, - r"\newarray \a 3 \a 2 = 2 \the\a 2", + newintarray_base_case_2, + r"\newIntArray \a 3 \a 2 = 2 \the\a 2", "2" ), ), failure_tests( - (newarray_out_of_bounds, r"\newarray \a 3 \a 3 = 2"), - (newarray_negative_index, r"\newarray \a 3 \a -3 = 2"), - (newarray_negative_length, r"\newarray \a -3"), + (newintarray_out_of_bounds, r"\newIntArray \a 3 \a 3 = 2"), + (newintarray_negative_index, r"\newIntArray \a 3 \a -3 = 2"), + (newintarray_negative_length, r"\newIntArray \a -3"), ), ]; } diff --git a/crates/texlang-stdlib/src/lib.rs b/crates/texlang-stdlib/src/lib.rs index d96752ea..cbd350c3 100644 --- a/crates/texlang-stdlib/src/lib.rs +++ b/crates/texlang-stdlib/src/lib.rs @@ -115,8 +115,8 @@ impl StdLibState { ("month", time::get_month()), ("multiply", math::get_multiply()), // - ("newint", alloc::get_newint()), - ("newarray", alloc::get_newarray()), + ("newInt", alloc::get_newint()), + ("newIntArray", alloc::get_newintarray()), ("noexpand", expansion::get_noexpand()), // ("or", conditional::get_or()), diff --git a/crates/texlang-stdlib/src/the.rs b/crates/texlang-stdlib/src/the.rs index 7a22b3d7..d0ca2e63 100644 --- a/crates/texlang-stdlib/src/the.rs +++ b/crates/texlang-stdlib/src/the.rs @@ -24,7 +24,7 @@ fn the_primitive_fn( Ok(match &token.value() { token::Value::ControlSequence(name) => { if let Some(command::Command::Variable(cmd)) = input.commands_map().get_command(name) { - match cmd.clone().value(the_token, input.as_mut())? { + match cmd.clone().value(token, input.as_mut())? { variable::ValueRef::Int(i) => int_to_tokens(the_token, *i), variable::ValueRef::CatCode(i) => int_to_tokens(the_token, (*i as u8).into()), } diff --git a/performance/benches/digits_of_pi.tex b/performance/benches/digits_of_pi.tex index f5a46c5c..b14cb602 100644 --- a/performance/benches/digits_of_pi.tex +++ b/performance/benches/digits_of_pi.tex @@ -46,7 +46,6 @@ \n = 1000 \def\result{\count} -% \newarray\result\n \countdef \resultIndex 32766 \resultIndex = 25196 diff --git a/playground/src/lib.rs b/playground/src/lib.rs index da0e79e9..4123a01f 100644 --- a/playground/src/lib.rs +++ b/playground/src/lib.rs @@ -122,8 +122,8 @@ fn new_vm(minutes_since_midnight: i32, day: i32, month: i32, year: i32) -> vm::V ("month", time::get_month()), ("multiply", math::get_multiply()), // - ("newarray", alloc::get_newarray()), - ("newint", alloc::get_newint()), + ("newInt", alloc::get_newint()), + ("newIntArray", alloc::get_newintarray()), ("newline", script::get_newline()), // ("or", conditional::get_or()), diff --git a/playground/ui/src/Content.tsx b/playground/ui/src/Content.tsx index 3c6d6347..f76127cc 100644 --- a/playground/ui/src/Content.tsx +++ b/playground/ui/src/Content.tsx @@ -39,7 +39,7 @@ export const DigitsOfPiDotTex = new TexFile("digits-of-pi.tex", String.raw % result will be incorrect. This n is at least 5000. Its exact value is not % known yet. % -% - This script uses Texcraft's allocation commands (\newint and \newarray) and +% - This script uses Texcraft's allocation commands (\newInt and \newIntArray) and % thus does not work on existing TeX engines like pdfTeX. There is a version % of this script that uses \count registers instead and that can run on both % pdfTeX and Texcraft [2]. The use of allocation commands allows for arbitrary @@ -57,7 +57,7 @@ export const DigitsOfPiDotTex = new TexFile("digits-of-pi.tex", String.raw } % Modulus: calculates \`#1 % #2\` and puts the result in #1 -\newint \modulusTemp +\newInt \modulusTemp \def\modulus#1#2{ \modulusTemp = #1 \divide \modulusTemp by #2 @@ -77,36 +77,36 @@ export const DigitsOfPiDotTex = new TexFile("digits-of-pi.tex", String.raw % The algorithm sometimes produces less than n digits. The tail of the array % may contain -1 values indicating those digits were no calculated. \def\computeDigitsOfPi#1#2{ - \newint \n + \newInt \n \n = #1 \let \result = #2 - \newint\resultIndex + \newInt\resultIndex % allocate an array of length (10n)/3 - \newint \len + \newInt \len \len = \n \multiply \len by 10 \divide \len by 3 - \newarray \r \len + \newIntArray \r \len % initialize each element of the array to 2 - \newint \i + \newInt \i \i = 0 \while{\i < \len}{ \r \i = 2 \advance \i by 1 } - \newint \j - \newint \k - \newint \carry - \newint \preDigit - \newint \firstPreDigit + \newInt \j + \newInt \k + \newInt \carry + \newInt \preDigit + \newInt \firstPreDigit \firstPreDigit = -1 - \newint \numTrailingPreDigits + \newInt \numTrailingPreDigits \numTrailingPreDigits = 0 - \newint \outerLoopIndex + \newInt \outerLoopIndex \outerLoopIndex = 0 \while{\outerLoopIndex < \n}{ \advance \outerLoopIndex by 1 @@ -172,13 +172,13 @@ export const DigitsOfPiDotTex = new TexFile("digits-of-pi.tex", String.raw \while{\resultIndex < \n}{\result\resultIndex-1\advance\resultIndex1} } -\newint \m +\newInt \m \m = 50 -\newarray \digits \m +\newIntArray \digits \m \computeDigitsOfPi \m \digits \i=0 -\newint \temp +\newInt \temp \while{\i<\m}{% \ifnum \result\i > -1 \the\digits\i \fi \ifnum \i=0.\fi