From 49fd0e9f0cf64527e8d96372bc6d091a8e9b5bf3 Mon Sep 17 00:00:00 2001
From: James Fennell <jamespfennell@gmail.com>
Date: Mon, 12 Jun 2023 09:50:42 -0400
Subject: [PATCH] Reimplement dynamic memory allocation (#3)

This is to make the dynamic memory allocation system work with serializable
VMs.
---
 .vscode/settings.json                  |   2 +
 crates/texlang-core/src/command/mod.rs |   1 -
 crates/texlang-core/src/variable.rs    |  15 --
 crates/texlang-stdlib/src/alloc.rs     | 270 +++++++------------------
 crates/texlang-stdlib/src/lib.rs       |   4 +-
 crates/texlang-stdlib/src/the.rs       |   2 +-
 performance/benches/digits_of_pi.tex   |   1 -
 playground/src/lib.rs                  |   4 +-
 playground/ui/src/Content.tsx          |  34 ++--
 9 files changed, 95 insertions(+), 238 deletions(-)
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 9dcbf9a8..8d6bf580 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -25,6 +25,8 @@
         "muglue",
         "nevec",
         "newcommand",
+        "newint",
+        "newintarray",
         "noexpand",
         "PDFTEX",
         "randtex",
diff --git a/crates/texlang-core/src/command/mod.rs b/crates/texlang-core/src/command/mod.rs
index b2b8763d..426cdf30 100644
--- a/crates/texlang-core/src/command/mod.rs
+++ b/crates/texlang-core/src/command/mod.rs
@@ -326,7 +326,6 @@ impl PrimitiveKey {
                         variable::IndexResolver::Dynamic(f) => Some(
                             PrimitiveKey::VariableArrayDynamic(variable_key, *f as usize),
                         ),
-                        variable::IndexResolver::DynamicVirtual(_) => None,
                     },
                 }
             }
diff --git a/crates/texlang-core/src/variable.rs b/crates/texlang-core/src/variable.rs
index f0add0f3..c45ad6f5 100644
--- a/crates/texlang-core/src/variable.rs
+++ b/crates/texlang-core/src/variable.rs
@@ -326,20 +326,6 @@ pub enum IndexResolver<S> {
     /// For example, in `\count 4` the index of `4` is determined by parsing a number
     /// from the input token stream.
     Dynamic(fn(token::Token, &mut vm::ExpandedStream<S>) -> Result<Index, Box<error::Error>>),
-    /// A dynamic index, but determined using virtual method dispatch.
-    ///
-    /// This method is more flexible than the Dynamic variant, but less performant.
-    DynamicVirtual(Box<dyn DynamicIndexResolver<S>>),
-}
-
-/// Trait used for dynamically determining an index using virtual method dispatch.
-pub trait DynamicIndexResolver<S> {
-    /// Determine the index of a variable.
-    fn resolve(
-        &self,
-        token: token::Token,
-        input: &mut vm::ExpandedStream<S>,
-    ) -> Result<Index, Box<error::Error>>;
 }
 
 impl<S> IndexResolver<S> {
@@ -352,7 +338,6 @@ impl<S> IndexResolver<S> {
         match self {
             IndexResolver::Static(addr) => Ok(*addr),
             IndexResolver::Dynamic(f) => f(token, input),
-            IndexResolver::DynamicVirtual(v) => v.resolve(token, input),
         }
     }
 }
diff --git a/crates/texlang-stdlib/src/alloc.rs b/crates/texlang-stdlib/src/alloc.rs
index d517a6aa..c4c9c4dc 100644
--- a/crates/texlang-stdlib/src/alloc.rs
+++ b/crates/texlang-stdlib/src/alloc.rs
@@ -1,62 +1,54 @@
 //! Dynamic allocation of variables and arrays
 //!
 //! This module contains implementations of brand new Texcraft commands
-//! `\newint` and `\newarray` which perform dynamic memory allocation.
+//! `\newInt` and `\newIntArray` which perform dynamic memory allocation.
 
-use std::collections::{BTreeMap, HashMap};
-use std::ops::Bound::Included;
+use std::collections::HashMap;
 use texcraft_stdext::collections::groupingmap;
-use texcraft_stdext::collections::nevec::Nevec;
 use texlang_core::parse::Command;
 use texlang_core::traits::*;
 use texlang_core::*;
 
-pub const NEWINT_DOC: &str = r"Allocate a new integer variable
+pub const NEWINT_DOC: &str = r"Allocate a new integer
 
-Usage: `\newint <control sequence>`
+Usage: `\newInt <control sequence>`
 
-The `\newint` command allocates a new integer variable
+The `\newInt` command allocates a new integer
 that is referenced using the provided control sequence.
 Simple example:
 
 ```
-\newint \myvariable
+\newInt \myvariable
 \myvariable = 4
 \advance \myvariable by 5
 \asserteq{\the \myvariable}{9}
 ```
 
-You can think of `\newint` as being a replacement for
+You can think of `\newInt` as being a replacement for
 Plain TeX's `\newcount` macro (TeXBook p346).
-The benefit of `\newint` is that different callers of the command
+The benefit of `\newInt` is that different callers of the command
 do not share the underlying memory; the allocated memory is unique
 to the caller.
-Under the hood `\newint` works by allocating new memory on
-the TeX engine's heap. This memory is allocated in a large Rust vector,
-so for n invocations of `\newint` there are log(n) heap allocations.
-
-The variable is deallocated at the end of the current group.
-If there is no current group (i.e., this is the global scope)
-then the variable is never deallocated.
+Under the hood `\newInt` works by allocating new memory on the TeX engine's heap.
 ";
 
-pub const NEWARRAY_DOC: &str = r"Allocate a new integer array
+pub const NEWINTARRAY_DOC: &str = r"Allocate a new integer array
 
-Usage: `\newarray <control sequence> <array length>`
+Usage: `\newIntArray <control sequence> <array length>`
 
-The `\newarray` command allocates a new array of integers that
+The `\newIntArray` command allocates a new array of integers that
 is referenced using the provided control sequence.
-This new array works pretty much like `\count`, but you can create
-as many arrays as you like and don't need to worry about other
-TeX code reusing the memory.
-Also, unlike `\count`, the size of the array is not fixed by
+This new control sequence works pretty much like `\count`, but you can create
+    as many arrays as you like and don't need to worry about other
+    TeX code reusing the memory.
+Unlike `\count`, the size of the array is not fixed by
 the engine.
-The only constaint on the size is that you have enough RAM
-on the machine to store it.
+The only constraint on the size is that you have enough RAM
+    on the machine to store it.
 Simple example:
 
 ```
-\newarray \myarray 3
+\newIntArray \myarray 3
 \myarray 0 = 4
 \asserteq{\the \myarray 0}{4}
 \myarray 1 = 5
@@ -65,114 +57,18 @@ Simple example:
 \asserteq{\the \myarray 2}{6}
 ```
 
-The array is deallocated at the end of the current group.
-If there is no current group (i.e., this is the global scope)
-then the variable is never deallocated.
+The new control sequence can *not* be aliased using \let.
 ";
 
 /// Component required for the alloc commands.
 #[derive(Default)]
 pub struct Component {
-    allocations: Nevec<GroupAllocations>,
-
-    // Map from addr to (group allocations index, allocations.singletons index)
-    singleton_addr_map: HashMap<variable::Index, (usize, usize)>,
-    next_singleton_addr: variable::Index,
-
-    // Map from addr to (group allocations index, gallocations.singletons index)
-    //
-    // Note that addr points to the first element of the array; when
-    // we add a new array, addr is incremented by the size of the array.
-    // addr + i then references the ith element of the array, for
-    // appropriate i. We use a BTreeMap to make it fast to obtain the
-    // indices from addr + i for any i.
-    array_addr_map: BTreeMap<variable::Index, (usize, usize)>,
-    next_array_addr: variable::Index,
-}
-
-/// Contains all the allocations for a single group.
-#[derive(Default)]
-struct GroupAllocations {
-    // TODO: rather than using a Vec of Vecs it may be more efficient
-    // to use a single global Vec and a single non-global Vec.
-    singletons: Vec<Singleton>,
-    arrays: Vec<Array>,
-}
-
-struct Singleton {
-    addr: variable::Index,
-    value: i32,
-}
-
-struct Array {
-    addr: variable::Index,
-    value: Vec<i32>,
-}
-
-impl Component {
-    fn alloc_int(&mut self) -> variable::Index {
-        let i = (
-            self.allocations.len() - 1,
-            self.allocations.last().singletons.len(),
-        );
-        self.allocations.last_mut().singletons.push(Singleton {
-            addr: self.next_singleton_addr,
-            value: 0,
-        });
-        self.singleton_addr_map.insert(self.next_singleton_addr, i);
-        self.next_singleton_addr.0 += 1;
-        variable::Index(self.next_singleton_addr.0 - 1)
-    }
-
-    fn alloc_array(&mut self, len: usize) -> variable::Index {
-        let i = (
-            self.allocations.len() - 1,
-            self.allocations.last().arrays.len(),
-        );
-        self.allocations.last_mut().arrays.push(Array {
-            addr: self.next_array_addr,
-            value: vec![0; len],
-        });
-        self.array_addr_map.insert(self.next_array_addr, i);
-        self.next_array_addr.0 += len;
-        variable::Index(self.next_array_addr.0 - len)
-    }
-
-    fn find_array(&self, elem_addr: variable::Index) -> (variable::Index, (usize, usize)) {
-        let (a, (b, c)) = self
-            .array_addr_map
-            .range((Included(&variable::Index(0)), Included(&elem_addr)))
-            .rev()
-            .next()
-            .unwrap();
-        (*a, (*b, *c))
-    }
-
-    // TODO: why isn't this used?
-    pub fn begin_group(&mut self) {
-        self.allocations.push(GroupAllocations {
-            singletons: vec![],
-            arrays: vec![],
-        });
-    }
-
-    // TODO: why isn't this used?
-    pub fn end_group(&mut self) -> bool {
-        let allocations = match self.allocations.pop_from_tail() {
-            None => return false,
-            Some(allocations) => allocations,
-        };
-        for singleton in &allocations.singletons {
-            self.singleton_addr_map.remove(&singleton.addr);
-        }
-        for array in &allocations.arrays {
-            self.array_addr_map.remove(&array.addr);
-        }
-        true
-    }
+    singletons: Vec<i32>,
+    arrays: Vec<i32>,
+    array_refs: HashMap<token::CsName, (usize, usize)>,
 }
 
-/// Get the `\newint` exeuction command.
+/// Get the `\newInt` execution command.
 pub fn get_newint<S: HasComponent<Component>>() -> command::BuiltIn<S> {
     command::BuiltIn::new_execution(newint_primitive_fn)
 }
@@ -182,64 +78,53 @@ fn newint_primitive_fn<S: HasComponent<Component>>(
     input: &mut vm::ExecutionInput<S>,
 ) -> command::Result<()> {
     let Command::ControlSequence(name) = Command::parse(input)?;
-    let addr = input.state_mut().component_mut().alloc_int();
+    let component = input.state_mut().component_mut();
+    let index = component.singletons.len();
+    component.singletons.push(Default::default());
     input.commands_map_mut().insert_variable_command(
         name,
         variable::Command::new_array(
             singleton_ref_fn,
             singleton_mut_ref_fn,
-            variable::IndexResolver::DynamicVirtual(Box::new(SingletonIndexResolver(addr))),
+            variable::IndexResolver::Static(variable::Index(index)),
         ),
         groupingmap::Scope::Local,
     );
     Ok(())
 }
 
-struct SingletonIndexResolver(variable::Index);
-
-impl<S> variable::DynamicIndexResolver<S> for SingletonIndexResolver {
-    fn resolve(
-        &self,
-        _: texlang_core::token::Token,
-        _: &mut vm::ExpandedStream<S>,
-    ) -> command::Result<variable::Index> {
-        Ok(self.0)
-    }
-}
-
-fn singleton_ref_fn<S: HasComponent<Component>>(state: &S, addr: variable::Index) -> &i32 {
-    let a = state.component();
-    let (allocations_i, inner_i) = a.singleton_addr_map[&addr];
-    &a.allocations.get(allocations_i).unwrap().singletons[inner_i].value
+fn singleton_ref_fn<S: HasComponent<Component>>(state: &S, index: variable::Index) -> &i32 {
+    &state.component().singletons[index.0]
 }
 
 fn singleton_mut_ref_fn<S: HasComponent<Component>>(
     state: &mut S,
-    addr: variable::Index,
+    index: variable::Index,
 ) -> &mut i32 {
-    let a = state.component_mut();
-    let (allocations_i, inner_i) = a.singleton_addr_map[&addr];
-    &mut a.allocations.get_mut(allocations_i).unwrap().singletons[inner_i].value
+    &mut state.component_mut().singletons[index.0]
 }
 
-/// Get the `\newarray` execution command.
-pub fn get_newarray<S: HasComponent<Component>>() -> command::BuiltIn<S> {
-    command::BuiltIn::new_execution(newarray_primitive_fn)
+/// Get the `\newIntArray` execution command.
+pub fn get_newintarray<S: HasComponent<Component>>() -> command::BuiltIn<S> {
+    command::BuiltIn::new_execution(newintarray_primitive_fn)
 }
 
-fn newarray_primitive_fn<S: HasComponent<Component>>(
+fn newintarray_primitive_fn<S: HasComponent<Component>>(
     _: token::Token,
     input: &mut vm::ExecutionInput<S>,
 ) -> command::Result<()> {
     let Command::ControlSequence(name) = Command::parse(input)?;
     let len = usize::parse(input)?;
-    let addr = input.state_mut().component_mut().alloc_array(len);
+    let component = input.state_mut().component_mut();
+    let start = component.arrays.len();
+    component.arrays.resize(start + len, Default::default());
+    component.array_refs.insert(name, (start, len));
     input.commands_map_mut().insert_variable_command(
         name,
         variable::Command::new_array(
             array_element_ref_fn,
             array_element_mut_ref_fn,
-            variable::IndexResolver::DynamicVirtual(Box::new(ArrayIndexResolver(addr))),
+            variable::IndexResolver::Dynamic(resolve),
         ),
         groupingmap::Scope::Local,
     );
@@ -247,50 +132,37 @@ fn newarray_primitive_fn<S: HasComponent<Component>>(
     Ok(())
 }
 
-struct ArrayIndexResolver(variable::Index);
-
-impl<S: HasComponent<Component>> variable::DynamicIndexResolver<S> for ArrayIndexResolver {
-    fn resolve(
-        &self,
-        token: texlang_core::token::Token,
-        input: &mut vm::ExpandedStream<S>,
-    ) -> command::Result<variable::Index> {
-        let array_addr = self.0;
-        let array_index = usize::parse(input)?;
-        let (allocations_i, inner_i) = input.state().component().array_addr_map[&array_addr];
-        let array_len = input.state().component().allocations[allocations_i].arrays[inner_i]
-            .value
-            .len();
-        if array_index >= array_len {
-            return Err(error::SimpleTokenError::new(input.vm(),
+fn resolve<S: HasComponent<Component>>(
+    token: token::Token,
+    input: &mut vm::ExpandedStream<S>,
+) -> command::Result<variable::Index> {
+    let name = match token.value() {
+        token::Value::ControlSequence(name) => name,
+        _ => todo!(),
+    };
+    let (array_index, array_len) = *input.state().component().array_refs.get(&name).unwrap();
+    let inner_index = usize::parse(input)?;
+    if inner_index >= array_len {
+        return Err(error::SimpleTokenError::new(input.vm(),
             token,
             format![
-                "Array out of bounds: cannot access index {array_index} of array with length {array_len}"
+                "Array out of bounds: cannot access index {inner_index} of array with length {array_len}"
             ],
         )
         .into());
-        }
-        Ok(variable::Index(array_addr.0 + array_index))
     }
+    Ok(variable::Index(array_index + inner_index))
 }
 
-fn array_element_ref_fn<S: HasComponent<Component>>(state: &S, addr: variable::Index) -> &i32 {
-    let (addr_0, (allocations_i, inner_i)) = state.component().find_array(addr);
-    &state.component().allocations[allocations_i].arrays[inner_i].value[addr.0 - addr_0.0]
+fn array_element_ref_fn<S: HasComponent<Component>>(state: &S, index: variable::Index) -> &i32 {
+    &state.component().arrays[index.0]
 }
 
 fn array_element_mut_ref_fn<S: HasComponent<Component>>(
     state: &mut S,
-    addr: variable::Index,
+    index: variable::Index,
 ) -> &mut i32 {
-    let (addr_0, (allocations_i, inner_i)) = state.component().find_array(addr);
-    &mut state
-        .component_mut()
-        .allocations
-        .get_mut(allocations_i)
-        .unwrap()
-        .arrays[inner_i]
-        .value[addr.0 - addr_0.0]
+    &mut state.component_mut().arrays[index.0]
 }
 
 #[cfg(test)]
@@ -312,35 +184,35 @@ mod test {
 
     fn initial_commands() -> HashMap<&'static str, command::BuiltIn<State>> {
         HashMap::from([
-            ("newint", get_newint()),
-            ("newarray", get_newarray()),
+            ("newInt", get_newint()),
+            ("newIntArray", get_newintarray()),
             ("the", get_the()),
         ])
     }
 
     test_suite![
         expansion_equality_tests(
-            (newint_base_case, r"\newint\a \a=3 \the\a", "3"),
+            (newint_base_case, r"\newInt\a \a=3 \the\a", "3"),
             (
-                newarray_base_case_0,
-                r"\newarray \a 3 \a 0 = 2 \the\a 0",
+                newintarray_base_case_0,
+                r"\newIntArray \a 3 \a 0 = 2 \the\a 0",
                 "2"
             ),
             (
-                newarray_base_case_1,
-                r"\newarray \a 3 \a 1 = 2 \the\a 1",
+                newintarray_base_case_1,
+                r"\newIntArray \a 3 \a 1 = 2 \the\a 1",
                 "2"
             ),
             (
-                newarray_base_case_2,
-                r"\newarray \a 3 \a 2 = 2 \the\a 2",
+                newintarray_base_case_2,
+                r"\newIntArray \a 3 \a 2 = 2 \the\a 2",
                 "2"
             ),
         ),
         failure_tests(
-            (newarray_out_of_bounds, r"\newarray \a 3 \a 3 = 2"),
-            (newarray_negative_index, r"\newarray \a 3 \a -3 = 2"),
-            (newarray_negative_length, r"\newarray \a -3"),
+            (newintarray_out_of_bounds, r"\newIntArray \a 3 \a 3 = 2"),
+            (newintarray_negative_index, r"\newIntArray \a 3 \a -3 = 2"),
+            (newintarray_negative_length, r"\newIntArray \a -3"),
         ),
     ];
 }
diff --git a/crates/texlang-stdlib/src/lib.rs b/crates/texlang-stdlib/src/lib.rs
index d96752ea..cbd350c3 100644
--- a/crates/texlang-stdlib/src/lib.rs
+++ b/crates/texlang-stdlib/src/lib.rs
@@ -115,8 +115,8 @@ impl StdLibState {
             ("month", time::get_month()),
             ("multiply", math::get_multiply()),
             //
-            ("newint", alloc::get_newint()),
-            ("newarray", alloc::get_newarray()),
+            ("newInt", alloc::get_newint()),
+            ("newIntArray", alloc::get_newintarray()),
             ("noexpand", expansion::get_noexpand()),
             //
             ("or", conditional::get_or()),
diff --git a/crates/texlang-stdlib/src/the.rs b/crates/texlang-stdlib/src/the.rs
index 7a22b3d7..d0ca2e63 100644
--- a/crates/texlang-stdlib/src/the.rs
+++ b/crates/texlang-stdlib/src/the.rs
@@ -24,7 +24,7 @@ fn the_primitive_fn<S: TexlangState>(
     Ok(match &token.value() {
         token::Value::ControlSequence(name) => {
             if let Some(command::Command::Variable(cmd)) = input.commands_map().get_command(name) {
-                match cmd.clone().value(the_token, input.as_mut())? {
+                match cmd.clone().value(token, input.as_mut())? {
                     variable::ValueRef::Int(i) => int_to_tokens(the_token, *i),
                     variable::ValueRef::CatCode(i) => int_to_tokens(the_token, (*i as u8).into()),
                 }
diff --git a/performance/benches/digits_of_pi.tex b/performance/benches/digits_of_pi.tex
index f5a46c5c..b14cb602 100644
--- a/performance/benches/digits_of_pi.tex
+++ b/performance/benches/digits_of_pi.tex
@@ -46,7 +46,6 @@
 \n = 1000
 
 \def\result{\count}
-% \newarray\result\n
 \countdef \resultIndex 32766
 \resultIndex = 25196
 
diff --git a/playground/src/lib.rs b/playground/src/lib.rs
index da0e79e9..4123a01f 100644
--- a/playground/src/lib.rs
+++ b/playground/src/lib.rs
@@ -122,8 +122,8 @@ fn new_vm(minutes_since_midnight: i32, day: i32, month: i32, year: i32) -> vm::V
         ("month", time::get_month()),
         ("multiply", math::get_multiply()),
         //
-        ("newarray", alloc::get_newarray()),
-        ("newint", alloc::get_newint()),
+        ("newInt", alloc::get_newint()),
+        ("newIntArray", alloc::get_newintarray()),
         ("newline", script::get_newline()),
         //
         ("or", conditional::get_or()),
diff --git a/playground/ui/src/Content.tsx b/playground/ui/src/Content.tsx
index 3c6d6347..f76127cc 100644
--- a/playground/ui/src/Content.tsx
+++ b/playground/ui/src/Content.tsx
@@ -39,7 +39,7 @@ export const DigitsOfPiDotTex = new TexFile("digits-of-pi.tex", String.raw
 %   result will be incorrect. This n is at least 5000. Its exact value is not
 %   known yet.
 %
-% - This script uses Texcraft's allocation commands (\newint and \newarray) and
+% - This script uses Texcraft's allocation commands (\newInt and \newIntArray) and
 %   thus does not work on existing TeX engines like pdfTeX. There is a version
 %   of this script that uses \count registers instead and that can run on both
 %   pdfTeX and Texcraft [2]. The use of allocation commands allows for arbitrary
@@ -57,7 +57,7 @@ export const DigitsOfPiDotTex = new TexFile("digits-of-pi.tex", String.raw
 }
 
 % Modulus: calculates \`#1 % #2\` and puts the result in #1
-\newint \modulusTemp
+\newInt \modulusTemp
 \def\modulus#1#2{
   \modulusTemp = #1
   \divide \modulusTemp by #2
@@ -77,36 +77,36 @@ export const DigitsOfPiDotTex = new TexFile("digits-of-pi.tex", String.raw
 % The algorithm sometimes produces less than n digits. The tail of the array
 % may contain -1 values indicating those digits were no calculated.
 \def\computeDigitsOfPi#1#2{
-  \newint \n
+  \newInt \n
   \n = #1
   \let \result = #2
-  \newint\resultIndex
+  \newInt\resultIndex
 
   % allocate an array of length (10n)/3
-  \newint \len
+  \newInt \len
   \len = \n
   \multiply \len by 10
   \divide \len by 3
-  \newarray \r \len
+  \newIntArray \r \len
 
   % initialize each element of the array to 2
-  \newint \i
+  \newInt \i
   \i = 0
   \while{\i < \len}{
     \r \i = 2
     \advance \i by 1
   }
 
-  \newint \j
-  \newint \k
-  \newint \carry
-  \newint \preDigit
-  \newint \firstPreDigit
+  \newInt \j
+  \newInt \k
+  \newInt \carry
+  \newInt \preDigit
+  \newInt \firstPreDigit
   \firstPreDigit = -1
-  \newint \numTrailingPreDigits
+  \newInt \numTrailingPreDigits
   \numTrailingPreDigits = 0
 
-  \newint \outerLoopIndex
+  \newInt \outerLoopIndex
   \outerLoopIndex = 0
   \while{\outerLoopIndex < \n}{
     \advance \outerLoopIndex by 1
@@ -172,13 +172,13 @@ export const DigitsOfPiDotTex = new TexFile("digits-of-pi.tex", String.raw
   \while{\resultIndex < \n}{\result\resultIndex-1\advance\resultIndex1}
 }
 
-\newint \m
+\newInt \m
 \m = 50
-\newarray \digits \m
+\newIntArray \digits \m
 \computeDigitsOfPi \m \digits
 
 \i=0
-\newint \temp
+\newInt \temp
 \while{\i<\m}{%
   \ifnum \result\i > -1 \the\digits\i \fi
   \ifnum \i=0.\fi