From 800a9499811cd92701b27c10b91c4b7f0fb3cc15 Mon Sep 17 00:00:00 2001
From: Yann Hamdaoui <yann.hamdaoui@tweag.io>
Date: Wed, 4 Dec 2024 23:53:25 +0100
Subject: [PATCH 1/4] Stdlib: add `filter_map`, `dedup` and its variants

---
 ...ontract_nested_custom_diagnostics.ncl.snap |   4 +-
 core/stdlib/std.ncl                           | 177 +++++++++++++++++-
 2 files changed, 178 insertions(+), 3 deletions(-)

diff --git a/cli/tests/snapshot/snapshots/snapshot__eval_stderr_subcontract_nested_custom_diagnostics.ncl.snap b/cli/tests/snapshot/snapshots/snapshot__eval_stderr_subcontract_nested_custom_diagnostics.ncl.snap
index 7b26d4abc6..7fda7501f1 100644
--- a/cli/tests/snapshot/snapshots/snapshot__eval_stderr_subcontract_nested_custom_diagnostics.ncl.snap
+++ b/cli/tests/snapshot/snapshots/snapshot__eval_stderr_subcontract_nested_custom_diagnostics.ncl.snap
@@ -21,9 +21,9 @@ warning: plain functions as contracts are deprecated
    = wrap this function using one of the constructors in `std.contract` instead, like `std.contract.from_validator` or `std.contract.custom`
 
 warning: plain functions as contracts are deprecated
-     ┌─ <stdlib/std.ncl>:1549:9
+     ┌─ <stdlib/std.ncl>:1724:9
      │
-1549 │         %contract/apply% contract (%label/push_diag% label) value,
+1724 │         %contract/apply% contract (%label/push_diag% label) value,
      │         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ applied to this term
      │
      ┌─ [INPUTS_PATH]/errors/subcontract_nested_custom_diagnostics.ncl:3:21
diff --git a/core/stdlib/std.ncl b/core/stdlib/std.ncl
index 8b9eb3d016..a7367219a8 100644
--- a/core/stdlib/std.ncl
+++ b/core/stdlib/std.ncl
@@ -917,6 +917,181 @@
         xs
         |> std.array.length
         |> std.array.generate (fun i => f i (std.array.at i xs)),
+
+    filter_map
+      : forall a b. (a -> [| 'Some b, 'None |]) -> Array a -> Array b
+      | doc m%"
+        Applies a function to every element in the given array, filtering out
+        `'None` results. `filter_map` combines `std.array.map` and
+        `std.array.filter` in a single pass.
+
+        # Examples
+
+        ```nickel
+        ["1", "hello", "2", "world"]
+        |> std.array.filter_map (fun x =>
+          if std.string.is_match "^[+-]?\\d+$" x then
+            'Some (std.string.to_number x)
+          else
+            'None
+        )
+        # => [ 1, 2 ]
+        ```
+      "%
+      = fun f array =>
+        fold_left
+          (fun acc x =>
+            f x
+            |> match {
+              'Some y => acc @ [y],
+              'None => acc,
+            }
+          )
+          []
+          array,
+
+    dedup
+      : Array Dyn -> Array Dyn
+      | doc m%"
+        Removes duplicates from an array.
+
+        # Performance
+
+        This function relies on equality and has a quadratic complexity in the
+        length of the array. It might thus be slow for large arrays or if
+        called repeatedly. Prefer `std.array.sort_dedup` or
+        `std.array.hash_dedup` if you have efficiency concerns.
+
+        # Examples
+
+        ```nickel multiline
+        std.array.dedup [ 4, 2, 1, 3, 5, 2, 1, 4 ]
+        # => [ 4, 2, 1, 3, 5 ]
+
+        std.array.dedup [ "hello", "world", "hello" ]
+        # => [ "hello", "world" ]
+        ```
+      "%
+      = fun array =>
+        let length = %array/length% array in
+
+        fold_left
+          (fun acc x =>
+            if elem x acc then
+              acc
+            else
+              acc @ [x]
+          )
+          []
+          array,
+
+    sort_dedup
+      : forall a. (a -> a -> [| 'Lesser, 'Equal, 'Greater |]) -> Array a -> Array a
+      | doc m%"
+        Sorts an array based on the provided comparison operator and removes
+        duplicates.
+
+        # Performance
+
+        As opposed to `std.array.dedup`, this function has a better time
+        complexity (`O(n*log(n))` where `n` is the size of the array), which
+        should improve performance especially on large arrays. If you need to
+        preserve the original order of the array, see `std.array.hash_dedup`.
+
+        # Examples
+
+        ```nickel multiline
+        std.array.sort_dedup std.number.compare [ 4, 2, 1, 3, 5, 2, 1, 4 ]
+        # => [ 1, 2, 3, 4, 5 ]
+
+        std.array.sort_dedup std.string.compare [ "world", "hello", "world" ]
+        # => [ "hello", "world" ]
+        ```
+      "%
+      = fun cmp array =>
+        let sorted = sort cmp array in
+        let length = %array/length% sorted in
+
+        let rec go = fun acc n =>
+          if n == length then
+            acc
+          else
+            let x = %array/at% sorted n in
+
+            # we would prefer to use the primitive equality here instead of
+            # relying on `cmp` returning `'Equal`. The latter 1. requires that
+            # the user-provided function is correct and 2. is more costly.
+            #
+            # Unfortunately, that would break parametricity and require to
+            # substitute `a` for `Dyn` in the type of `sort_dedup`, which makes
+            # it more annoying to use with otherwise statically typed comparison
+            # function. So for now, we rely on the comparison function.
+            let acc =
+              if n == 0
+              || cmp (%array/at% sorted (n - 1)) x != 'Equal then
+                acc @ [x]
+              else
+                acc
+            in
+
+            go acc (n + 1)
+        in
+
+        go [] 0,
+
+    hash_dedup
+      : forall a. (a -> String) -> Array a -> Array a
+      | doc m%"
+        Removes duplicates from an array efficiently.
+
+        # Hash function
+
+        `hash_dedup` uses a "hash" function mapping array elements to strings.
+        This function is used to efficiently check if an element has already
+        been seen before (linear time amortized in the size of array versus
+        quadratic for `std.array.dedup`).
+
+        The hash function must separate distinct elements with very high
+        probability. If two elements have the same hash, they will be considered
+        equal and thus deduplicated.
+
+        Although we call it a hash function, it doesn't have to produce good
+        hash values: the output of this function is used internally to generate
+        record field names, which are themselves properly hashed again by the
+        Nickel runtime.
+
+        # Examples
+
+        ```nickel multiline
+        std.array.hash_dedup std.to_string [ 4, 2, 1, 3, 5, 2, 1, 4 ]
+        # => [ 4, 2, 1, 3, 5 ]
+
+        std.array.hash_dedup std.function.id [ "world", "hello", "world" ]
+        # => [ "world", "hello" ]
+        ```
+      "%
+      = fun hash array =>
+        let length = %array/length% array in
+
+        let rec go = fun acc n =>
+          if n == length then
+            acc
+          else
+            let x = %array/at% array n in
+            let hashed = hash x in
+
+            if %record/has_field% hashed acc.seen then
+              go acc (n + 1)
+            else
+              let acc = {
+                seen = %record/insert% hashed acc.seen null,
+                result = acc.result @ [x],
+              }
+              in
+              go acc (n + 1)
+        in
+
+        (go { seen = {}, result = [] } 0).result,
   },
 
   contract = {
@@ -4348,7 +4523,7 @@
       %contract/custom% (fun _label _value => 'Error { message = msg }),
 
   fail_with
-    | String -> Dyn
+    | String -> (forall a. a)
     | doc m%"
       Abort the evaluation with the given message. The error will be reported as
       a contract violation, as `fail_with` uses `std.FailWith` under the hood.

From d8e18b441eaf984146281263ddf47c27a4aa36ec Mon Sep 17 00:00:00 2001
From: Yann Hamdaoui <yann.hamdaoui@tweag.io>
Date: Wed, 11 Dec 2024 15:15:32 +0100
Subject: [PATCH 2/4] Fix NLS stack overflow on Windows

---
 core/tests/integration/main.rs |  4 ++--
 lsp/nls/src/main.rs            | 15 ++++++++++++++-
 lsp/nls/src/server.rs          |  1 +
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/core/tests/integration/main.rs b/core/tests/integration/main.rs
index 420afb81c0..fdc1b498a4 100644
--- a/core/tests/integration/main.rs
+++ b/core/tests/integration/main.rs
@@ -27,8 +27,8 @@ fn check_annotated_nickel_file(path: &str) {
     let test: TestCase<Test> =
         read_annotated_test_case(path).expect("Failed to parse annotated program");
 
-    // By default, cargo runs tests with a 2MB stack, which we can overflow in
-    // debug mode. To avoid this we run the tests with an increased stack size.
+    // By default, cargo runs tests with a 2MB stack, which is easy to overflow. To avoid this we
+    // run the tests with an increased stack size.
     const STACK_SIZE: usize = 4 * 1024 * 1024;
     let path = String::from(project_root().join(path).to_string_lossy());
 
diff --git a/lsp/nls/src/main.rs b/lsp/nls/src/main.rs
index 57f162daef..092203234a 100644
--- a/lsp/nls/src/main.rs
+++ b/lsp/nls/src/main.rs
@@ -1,4 +1,4 @@
-use std::{fs, io, path::PathBuf};
+use std::{fs, io, path::PathBuf, thread};
 
 use anyhow::Result;
 
@@ -29,6 +29,10 @@ mod usage;
 mod utils;
 mod world;
 
+// Default stack size is 1MB on Windows, which is too small. We make it 8MB, which is the default
+// size on Linux.
+const STACK_SIZE: usize = 8 * 1024 * 1024;
+
 use crate::{config::LspConfig, trace::Trace};
 
 #[derive(clap::Parser, Debug)]
@@ -59,6 +63,15 @@ struct Options {
 }
 
 fn main() -> Result<()> {
+    let handle = thread::Builder::new()
+        .stack_size(STACK_SIZE)
+        .spawn(run)
+        .unwrap();
+
+    handle.join().unwrap()
+}
+
+fn run() -> Result<()> {
     use clap::Parser;
 
     env_logger::init();
diff --git a/lsp/nls/src/server.rs b/lsp/nls/src/server.rs
index 04bd43c0fe..e6371b3c6d 100644
--- a/lsp/nls/src/server.rs
+++ b/lsp/nls/src/server.rs
@@ -96,6 +96,7 @@ impl Server {
             .send(Message::Response(response))
             .unwrap();
     }
+
     pub(crate) fn notify(&mut self, notification: Notification) {
         trace!("Sending notification: {:#?}", notification);
         self.connection

From 435f5e422efece321cfde5877fdbb319dd8f4e7c Mon Sep 17 00:00:00 2001
From: Yann Hamdaoui <yann.hamdaoui@tweag.io>
Date: Thu, 19 Dec 2024 18:18:37 +0100
Subject: [PATCH 3/4] Get rid of hash_dedup for now, split dedup_sorted out of
 sort_dedup

---
 core/stdlib/std.ncl | 92 ++++++++++++++++++---------------------------
 1 file changed, 36 insertions(+), 56 deletions(-)

diff --git a/core/stdlib/std.ncl b/core/stdlib/std.ncl
index a7367219a8..f6b42e61ba 100644
--- a/core/stdlib/std.ncl
+++ b/core/stdlib/std.ncl
@@ -960,7 +960,7 @@
         This function relies on equality and has a quadratic complexity in the
         length of the array. It might thus be slow for large arrays or if
         called repeatedly. Prefer `std.array.sort_dedup` or
-        `std.array.hash_dedup` if you have efficiency concerns.
+        `std.array.dedup_sorted` if you have efficiency concerns.
 
         # Examples
 
@@ -985,38 +985,43 @@
           []
           array,
 
-    sort_dedup
+    dedup_sorted
       : forall a. (a -> a -> [| 'Lesser, 'Equal, 'Greater |]) -> Array a -> Array a
       | doc m%"
-        Sorts an array based on the provided comparison operator and removes
-        duplicates.
+        Given an array already sorted by the given ordering function, remove duplicates.
+
+        # Preconditions
+
+        The input array must be sorted using the same order as provided to this
+        function. Otherwise, some duplicates might not be removed.
 
         # Performance
 
-        As opposed to `std.array.dedup`, this function has a better time
-        complexity (`O(n*log(n))` where `n` is the size of the array), which
-        should improve performance especially on large arrays. If you need to
-        preserve the original order of the array, see `std.array.hash_dedup`.
+        As opposed to `std.array.dedup`, this function has a linear time
+        complexity, which should improve performance especially on large arrays.
+
+        If you sort the array only to remove duplicates, you can use
+        `std.array.sort_dedup` directly (which handles both sorting and
+        deduplication).
 
         # Examples
 
         ```nickel multiline
-        std.array.sort_dedup std.number.compare [ 4, 2, 1, 3, 5, 2, 1, 4 ]
+        std.array.dedup_sorted std.number.compare [ 1, 2, 2, 3, 4, 4, 4, 5 ]
         # => [ 1, 2, 3, 4, 5 ]
 
-        std.array.sort_dedup std.string.compare [ "world", "hello", "world" ]
+        std.array.dedup_sorted std.string.compare [ "hello", "world", "world" ]
         # => [ "hello", "world" ]
         ```
       "%
       = fun cmp array =>
-        let sorted = sort cmp array in
-        let length = %array/length% sorted in
+        let length = %array/length% array in
 
         let rec go = fun acc n =>
           if n == length then
             acc
           else
-            let x = %array/at% sorted n in
+            let x = %array/at% array n in
 
             # we would prefer to use the primitive equality here instead of
             # relying on `cmp` returning `'Equal`. The latter 1. requires that
@@ -1028,7 +1033,7 @@
             # function. So for now, we rely on the comparison function.
             let acc =
               if n == 0
-              || cmp (%array/at% sorted (n - 1)) x != 'Equal then
+              || cmp (%array/at% array (n - 1)) x != 'Equal then
                 acc @ [x]
               else
                 acc
@@ -1039,59 +1044,34 @@
 
         go [] 0,
 
-    hash_dedup
-      : forall a. (a -> String) -> Array a -> Array a
+    sort_dedup
+      : forall a. (a -> a -> [| 'Lesser, 'Equal, 'Greater |]) -> Array a -> Array a
       | doc m%"
-        Removes duplicates from an array efficiently.
-
-        # Hash function
+        Sorts an array based on the provided comparison operator and removes
+        duplicates.
 
-        `hash_dedup` uses a "hash" function mapping array elements to strings.
-        This function is used to efficiently check if an element has already
-        been seen before (linear time amortized in the size of array versus
-        quadratic for `std.array.dedup`).
+        This is a combination of `std.array.sort` and `std.array.dedup_sorted`.
 
-        The hash function must separate distinct elements with very high
-        probability. If two elements have the same hash, they will be considered
-        equal and thus deduplicated.
+        # Performance
 
-        Although we call it a hash function, it doesn't have to produce good
-        hash values: the output of this function is used internally to generate
-        record field names, which are themselves properly hashed again by the
-        Nickel runtime.
+        As opposed to `std.array.dedup`, this function has a better time
+        complexity (`O(n*log(n))` where `n` is the size of the array), which
+        should improve performance especially on large arrays.
 
         # Examples
 
         ```nickel multiline
-        std.array.hash_dedup std.to_string [ 4, 2, 1, 3, 5, 2, 1, 4 ]
-        # => [ 4, 2, 1, 3, 5 ]
+        std.array.sort_dedup std.number.compare [ 4, 2, 1, 3, 5, 2, 1, 4 ]
+        # => [ 1, 2, 3, 4, 5 ]
 
-        std.array.hash_dedup std.function.id [ "world", "hello", "world" ]
-        # => [ "world", "hello" ]
+        std.array.sort_dedup std.string.compare [ "world", "hello", "world" ]
+        # => [ "hello", "world" ]
         ```
       "%
-      = fun hash array =>
-        let length = %array/length% array in
-
-        let rec go = fun acc n =>
-          if n == length then
-            acc
-          else
-            let x = %array/at% array n in
-            let hashed = hash x in
-
-            if %record/has_field% hashed acc.seen then
-              go acc (n + 1)
-            else
-              let acc = {
-                seen = %record/insert% hashed acc.seen null,
-                result = acc.result @ [x],
-              }
-              in
-              go acc (n + 1)
-        in
-
-        (go { seen = {}, result = [] } 0).result,
+      = fun cmp array =>
+        array
+        |> sort cmp
+        |> dedup_sorted cmp,
   },
 
   contract = {

From a94604ffd9faaf1110c084b1a2791aa231cb4f50 Mon Sep 17 00:00:00 2001
From: Yann Hamdaoui <yann.hamdaoui@tweag.io>
Date: Thu, 19 Dec 2024 18:41:10 +0100
Subject: [PATCH 4/4] Update snapshot tests

---
 ...eval_stderr_subcontract_nested_custom_diagnostics.ncl.snap | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cli/tests/snapshot/snapshots/snapshot__eval_stderr_subcontract_nested_custom_diagnostics.ncl.snap b/cli/tests/snapshot/snapshots/snapshot__eval_stderr_subcontract_nested_custom_diagnostics.ncl.snap
index 7fda7501f1..d6bc75a1c8 100644
--- a/cli/tests/snapshot/snapshots/snapshot__eval_stderr_subcontract_nested_custom_diagnostics.ncl.snap
+++ b/cli/tests/snapshot/snapshots/snapshot__eval_stderr_subcontract_nested_custom_diagnostics.ncl.snap
@@ -21,9 +21,9 @@ warning: plain functions as contracts are deprecated
    = wrap this function using one of the constructors in `std.contract` instead, like `std.contract.from_validator` or `std.contract.custom`
 
 warning: plain functions as contracts are deprecated
-     ┌─ <stdlib/std.ncl>:1724:9
+     ┌─ <stdlib/std.ncl>:1704:9
      │
-1724 │         %contract/apply% contract (%label/push_diag% label) value,
+1704 │         %contract/apply% contract (%label/push_diag% label) value,
      │         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ applied to this term
      │
      ┌─ [INPUTS_PATH]/errors/subcontract_nested_custom_diagnostics.ncl:3:21