From 800a9499811cd92701b27c10b91c4b7f0fb3cc15 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Wed, 4 Dec 2024 23:53:25 +0100 Subject: [PATCH 1/4] Stdlib: add `filter_map`, `dedup` and its variants --- ...ontract_nested_custom_diagnostics.ncl.snap | 4 +- core/stdlib/std.ncl | 177 +++++++++++++++++- 2 files changed, 178 insertions(+), 3 deletions(-) diff --git a/cli/tests/snapshot/snapshots/snapshot__eval_stderr_subcontract_nested_custom_diagnostics.ncl.snap b/cli/tests/snapshot/snapshots/snapshot__eval_stderr_subcontract_nested_custom_diagnostics.ncl.snap index 7b26d4abc6..7fda7501f1 100644 --- a/cli/tests/snapshot/snapshots/snapshot__eval_stderr_subcontract_nested_custom_diagnostics.ncl.snap +++ b/cli/tests/snapshot/snapshots/snapshot__eval_stderr_subcontract_nested_custom_diagnostics.ncl.snap @@ -21,9 +21,9 @@ warning: plain functions as contracts are deprecated = wrap this function using one of the constructors in `std.contract` instead, like `std.contract.from_validator` or `std.contract.custom` warning: plain functions as contracts are deprecated - ┌─ :1549:9 + ┌─ :1724:9 │ -1549 │ %contract/apply% contract (%label/push_diag% label) value, +1724 │ %contract/apply% contract (%label/push_diag% label) value, │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ applied to this term │ ┌─ [INPUTS_PATH]/errors/subcontract_nested_custom_diagnostics.ncl:3:21 diff --git a/core/stdlib/std.ncl b/core/stdlib/std.ncl index 8b9eb3d016..a7367219a8 100644 --- a/core/stdlib/std.ncl +++ b/core/stdlib/std.ncl @@ -917,6 +917,181 @@ xs |> std.array.length |> std.array.generate (fun i => f i (std.array.at i xs)), + + filter_map + : forall a b. (a -> [| 'Some b, 'None |]) -> Array a -> Array b + | doc m%" + Applies a function to every element in the given array, filtering out + `'None` results. `filter_map` combines `std.array.map` and + `std.array.filter` in a single pass. + + # Examples + + ```nickel + ["1", "hello", "2", "world"] + |> std.array.filter_map (fun x => + if std.string.is_match "^[+-]?\\d+$" x then + 'Some (std.string.to_number x) + else + 'None + ) + # => [ 1, 2 ] + ``` + "% + = fun f array => + fold_left + (fun acc x => + f x + |> match { + 'Some y => acc @ [y], + 'None => acc, + } + ) + [] + array, + + dedup + : Array Dyn -> Array Dyn + | doc m%" + Removes duplicates from an array. + + # Performance + + This function relies on equality and has a quadratic complexity in the + length of the array. It might thus be slow for large arrays or if + called repeatedly. Prefer `std.array.sort_dedup` or + `std.array.hash_dedup` if you have efficiency concerns. + + # Examples + + ```nickel multiline + std.array.dedup [ 4, 2, 1, 3, 5, 2, 1, 4 ] + # => [ 4, 2, 1, 3, 5 ] + + std.array.dedup [ "hello", "world", "hello" ] + # => [ "hello", "world" ] + ``` + "% + = fun array => + let length = %array/length% array in + + fold_left + (fun acc x => + if elem x acc then + acc + else + acc @ [x] + ) + [] + array, + + sort_dedup + : forall a. (a -> a -> [| 'Lesser, 'Equal, 'Greater |]) -> Array a -> Array a + | doc m%" + Sorts an array based on the provided comparison operator and removes + duplicates. + + # Performance + + As opposed to `std.array.dedup`, this function has a better time + complexity (`O(n*log(n))` where `n` is the size of the array), which + should improve performance especially on large arrays. If you need to + preserve the original order of the array, see `std.array.hash_dedup`. + + # Examples + + ```nickel multiline + std.array.sort_dedup std.number.compare [ 4, 2, 1, 3, 5, 2, 1, 4 ] + # => [ 1, 2, 3, 4, 5 ] + + std.array.sort_dedup std.string.compare [ "world", "hello", "world" ] + # => [ "hello", "world" ] + ``` + "% + = fun cmp array => + let sorted = sort cmp array in + let length = %array/length% sorted in + + let rec go = fun acc n => + if n == length then + acc + else + let x = %array/at% sorted n in + + # we would prefer to use the primitive equality here instead of + # relying on `cmp` returning `'Equal`. The latter 1. requires that + # the user-provided function is correct and 2. is more costly. + # + # Unfortunately, that would break parametricity and require to + # substitute `a` for `Dyn` in the type of `sort_dedup`, which makes + # it more annoying to use with otherwise statically typed comparison + # function. So for now, we rely on the comparison function. + let acc = + if n == 0 + || cmp (%array/at% sorted (n - 1)) x != 'Equal then + acc @ [x] + else + acc + in + + go acc (n + 1) + in + + go [] 0, + + hash_dedup + : forall a. (a -> String) -> Array a -> Array a + | doc m%" + Removes duplicates from an array efficiently. + + # Hash function + + `hash_dedup` uses a "hash" function mapping array elements to strings. + This function is used to efficiently check if an element has already + been seen before (linear time amortized in the size of array versus + quadratic for `std.array.dedup`). + + The hash function must separate distinct elements with very high + probability. If two elements have the same hash, they will be considered + equal and thus deduplicated. + + Although we call it a hash function, it doesn't have to produce good + hash values: the output of this function is used internally to generate + record field names, which are themselves properly hashed again by the + Nickel runtime. + + # Examples + + ```nickel multiline + std.array.hash_dedup std.to_string [ 4, 2, 1, 3, 5, 2, 1, 4 ] + # => [ 4, 2, 1, 3, 5 ] + + std.array.hash_dedup std.function.id [ "world", "hello", "world" ] + # => [ "world", "hello" ] + ``` + "% + = fun hash array => + let length = %array/length% array in + + let rec go = fun acc n => + if n == length then + acc + else + let x = %array/at% array n in + let hashed = hash x in + + if %record/has_field% hashed acc.seen then + go acc (n + 1) + else + let acc = { + seen = %record/insert% hashed acc.seen null, + result = acc.result @ [x], + } + in + go acc (n + 1) + in + + (go { seen = {}, result = [] } 0).result, }, contract = { @@ -4348,7 +4523,7 @@ %contract/custom% (fun _label _value => 'Error { message = msg }), fail_with - | String -> Dyn + | String -> (forall a. a) | doc m%" Abort the evaluation with the given message. The error will be reported as a contract violation, as `fail_with` uses `std.FailWith` under the hood. From d8e18b441eaf984146281263ddf47c27a4aa36ec Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Wed, 11 Dec 2024 15:15:32 +0100 Subject: [PATCH 2/4] Fix NLS stack overflow on Windows --- core/tests/integration/main.rs | 4 ++-- lsp/nls/src/main.rs | 15 ++++++++++++++- lsp/nls/src/server.rs | 1 + 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/core/tests/integration/main.rs b/core/tests/integration/main.rs index 420afb81c0..fdc1b498a4 100644 --- a/core/tests/integration/main.rs +++ b/core/tests/integration/main.rs @@ -27,8 +27,8 @@ fn check_annotated_nickel_file(path: &str) { let test: TestCase = read_annotated_test_case(path).expect("Failed to parse annotated program"); - // By default, cargo runs tests with a 2MB stack, which we can overflow in - // debug mode. To avoid this we run the tests with an increased stack size. + // By default, cargo runs tests with a 2MB stack, which is easy to overflow. To avoid this we + // run the tests with an increased stack size. const STACK_SIZE: usize = 4 * 1024 * 1024; let path = String::from(project_root().join(path).to_string_lossy()); diff --git a/lsp/nls/src/main.rs b/lsp/nls/src/main.rs index 57f162daef..092203234a 100644 --- a/lsp/nls/src/main.rs +++ b/lsp/nls/src/main.rs @@ -1,4 +1,4 @@ -use std::{fs, io, path::PathBuf}; +use std::{fs, io, path::PathBuf, thread}; use anyhow::Result; @@ -29,6 +29,10 @@ mod usage; mod utils; mod world; +// Default stack size is 1MB on Windows, which is too small. We make it 8MB, which is the default +// size on Linux. +const STACK_SIZE: usize = 8 * 1024 * 1024; + use crate::{config::LspConfig, trace::Trace}; #[derive(clap::Parser, Debug)] @@ -59,6 +63,15 @@ struct Options { } fn main() -> Result<()> { + let handle = thread::Builder::new() + .stack_size(STACK_SIZE) + .spawn(run) + .unwrap(); + + handle.join().unwrap() +} + +fn run() -> Result<()> { use clap::Parser; env_logger::init(); diff --git a/lsp/nls/src/server.rs b/lsp/nls/src/server.rs index 04bd43c0fe..e6371b3c6d 100644 --- a/lsp/nls/src/server.rs +++ b/lsp/nls/src/server.rs @@ -96,6 +96,7 @@ impl Server { .send(Message::Response(response)) .unwrap(); } + pub(crate) fn notify(&mut self, notification: Notification) { trace!("Sending notification: {:#?}", notification); self.connection From 435f5e422efece321cfde5877fdbb319dd8f4e7c Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Thu, 19 Dec 2024 18:18:37 +0100 Subject: [PATCH 3/4] Get rid of hash_dedup for now, split dedup_sorted out of sort_dedup --- core/stdlib/std.ncl | 92 ++++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 56 deletions(-) diff --git a/core/stdlib/std.ncl b/core/stdlib/std.ncl index a7367219a8..f6b42e61ba 100644 --- a/core/stdlib/std.ncl +++ b/core/stdlib/std.ncl @@ -960,7 +960,7 @@ This function relies on equality and has a quadratic complexity in the length of the array. It might thus be slow for large arrays or if called repeatedly. Prefer `std.array.sort_dedup` or - `std.array.hash_dedup` if you have efficiency concerns. + `std.array.dedup_sorted` if you have efficiency concerns. # Examples @@ -985,38 +985,43 @@ [] array, - sort_dedup + dedup_sorted : forall a. (a -> a -> [| 'Lesser, 'Equal, 'Greater |]) -> Array a -> Array a | doc m%" - Sorts an array based on the provided comparison operator and removes - duplicates. + Given an array already sorted by the given ordering function, remove duplicates. + + # Preconditions + + The input array must be sorted using the same order as provided to this + function. Otherwise, some duplicates might not be removed. # Performance - As opposed to `std.array.dedup`, this function has a better time - complexity (`O(n*log(n))` where `n` is the size of the array), which - should improve performance especially on large arrays. If you need to - preserve the original order of the array, see `std.array.hash_dedup`. + As opposed to `std.array.dedup`, this function has a linear time + complexity, which should improve performance especially on large arrays. + + If you sort the array only to remove duplicates, you can use + `std.array.sort_dedup` directly (which handles both sorting and + deduplication). # Examples ```nickel multiline - std.array.sort_dedup std.number.compare [ 4, 2, 1, 3, 5, 2, 1, 4 ] + std.array.dedup_sorted std.number.compare [ 1, 2, 2, 3, 4, 4, 4, 5 ] # => [ 1, 2, 3, 4, 5 ] - std.array.sort_dedup std.string.compare [ "world", "hello", "world" ] + std.array.dedup_sorted std.string.compare [ "hello", "world", "world" ] # => [ "hello", "world" ] ``` "% = fun cmp array => - let sorted = sort cmp array in - let length = %array/length% sorted in + let length = %array/length% array in let rec go = fun acc n => if n == length then acc else - let x = %array/at% sorted n in + let x = %array/at% array n in # we would prefer to use the primitive equality here instead of # relying on `cmp` returning `'Equal`. The latter 1. requires that @@ -1028,7 +1033,7 @@ # function. So for now, we rely on the comparison function. let acc = if n == 0 - || cmp (%array/at% sorted (n - 1)) x != 'Equal then + || cmp (%array/at% array (n - 1)) x != 'Equal then acc @ [x] else acc @@ -1039,59 +1044,34 @@ go [] 0, - hash_dedup - : forall a. (a -> String) -> Array a -> Array a + sort_dedup + : forall a. (a -> a -> [| 'Lesser, 'Equal, 'Greater |]) -> Array a -> Array a | doc m%" - Removes duplicates from an array efficiently. - - # Hash function + Sorts an array based on the provided comparison operator and removes + duplicates. - `hash_dedup` uses a "hash" function mapping array elements to strings. - This function is used to efficiently check if an element has already - been seen before (linear time amortized in the size of array versus - quadratic for `std.array.dedup`). + This is a combination of `std.array.sort` and `std.array.dedup_sorted`. - The hash function must separate distinct elements with very high - probability. If two elements have the same hash, they will be considered - equal and thus deduplicated. + # Performance - Although we call it a hash function, it doesn't have to produce good - hash values: the output of this function is used internally to generate - record field names, which are themselves properly hashed again by the - Nickel runtime. + As opposed to `std.array.dedup`, this function has a better time + complexity (`O(n*log(n))` where `n` is the size of the array), which + should improve performance especially on large arrays. # Examples ```nickel multiline - std.array.hash_dedup std.to_string [ 4, 2, 1, 3, 5, 2, 1, 4 ] - # => [ 4, 2, 1, 3, 5 ] + std.array.sort_dedup std.number.compare [ 4, 2, 1, 3, 5, 2, 1, 4 ] + # => [ 1, 2, 3, 4, 5 ] - std.array.hash_dedup std.function.id [ "world", "hello", "world" ] - # => [ "world", "hello" ] + std.array.sort_dedup std.string.compare [ "world", "hello", "world" ] + # => [ "hello", "world" ] ``` "% - = fun hash array => - let length = %array/length% array in - - let rec go = fun acc n => - if n == length then - acc - else - let x = %array/at% array n in - let hashed = hash x in - - if %record/has_field% hashed acc.seen then - go acc (n + 1) - else - let acc = { - seen = %record/insert% hashed acc.seen null, - result = acc.result @ [x], - } - in - go acc (n + 1) - in - - (go { seen = {}, result = [] } 0).result, + = fun cmp array => + array + |> sort cmp + |> dedup_sorted cmp, }, contract = { From a94604ffd9faaf1110c084b1a2791aa231cb4f50 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Thu, 19 Dec 2024 18:41:10 +0100 Subject: [PATCH 4/4] Update snapshot tests --- ...eval_stderr_subcontract_nested_custom_diagnostics.ncl.snap | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cli/tests/snapshot/snapshots/snapshot__eval_stderr_subcontract_nested_custom_diagnostics.ncl.snap b/cli/tests/snapshot/snapshots/snapshot__eval_stderr_subcontract_nested_custom_diagnostics.ncl.snap index 7fda7501f1..d6bc75a1c8 100644 --- a/cli/tests/snapshot/snapshots/snapshot__eval_stderr_subcontract_nested_custom_diagnostics.ncl.snap +++ b/cli/tests/snapshot/snapshots/snapshot__eval_stderr_subcontract_nested_custom_diagnostics.ncl.snap @@ -21,9 +21,9 @@ warning: plain functions as contracts are deprecated = wrap this function using one of the constructors in `std.contract` instead, like `std.contract.from_validator` or `std.contract.custom` warning: plain functions as contracts are deprecated - ┌─ :1724:9 + ┌─ :1704:9 │ -1724 │ %contract/apply% contract (%label/push_diag% label) value, +1704 │ %contract/apply% contract (%label/push_diag% label) value, │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ applied to this term │ ┌─ [INPUTS_PATH]/errors/subcontract_nested_custom_diagnostics.ncl:3:21