diff --git a/cli/src/template_builder.rs b/cli/src/template_builder.rs index 844836bb63..6940c54ed2 100644 --- a/cli/src/template_builder.rs +++ b/cli/src/template_builder.rs @@ -488,6 +488,11 @@ fn builtin_string_methods<'a, L: TemplateLanguage<'a>>() -> TemplateBuildMethodF // Not using maplit::hashmap!{} or custom declarative macro here because // code completion inside macro is quite restricted. let mut map = TemplateBuildMethodFnMap::::new(); + map.insert("len", |language, _build_ctx, self_property, function| { + template_parser::expect_no_arguments(function)?; + let out_property = TemplateFunction::new(self_property, |s| Ok(s.len().try_into()?)); + Ok(language.wrap_integer(out_property)) + }); map.insert( "contains", |language, build_ctx, self_property, function| { @@ -557,11 +562,15 @@ fn builtin_string_methods<'a, L: TemplateLanguage<'a>>() -> TemplateBuildMethodF ); map.insert("substr", |language, build_ctx, self_property, function| { let [start_idx, end_idx] = template_parser::expect_exact_arguments(function)?; - let start_idx_property = expect_integer_expression(language, build_ctx, start_idx)?; - let end_idx_property = expect_integer_expression(language, build_ctx, end_idx)?; + let start_idx_property = expect_isize_expression(language, build_ctx, start_idx)?; + let end_idx_property = expect_isize_expression(language, build_ctx, end_idx)?; let out_property = TemplateFunction::new( (self_property, start_idx_property, end_idx_property), - |(s, start_idx, end_idx)| Ok(string_substr(&s, start_idx, end_idx)), + |(s, start_idx, end_idx)| { + let start_idx = string_index_to_char_boundary(&s, start_idx); + let end_idx = string_index_to_char_boundary(&s, end_idx); + Ok(s.get(start_idx..end_idx).unwrap_or_default().to_owned()) + }, ); Ok(language.wrap_string(out_property)) }); @@ -595,28 +604,19 @@ fn builtin_string_methods<'a, L: TemplateLanguage<'a>>() -> TemplateBuildMethodF map } -fn string_substr(s: &str, start_idx: i64, end_idx: i64) -> String { - // TODO: If we add .len() method, we'll expose bytes-based and char-based APIs. - // Having different index units would be confusing, so we might want to change - // .substr() to bytes-based and round up/down towards char or grapheme-cluster - // boundary. - let to_idx = |i: i64| -> usize { - let magnitude = usize::try_from(i.unsigned_abs()).unwrap_or(usize::MAX); - if i < 0 { - s.chars().count().saturating_sub(magnitude) - } else { - magnitude - } - }; - let start_idx = to_idx(start_idx); - let end_idx = to_idx(end_idx); - if start_idx >= end_idx { - String::new() +/// Clamps and aligns the given index `i` to char boundary. +/// +/// Negative index counts from the end. If the index isn't at a char boundary, +/// it will be rounded towards 0 (left or right depending on the sign.) +fn string_index_to_char_boundary(s: &str, i: isize) -> usize { + // TODO: use floor/ceil_char_boundary() if get stabilized + let magnitude = i.unsigned_abs(); + if i < 0 { + let p = s.len().saturating_sub(magnitude); + (p..=s.len()).find(|&p| s.is_char_boundary(p)).unwrap() } else { - s.chars() - .skip(start_idx) - .take(end_idx - start_idx) - .collect() + let p = magnitude.min(s.len()); + (0..=p).rev().find(|&p| s.is_char_boundary(p)).unwrap() } } @@ -767,6 +767,12 @@ where O: Template<()> + Clone + 'a, { let property = match function.name { + "len" => { + template_parser::expect_no_arguments(function)?; + let out_property = + TemplateFunction::new(self_property, |items| Ok(items.len().try_into()?)); + language.wrap_integer(out_property) + } "join" => { let [separator_node] = template_parser::expect_exact_arguments(function)?; let separator = expect_template_expression(language, build_ctx, separator_node)?; @@ -794,6 +800,12 @@ where O: Clone + 'a, { let property = match function.name { + "len" => { + template_parser::expect_no_arguments(function)?; + let out_property = + TemplateFunction::new(self_property, |items| Ok(items.len().try_into()?)); + language.wrap_integer(out_property) + } // No "join" "map" => build_map_operation(language, build_ctx, self_property, function, wrap_item)?, _ => return Err(TemplateParseError::no_such_method("List", function)), @@ -1032,6 +1044,17 @@ pub fn expect_integer_expression<'a, L: TemplateLanguage<'a>>( .ok_or_else(|| TemplateParseError::expected_type("Integer", node.span)) } +/// If the given expression `node` is of `Integer` type, converts it to `isize`. +pub fn expect_isize_expression<'a, L: TemplateLanguage<'a>>( + language: &L, + build_ctx: &BuildContext, + node: &ExpressionNode, +) -> TemplateParseResult + 'a>> { + let i64_property = expect_integer_expression(language, build_ctx, node)?; + let isize_property = TemplateFunction::new(i64_property, |v| Ok(isize::try_from(v)?)); + Ok(Box::new(isize_property)) +} + /// If the given expression `node` is of `Integer` type, converts it to `usize`. pub fn expect_usize_expression<'a, L: TemplateLanguage<'a>>( language: &L, @@ -1530,6 +1553,9 @@ mod tests { language.wrap_string(Literal("sep".to_owned())) }); + insta::assert_snapshot!(env.render_ok(r#""".lines().len()"#), @"0"); + insta::assert_snapshot!(env.render_ok(r#""a\nb\nc".lines().len()"#), @"3"); + insta::assert_snapshot!(env.render_ok(r#""".lines().join("|")"#), @""); insta::assert_snapshot!(env.render_ok(r#""a\nb\nc".lines().join("|")"#), @"a|b|c"); // Null separator @@ -1627,6 +1653,10 @@ mod tests { language.wrap_string(Literal("description 1".to_owned())) }); + insta::assert_snapshot!(env.render_ok(r#""".len()"#), @"0"); + insta::assert_snapshot!(env.render_ok(r#""foo".len()"#), @"3"); + insta::assert_snapshot!(env.render_ok(r#""💩".len()"#), @"4"); + insta::assert_snapshot!(env.render_ok(r#""fooo".contains("foo")"#), @"true"); insta::assert_snapshot!(env.render_ok(r#""foo".contains("fooo")"#), @"false"); insta::assert_snapshot!(env.render_ok(r#"description.contains("description")"#), @"true"); @@ -1668,11 +1698,25 @@ mod tests { insta::assert_snapshot!(env.render_ok(r#""foo".substr(0, 0)"#), @""); insta::assert_snapshot!(env.render_ok(r#""foo".substr(0, 1)"#), @"f"); - insta::assert_snapshot!(env.render_ok(r#""foo".substr(0, 99)"#), @"foo"); + insta::assert_snapshot!(env.render_ok(r#""foo".substr(0, 3)"#), @"foo"); + insta::assert_snapshot!(env.render_ok(r#""foo".substr(0, 4)"#), @"foo"); insta::assert_snapshot!(env.render_ok(r#""abcdef".substr(2, -1)"#), @"cde"); insta::assert_snapshot!(env.render_ok(r#""abcdef".substr(-3, 99)"#), @"def"); - insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(2, -1)"#), @"c"); - insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(-1, 99)"#), @"💩"); + insta::assert_snapshot!(env.render_ok(r#""abcdef".substr(-6, 99)"#), @"abcdef"); + insta::assert_snapshot!(env.render_ok(r#""abcdef".substr(-7, 1)"#), @"a"); + + // non-ascii characters + insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(2, -1)"#), @"c💩"); + insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(3, -3)"#), @"💩"); + insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(3, -4)"#), @""); + insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(6, -3)"#), @"💩"); + insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(7, -3)"#), @""); + insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(3, 4)"#), @""); + insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(3, 6)"#), @""); + insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(3, 7)"#), @"💩"); + insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(-1, 7)"#), @""); + insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(-3, 7)"#), @""); + insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(-4, 7)"#), @"💩"); // ranges with end > start are empty insta::assert_snapshot!(env.render_ok(r#""abcdef".substr(4, 2)"#), @""); diff --git a/cli/tests/test_commit_template.rs b/cli/tests/test_commit_template.rs index 13eda0c4fa..6f424cf3bc 100644 --- a/cli/tests/test_commit_template.rs +++ b/cli/tests/test_commit_template.rs @@ -26,17 +26,18 @@ fn test_log_parents() { test_env.jj_cmd_ok(&repo_path, &["new", "@-"]); test_env.jj_cmd_ok(&repo_path, &["new", "@", "@-"]); - let template = r#"commit_id ++ "\nP: " ++ parents.map(|c| c.commit_id()) ++ "\n""#; + let template = + r#"commit_id ++ "\nP: " ++ parents.len() ++ " " ++ parents.map(|c| c.commit_id()) ++ "\n""#; let stdout = test_env.jj_cmd_success(&repo_path, &["log", "-T", template]); insta::assert_snapshot!(stdout, @r###" @ c067170d4ca1bc6162b64f7550617ec809647f84 - ├─╮ P: 4db490c88528133d579540b6900b8098f0c17701 230dd059e1b059aefc0da06a2e5a7dbf22362f22 + ├─╮ P: 2 4db490c88528133d579540b6900b8098f0c17701 230dd059e1b059aefc0da06a2e5a7dbf22362f22 ◉ │ 4db490c88528133d579540b6900b8098f0c17701 - ├─╯ P: 230dd059e1b059aefc0da06a2e5a7dbf22362f22 + ├─╯ P: 1 230dd059e1b059aefc0da06a2e5a7dbf22362f22 ◉ 230dd059e1b059aefc0da06a2e5a7dbf22362f22 - │ P: 0000000000000000000000000000000000000000 + │ P: 1 0000000000000000000000000000000000000000 ◉ 0000000000000000000000000000000000000000 - P: + P: 0 "###); let template = r#"parents.map(|c| c.commit_id().shortest(4))"#; diff --git a/docs/templates.md b/docs/templates.md index 0825fc03a0..c9424f6033 100644 --- a/docs/templates.md +++ b/docs/templates.md @@ -105,6 +105,7 @@ No methods are defined. A list can be implicitly converted to `Boolean`. The following methods are defined. +* `.len() -> Integer`: Number of elements in the list. * `.join(separator: Template) -> Template`: Concatenate elements with the given `separator`. * `.map(|item| expression) -> ListTemplate`: Apply template `expression` @@ -164,6 +165,7 @@ The following methods are defined. A string can be implicitly converted to `Boolean`. The following methods are defined. +* `.len() -> Integer`: Length in UTF-8 bytes. * `.contains(needle: Template) -> Boolean` * `.first_line() -> String` * `.lines() -> List`: Split into lines excluding newline characters. @@ -173,7 +175,9 @@ defined. * `.ends_with(needle: Template) -> Boolean` * `.remove_prefix(needle: Template) -> String`: Removes the passed prefix, if present * `.remove_suffix(needle: Template) -> String`: Removes the passed suffix, if present -* `.substr(start: Integer, end: Integer) -> String`: Extract substring. Negative values count from the end. +* `.substr(start: Integer, end: Integer) -> String`: Extract substring. The + `start`/`end` indices should be specified in UTF-8 bytes. Negative values + count from the end of the string. #### String literals