Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

templater: add .len() method, make .substr() byte-index based #3168

Merged
merged 3 commits into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 71 additions & 27 deletions cli/src/template_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,11 @@ fn builtin_string_methods<'a, L: TemplateLanguage<'a>>() -> TemplateBuildMethodF
// Not using maplit::hashmap!{} or custom declarative macro here because
// code completion inside macro is quite restricted.
let mut map = TemplateBuildMethodFnMap::<L, String>::new();
map.insert("len", |language, _build_ctx, self_property, function| {
template_parser::expect_no_arguments(function)?;
let out_property = TemplateFunction::new(self_property, |s| Ok(s.len().try_into()?));
Ok(language.wrap_integer(out_property))
});
map.insert(
"contains",
|language, build_ctx, self_property, function| {
Expand Down Expand Up @@ -557,11 +562,15 @@ fn builtin_string_methods<'a, L: TemplateLanguage<'a>>() -> TemplateBuildMethodF
);
map.insert("substr", |language, build_ctx, self_property, function| {
let [start_idx, end_idx] = template_parser::expect_exact_arguments(function)?;
let start_idx_property = expect_integer_expression(language, build_ctx, start_idx)?;
let end_idx_property = expect_integer_expression(language, build_ctx, end_idx)?;
let start_idx_property = expect_isize_expression(language, build_ctx, start_idx)?;
let end_idx_property = expect_isize_expression(language, build_ctx, end_idx)?;
let out_property = TemplateFunction::new(
(self_property, start_idx_property, end_idx_property),
|(s, start_idx, end_idx)| Ok(string_substr(&s, start_idx, end_idx)),
|(s, start_idx, end_idx)| {
let start_idx = string_index_to_char_boundary(&s, start_idx);
let end_idx = string_index_to_char_boundary(&s, end_idx);
Ok(s.get(start_idx..end_idx).unwrap_or_default().to_owned())
},
);
Ok(language.wrap_string(out_property))
});
Expand Down Expand Up @@ -595,28 +604,19 @@ fn builtin_string_methods<'a, L: TemplateLanguage<'a>>() -> TemplateBuildMethodF
map
}

fn string_substr(s: &str, start_idx: i64, end_idx: i64) -> String {
// TODO: If we add .len() method, we'll expose bytes-based and char-based APIs.
// Having different index units would be confusing, so we might want to change
// .substr() to bytes-based and round up/down towards char or grapheme-cluster
// boundary.
let to_idx = |i: i64| -> usize {
let magnitude = usize::try_from(i.unsigned_abs()).unwrap_or(usize::MAX);
if i < 0 {
s.chars().count().saturating_sub(magnitude)
} else {
magnitude
}
};
let start_idx = to_idx(start_idx);
let end_idx = to_idx(end_idx);
if start_idx >= end_idx {
String::new()
/// Clamps and aligns the given index `i` to char boundary.
///
/// Negative index counts from the end. If the index isn't at a char boundary,
/// it will be rounded towards 0 (left or right depending on the sign.)
fn string_index_to_char_boundary(s: &str, i: isize) -> usize {
// TODO: use floor/ceil_char_boundary() if get stabilized
let magnitude = i.unsigned_abs();
if i < 0 {
let p = s.len().saturating_sub(magnitude);
(p..=s.len()).find(|&p| s.is_char_boundary(p)).unwrap()
} else {
s.chars()
.skip(start_idx)
.take(end_idx - start_idx)
.collect()
let p = magnitude.min(s.len());
(0..=p).rev().find(|&p| s.is_char_boundary(p)).unwrap()
}
}

Expand Down Expand Up @@ -767,6 +767,12 @@ where
O: Template<()> + Clone + 'a,
{
let property = match function.name {
"len" => {
template_parser::expect_no_arguments(function)?;
let out_property =
TemplateFunction::new(self_property, |items| Ok(items.len().try_into()?));
language.wrap_integer(out_property)
}
"join" => {
let [separator_node] = template_parser::expect_exact_arguments(function)?;
let separator = expect_template_expression(language, build_ctx, separator_node)?;
Expand Down Expand Up @@ -794,6 +800,12 @@ where
O: Clone + 'a,
{
let property = match function.name {
"len" => {
template_parser::expect_no_arguments(function)?;
let out_property =
TemplateFunction::new(self_property, |items| Ok(items.len().try_into()?));
language.wrap_integer(out_property)
}
// No "join"
"map" => build_map_operation(language, build_ctx, self_property, function, wrap_item)?,
_ => return Err(TemplateParseError::no_such_method("List", function)),
Expand Down Expand Up @@ -1032,6 +1044,17 @@ pub fn expect_integer_expression<'a, L: TemplateLanguage<'a>>(
.ok_or_else(|| TemplateParseError::expected_type("Integer", node.span))
}

/// If the given expression `node` is of `Integer` type, converts it to `isize`.
pub fn expect_isize_expression<'a, L: TemplateLanguage<'a>>(
language: &L,
build_ctx: &BuildContext<L::Property>,
node: &ExpressionNode,
) -> TemplateParseResult<Box<dyn TemplateProperty<L::Context, Output = isize> + 'a>> {
let i64_property = expect_integer_expression(language, build_ctx, node)?;
let isize_property = TemplateFunction::new(i64_property, |v| Ok(isize::try_from(v)?));
Ok(Box::new(isize_property))
}

/// If the given expression `node` is of `Integer` type, converts it to `usize`.
pub fn expect_usize_expression<'a, L: TemplateLanguage<'a>>(
language: &L,
Expand Down Expand Up @@ -1530,6 +1553,9 @@ mod tests {
language.wrap_string(Literal("sep".to_owned()))
});

insta::assert_snapshot!(env.render_ok(r#""".lines().len()"#), @"0");
insta::assert_snapshot!(env.render_ok(r#""a\nb\nc".lines().len()"#), @"3");

insta::assert_snapshot!(env.render_ok(r#""".lines().join("|")"#), @"");
insta::assert_snapshot!(env.render_ok(r#""a\nb\nc".lines().join("|")"#), @"a|b|c");
// Null separator
Expand Down Expand Up @@ -1627,6 +1653,10 @@ mod tests {
language.wrap_string(Literal("description 1".to_owned()))
});

insta::assert_snapshot!(env.render_ok(r#""".len()"#), @"0");
insta::assert_snapshot!(env.render_ok(r#""foo".len()"#), @"3");
insta::assert_snapshot!(env.render_ok(r#""💩".len()"#), @"4");

insta::assert_snapshot!(env.render_ok(r#""fooo".contains("foo")"#), @"true");
insta::assert_snapshot!(env.render_ok(r#""foo".contains("fooo")"#), @"false");
insta::assert_snapshot!(env.render_ok(r#"description.contains("description")"#), @"true");
Expand Down Expand Up @@ -1668,11 +1698,25 @@ mod tests {

insta::assert_snapshot!(env.render_ok(r#""foo".substr(0, 0)"#), @"");
insta::assert_snapshot!(env.render_ok(r#""foo".substr(0, 1)"#), @"f");
insta::assert_snapshot!(env.render_ok(r#""foo".substr(0, 99)"#), @"foo");
insta::assert_snapshot!(env.render_ok(r#""foo".substr(0, 3)"#), @"foo");
insta::assert_snapshot!(env.render_ok(r#""foo".substr(0, 4)"#), @"foo");
insta::assert_snapshot!(env.render_ok(r#""abcdef".substr(2, -1)"#), @"cde");
insta::assert_snapshot!(env.render_ok(r#""abcdef".substr(-3, 99)"#), @"def");
insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(2, -1)"#), @"c");
insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(-1, 99)"#), @"💩");
insta::assert_snapshot!(env.render_ok(r#""abcdef".substr(-6, 99)"#), @"abcdef");
insta::assert_snapshot!(env.render_ok(r#""abcdef".substr(-7, 1)"#), @"a");

// non-ascii characters
insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(2, -1)"#), @"c💩");
insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(3, -3)"#), @"💩");
insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(3, -4)"#), @"");
insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(6, -3)"#), @"💩");
insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(7, -3)"#), @"");
insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(3, 4)"#), @"");
insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(3, 6)"#), @"");
insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(3, 7)"#), @"💩");
insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(-1, 7)"#), @"");
insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(-3, 7)"#), @"");
insta::assert_snapshot!(env.render_ok(r#""abc💩".substr(-4, 7)"#), @"💩");

// ranges with end > start are empty
insta::assert_snapshot!(env.render_ok(r#""abcdef".substr(4, 2)"#), @"");
Expand Down
11 changes: 6 additions & 5 deletions cli/tests/test_commit_template.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,18 @@ fn test_log_parents() {
test_env.jj_cmd_ok(&repo_path, &["new", "@-"]);
test_env.jj_cmd_ok(&repo_path, &["new", "@", "@-"]);

let template = r#"commit_id ++ "\nP: " ++ parents.map(|c| c.commit_id()) ++ "\n""#;
let template =
r#"commit_id ++ "\nP: " ++ parents.len() ++ " " ++ parents.map(|c| c.commit_id()) ++ "\n""#;
let stdout = test_env.jj_cmd_success(&repo_path, &["log", "-T", template]);
insta::assert_snapshot!(stdout, @r###"
@ c067170d4ca1bc6162b64f7550617ec809647f84
├─╮ P: 4db490c88528133d579540b6900b8098f0c17701 230dd059e1b059aefc0da06a2e5a7dbf22362f22
├─╮ P: 2 4db490c88528133d579540b6900b8098f0c17701 230dd059e1b059aefc0da06a2e5a7dbf22362f22
◉ │ 4db490c88528133d579540b6900b8098f0c17701
├─╯ P: 230dd059e1b059aefc0da06a2e5a7dbf22362f22
├─╯ P: 1 230dd059e1b059aefc0da06a2e5a7dbf22362f22
◉ 230dd059e1b059aefc0da06a2e5a7dbf22362f22
│ P: 0000000000000000000000000000000000000000
│ P: 1 0000000000000000000000000000000000000000
◉ 0000000000000000000000000000000000000000
P:
P: 0
"###);

let template = r#"parents.map(|c| c.commit_id().shortest(4))"#;
Expand Down
6 changes: 5 additions & 1 deletion docs/templates.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ No methods are defined.
A list can be implicitly converted to `Boolean`. The following methods are
defined.

* `.len() -> Integer`: Number of elements in the list.
* `.join(separator: Template) -> Template`: Concatenate elements with
the given `separator`.
* `.map(|item| expression) -> ListTemplate`: Apply template `expression`
Expand Down Expand Up @@ -164,6 +165,7 @@ The following methods are defined.
A string can be implicitly converted to `Boolean`. The following methods are
defined.

* `.len() -> Integer`: Length in UTF-8 bytes.
* `.contains(needle: Template) -> Boolean`
* `.first_line() -> String`
* `.lines() -> List<String>`: Split into lines excluding newline characters.
Expand All @@ -173,7 +175,9 @@ defined.
* `.ends_with(needle: Template) -> Boolean`
* `.remove_prefix(needle: Template) -> String`: Removes the passed prefix, if present
* `.remove_suffix(needle: Template) -> String`: Removes the passed suffix, if present
* `.substr(start: Integer, end: Integer) -> String`: Extract substring. Negative values count from the end.
* `.substr(start: Integer, end: Integer) -> String`: Extract substring. The
`start`/`end` indices should be specified in UTF-8 bytes. Negative values
count from the end of the string.

#### String literals

Expand Down