Skip to content

Commit

Permalink
更强大的字符串转义系统, 更好的编写多行字符串
Browse files Browse the repository at this point in the history
- 新增加了内置颜色高亮和错误转义高亮
- 将大部分高亮组换成非捕获组以提速
  • Loading branch information
A4-Tacks committed Dec 18, 2023
1 parent e4d3173 commit dda8a28
Show file tree
Hide file tree
Showing 12 changed files with 228 additions and 15 deletions.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "mindustry_logic_bang_lang"
version = "0.13.8"
version = "0.13.9"
edition = "2021"

authors = ["A4-Tacks <[email protected]>"]
Expand Down
1 change: 1 addition & 0 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

## 以下是推荐的阅读顺序 The following is the recommended reading order
> [`value.mdtlbl`](./value.mdtlbl)<br/>
> [`mult_line_string.mdtlbl`](./mult_line_string.mdtlbl)<br/>
> [`dexp.mdtlbl`](./dexp.mdtlbl)<br/>
> [`print.mdtlbl`](./print.mdtlbl)<br/>
> [`sets.mdtlbl`](./sets.mdtlbl)<br/>
Expand Down
38 changes: 38 additions & 0 deletions examples/mult_line_string.mdtlbl
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#**
* 这是0.13.9的字符串新语法, 可以使多行字符串更加好用
*
* 可以使用反斜杠转义行尾的换行符,
* 然后字符串将会由这个换行符忽略至下一行第一个非空白字符或者一个反斜杠转义的空格
*
* 会很好的处理对左方括号的转义和对反斜杠自身的转义
*
* 注意: `"[]"`作用为清除颜色,
* 本编译器为了当反斜杠和n紧贴时不产生新行将在中间插入此符号,
* 所以如 `"[red]foo\\nbar"` 这样的字符串, `bar`将会被清除颜色
*#

{
set x "\
This is a mult line string!\
";
set y "\
foo\
\ bar\
";
set z "\
line1\n\
line2\n\
";
set a "back slash: \\\nnormal: \\n";
set b "\
[red]\[red]\n\
[yellow]\[yellow]\n\
";
}
#* >>>
set x "This is a mult line string!"
set y "foo bar"
set z "line1\nline2\n"
set a "back slash: \\nnormal: \[]n"
set b "[red][[red]\n[yellow][[yellow]\n"
*#
3 changes: 2 additions & 1 deletion src/syntax/def.lalrpop
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ use crate::syntax::{
ZERO_VAR,
FALSE_VAR,
};
use ::var_utils::string_escape;

grammar(meta: &mut Meta);

Expand Down Expand Up @@ -85,7 +86,7 @@ pub TopLevel: Expand = CtrlStart <mut lines:Expand> <ctrl:CtrlStop> => {
lines
};

pub String: Var = r#""[^"]*""# => <>.lines().collect::<Vec<_>>().join("\\n");
pub String: Var = r#""(?:\\\r?\n\s*(?:\\ )?|\r?\n|\\[n\\\[]|[^"\r\n\\])*""# => string_escape(<>);
pub Ident: Var = r"[_\p{XID_Start}]\p{XID_Continue}*" => <>.into();
pub OIdent: Var = r"@[_\p{XID_Start}][\p{XID_Continue}\-]*" => <>.into(); // `@abc-def`这种
pub Number: Var = r"(?:0(?:x-?[\da-fA-F][_\da-fA-F]*|b-?[01][_01]*)|-?\d[_\d]*(?:\.\d[\d_]*|e[+\-]?\d[\d_]*)?)"
Expand Down
8 changes: 6 additions & 2 deletions src/syntax/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use display_source::{
DisplaySource,
DisplaySourceMeta,
};
use var_utils::AsVarType;
use var_utils::{AsVarType,string_unescape};
pub use crate::tag_code::mdt_logic_split;
use utils::counter::Counter;

Expand Down Expand Up @@ -301,7 +301,11 @@ impl Value {
/// 返回被规范化的标识符
pub fn replace_ident(s: &str) -> String {
if Self::no_use_repr_var(s) {
s.into()
if Self::is_string(s) {
string_unescape(s)
} else {
s.into()
}
} else {
let var = s.replace('\'', "\"");
format!("'{}'", var)
Expand Down
55 changes: 55 additions & 0 deletions src/syntax/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2059,6 +2059,12 @@ fn display_source_test() {
.display_source_and_get(&mut meta),
"({\n take X = A;\n take Y = B;\n} => ((X > 10 && Y > 20) && X < Y))"
);
assert_eq!(
parse!(line_parser, r#"set a "\n\\\[hi]\\n";"#)
.unwrap()
.display_source_and_get(&mut meta),
r#"`'set'` a "\n\\[[hi]\\n";"#
);
}

#[test]
Expand Down Expand Up @@ -3886,3 +3892,52 @@ fn const_expr_eval_test() {
"#).unwrap()).compile().unwrap(),
);
}

#[test]
fn string_escape_test() {
let parser = VarParser::new();

let true_case = [
("\n", r"\n"),
("\r\n", r"\n"),
("\r\nab", r"\nab"),
("ab\ncd", r"ab\ncd"),
("ab \ncd", r"ab \ncd"),
("ab \n cd", r"ab \n cd"),
("ab \\\ncd", r"ab cd"),
("ab\\\n cd", r"abcd"),
("ab\\\n \\ cd", r"ab cd"),
("ab\\\r\n \\ cd", r"ab cd"),
("ab\\\n \\ cd", r"ab cd"),
("ab\\\n\\ cd", r"ab cd"),
("ab\\\n\n\\ cd", r"ab\n cd"),
("ab\\\n\\\n\\ cd", r"ab cd"),
("\nab", r"\nab"),
("\\\nab", r"ab"),
("a\\\\b", r"a\b"),
("m\\\\n", r"m\[]n"),
("[red]\\[red]", r"[red][[red]"),
("你好", r"你好"),
];
let false_case = [
"a\rb",
"a\n\rb",
"a\n\\ b",
r"ab\r",
r"ab\t",
r"ab\\\",
r"\ ab",
r" \ ab",
r" \ ab",
r"a\bb",
];
let quoted = |s| format!("\"{s}\"");
for (src, dst) in true_case {
assert_eq!(parse!(parser, &quoted(src)), Ok(quoted(dst)));
}
for src in false_case {
let src = quoted(src);
let res = parse!(parser, &src);
assert!(res.is_err(), "fail: {:?} -> {:?}", res, src)
}
}
20 changes: 13 additions & 7 deletions syntax/vim/mdtlbl.vim
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
" Vim syntax file
" Language: mindustry_logic_bang_lang (mdtlbl)
" Maintainer: A4-Tacks <[email protected]>
" Last Change: 2023-12-18
" Last Change: 2023-12-19
" URL: https://github.com/A4-Tacks/mindustry_logic_bang_lang

" 已加载高亮时就退出
Expand Down Expand Up @@ -54,19 +54,25 @@ setlocal formatoptions+=rq


" 值(Var) {{{1
syn match mdtlblSpecialChar /\\n/ contained
syn match mdtlblStringFailedEscape /\\./ contained
hi link mdtlblStringFailedEscape Error

syn match mdtlblStringColor /\[\v%(#\x{6,8}|%(c%(lear|yan|oral)|b%(l%(ack|ue)|r%(own|ick))|white|li%(ghtgray|me)|g%(r%(ay|een)|old%(enrod)?)|darkgray|navy|r%(oyal|ed)|s%(late|ky|carlet|almon)|t%(eal|an)|acid|forest|o%(live|range)|yellow|p%(ink|urple)|ma%(genta|roon)|violet))\]/ contained
hi link mdtlblStringColor Include

syn match mdtlblSpecialChar /^ *\\ \|\\\%([n\\[]\|$\)/ contained
hi link mdtlblSpecialChar SpecialChar

syn region mdtlblString start=/"/ end=/"/ contains=mdtlblSpecialChar
syn region mdtlblString start=/"/ end=/"/ contains=mdtlblSpecialChar,mdtlblStringFailedEscape,mdtlblStringColor
hi link mdtlblString String

syn match mdtlblOIdent /@\I\i*\(-\i*\)*/
syn match mdtlblOIdent /@\I\i*\%(-\i*\)*/
hi link mdtlblOIdent Identifier

syn match mdtlblOtherVar /'[^' \t]\+'/
hi link mdtlblOtherVar Identifier

syn match mdtlblNumber /\v(<0(x\-?[0-9a-fA-F][0-9a-fA-F_]*|b\-?[01][_01]*)|\-?<\d[0-9_]*(\.\d[0-9_]*|e[+\-]?\d[0-9_]*)?)>/
syn match mdtlblNumber /\v(<0%(x\-?[0-9a-fA-F][0-9a-fA-F_]*|b\-?[01][_01]*)|\-?<\d[0-9_]*%(\.\d[0-9_]*|e[+\-]?\d[0-9_]*)?)>/
hi link mdtlblNumber Number

syn match mdtlblBoolean /\v<true|false>/
Expand All @@ -80,7 +86,7 @@ hi link mdtlblResultHandle Identifier


" Label {{{1
syn match mdtlblDefineResultHandle /\((\(\s\|\n\)*\)\@<=\I\i*:/
syn match mdtlblDefineResultHandle /\%((\%(\s\|#\*.*\*#\|\%(#[^*].*\|#\)\=\n\)*\)\@<=\I\i*:/
hi link mdtlblDefineResultHandle Identifier

syn match mdtlblIdentLabel /:\I\i*/
Expand Down Expand Up @@ -117,7 +123,7 @@ function! GetMdtlblIndent()

let diff = 0

if preline =~# '\([({\[:]\|\<\(else\)\>\)$'
if preline =~# '\([({[:]\|\<\(else\)\>\)$'
let diff += 1
endif

Expand Down
2 changes: 1 addition & 1 deletion tools/var_utils/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tools/var_utils/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "var_utils"
version = "0.3.0"
version = "0.4.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand Down
66 changes: 66 additions & 0 deletions tools/var_utils/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,5 +154,71 @@ impl AsVarType for str {
}
}

/// 处理字符串中的转义字符
///
/// 例如 `"\r"` -> `""`
/// 例如 `"a\\\\b"` -> `"a\\b"`
/// 例如换行 `"\n"`|`"\r\n"` -> `"\\n"`
/// 例如 `"a\\\n \ b"` -> `"a b"`
pub fn string_escape(s: &str) -> String {
let mut iter = s.chars()
.filter(|&ch| ch != '\r')
.peekable();
let mut res = String::with_capacity(s.len() + (s.len() >> 4));
while let Some(ch) = iter.next() {
match ch {
'\n' => res.push_str("\\n"),
'\\' if iter.peek() == Some(&' ') => {
res.push(iter.next().unwrap());
},
'\\' if iter.peek() == Some(&'\\') => {
iter.next().unwrap();
res.push('\\');
if iter.peek() == Some(&'n') {
res.push_str("[]")
}
},
'\\' if iter.peek() == Some(&'[') => {
iter.next().unwrap();
res.push_str(r"[[");
},
'\\' if iter.peek() == Some(&'\n') => {
iter.next().unwrap();
let mut f = ||
iter.next_if(|&ch| matches!(ch, ' ' | '\t'))
.is_some();
while f() {}
},
ch => res.push(ch),
}
}
res.shrink_to_fit();
res
}

pub fn string_unescape(s: &str) -> String {
let mut iter = s.chars().peekable();
let mut res = String::with_capacity(s.len() + (s.len() >> 4));
while let Some(ch) = iter.next() {
match ch {
// `\[]` -> `\\`, `\[` -> `\\[`
'\\' if iter.peek() == Some(&'[') => {
iter.next().unwrap();
res.push_str(r"\\");
if iter.peek() == Some(&']') {
iter.next().unwrap();
} else {
res.push('[');
}
},
'\\' if iter.peek() == Some(&'n') => res.push('\\'),
'\\' => res.push_str(r"\\"),
ch => res.push(ch),
}
}
res.shrink_to_fit();
res
}

#[cfg(test)]
mod tests;
42 changes: 42 additions & 0 deletions tools/var_utils/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,45 @@ fn mod_op_test() {
assert_eq!(-0.2 % 1.0, -0.2); // 这是必要的防御, 在python, 它为0.8
assert_eq!(-0.2 % 2.0, -0.2);
}

#[test]
fn string_escape_test() {
let strs = [
("a", r"a"),
("a\nb", r"a\nb"),
("a\r\nb", r"a\nb"),
("a\r\r\nb\r", r"a\nb"),
("a\\\n b", r"ab"),
("a\\\n \\ b", r"a b"),
("a\\\n \\ b", r"a b"),
("a\\\n\\\n \\ b", r"a b"),
("a\\\n\\\r\n \\ b", r"a b"),
("a\\\n\\\r\n \\ \\\\ b", r"a \ b"),
("a\\\n\n b", r"a\n b"),
("a\\\\b", r"a\b"),
("a\\\\n", r"a\[]n"),
("a\\[red]b", r"a[[red]b"),
("你好", r"你好"),
];
for (src, dst) in strs {
assert_eq!(string_escape(src), dst);
}
}

#[test]
fn string_unescape_test() {
let strs = [
(r"a", r"a"),
(r"a\nb", r"a\nb"),
(r"a\r\nb", r"a\\r\nb"),
(r"a \ b", r"a \\ b"),
(r"a \[] b", r"a \\ b"),
(r"a \[red] b", r"a \\[red] b"),
(r"a \\[red] b", r"a \\\\[red] b"),
(r"a [[red] b", r"a [[red] b"),
(r"你好", r"你好"),
];
for (src, dst) in strs {
assert_eq!(string_unescape(src), dst);
}
}

0 comments on commit dda8a28

Please sign in to comment.