diff --git a/.github/workflows/keyboard.yml b/.github/workflows/keyboard.yml index 7bfc1135d15..a8e527b9810 100644 --- a/.github/workflows/keyboard.yml +++ b/.github/workflows/keyboard.yml @@ -37,5 +37,7 @@ jobs: run: npm install -g @keymanapp/kmc - name: Compile Keyboards run: kmc --error-reporting build keyboards/3.0/*.xml + - name: Check ABNF + run: bash tools/scripts/keyboard-abnf-tests/check-keyboard-abnf.sh - name: Run Kbd Charts run: 'cd docs/charts/keyboards && npm ci && npm run build' diff --git a/docs/ldml/tr35-keyboards.md b/docs/ldml/tr35-keyboards.md index f1c493c3a5e..0af8252f54b 100644 --- a/docs/ldml/tr35-keyboards.md +++ b/docs/ldml/tr35-keyboards.md @@ -121,6 +121,7 @@ The LDML specification is divided into the following parts: * [Additional Features](#additional-features) * [Disallowed Regex Features](#disallowed-regex-features) * [Replacement syntax](#replacement-syntax) + * [Transform ABNF](#transform-abnf) * [Element: reorder](#element-reorder) * [Using `` with `` elements](#using-import-with-reorder-elements) * [Example Post-reorder transforms](#example-post-reorder-transforms) @@ -2412,6 +2413,31 @@ Used in the `to=` Emits the named mark. Also see [Markers](#markers). +#### Transform ABNF + +The grammar for the transform rules is formally defined +using the ABNF notation [[STD68](https://www.rfc-editor.org/info/std68)], +including the modifications found in [RFC 7405](https://www.rfc-editor.org/rfc/rfc7405). + +RFC7405 defines a variation of ABNF that is case-sensitive. +Some ABNF tools are only compatible with the specification found in +[RFC 5234](https://www.rfc-editor.org/rfc/rfc5234). + +The ABNF files are located in the `keyboards/abnf` directory in the CLDR source directory: + + * `transform-from-required.abnf` + This is a partial ABNF for the `` attribute. The `from=` attribute MUST match this ABNF. Not all strings which match this ABNF are valid, see the next file. + + * `transform-from-invalid.abnf` + This is an additional ABNF for the `` attribute showing patterns that are not valid. The `from=` attribute MUST NOT match this ABNF. + + * `transform-to-required.abnf` + This is a partial ABNF for the `` attribute. The `to=` attribute MUST match this ABNF. Not all strings which match this ABNF are valid, see the next file. + + * `transform-to-invalid.abnf` + This is an additional ABNF for the `` attribute showing patterns that are not valid. The `to=` attribute MUST NOT match this ABNF. + + * * * ### Element: reorder @@ -2873,6 +2899,7 @@ The following are the design principles for the IDs. * * * + © 2024–2024 Unicode, Inc. This publication is protected by copyright, and permission must be obtained from Unicode, Inc. prior to any reproduction, modification, or other use not permitted by the [Terms of Use](https://www.unicode.org/copyright.html). @@ -2885,4 +2912,4 @@ The authors, contributors, and publishers have taken care in the preparation of but make no express or implied representation or warranty of any kind and assume no responsibility or liability for errors or omissions or for consequential or incidental damages that may arise therefrom. This publication is provided “AS-IS” without charge as a convenience to users. -Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the United States and other countries. \ No newline at end of file +Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the United States and other countries. diff --git a/keyboards/abnf/transform-from-required.abnf b/keyboards/abnf/transform-from-required.abnf new file mode 100644 index 00000000000..029359b2049 --- /dev/null +++ b/keyboards/abnf/transform-from-required.abnf @@ -0,0 +1,124 @@ +from-match = atoms + +; an empty match is not allowed. +atoms = atom *(atom) + +atom = simple-matcher / codepoint + +codepoint = backslash "u" "{" cphex "}" + +cphex = 1*6LHEXDIG + +; TODO +simple-matcher = DIGIT / ALPHA / SP + +; message = simple-message / complex-message + +; simple-message = o [simple-start pattern] +; simple-start = simple-start-char / escaped-char / placeholder +; pattern = *(text-char / escaped-char / placeholder) +; placeholder = expression / markup + +; complex-message = o *(declaration o) complex-body o +; declaration = input-declaration / local-declaration +; complex-body = quoted-pattern / matcher + +; input-declaration = input o variable-expression +; local-declaration = local s variable o "=" o expression + +; quoted-pattern = "{{" pattern "}}" + +; matcher = match-statement s variant *(o variant) +; match-statement = match 1*(s selector) +; selector = variable +; variant = key *(s key) o quoted-pattern +; key = literal / "*" + +; ; Expressions +; expression = literal-expression +; / variable-expression +; / function-expression +; literal-expression = "{" o literal [s function] *(s attribute) o "}" +; variable-expression = "{" o variable [s function] *(s attribute) o "}" +; function-expression = "{" o function *(s attribute) o "}" + +; markup = "{" o "#" identifier *(s option) *(s attribute) o ["/"] "}" ; open and standalone +; / "{" o "/" identifier *(s option) *(s attribute) o "}" ; close + +; ; Expression and literal parts +; function = ":" identifier *(s option) +; option = identifier o "=" o (literal / variable) + +; attribute = "@" identifier [o "=" o literal] + +; variable = "$" name + +; literal = quoted-literal / unquoted-literal +; quoted-literal = "|" *(quoted-char / escaped-char) "|" +; unquoted-literal = name / number-literal +; ; number-literal matches JSON number (https://www.rfc-editor.org/rfc/rfc8259#section-6) +; number-literal = ["-"] (%x30 / (%x31-39 *DIGIT)) ["." 1*DIGIT] [%i"e" ["-" / "+"] 1*DIGIT] + +; ; Keywords; Note that these are case-sensitive +; input = %s".input" +; local = %s".local" +; match = %s".match" + +; ; Names and identifiers +; ; identifier matches https://www.w3.org/TR/REC-xml-names/#NT-QName +; ; name matches https://www.w3.org/TR/REC-xml-names/#NT-NCName but excludes U+FFFD and U+061C +; identifier = [namespace ":"] name +; namespace = name +; name = [bidi] name-start *name-char [bidi] +; name-start = ALPHA / "_" +; / %xC0-D6 / %xD8-F6 / %xF8-2FF +; / %x370-37D / %x37F-61B / %x61D-1FFF / %x200C-200D +; / %x2070-218F / %x2C00-2FEF / %x3001-D7FF +; / %xF900-FDCF / %xFDF0-FFFC / %x10000-EFFFF +; name-char = name-start / DIGIT / "-" / "." +; / %xB7 / %x300-36F / %x203F-2040 + +; ; Restrictions on characters in various contexts +; simple-start-char = content-char / "@" / "|" +; text-char = content-char / ws / "." / "@" / "|" +; quoted-char = content-char / ws / "." / "@" / "{" / "}" +; content-char = %x01-08 ; omit NULL (%x00), HTAB (%x09) and LF (%x0A) +; / %x0B-0C ; omit CR (%x0D) +; / %x0E-1F ; omit SP (%x20) +; / %x21-2D ; omit . (%x2E) +; / %x2F-3F ; omit @ (%x40) +; / %x41-5B ; omit \ (%x5C) +; / %x5D-7A ; omit { | } (%x7B-7D) +; / %x7E-2FFF ; omit IDEOGRAPHIC SPACE (%x3000) +; / %x3001-10FFFF ; allowing surrogates is intentional + +; ; Character escapes +; escaped-char = backslash ( backslash / "{" / "|" / "}" ) +backslash = %x5C ; U+005C REVERSE SOLIDUS "\" + + +; ; Required whitespace +; s = *bidi ws o + +; ; Optional whitespace +; o = *(ws / bidi) + +; ; Bidirectional marks and isolates +; ; ALM / LRM / RLM / LRI, RLI, FSI & PDI +; bidi = %x061C / %x200E / %x200F / %x2066-2069 + +; Whitespace characters +; ws = SP / HTAB / CR / LF / %x3000 + +; uppercase rules are omitted from the AST by default + +; from STD-68 +DIGIT = %x30-39 ; 0-9 +ALPHA = %x41-5A / %x61-7A ; A-Z / a-z +SP = %x20 +; HTAB = %xF900 ; horizontal tab +; LF = %x0A ; linefeed +; CR = %x0D ; carriage return +HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" +; like HEXDIG but lowercase also +LHEXDIG = HEXDIG / "a" / "b" / "c" / "d" / "e" / "f" diff --git a/tools/scripts/keyboard-abnf-tests/check-keyboard-abnf.sh b/tools/scripts/keyboard-abnf-tests/check-keyboard-abnf.sh new file mode 100755 index 00000000000..31b55e80fda --- /dev/null +++ b/tools/scripts/keyboard-abnf-tests/check-keyboard-abnf.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +ABNF_DIR=keyboards/abnf +TEST_DIR=tools/scripts/keyboard-abnf-tests +abnf_check="npx --package=abnf abnf_check" +abnf_test="npx --package=abnf abnf_test" + +echo "-- checking ABNF --" + +for abnf in ${ABNF_DIR}/*.abnf; do + echo Validating ${abnf} + ${abnf_check} ${abnf} || exit 1 +done + +echo "-- running test suites --" + +for abnf in ${ABNF_DIR}/*.abnf; do + echo Testing ${abnf} + base=$(basename ${abnf} .abnf) + SUITEDIR=${TEST_DIR}/${base}.d + if [[ -d ${SUITEDIR} ]]; + then + echo " Test suite ${SUITEDIR}" + for testf in ${SUITEDIR}/*.pass.txt; do + start=$(basename ${testf} .pass.txt) + echo " Testing ${testf} for ${start}" + while IFS="" read -r str || [ -n "$str" ] + do + if echo "${str}" | grep -v -q '^#'; then + echo "# '${str}'" + ${abnf_test} ${abnf} -t "${str}" || exit 1 + fi + done <${testf} + done + else + echo " Warning: ${SUITEDIR} did not exist" + fi + # npx --package=abnf abnf_check ${abnf} || exit 1 +done + +echo "All OK" +exit 0 + diff --git a/tools/scripts/keyboard-abnf-tests/transform-from-required.d/from-match.pass.txt b/tools/scripts/keyboard-abnf-tests/transform-from-required.d/from-match.pass.txt new file mode 100644 index 00000000000..7418fdeab44 --- /dev/null +++ b/tools/scripts/keyboard-abnf-tests/transform-from-required.d/from-match.pass.txt @@ -0,0 +1,13 @@ +abc +#abc 𐒵 +def +\u{1234} \u{012A} \u{22} \u{012a} \u{1234A} +#\m{q}: +#\m{q}L +#\m{q}। +#\m{q}ড +#\m{q}ঢ +#\m{q}ত +#\m{q}য +#\m{q}র +#\m{q}ল