From 534a9278894440befa214917c6a413475d237632 Mon Sep 17 00:00:00 2001 From: "Steven R. Loomis" Date: Wed, 10 Apr 2024 15:32:47 -0500 Subject: [PATCH] CLDR-17424 v45: mfwg2 spec pour (#3626) --- common/dtd/messageFormat/message.dtd | 58 + common/dtd/messageFormat/message.json | 242 + common/testData/messageFormat/README.md | 41 + .../messageFormat/data-model-errors.json | 32 + .../testData/messageFormat/syntax-errors.json | 56 + common/testData/messageFormat/test-core.json | 212 + .../messageFormat/test-core.json.d.ts | 25 + .../messageFormat/test-functions.json | 322 ++ .../messageFormat/test-functions.json.d.ts | 8 + .../images/messageFormatCurrencyExample.png | Bin 0 -> 3895 bytes docs/ldml/tr35-messageFormat.md | 3919 ++++++++++++++++- tools/scripts/tr-archive/archive.js | 6 +- tools/scripts/tr-archive/make-tr-archive.sh | 1 + tools/scripts/tr-archive/package-lock.json | 31 +- tools/scripts/tr-archive/package.json | 3 +- tools/scripts/tr-archive/tr35.css | 2 + 16 files changed, 4937 insertions(+), 21 deletions(-) create mode 100644 common/dtd/messageFormat/message.dtd create mode 100644 common/dtd/messageFormat/message.json create mode 100644 common/testData/messageFormat/README.md create mode 100644 common/testData/messageFormat/data-model-errors.json create mode 100644 common/testData/messageFormat/syntax-errors.json create mode 100644 common/testData/messageFormat/test-core.json create mode 100644 common/testData/messageFormat/test-core.json.d.ts create mode 100644 common/testData/messageFormat/test-functions.json create mode 100644 common/testData/messageFormat/test-functions.json.d.ts create mode 100644 docs/ldml/images/messageFormatCurrencyExample.png create mode 100644 tools/scripts/tr-archive/tr35.css diff --git a/common/dtd/messageFormat/message.dtd b/common/dtd/messageFormat/message.dtd new file mode 100644 index 00000000000..c89c9e39872 --- /dev/null +++ b/common/dtd/messageFormat/message.dtd @@ -0,0 +1,58 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/common/dtd/messageFormat/message.json b/common/dtd/messageFormat/message.json new file mode 100644 index 00000000000..6d4036887a4 --- /dev/null +++ b/common/dtd/messageFormat/message.json @@ -0,0 +1,242 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://github.com/unicode-org/cldr/blob/maint/maint-45/common/dtd/messageFormat/message.json", + + "oneOf": [{ "$ref": "#/$defs/message" }, { "$ref": "#/$defs/select" }], + + "$defs": { + "literal": { + "type": "object", + "properties": { + "type": { "const": "literal" }, + "value": { "type": "string" } + }, + "required": ["type", "value"] + }, + "variable": { + "type": "object", + "properties": { + "type": { "const": "variable" }, + "name": { "type": "string" } + }, + "required": ["type", "name"] + }, + "options": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { "type": "string" }, + "value": { + "oneOf": [ + { "$ref": "#/$defs/literal" }, + { "$ref": "#/$defs/variable" } + ] + } + }, + "required": ["name", "value"] + } + }, + "attributes": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { "type": "string" }, + "value": { + "oneOf": [ + { "$ref": "#/$defs/literal" }, + { "$ref": "#/$defs/variable" } + ] + } + }, + "required": ["name"] + } + }, + + "function-annotation": { + "type": "object", + "properties": { + "type": { "const": "function" }, + "name": { "type": "string" }, + "options": { "$ref": "#/$defs/options" } + }, + "required": ["type", "name"] + }, + "unsupported-annotation": { + "type": "object", + "properties": { + "type": { "const": "unsupported-annotation" }, + "source": { "type": "string" } + }, + "required": ["type", "source"] + }, + "annotation": { + "oneOf": [ + { "$ref": "#/$defs/function-annotation" }, + { "$ref": "#/$defs/unsupported-annotation" } + ] + }, + + "literal-expression": { + "type": "object", + "properties": { + "type": { "const": "expression" }, + "arg": { "$ref": "#/$defs/literal" }, + "annotation": { "$ref": "#/$defs/annotation" }, + "attributes": { "$ref": "#/$defs/attributes" } + }, + "required": ["type", "arg"] + }, + "variable-expression": { + "type": "object", + "properties": { + "type": { "const": "expression" }, + "arg": { "$ref": "#/$defs/variable" }, + "annotation": { "$ref": "#/$defs/annotation" }, + "attributes": { "$ref": "#/$defs/attributes" } + }, + "required": ["type", "arg"] + }, + "function-expression": { + "type": "object", + "properties": { + "type": { "const": "expression" }, + "annotation": { "$ref": "#/$defs/function-annotation" }, + "attributes": { "$ref": "#/$defs/attributes" } + }, + "required": ["type", "annotation"] + }, + "unsupported-expression": { + "type": "object", + "properties": { + "type": { "const": "expression" }, + "annotation": { "$ref": "#/$defs/unsupported-annotation" }, + "attributes": { "$ref": "#/$defs/attributes" } + }, + "required": ["type", "annotation"] + }, + "expression": { + "oneOf": [ + { "$ref": "#/$defs/literal-expression" }, + { "$ref": "#/$defs/variable-expression" }, + { "$ref": "#/$defs/function-expression" }, + { "$ref": "#/$defs/unsupported-expression" } + ] + }, + + "markup": { + "type": "object", + "properties": { + "type": { "const": "markup" }, + "kind": { "oneOf": [ "open", "standalone", "close" ] }, + "name": { "type": "string" }, + "options": { "$ref": "#/$defs/options" }, + "attributes": { "$ref": "#/$defs/attributes" } + }, + "required": ["type", "kind", "name"] + }, + + "pattern": { + "type": "array", + "items": { + "oneOf": [ + { "type": "string" }, + { "$ref": "#/$defs/expression" }, + { "$ref": "#/$defs/markup" } + ] + } + }, + + "input-declaration": { + "type": "object", + "properties": { + "type": { "const": "input" }, + "name": { "type": "string" }, + "value": { "$ref": "#/$defs/variable-expression" } + }, + "required": ["type", "name", "value"] + }, + "local-declaration": { + "type": "object", + "properties": { + "type": { "const": "local" }, + "name": { "type": "string" }, + "value": { "$ref": "#/$defs/expression" } + }, + "required": ["type", "name", "value"] + }, + "unsupported-statement": { + "type": "object", + "properties": { + "type": { "const": "unsupported-statement" }, + "keyword": { "type": "string" }, + "body": { "type": "string" }, + "expressions": { + "type": "array", + "items": { "$ref": "#/$defs/expression" } + } + }, + "required": ["type", "keyword", "expressions"] + }, + "declarations": { + "type": "array", + "items": { + "oneOf": [ + { "$ref": "#/$defs/input-declaration" }, + { "$ref": "#/$defs/local-declaration" }, + { "$ref": "#/$defs/unsupported-statement" } + ] + } + }, + + "variant-key": { + "oneOf": [ + { "$ref": "#/$defs/literal" }, + { + "type": "object", + "properties": { + "type": { "const": "*" }, + "value": { "type": "string" } + }, + "required": ["type"] + } + ] + }, + "message": { + "type": "object", + "properties": { + "type": { "const": "message" }, + "declarations": { "$ref": "#/$defs/declarations" }, + "pattern": { "$ref": "#/$defs/pattern" } + }, + "required": ["type", "declarations", "pattern"] + }, + "select": { + "type": "object", + "properties": { + "type": { "const": "select" }, + "declarations": { "$ref": "#/$defs/declarations" }, + "selectors": { + "type": "array", + "items": { "$ref": "#/$defs/expression" } + }, + "variants": { + "type": "array", + "items": { + "type": "object", + "properties": { + "keys": { + "type": "array", + "items": { "$ref": "#/$defs/variant-key" } + }, + "value": { "$ref": "#/$defs/pattern" } + }, + "required": ["keys", "value"] + } + } + }, + "required": ["type", "declarations", "selectors", "variants"] + } + } +} diff --git a/common/testData/messageFormat/README.md b/common/testData/messageFormat/README.md new file mode 100644 index 00000000000..6fa17197bc4 --- /dev/null +++ b/common/testData/messageFormat/README.md @@ -0,0 +1,41 @@ +# Test Data for CLDR MessageFormat 2.0 Tech Preview + +For information about MessageFormat 2.0, see [Unicode Locale Data Markup Language (LDML): Part 9: Message Format](../../../docs/ldml/tr35-messageFormat.md) + +The files in this directory were originally copied from the [messageformat project](https://github.com/messageformat/messageformat/tree/11c95dab2b25db8454e49ff4daadb817e1d5b770/packages/mf2-messageformat/src/__fixtures) +and are here relicensed by their original author (Eemeli Aro) under the Unicode License. + +These test files are intended to be useful for testing multiple different message processors in different ways: + +- `syntax-errors.json` — An array of strings that should produce a Syntax Error when parsed. + +- `data-model-errors.json` - An object with string keys and arrays of strings as values, + where each key is the name of an error and its value is an array of strings that + should produce `error` when processed. + Error names are defined in ["MessageFormat 2.0 Errors"](../../../docs/ldml/tr35-messageFormat.md#errors) in the spec. + +- `test-core.json` — An array of test cases that do not depend on any registry definitions. + Each test may include some of the following fields: + - `src: string` (required) — The MF2 syntax source. + - `exp: string` (required) — The expected result of formatting the message to a string. + - `locale: string` — The locale to use for formatting. Defaults to 'en-US'. + - `params: Record` — Parameters to pass in to the formatter for resolving external variables. + - `parts: object[]` — The expected result of formatting the message to parts. + - `cleanSrc: string` — A normalixed form of `src`, for testing stringifiers. + - `errors: { type: string }[]` — The runtime errors expected to be emitted when formatting the message. + If `errors` is either absent or empty, the message must be formatted without errors. + - `only: boolean` — Normally not set. A flag to use during development to only run one or more specific tests. + +- `test-function.json` — An object with string keys and arrays of test cases as values, + using the same definition as for `test-core.json`. + The keys each correspond to a function that is used in the tests. + Since the behavior of built-in formatters is implementation-specific, + the `exp` field should generally be omitted, + except for error cases. + +TypeScript `.d.ts` files are included for `test-core.json` and `test-function.json` with the above definition. + +Some examples of test harnesses using these tests, from the source repository: +- [CST parse/stringify tests](https://github.com/messageformat/messageformat/blob/11c95dab2b25db8454e49ff4daadb817e1d5b770/packages/mf2-messageformat/src/cst/cst.test.ts) +- [Data model stringify tests](https://github.com/messageformat/messageformat/blob/11c95dab2b25db8454e49ff4daadb817e1d5b770/packages/mf2-messageformat/src/data-model/stringify.test.ts) +- [Formatting tests](https://github.com/messageformat/messageformat/blob/11c95dab2b25db8454e49ff4daadb817e1d5b770/packages/mf2-messageformat/src/messageformat.test.ts) diff --git a/common/testData/messageFormat/data-model-errors.json b/common/testData/messageFormat/data-model-errors.json new file mode 100644 index 00000000000..0a6bd67641b --- /dev/null +++ b/common/testData/messageFormat/data-model-errors.json @@ -0,0 +1,32 @@ +{ + "Variant Key Mismatch": [ + ".match {$foo :x} * * {{foo}}", + ".match {$foo :x} {$bar :x} * {{foo}}" + ], + "Missing Fallback Variant": [ + ".match {:foo} 1 {{_}}", + ".match {:foo} other {{_}}", + ".match {:foo} {:bar} * 1 {{_}} 1 * {{_}}" + ], + "Missing Selector Annotation": [ + ".match {$foo} one {{one}} * {{other}}", + ".input {$foo} .match {$foo} one {{one}} * {{other}}", + ".local $foo = {$bar} .match {$foo} one {{one}} * {{other}}" + ], + "Duplicate Declaration": [ + ".input {$foo} .input {$foo} {{_}}", + ".input {$foo} .local $foo = {42} {{_}}", + ".local $foo = {42} .input {$foo} {{_}}", + ".local $foo = {:unknown} .local $foo = {42} {{_}}", + ".local $foo = {$bar} .local $bar = {42} {{_}}", + ".local $foo = {$foo} {{_}}", + ".local $foo = {$bar} .local $bar = {$baz} {{_}}", + ".local $foo = {$bar :func} .local $bar = {$baz} {{_}}", + ".local $foo = {42 :func opt=$foo} {{_}}", + ".local $foo = {42 :func opt=$bar} .local $bar = {42} {{_}}" + ], + "Duplicate Option Name": [ + "bad {:placeholder option=x option=x}", + "bad {:placeholder ns:option=x ns:option=y}" + ] +} diff --git a/common/testData/messageFormat/syntax-errors.json b/common/testData/messageFormat/syntax-errors.json new file mode 100644 index 00000000000..fc4537131c8 --- /dev/null +++ b/common/testData/messageFormat/syntax-errors.json @@ -0,0 +1,56 @@ +[ + ".", + "{", + "}", + "{}", + "{{", + "{{}", + "{{}}}", + "{|foo| #markup}", + "{{missing end brace}", + "{{missing end braces", + "{{missing end {$braces", + "{{extra}} content", + "empty { } placeholder", + "missing space {42:func}", + "missing space {|foo|:func}", + "missing space {|foo|@bar}", + "missing space {:func@bar}", + "missing space {:func @bar@baz}", + "missing space {:func @bar=42@baz}", + "missing space {+reserved@bar}", + "missing space {&private@bar}", + "bad {:} placeholder", + "bad {\\u0000placeholder}", + "no-equal {|42| :number minimumFractionDigits 2}", + "bad {:placeholder option=}", + "bad {:placeholder option value}", + "bad {:placeholder option:value}", + "bad {:placeholder option}", + "bad {:placeholder:}", + "bad {::placeholder}", + "bad {:placeholder::foo}", + "bad {:placeholder option:=x}", + "bad {:placeholder :option=x}", + "bad {:placeholder option::x=y}", + "bad {$placeholder option}", + "bad {:placeholder @attribute=}", + "bad {:placeholder @attribute=@foo}", + "no {placeholder end", + "no {$placeholder end", + "no {:placeholder end", + "no {|placeholder| end", + "no {|literal} end", + "no {|literal or placeholder end", + ".local bar = {|foo|} {{_}}", + ".local #bar = {|foo|} {{_}}", + ".local $bar {|foo|} {{_}}", + ".local $bar = |foo| {{_}}", + ".match {#foo} * {{foo}}", + ".match {} * {{foo}}", + ".match {|foo| :x} {|bar| :x} ** {{foo}}", + ".match * {{foo}}", + ".match {|x| :x} * foo", + ".match {|x| :x} * {{foo}} extra", + ".match |x| * {{foo}}" +] diff --git a/common/testData/messageFormat/test-core.json b/common/testData/messageFormat/test-core.json new file mode 100644 index 00000000000..0e7049fdb1a --- /dev/null +++ b/common/testData/messageFormat/test-core.json @@ -0,0 +1,212 @@ +[ + { "src": "hello", "exp": "hello" }, + { "src": "hello {world}", "exp": "hello world" }, + { + "src": "hello { world\t\n}", + "exp": "hello world", + "cleanSrc": "hello {world}" + }, + { + "src": "hello {\u3000world\r}", + "exp": "hello world", + "cleanSrc": "hello {world}" + }, + { "src": "hello {|world|}", "exp": "hello world" }, + { "src": "hello {||}", "exp": "hello " }, + { + "src": "hello {$place}", + "params": { "place": "world" }, + "exp": "hello world" + }, + { + "src": "hello {$place-.}", + "params": { "place-.": "world" }, + "exp": "hello world" + }, + { + "src": "hello {$place}", + "errors": [{ "type": "unresolved-var" }], + "exp": "hello {$place}" + }, + { + "src": "{$one} and {$two}", + "params": { "one": 1.3, "two": 4.2 }, + "exp": "1.3 and 4.2" + }, + { + "src": "{$one} et {$two}", + "locale": "fr", + "params": { "one": 1.3, "two": 4.2 }, + "exp": "1,3 et 4,2" + }, + { "src": ".local $foo = {bar} {{bar {$foo}}}", "exp": "bar bar" }, + { "src": ".local $foo = {|bar|} {{bar {$foo}}}", "exp": "bar bar" }, + { + "src": ".local $foo = {|bar|} {{bar {$foo}}}", + "params": { "foo": "foo" }, + "exp": "bar bar" + }, + { + "src": ".local $foo = {$bar} {{bar {$foo}}}", + "params": { "bar": "foo" }, + "exp": "bar foo" + }, + { + "src": ".local $foo = {$baz} .local $bar = {$foo} {{bar {$bar}}}", + "params": { "baz": "foo" }, + "exp": "bar foo" + }, + { + "src": ".input {$foo} {{bar {$foo}}}", + "params": { "foo": "foo" }, + "exp": "bar foo" + }, + { + "src": ".input {$foo} .local $bar = {$foo} {{bar {$bar}}}", + "params": { "foo": "foo" }, + "exp": "bar foo" + }, + { + "src": ".local $foo = {$baz} .local $bar = {$foo} {{bar {$bar}}}", + "params": { "baz": "foo" }, + "exp": "bar foo" + }, + { "src": ".local $x = {42} .local $y = {$x} {{{$x} {$y}}}", "exp": "42 42" }, + { + "src": "{#tag}", + "exp": "", + "parts": [{ "type": "markup", "kind": "open", "name": "tag" }] + }, + { + "src": "{#tag}content", + "exp": "content", + "parts": [ + { "type": "markup", "kind": "open", "name": "tag" }, + { "type": "literal", "value": "content" } + ] + }, + { + "src": "{#ns:tag}content{/ns:tag}", + "exp": "content", + "parts": [ + { "type": "markup", "kind": "open", "name": "ns:tag" }, + { "type": "literal", "value": "content" }, + { "type": "markup", "kind": "close", "name": "ns:tag" } + ] + }, + { + "src": "{/tag}content", + "exp": "content", + "parts": [ + { "type": "markup", "kind": "close", "name": "tag" }, + { "type": "literal", "value": "content" } + ] + }, + { + "src": "{#tag foo=bar}", + "exp": "", + "parts": [ + { + "type": "markup", + "kind": "open", + "name": "tag", + "options": { "foo": "bar" } + } + ] + }, + { + "src": "{#tag foo=bar/}", + "cleanSrc": "{#tag foo=bar /}", + "exp": "", + "parts": [ + { + "type": "markup", + "kind": "standalone", + "name": "tag", + "options": { "foo": "bar" } + } + ] + }, + { + "src": "{#tag a:foo=|foo| b:bar=$bar}", + "params": { "bar": "b a r" }, + "exp": "", + "parts": [ + { + "type": "markup", + "kind": "open", + "name": "tag", + "options": { "a:foo": "foo", "b:bar": "b a r" } + } + ] + }, + { + "src": "{/tag foo=bar}", + "exp": "", + "parts": [ + { + "type": "markup", + "kind": "close", + "name": "tag", + "options": { "foo": "bar" } + } + ] + }, + { + "src": "{42 @foo @bar=13}", + "exp": "42", + "parts": [{ "type": "string", "value": "42" }] + }, + { + "src": "{42 @foo=$bar}", + "exp": "42", + "parts": [{ "type": "string", "value": "42" }] + }, + { + "src": "foo {+reserved}", + "exp": "foo {+}", + "parts": [ + { "type": "literal", "value": "foo " }, + { "type": "fallback", "source": "+" } + ], + "errors": [{ "type": "unsupported-annotation" }] + }, + { + "src": "foo {&private}", + "exp": "foo {&}", + "parts": [ + { "type": "literal", "value": "foo " }, + { "type": "fallback", "source": "&" } + ], + "errors": [{ "type": "unsupported-annotation" }] + }, + { + "src": "foo {?reserved @a @b=$c}", + "exp": "foo {?}", + "parts": [ + { "type": "literal", "value": "foo " }, + { "type": "fallback", "source": "?" } + ], + "errors": [{ "type": "unsupported-annotation" }] + }, + { + "src": ".foo {42} {{bar}}", + "exp": "bar", + "parts": [{ "type": "literal", "value": "bar" }], + "errors": [{ "type": "unsupported-statement" }] + }, + { + "src": ".foo{42}{{bar}}", + "cleanSrc": ".foo {42} {{bar}}", + "exp": "bar", + "parts": [{ "type": "literal", "value": "bar" }], + "errors": [{ "type": "unsupported-statement" }] + }, + { + "src": ".foo |}lit{| {42}{{bar}}", + "cleanSrc": ".foo |}lit{| {42} {{bar}}", + "exp": "bar", + "parts": [{ "type": "literal", "value": "bar" }], + "errors": [{ "type": "unsupported-statement" }] + } +] diff --git a/common/testData/messageFormat/test-core.json.d.ts b/common/testData/messageFormat/test-core.json.d.ts new file mode 100644 index 00000000000..495fbf7b4ba --- /dev/null +++ b/common/testData/messageFormat/test-core.json.d.ts @@ -0,0 +1,25 @@ +// Copyright © 1991-2024 Unicode, Inc. +// For terms of use, see http://www.unicode.org/copyright.html +// SPDX-License-Identifier: Unicode-3.0 + +export type TestMessage = { + /** The MF2 message to be tested. */ + src: string; + /** The locale to use for formatting. Defaults to 'en-US'. */ + locale?: string; + /** Parameters to pass in to the formatter for resolving external variables. */ + params?: Record; + /** The expected result of formatting the message to a string. */ + exp: string; + /** The expected result of formatting the message to parts. */ + parts?: Array; + /** A normalixed form of `src`, for testing stringifiers. */ + cleanSrc?: string; + /** The runtime errors expected to be emitted when formatting the message. */ + errors?: Array<{ type: string }>; + /** Normally not set. A flag to use during development to only run one or more specific tests. */ + only?: boolean; +}; + +declare const data: TestMessage[]; +export default data; diff --git a/common/testData/messageFormat/test-functions.json b/common/testData/messageFormat/test-functions.json new file mode 100644 index 00000000000..03080a2b6cd --- /dev/null +++ b/common/testData/messageFormat/test-functions.json @@ -0,0 +1,322 @@ +{ + "date": [ + { "src": "{:date}", "exp": "{:date}", "errors": [{ "type": "bad-input" }] }, + { + "src": "{horse :date}", + "exp": "{|horse|}", + "errors": [{ "type": "bad-input" }] + }, + { "src": "{|2006-01-02| :date}" }, + { "src": "{|2006-01-02T15:04:06| :date}" }, + { "src": "{|2006-01-02| :date style=long}" }, + { + "src": ".local $d = {|2006-01-02| :date style=long} {{{$d :date}}}" + }, + { + "src": ".local $t = {|2006-01-02T15:04:06| :time} {{{$t :date}}}" + } + ], + "time": [ + { "src": "{:time}", "exp": "{:time}", "errors": [{ "type": "bad-input" }] }, + { + "src": "{horse :time}", + "exp": "{|horse|}", + "errors": [{ "type": "bad-input" }] + }, + { "src": "{|2006-01-02T15:04:06| :time}" }, + { + "src": "{|2006-01-02T15:04:06| :time style=medium}" + }, + { + "src": ".local $t = {|2006-01-02T15:04:06| :time style=medium} {{{$t :time}}}" + }, + { + "src": ".local $d = {|2006-01-02T15:04:06| :date} {{{$d :time}}}" + } + ], + "datetime": [ + { + "src": "{:datetime}", + "exp": "{:datetime}", + "errors": [{ "type": "bad-input" }] + }, + { + "src": "{$x :datetime}", + "exp": "{$x}", + "params": { "x": true }, + "errors": [{ "type": "bad-input" }] + }, + { + "src": "{horse :datetime}", + "exp": "{|horse|}", + "errors": [{ "name": "RangeError" }] + }, + { "src": "{|2006-01-02T15:04:06| :datetime}" }, + { + "src": "{|2006-01-02T15:04:06| :datetime year=numeric month=|2-digit|}" + }, + { + "src": "{|2006-01-02T15:04:06| :datetime dateStyle=long}" + }, + { + "src": "{|2006-01-02T15:04:06| :datetime timeStyle=medium}" + }, + { + "src": "{$dt :datetime}", + "params": { "dt": "2006-01-02T15:04:06" } + } + ], + "integer": [ + { "src": "hello {4.2 :integer}", "exp": "hello 4" }, + { "src": "hello {-4.20 :integer}", "exp": "hello -4" }, + { "src": "hello {0.42e+1 :integer}", "exp": "hello 4" }, + { + "src": ".match {$foo :integer} one {{one}} * {{other}}", + "params": { "foo": 1.2 }, + "exp": "one" + } + ], + "number": [ + { "src": "hello {4.2 :number}", "exp": "hello 4.2" }, + { "src": "hello {-4.20 :number}", "exp": "hello -4.2" }, + { "src": "hello {0.42e+1 :number}", "exp": "hello 4.2" }, + { + "src": "hello {foo :number}", + "exp": "hello {|foo|}", + "errors": [{ "type": "bad-input" }] + }, + { + "src": "invalid number literal {.1 :number}", + "exp": "invalid number literal {|.1|}", + "errors": [{ "type": "bad-input" }] + }, + { + "src": "invalid number literal {1. :number}", + "exp": "invalid number literal {|1.|}", + "errors": [{ "type": "bad-input" }] + }, + { + "src": "invalid number literal {01 :number}", + "exp": "invalid number literal {|01|}", + "errors": [{ "type": "bad-input" }] + }, + { + "src": "invalid number literal {|+1| :number}", + "exp": "invalid number literal {|+1|}", + "errors": [{ "type": "bad-input" }] + }, + { + "src": "invalid number literal {0x1 :number}", + "exp": "invalid number literal {|0x1|}", + "errors": [{ "type": "bad-input" }] + }, + { + "src": "hello {:number}", + "exp": "hello {:number}", + "errors": [{ "type": "bad-input" }] + }, + { + "src": "hello {4.2 :number minimumFractionDigits=2}", + "exp": "hello 4.20" + }, + { + "src": "hello {|4.2| :number minimumFractionDigits=|2|}", + "exp": "hello 4.20" + }, + { + "src": "hello {4.2 :number minimumFractionDigits=$foo}", + "params": { "foo": 2 }, + "exp": "hello 4.20" + }, + { + "src": "hello {|4.2| :number minimumFractionDigits=$foo}", + "params": { "foo": "2" }, + "exp": "hello 4.20" + }, + { + "src": ".local $foo = {$bar :number} {{bar {$foo}}}", + "params": { "bar": 4.2 }, + "exp": "bar 4.2" + }, + { + "src": ".local $foo = {$bar :number minimumFractionDigits=2} {{bar {$foo}}}", + "params": { "bar": 4.2 }, + "exp": "bar 4.20" + }, + { + "src": ".local $foo = {$bar :number minimumFractionDigits=foo} {{bar {$foo}}}", + "params": { "bar": 4.2 }, + "exp": "bar {$bar}", + "errors": [{ "type": "bad-option" }] + }, + { + "src": ".local $foo = {$bar :number} {{bar {$foo}}}", + "params": { "bar": "foo" }, + "exp": "bar {$bar}", + "errors": [{ "type": "bad-input" }] + }, + { + "src": ".input {$foo :number} {{bar {$foo}}}", + "params": { "foo": 4.2 }, + "exp": "bar 4.2" + }, + { + "src": ".input {$foo :number minimumFractionDigits=2} {{bar {$foo}}}", + "params": { "foo": 4.2 }, + "exp": "bar 4.20" + }, + { + "src": ".input {$foo :number minimumFractionDigits=foo} {{bar {$foo}}}", + "params": { "foo": 4.2 }, + "exp": "bar {$foo}", + "errors": [{ "type": "bad-option" }] + }, + { + "src": ".input {$foo :number} {{bar {$foo}}}", + "params": { "foo": "foo" }, + "exp": "bar {$foo}", + "errors": [{ "type": "bad-input" }] + }, + { + "src": ".match {$foo :number} one {{one}} * {{other}}", + "params": { "foo": 1 }, + "exp": "one" + }, + { + "src": ".match {$foo :number} 1 {{=1}} one {{one}} * {{other}}", + "params": { "foo": 1 }, + "exp": "=1" + }, + { + "src": ".match {$foo :number} one {{one}} 1 {{=1}} * {{other}}", + "params": { "foo": 1 }, + "exp": "=1" + }, + { + "src": ".match {$foo :number} {$bar :number} one one {{one one}} one * {{one other}} * * {{other}}", + "params": { "foo": 1, "bar": 1 }, + "exp": "one one" + }, + { + "src": ".match {$foo :number} {$bar :number} one one {{one one}} one * {{one other}} * * {{other}}", + "params": { "foo": 1, "bar": 2 }, + "exp": "one other" + }, + { + "src": ".match {$foo :number} {$bar :number} one one {{one one}} one * {{one other}} * * {{other}}", + "params": { "foo": 2, "bar": 2 }, + "exp": "other" + }, + { + "src": ".input {$foo :number} .match {$foo} one {{one}} * {{other}}", + "params": { "foo": 1 }, + "exp": "one" + }, + { + "src": ".local $foo = {$bar :number} .match {$foo} one {{one}} * {{other}}", + "params": { "bar": 1 }, + "exp": "one" + }, + { + "src": ".input {$foo :number} .local $bar = {$foo} .match {$bar} one {{one}} * {{other}}", + "params": { "foo": 1 }, + "exp": "one" + }, + { + "src": ".input {$bar :number} .match {$bar} one {{one}} * {{other}}", + "params": { "bar": 2 }, + "exp": "other" + }, + { + "src": ".input {$bar} .match {$bar :number} one {{one}} * {{other}}", + "params": { "bar": 1 }, + "exp": "one" + }, + { + "src": ".input {$bar} .match {$bar :number} one {{one}} * {{other}}", + "params": { "bar": 2 }, + "exp": "other" + }, + { + "src": ".input {$bar} .match {$bar :number} one {{one}} * {{other}}", + "params": { "bar": 1 }, + "exp": "one" + }, + { + "src": ".input {$bar} .match {$bar :number} one {{one}} * {{other}}", + "params": { "bar": 2 }, + "exp": "other" + }, + { + "src": ".input {$none} .match {$foo :number} one {{one}} * {{{$none}}}", + "params": { "foo": 1 }, + "exp": "one" + }, + { + "src": ".local $bar = {$none} .match {$foo :number} one {{one}} * {{{$bar}}}", + "params": { "foo": 1 }, + "exp": "one" + }, + { + "src": ".local $bar = {$none} .match {$foo :number} one {{one}} * {{{$bar}}}", + "params": { "foo": 2 }, + "exp": "{$none}", + "errors": [{ "type": "unresolved-var" }] + }, + { + "src": "{42 :number @foo @bar=13}", + "exp": "42", + "parts": [ + { "type": "number", "parts": [{ "type": "integer", "value": "42" }] } + ] + } + ], + "ordinal": [ + { + "src": ".match {$foo :ordinal} one {{st}} two {{nd}} few {{rd}} * {{th}}", + "params": { "foo": 1 }, + "exp": "th", + "errors": [{ "type": "missing-func" }, { "type": "not-selectable" }] + }, + { + "src": "hello {42 :ordinal}", + "exp": "hello {|42|}", + "errors": [{ "type": "missing-func" }] + } + ], + "plural": [ + { + "src": ".match {$foo :plural} one {{one}} * {{other}}", + "params": { "foo": 1 }, + "exp": "other", + "errors": [{ "type": "missing-func" }, { "type": "not-selectable" }] + }, + { + "src": "hello {42 :plural}", + "exp": "hello {|42|}", + "errors": [{ "type": "missing-func" }] + } + ], + "string": [ + { + "src": ".match {$foo :string} |1| {{one}} * {{other}}", + "params": { "foo": "1" }, + "exp": "one" + }, + { + "src": ".match {$foo :string} 1 {{one}} * {{other}}", + "params": { "foo": 1 }, + "exp": "one" + }, + { + "src": ".match {$foo :string} 1 {{one}} * {{other}}", + "params": { "foo": null }, + "exp": "other" + }, + { + "src": ".match {$foo :string} 1 {{one}} * {{other}}", + "exp": "other", + "errors": [{ "type": "unresolved-var" }] + } + ] +} diff --git a/common/testData/messageFormat/test-functions.json.d.ts b/common/testData/messageFormat/test-functions.json.d.ts new file mode 100644 index 00000000000..360c76a4196 --- /dev/null +++ b/common/testData/messageFormat/test-functions.json.d.ts @@ -0,0 +1,8 @@ +// Copyright © 1991-2024 Unicode, Inc. +// For terms of use, see http://www.unicode.org/copyright.html +// SPDX-License-Identifier: Unicode-3.0 + +import type { TestMessage } from './test-core.json'; + +declare const data: Record; +export default data; diff --git a/docs/ldml/images/messageFormatCurrencyExample.png b/docs/ldml/images/messageFormatCurrencyExample.png new file mode 100644 index 0000000000000000000000000000000000000000..879917472af4dc00ee6b2ea0c2f8ff19fe1f5a51 GIT binary patch literal 3895 zcmbVP*CQK__q8dNs$F~UO>G}55w&YX%@TW6DXOR>2ucL8)$E9oSW#+LY0Vmy)+nX+ z9z|mo-~RrE-^0E4;hghu?|HfBg_Q-Ek)Dg5jEs!&o-xq+-`xIJDs(jedH{4xo{S7| zbq}a-8|k|10<*QXE~V{8wjbwC>5p(R`~pQ^Iu|Or3%U0S7OrT1>)Ah3ECra7JKJUg~&697w>%T z3+w>#`iHAf$B=TU@Vsj~+eP(u$i%sD!ZFR-?7sf~*ZcXZy40yyz$aesU0x=~>W1ee z6TI|)sB;|T@3fq4w#MTP+m4nPMT`+rFUfc!kD;GqsXJ5S6f(2)gMv;g&2WcUy~x?} z#!-uGSGH7^sjP~-=I(UXB_H8^P=D1CeMd>M;m{!{Uvmdw_?x+X4Mu>7z7F=}i>g&m zf1L_@%{>i|)RQoDHdblA5^#FDr9lSc}bfutIcjKBa10(%VuQ z91`V~`&Un@~7?ApyxT`6|mSAz9k?ndl=AkY@wNKBQR5;AL;isLa@4X#@7|m54-c@->8TOyJ zt=QsKWsE|lf4#SC59<@{z8;sIq zi{~5hKXPg_??NR6RVtcjkM{ zPKfZ^+(N~K3#-a|X9t z80uChrGRd;f*{uu)12j<#QdcIi73~kHn%ng%mHVW!=N}owDj~9?H?;)oE3Xg#zqbO zm%fCI!R-cNUBF5Svoq2-xg?b+*6SRes(;WVw1F`F5q^y!5v6YOw@?KM!rdQ)TuGoJ zoMz*0`Y<&wFj;t37sQ==ziSfX4;fPw1B&(O5r!TUx;_K@j2X|)M%zXTwb-Ap^IHcB z-Hdtr@~unYK2i=ma-JIk?j+rUv$j%qx#-xbh-@C*{W8O2OPw7jP7w|co*6~RJT zzWBgeR=gZ}?rbWFgYE$K(Q~z3RBj+Nc=L1f^Tru#D+lLo&8$*=rMm(#BnxqD=5r2(xp)B+Y z4beh@pK>MTK-ovL@uy{f$N;>xV2|DP${_!4`GmdU#K zES5OtnS;3(=9Gq}v&lZTNFmPfDq$zTR@+TH7swh+c&V1NNV7Z;zhQspJ z`61(iu5A5rev(I1f!p?rZ^pcTBnYhIkYILcX+pI_qvMVC$A|o)nTlfGiJ2s3ziz}E zYOg3Z2J}RaP4AEyCF$V;6!5NAdUyA)8Dt@BOF(<(L-n481UM^m5F)NTmN4+NwoH3G zYc350QSR}{a~u1#{|fd>DchaLp~g%# ztg*%2LopR|pUbpy&@6U$Mf1KjZqa-!zu7Dceq>1HnewCTNU8fU@LvnaP_(*PM`hY+ zmCSs-$YEFWb*5Qe5Wy|=MZ`Y;K>yd8=<`tOiDhG=_@L-_uaP1(N*y1ld@!u7?} z1{9>>#2^M`0u{{Q)%k(Nl!@LOya*m_nTjhD5|C}<6IvuFUO zc9QauKlf43dBWrRu#C>8C+7nORjFboF7F&fJ_zHqeXEeK398uf2UC$ugy(4mn%~5K z7_dc|XTKlckJXr8hLy5e!^*nsZ)@5Az5$eA_!0ifZzZG*v`ZZ;j!U%$+HkV-Xd=v9 znjY=pV8e@&mD-rmN%l!+)UFnPl1M~lXu~-gPYf&b=o!8gTOc(BdyJl1Y5o)_dwZoo z?<(F!teTF;B(qNWmbDmxIWaludrY!4cixw&yU3V@+-D`fnQ{jZu|PdO)T;pWEW|F}9%hxPb8rSa~_ zM#{5+28ZLw6k@y?>?-cw4=)7PoTBl+mKL0TM4s4E%9Y^zko#3CzpJ!ugHwI-_x!_$ z-ni6dMl>}}P&-_R+b`Z+ee1=Qbw`J`VSOB)(5dvFyXY53Mfg{!iY~`w z`BY{BZ=3v*2hnU6>jJpcZ~UI;VmtpOJJ*diNZt-B%D`L+eYsnL%^qCNR?5up>XdwX z#ke|}^TF|B1{tQus0N5}yrjJudgGZz1A~OYp8mew}v*s!PC(1y_oI)qdXr z*i7E6SIqGNSYrM;{V$ig-u0%bgConS1HCo2p?<;_#W7j#xbmTIKgo?Izm(SWZISwa zyKIAs@E=gzP+pQDE@amYBittK!J}1y$J_Wk0@l(8Zv1>iS3H};``qgLZQuEy6yrCK zu^C-5dls6pZ+Gd!>-{0?M4ga{0;XJN#OnZ~mf>hkX_MwUtSfqOQjQ3X{lArSPlj)L z7j9vVJD$Ad8OA)`*3^FXONb|vRHf!^uPWs8X2q2KjFVjTf@L^1vK$3$C0}fu`8z@w z--7!WIB&}hrxYSBM>XVb*+k1o49@8Jz&>s6)E$~BPuNUZ>okq7pcuJSThE1Cw{A{K z91?c8`Bugk^aP~y!9U>h@;kk)C;gJDDr|Xie)AB))?krra@VySS&Fk3loqx_%W|8A z&?Xh-&AZ`0yVyw6D5d>ph}h%L|L&kg!eINfP?AFj?jPRd04P+G%wdGrrhMFy?KS3L zp!*uP)k6gwlSLjLQg)L1h_RdLG>yW9QSQ2-8ZJ*nvYKWLh9h7fIXzjOyVkk6C(ow)rabM& z*`uPjJL)H5+!!%B)u_+`60ZX&t6Ky4#vRx!wqtPX;+s^G$WMB4dr^gbHzkal(NJh= z^#Sv(j0VPCV+llY;I~yE|PMIU50l6dm77A<8je*V{$3m-6|Dj2@_@&o`>SF#Jhp zdZfu_8t-%-R_(OEZ*0u-t;^-A*t4R@@w$$E-%6Xl;Gmb5c_;JP~C*{ed zOG1~%F9rGefzo3i5%gw-^49R)+;u!YDEJX*q$IE-uaj}svN&ntN{td*+p_(kJGtms zX-#rWJb>!^+DWZ(cOs!KbY0kM+_vrD>&qvqhv-u{o(n9c+E~2tFl}4- zVQ&&B7#eqBh|x@-b+CCMxtnXNuc|Wd;=%M=-obAQ7r9D~x?V^LQo-`s+&`U|bUM6M z&2ydeEy-G(l&z`g9oL#Y8V~3H_vwdCiga|yQ(0P;q)0Y+1PPLy4uv}}>rMh~RoC=tRu|7~0IRYHGI5-4MQcGEm~um}qh_*uj^FsK z@%7(11pFUT>QgT0!CEq-%xS7t1CGnvWYWEb?3(ZYm+j6u-Q-Gy-B>A#@J51ztBContents of Part 9, Message Format -* [CLDR Message Format](#cldr-message-format) - * [Introduction](#introduction) - * [Status](#status) +* [Introduction](#introduction) + * [Conformance](#conformance) + * [Terminology and Conventions](#terminology-and-conventions) + * [Stability Policy](#stability-policy) +* [Syntax](#syntax) + * [Design Goals](#design-goals) + * [Design Restrictions](#design-restrictions) + * [Messages and their Syntax](#messages-and-their-syntax) + * [Well-formed vs. Valid Messages](#well-formed-vs.-valid-messages) + * [The Message](#the-message) + * [Declarations](#declarations) + * [Reserved Statements](#reserved-statements) + * [Complex Body](#complex-body) + * [Pattern](#pattern) + * [Quoted Pattern](#quoted-pattern) + * [Text](#text) + * [Placeholder](#placeholder) + * [Matcher](#matcher) + * [Selector](#selector) + * [Variant](#variant) + * [Key](#key) + * [Expressions](#expressions) + * [Annotation](#annotation) + * [Function](#function) + * [Options](#options) + * [Private-Use Annotations](#private-use-annotations) + * [Reserved Annotations](#reserved-annotations) + * [Markup](#markup) + * [Attributes](#attributes) + * [Other Syntax Elements](#other-syntax-elements) + * [Keywords](#keywords) + * [Literals](#literals) + * [Names and Identifiers](#names-and-identifiers) + * [Escape Sequences](#escape-sequences) + * [Whitespace](#whitespace) +* [Complete ABNF](#complete-abnf) + * [`message.abnf`](#message.abnf) +* [Errors](#errors) + * [Error Handling](#error-handling) + * [Syntax Errors](#syntax-errors) + * [Data Model Errors](#data-model-errors) + * [Variant Key Mismatch](#variant-key-mismatch) + * [Missing Fallback Variant](#missing-fallback-variant) + * [Missing Selector Annotation](#missing-selector-annotation) + * [Duplicate Declaration](#duplicate-declaration) + * [Duplicate Option Name](#duplicate-option-name) + * [Resolution Errors](#resolution-errors) + * [Unresolved Variable](#unresolved-variable) + * [Unknown Function](#unknown-function) + * [Unsupported Expression](#unsupported-expression) + * [Invalid Expression](#invalid-expression) + * [Unsupported Statement](#unsupported-statement) + * [Selection Errors](#selection-errors) + * [Formatting Errors](#formatting-errors) +* [Function Registry](#function-registry) + * [Goals](#goals) + * [Conformance and Use](#conformance-and-use) + * [Registry Data Model](#registry-data-model) + * [Example](#example) + * [Default Registry](#default-registry) + * [String Value Selection and Formatting](#string-value-selection-and-formatting) + * [The `:string` function](#the-string-function) + * [Operands](#operands) + * [Options](#options) + * [Selection](#selection) + * [Formatting](#formatting) + * [Numeric Value Selection and Formatting](#numeric-value-selection-and-formatting) + * [The `:number` function](#the-number-function) + * [Operands](#operands) + * [Options](#options) + * [Default Value of `select` Option](#default-value-of-select-option) + * [Percent Style](#percent-style) + * [Selection](#selection) + * [The `:integer` function](#the-integer-function) + * [Operands](#operands) + * [Options](#options) + * [Default Value of `select` Option](#default-value-of-select-option) + * [Percent Style](#percent-style) + * [Selection](#selection) + * [Number Operands](#number-operands) + * [Digit Size Options](#digit-size-options) + * [Number Selection](#number-selection) + * [Rule Selection](#rule-selection) + * [Determining Exact Literal Match](#determining-exact-literal-match) + * [Date and Time Value Formatting](#date-and-time-value-formatting) + * [The `:datetime` function](#the-datetime-function) + * [Operands](#operands) + * [Options](#options) + * [Style Options](#style-options) + * [Field Options](#field-options) + * [The `:date` function](#the-date-function) + * [Operands](#operands) + * [Options](#options) + * [The `:time` function](#the-time-function) + * [Operands](#operands) + * [Options](#options) + * [Date and Time Operands](#date-and-time-operands) +* [Formatting](#formatting) + * [Formatting Context](#formatting-context) + * [Expression and Markup Resolution](#expression-and-markup-resolution) + * [Literal Resolution](#literal-resolution) + * [Variable Resolution](#variable-resolution) + * [Function Resolution](#function-resolution) + * [Option Resolution](#option-resolution) + * [Markup Resolution](#markup-resolution) + * [Fallback Resolution](#fallback-resolution) + * [Pattern Selection](#pattern-selection) + * [Resolve Selectors](#resolve-selectors) + * [Resolve Preferences](#resolve-preferences) + * [Filter Variants](#filter-variants) + * [Sort Variants](#sort-variants) + * [Examples](#examples) + * [Example 1](#example-1) + * [Example 2](#example-2) + * [Example 3](#example-3) + * [Formatting](#formatting) + * [Examples](#examples) + * [Formatting Fallback Values](#formatting-fallback-values) + * [Handling Bidirectional Text](#handling-bidirectional-text) +* [Interchange Data Model](#interchange-data-model) + * [Messages](#messages) + * [Patterns](#patterns) + * [Expressions](#expressions) + * [Markup](#markup) + * [Extensions](#extensions) +* [Appendices](#appendices) + * [Security Considerations](#security-considerations) + * [Acknowledgements](#acknowledgements) -## CLDR Message Format +## Introduction -### Introduction +One of the challenges in adapting software to work for +users with different languages and cultures is the need for **_dynamic messages_**. +Whenever a user interface needs to present data as part of a larger string, +that data needs to be formatted (and the message may need to be altered) +to make it culturally accepted and grammatically correct. -This specification defines the data model, syntax, processing, and conformance requirements for the next generation of dynamic messages. It is intended for adoption by programming languages and APIs. This will enable the integration of existing internationalization APIs (such as the date and number formats shown above), grammatical matching (such as plurals or genders), as well as user-defined formats and message selectors. +> For example, if your US English (`en-US`) interface has a message like: +> +> > Your item had 1,023 views on April 3, 2023 +> +> You want the translated message to be appropriately formatted into French: +> +> > Votre article a eu 1 023 vues le 3 avril 2023 +> +> Or Japanese: +> +> > あなたのアイテムは 2023 年 4 月 3 日に 1,023 回閲覧されました。 -### Status +This specification defines the +data model, syntax, processing, and conformance requirements +for the next generation of _dynamic messages_. +It is intended for adoption by programming languages and APIs. +This will enable the integration of +existing internationalization APIs (such as the date and number formats shown above), +grammatical matching (such as plurals or genders), +as well as user-defined formats and message selectors. -The Message Format 2.0 Specification has been approved by the CLDR-TC for inclusion in CLDR version 45. -The specification will be included in this page prior to release. +The document is the successor to ICU MessageFormat, +henceforth called ICU MessageFormat 1.0. -In the interim, access the -[current draft specification](https://github.com/unicode-org/message-format-wg/blob/LDML45-alpha/spec/#readme). +### Conformance + +Everything in this specification is normative except for: +sections marked as non-normative, +all authoring guidelines, diagrams, examples, and notes. + +The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL +NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", +"MAY", and "OPTIONAL" in this document are to be interpreted as +described in BCP 14 \[[RFC2119](https://www.rfc-editor.org/rfc/rfc2119)\] +\[[RFC8174](https://www.rfc-editor.org/rfc/rfc8174)\] when, and only when, they +appear in all capitals, as shown here. + +### Terminology and Conventions + +A **_term_** looks like this when it is defined in this specification. + +A reference to a _term_ looks like this. + +> Examples are non-normative and styled like this. + +### Stability Policy + +> [!IMPORTANT] +> The provisions of the stability policy are not in effect until +> the conclusion of the technical preview and adoption of this specification. + +Updates to this specification will not change +the syntactical meaning, the runtime output, or other behaviour +of valid messages written for earlier versions of this specification +that only use functions defined in this specification. +Updates to this specification will not remove any syntax provided in this version. +Future versions MAY add additional structure or meaning to existing syntax. + +Updates to this specification will not remove any reserved keywords or sigils. + +> [!NOTE] +> Future versions may define new keywords. + +Updates to this specification will not reserve or assign meaning to +any character "sigils" except for those in the `reserved` production. + +Updates to this specification +will not remove any functions defined in the default registry nor +will they remove any options or option values. +Additional options or option values MAY be defined. + +> [!NOTE] +> This does not guarantee that the results of formatting will never change. +> Even when the specification doesn't change, +> the functions for date formatting, number formatting and so on +> will change their results over time. + +Later specification versions MAY make previously invalid messages valid. + +Updates to this specification will not introduce message syntax that, +when parsed according to earlier versions of this specification, +would produce syntax or data model errors. +Such messages MAY produce errors when formatted +according to an earlier version of this specification. + +From version 2.0, MessageFormat will only reserve, define, or require +function names or function option names +consisting of characters in the ranges a-z, A-Z, and 0-9. +All other names in these categories are reserved for the use of implementations or users. + +> [!NOTE] +> Users defining custom names SHOULD include at least one character outside these ranges +> to ensure that they will be compatible with future versions of this specification. + +Later versions of this specification will not introduce changes +to the data model that would result in a data model representation +based on this version being invalid. + +> For example, existing interfaces or fields will not be removed. + +Later versions of this specification MAY introduce changes +to the data model that would result in future data model representations +not being valid for implementations of this version of the data model. + +> For example, a future version could introduce a new keyword, +> whose data model representation would be a new interface +> that is not recognized by this version's data model. + +Later specification versions will not introduce syntax that cannot be +represented by this version of the data model. + +> For example, a future version could introduce a new keyword. +> The future version's data model would provide an interface for that keyword +> while this version of the data model would parse the value into +> the interface `UnsupportedStatement`. +> Both data models would be "valid" in their context, +> but this version's would be missing any functionality for the new statement type. + +## Syntax + +This section defines the formal grammar describing the syntax of a single message. + +### Design Goals + +_This section is non-normative._ + +The design goals of the syntax specification are as follows: + +1. The syntax should leverage the familiarity with ICU MessageFormat 1.0 + in order to lower the barrier to entry and increase the chance of adoption. + At the same time, + the syntax should fix the [pain points of ICU MessageFormat 1.0](../docs/why_mf_next.md). + + - _Non-Goal_: Be backwards-compatible with the ICU MessageFormat 1.0 syntax. + +1. The syntax inside translatable content should be easy to understand for humans. + This includes making it clear which parts of the message body _are_ translatable content, + which parts inside it are placeholders for expressions, + as well as making the selection logic predictable and easy to reason about. + + - _Non-Goal_: Make the syntax intuitive enough for non-technical translators to hand-edit. + Instead, we assume that most translators will work with MessageFormat 2 + by means of GUI tooling, CAT workbenches etc. + +1. The syntax surrounding translatable content should be easy to write and edit + for developers, localization engineers, and easy to parse by machines. + +1. The syntax should make a single message easily embeddable inside many container formats: + `.properties`, YAML, XML, inlined as string literals in programming languages, etc. + This includes a future _MessageResource_ specification. + + - _Non-Goal_: Support unnecessary escape sequences, which would theirselves require + additional escaping when embedded. Instead, we tolerate direct use of nearly all + characters (including line breaks, control characters, etc.) and rely upon escaping + in those outer formats to aid human comprehension (e.g., depending upon container + format, a U+000A LINE FEED might be represented as `\n`, `\012`, `\x0A`, `\u000A`, + `\U0000000A`, ` `, ` `, `%0A`, ``, or something else entirely). + +### Design Restrictions + +_This section is non-normative._ + +The syntax specification takes into account the following design restrictions: + +1. Whitespace outside the translatable content should be insignificant. + It should be possible to define a message entirely on a single line with no ambiguity, + as well as to format it over multiple lines for clarity. + +1. The syntax should define as few special characters and sigils as possible. + Note that this necessitates extra care when presenting messages for human consumption, + because they may contain invisible characters such as U+200B ZERO WIDTH SPACE, + control characters such as U+0000 NULL and U+0009 TAB, permanently reserved noncharacters + (U+FDD0 through U+FDEF and U+nFFFE and U+nFFFF where n is 0x0 through 0x10), + private-use code points (U+E000 through U+F8FF, U+F0000 through U+FFFFD, and + U+100000 through U+10FFFD), unassigned code points, and other potentially confusing content. + +### Messages and their Syntax + +The purpose of MessageFormat is to allow content to vary at runtime. +This variation might be due to placing a value into the content +or it might be due to selecting a different bit of content based on some data value +or it might be due to a combination of the two. + +MessageFormat calls the template for a given formatting operation a _message_. + +The values passed in at runtime (which are to be placed into the content or used +to select between different content items) are called _external variables_. +The author of a _message_ can also assign _local variables_, including +variables that modify _external variables_. + +This part of the MessageFormat specification defines the syntax for a _message_, +along with the concepts and terminology needed when processing a _message_ +during the [formatting](./formatting.md) of a _message_ at runtime. + +The complete formal syntax of a _message_ is described by the [ABNF](./message.abnf). + +#### Well-formed vs. Valid Messages + +A _message_ is **_well-formed_** if it satisfies all the rules of the grammar. +Attempting to parse a _message_ that is not _well-formed_ will result in a _Syntax Error_. + +A _message_ is **_valid_** if it is _well-formed_ and +**also** meets the additional content restrictions +and semantic requirements about its structure defined below for +_declarations_, _matcher_ and _options_. +Attempting to parse a _message_ that is not _valid_ will result in a _Data Model Error_. + +### The Message + +A **_message_** is the complete template for a specific message formatting request. + +> [!NOTE] +> This syntax is designed to be embeddable into many different programming languages and formats. +> As such, it avoids constructs, such as character escapes, that are specific to any given file +> format or processor. +> In particular, it avoids using quote characters common to many file formats and formal languages +> so that these do not need to be escaped in the body of a _message_. + +> [!NOTE] +> In general (and except where required by the syntax), whitespace carries no meaning in the structure +> of a _message_. While many of the examples in this spec are written on multiple lines, the formatting +> shown is primarily for readability. +> +> > **Example** This _message_: +> > +> > ``` +> > .local $foo = { |horse| } +> > {{You have a {$foo}!}} +> > ``` +> > +> > Can also be written as: +> > +> > ``` +> > .local $foo={|horse|}{{You have a {$foo}!}} +> > ``` +> > +> > An exception to this is: whitespace inside a _pattern_ is **always** significant. + +> [!NOTE] +> The syntax assumes that each _message_ will be displayed with a left-to-right display order +> and be processed in the logical character order. +> The syntax also permits the use of right-to-left characters in _identifiers_, +> _literals_, and other values. +> This can result in confusion when viewing the _message_. +> +> Additional restrictions or requirements, +> such as permitting the use of certain bidirectional control characters in the syntax, +> might be added during the Tech Preview to better manage bidirectional text. +> Feedback on the creation and management of _messages_ +> containing bidirectional tokens is strongly desired. + +A _message_ can be a _simple message_ or it can be a _complex message_. + +```abnf +message = simple-message / complex-message +``` + +A **_simple message_** contains a single _pattern_, +with restrictions on its first character. +An empty string is a valid _simple message_. + +```abnf +simple-message = [simple-start pattern] +simple-start = simple-start-char / text-escape / placeholder +``` + +A **_complex message_** is any _message_ that contains _declarations_, +a _matcher_, or both. +A _complex message_ always begins with either a keyword that has a `.` prefix or a _quoted pattern_ +and consists of: + +1. an optional list of _declarations_, followed by +2. a _complex body_ + +```abnf +complex-message = *(declaration [s]) complex-body +``` + +#### Declarations + +A **_declaration_** binds a _variable_ identifier to a value within the scope of a _message_. +This _variable_ can then be used in other _expressions_ within the same _message_. +_Declarations_ are optional: many messages will not contain any _declarations_. + +An **_input-declaration_** binds a _variable_ to an external input value. +The _variable-expression_ of an _input-declaration_ +MAY include an _annotation_ that is applied to the external value. + +A **_local-declaration_** binds a _variable_ to the resolved value of an _expression_. + +For compatibility with later MessageFormat 2 specification versions, +_declarations_ MAY also include _reserved statements_. + +```abnf +declaration = input-declaration / local-declaration / reserved-statement +input-declaration = input [s] variable-expression +local-declaration = local s variable [s] "=" [s] expression +``` + +_Variables_, once declared, MUST NOT be redeclared. +A _message_ that does any of the following is not _valid_ and will produce a +_Duplicate Declaration_ error during processing: +- A _declaration_ MUST NOT bind a _variable_ + that appears as a _variable_ anywhere within a previous _declaration_. +- An _input-declaration_ MUST NOT bind a _variable_ + that appears anywhere within the _annotation_ of its _variable-expression_. +- A _local-declaration_ MUST NOT bind a _variable_ that appears in its _expression_. + +A _local-declaration_ MAY overwrite an external input value as long as the +external input value does not appear in a previous _declaration_. + +> [!NOTE] +> These restrictions only apply to _declarations_. +> A _placeholder_ or _selector_ can apply a different annotation to a _variable_ +> than one applied to the same _variable_ named in a _declaration_. +> For example, this message is _valid_: +> ``` +> .input {$var :number maximumFractionDigits=0} +> .match {$var :number maximumFractionDigits=2} +> 0 {{The selector can apply a different annotation to {$var} for the purposes of selection}} +> * {{A placeholder in a pattern can apply a different annotation to {$var :number maximumFractionDigits=3}}} +> ``` +> (See the [Errors](./errors.md) section for examples of invalid messages) + +##### Reserved Statements + +A **_reserved statement_** reserves additional `.keywords` +for use by future versions of this specification. +Any such future keyword must start with `.`, +followed by two or more lower-case ASCII characters. + +The rest of the statement supports +a similarly wide range of content as _reserved annotations_, +but it MUST end with one or more _expressions_. + +```abnf +reserved-statement = reserved-keyword [s reserved-body] 1*([s] expression) +reserved-keyword = "." name +``` + +> [!NOTE] +> The `reserved-keyword` ABNF rule is a simplification, +> as it MUST NOT be considered to match any of the existing keywords +> `.input`, `.local`, or `.match`. + +This allows flexibility in future standardization, +as future definitions MAY define additional semantics and constraints +on the contents of these _reserved statements_. + +Implementations MUST NOT assign meaning or semantics to a _reserved statement_: +these are reserved for future standardization. +Implementations MUST NOT remove or alter the contents of a _reserved statement_. + +#### Complex Body + +The **_complex body_** of a _complex message_ is the part that will be formatted. +The _complex body_ consists of either a _quoted pattern_ or a _matcher_. + +```abnf +complex-body = quoted-pattern / matcher +``` + +### Pattern + +A **_pattern_** contains a sequence of _text_ and _placeholders_ to be formatted as a unit. +Unless there is an error, resolving a _message_ always results in the formatting +of a single _pattern_. + +```abnf +pattern = *(text-char / text-escape / placeholder) +``` +A _pattern_ MAY be empty. + +A _pattern_ MAY contain an arbitrary number of _placeholders_ to be evaluated +during the formatting process. + +#### Quoted Pattern + +A **_quoted pattern_** is a _pattern_ that is "quoted" to prevent +interference with other parts of the _message_. +A _quoted pattern_ starts with a sequence of two U+007B LEFT CURLY BRACKET `{{` +and ends with a sequence of two U+007D RIGHT CURLY BRACKET `}}`. + +```abnf +quoted-pattern = "{{" pattern "}}" +``` + +A _quoted pattern_ MAY be empty. + +> An empty _quoted pattern_: +> +> ``` +> {{}} +> ``` + +#### Text + +**_text_** is the translateable content of a _pattern_. +Any Unicode code point is allowed, except for U+0000 NULL +and the surrogate code points U+D800 through U+DFFF inclusive. +The characters U+005C REVERSE SOLIDUS `\`, +U+007B LEFT CURLY BRACKET `{`, and U+007D RIGHT CURLY BRACKET `}` +MUST be escaped as `\\`, `\{`, and `\}` respectively. + +In the ABNF, _text_ is represented by non-empty sequences of +`simple-start-char`, `text-char`, and `text-escape`. +The first of these is used at the start of a _simple message_, +and matches `text-char` except for not allowing U+002E FULL STOP `.`. +The ABNF uses `content-char` as a shared base for _text_ and _quoted literal_ characters. + +Whitespace in _text_, including tabs, spaces, and newlines is significant and MUST +be preserved during formatting. + +```abnf +simple-start-char = content-char / s / "@" / "|" +text-char = content-char / s / "." / "@" / "|" +quoted-char = content-char / s / "." / "@" / "{" / "}" +reserved-char = content-char / "." +content-char = %x01-08 ; omit NULL (%x00), HTAB (%x09) and LF (%x0A) + / %x0B-0C ; omit CR (%x0D) + / %x0E-1F ; omit SP (%x20) + / %x21-2D ; omit . (%x2E) + / %x2F-3F ; omit @ (%x40) + / %x41-5B ; omit \ (%x5C) + / %x5D-7A ; omit { | } (%x7B-7D) + / %x7E-2FFF ; omit IDEOGRAPHIC SPACE (%x3000) + / %x3001-D7FF ; omit surrogates + / %xE000-10FFFF +``` + +When a _pattern_ is quoted by embedding the _pattern_ in curly brackets, the +resulting _message_ can be embedded into +various formats regardless of the container's whitespace trimming rules. +Otherwise, care must be taken to ensure that pattern-significant whitespace is preserved. + +> **Example** +> In a Java `.properties` file, the values `hello` and `hello2` both contain +> an identical _message_ which consists of a single _pattern_. +> This _pattern_ consists of _text_ with exactly three spaces before and after the word "Hello": +> +> ```properties +> hello = {{ Hello }} +> hello2=\ Hello \ +> ``` + +#### Placeholder + +A **_placeholder_** is an _expression_ or _markup_ that appears inside of a _pattern_ +and which will be replaced during the formatting of a _message_. + +```abnf +placeholder = expression / markup +``` + +### Matcher + +A **_matcher_** is the _complex body_ of a _message_ that allows runtime selection +of the _pattern_ to use for formatting. +This allows the form or content of a _message_ to vary based on values +determined at runtime. + +A _matcher_ consists of the keyword `.match` followed by at least one _selector_ +and at least one _variant_. + +When the _matcher_ is processed, the result will be a single _pattern_ that serves +as the template for the formatting process. + +A _message_ can only be considered _valid_ if the following requirements are +satisfied: + +- The number of _keys_ on each _variant_ MUST be equal to the number of _selectors_. +- At least one _variant_ MUST exist whose _keys_ are all equal to the "catch-all" key `*`. +- Each _selector_ MUST have an _annotation_, + or contain a _variable_ that directly or indirectly references a _declaration_ with an _annotation_. + +```abnf +matcher = match-statement 1*([s] variant) +match-statement = match 1*([s] selector) +``` + +> A _message_ with a _matcher_: +> +> ``` +> .input {$count :number} +> .match {$count} +> one {{You have {$count} notification.}} +> * {{You have {$count} notifications.}} +> ``` + +> A _message_ containing a _matcher_ formatted on a single line: +> +> ``` +> .match {:platform} windows {{Settings}} * {{Preferences}} +> ``` + +#### Selector + +A **_selector_** is an _expression_ that ranks or excludes the +_variants_ based on the value of the corresponding _key_ in each _variant_. +The combination of _selectors_ in a _matcher_ thus determines +which _pattern_ will be used during formatting. + +```abnf +selector = expression +``` + +There MUST be at least one _selector_ in a _matcher_. +There MAY be any number of additional _selectors_. + +> A _message_ with a single _selector_ that uses a custom _function_ +> `:hasCase` which is a _selector_ that allows the _message_ to choose a _pattern_ +> based on grammatical case: +> +> ``` +> .match {$userName :hasCase} +> vocative {{Hello, {$userName :person case=vocative}!}} +> accusative {{Please welcome {$userName :person case=accusative}!}} +> * {{Hello!}} +> ``` + +> A message with two _selectors_: +> +> ``` +> .input {$numLikes :integer} +> .input {$numShares :integer} +> .match {$numLikes} {$numShares} +> 0 0 {{Your item has no likes and has not been shared.}} +> 0 one {{Your item has no likes and has been shared {$numShares} time.}} +> 0 * {{Your item has no likes and has been shared {$numShares} times.}} +> one 0 {{Your item has {$numLikes} like and has not been shared.}} +> one one {{Your item has {$numLikes} like and has been shared {$numShares} time.}} +> one * {{Your item has {$numLikes} like and has been shared {$numShares} times.}} +> * 0 {{Your item has {$numLikes} likes and has not been shared.}} +> * one {{Your item has {$numLikes} likes and has been shared {$numShares} time.}} +> * * {{Your item has {$numLikes} likes and has been shared {$numShares} times.}} +> ``` + +#### Variant + +A **_variant_** is a _quoted pattern_ associated with a set of _keys_ in a _matcher_. +Each _variant_ MUST begin with a sequence of _keys_, +and terminate with a valid _quoted pattern_. +The number of _keys_ in each _variant_ MUST match the number of _selectors_ in the _matcher_. + +Each _key_ is separated from each other by whitespace. +Whitespace is permitted but not required between the last _key_ and the _quoted pattern_. + +```abnf +variant = key *(s key) [s] quoted-pattern +key = literal / "*" +``` + +##### Key + +A **_key_** is a value in a _variant_ for use by a _selector_ when ranking +or excluding _variants_ during the _matcher_ process. +A _key_ can be either a _literal_ value or the "catch-all" key `*`. + +The **_catch-all key_** is a special key, represented by `*`, +that matches all values for a given _selector_. + +### Expressions + +An **_expression_** is a part of a _message_ that will be determined +during the _message_'s formatting. + +An _expression_ MUST begin with U+007B LEFT CURLY BRACKET `{` +and end with U+007D RIGHT CURLY BRACKET `}`. +An _expression_ MUST NOT be empty. +An _expression_ cannot contain another _expression_. +An _expression_ MAY contain one more _attributes_. + +A **_literal-expression_** contains a _literal_, +optionally followed by an _annotation_. + +A **_variable-expression_** contains a _variable_, +optionally followed by an _annotation_. + +An **_annotation-expression_** contains an _annotation_ without an _operand_. + +```abnf +expression = literal-expression + / variable-expression + / annotation-expression +literal-expression = "{" [s] literal [s annotation] *(s attribute) [s] "}" +variable-expression = "{" [s] variable [s annotation] *(s attribute) [s] "}" +annotation-expression = "{" [s] annotation *(s attribute) [s] "}" +``` + +There are several types of _expression_ that can appear in a _message_. +All _expressions_ share a common syntax. The types of _expression_ are: + +1. The value of a _local-declaration_ +2. A _selector_ +3. A kind of _placeholder_ in a _pattern_ + +Additionally, an _input-declaration_ can contain a _variable-expression_. + +> Examples of different types of _expression_ +> +> Declarations: +> +> ``` +> .input {$x :function option=value} +> .local $y = {|This is an expression|} +> ``` +> +> Selectors: +> +> ``` +> .match {$selector :functionRequired} +> ``` +> +> Placeholders: +> +> ``` +> This placeholder contains a literal expression: {|literal|} +> This placeholder contains a variable expression: {$variable} +> This placeholder references a function on a variable: {$variable :function with=options} +> This placeholder contains a function expression with a variable-valued option: {:function option=$variable} +> ``` + +#### Annotation + +An **_annotation_** is part of an _expression_ containing either +a _function_ together with its associated _options_, or +a _private-use annotation_ or a _reserved annotation_. + +```abnf +annotation = function + / private-use-annotation + / reserved-annotation +``` + +An **_operand_** is the _literal_ of a _literal-expression_ or +the _variable_ of a _variable-expression_. + +An _annotation_ can appear in an _expression_ by itself or following a single _operand_. +When following an _operand_, the _operand_ serves as input to the _annotation_. + +##### Function + +A **_function_** is named functionality in an _annotation_. +_Functions_ are used to evaluate, format, select, or otherwise process data +values during formatting. + +Each _function_ is defined by the runtime's _function registry_. +A _function_'s entry in the _function registry_ will define +whether the _function_ is a _selector_ or formatter (or both), +whether an _operand_ is required, +what form the values of an _operand_ can take, +what _options_ and _option_ values are valid, +and what outputs might result. +See [function registry](./registry.md) for more information. + +A _function_ starts with a prefix sigil `:` followed by an _identifier_. +The _identifier_ MAY be followed by one or more _options_. +_Options_ are not required. + +```abnf +function = ":" identifier *(s option) +``` + +> A _message_ with a _function_ operating on the _variable_ `$now`: +> +> ``` +> It is now {$now :datetime}. +> ``` + +###### Options + +An **_option_** is a key-value pair +containing a named argument that is passed to a _function_. + +An _option_ has an _identifier_ and a _value_. +The _identifier_ is separated from the _value_ by an U+003D EQUALS SIGN `=` along with +optional whitespace. +The value of an _option_ can be either a _literal_ or a _variable_. + +Multiple _options_ are permitted in an _annotation_. +_Options_ are separated from the preceding _function_ _identifier_ +and from each other by whitespace. +Each _option_'s _identifier_ MUST be unique within the _annotation_: +an _annotation_ with duplicate _option_ _identifiers_ is not valid. + +The order of _options_ is not significant. + +```abnf +option = identifier [s] "=" [s] (literal / variable) +``` + +> Examples of _functions_ with _options_ +> +> A _message_ using the `:datetime` function. +> The _option_ `weekday` has the literal `long` as its value: +> +> ``` +> Today is {$date :datetime weekday=long}! +> ``` + +> A _message_ using the `:datetime` function. +> The _option_ `weekday` has a variable `$dateStyle` as its value: +> +> ``` +> Today is {$date :datetime weekday=$dateStyle}! +> ``` + +##### Private-Use Annotations + +A **_private-use annotation_** is an _annotation_ whose syntax is reserved +for use by a specific implementation or by private agreement between multiple implementations. +Implementations MAY define their own meaning and semantics for _private-use annotations_. + +A _private-use annotation_ starts with either U+0026 AMPERSAND `&` or U+005E CIRCUMFLEX ACCENT `^`. + +Characters, including whitespace, are assigned meaning by the implementation. +The definition of escapes in the `reserved-body` production, used for the body of +a _private-use annotation_ is an affordance to implementations that +wish to use a syntax exactly like other functions. Specifically: + +- The characters `\`, `{`, and `}` MUST be escaped as `\\`, `\{`, and `\}` respectively + when they appear in the body of a _private-use annotation_. +- The character `|` is special: it SHOULD be escaped as `\|` in a _private-use annotation_, + but can appear unescaped as long as it is paired with another `|`. + This is an affordance to allow _literals_ to appear in the private use syntax. + +A _private-use annotation_ MAY be empty after its introducing sigil. + +```abnf +private-use-annotation = private-start [[s] reserved-body] +private-start = "^" / "&" +``` + +> [!NOTE] +> Users are cautioned that _private-use annotations_ cannot be reliably exchanged +> and can result in errors during formatting. +> It is generally a better idea to use the function registry +> to define additional formatting or annotation options. + +> Here are some examples of what _private-use_ sequences might look like: +> +> ``` +> Here's private use with an operand: {$foo &bar} +> Here's a placeholder that is entirely private-use: {&anything here} +> Here's a private-use function that uses normal function syntax: {$operand ^foo option=|literal|} +> The character \| has to be paired or escaped: {&private || |something between| or isolated: \| } +> Stop {& "translate 'stop' as a verb" might be a translator instruction or comment } +> Protect stuff in {^ph}{^/ph}private use{^ph}{^/ph} +> ``` + +##### Reserved Annotations + +A **_reserved annotation_** is an _annotation_ whose syntax is reserved +for future standardization. + +A _reserved annotation_ starts with a reserved character. +The remaining part of a _reserved annotation_, called a _reserved body_, +MAY be empty or contain arbitrary text that starts and ends with +a non-whitespace character. + +This allows maximum flexibility in future standardization, +as future definitions MAY define additional semantics and constraints +on the contents of these _annotations_. + +Implementations MUST NOT assign meaning or semantics to +an _annotation_ starting with `reserved-annotation-start`: +these are reserved for future standardization. +Whitespace before or after a _reserved body_ is not part of the _reserved body_. +Implementations MUST NOT remove or alter the contents of a _reserved body_, +including any interior whitespace, +but MAY remove or alter whitespace before or after the _reserved body_. + +While a reserved sequence is technically "well-formed", +unrecognized _reserved-annotations_ or _private-use-annotations_ have no meaning. + +```abnf +reserved-annotation = reserved-annotation-start [[s] reserved-body] +reserved-annotation-start = "!" / "%" / "*" / "+" / "<" / ">" / "?" / "~" + +reserved-body = reserved-body-part *([s] reserved-body-part) +reserved-body-part = reserved-char / reserved-escape / quoted +``` + +### Markup + +**_Markup_** _placeholders_ are _pattern_ parts +that can be used to represent non-language parts of a _message_, +such as inline elements or styling that should apply to a span of parts. + +_Markup_ MUST begin with U+007B LEFT CURLY BRACKET `{` +and end with U+007D RIGHT CURLY BRACKET `}`. +_Markup_ MAY contain one more _attributes_. + +_Markup_ comes in three forms: + +**_Markup-open_** starts with U+0023 NUMBER SIGN `#` and +represents an opening element within the _message_, +such as markup used to start a span. +It MAY include _options_. + +**_Markup-standalone_** starts with U+0023 NUMBER SIGN `#` +and has a U+002F SOLIDUS `/` immediately before its closing `}` +representing a self-closing or standalone element within the _message_. +It MAY include _options_. + +**_Markup-close_** starts with U+002F SOLIDUS `/` and +is a _pattern_ part ending a span. + +```abnf +markup = "{" [s] "#" identifier *(s option) *(s attribute) [s] ["/"] "}" ; open and standalone + / "{" [s] "/" identifier *(s option) *(s attribute) [s] "}" ; close +``` + +> A _message_ with one `button` markup span and a standalone `img` markup element: +> +> ``` +> {#button}Submit{/button} or {#img alt=|Cancel| /}. +> ``` + +> A _message_ with attributes in the closing tag: +> +> ``` +> {#ansi attr=|bold,italic|}Bold and italic{/ansi attr=|bold|} italic only {/ansi attr=|italic|} no formatting.} +> ``` + +A _markup-open_ can appear without a corresponding _markup-close_. +A _markup-close_ can appear without a corresponding _markup-open_. +_Markup_ _placeholders_ can appear in any order without making the _message_ invalid. +However, specifications or implementations defining _markup_ might impose requirements +on the pairing, ordering, or contents of _markup_ during _formatting_. + +### Attributes + +**_Attributes_ are reserved for standardization by future versions of this specification.** +Examples in this section are meant to be illustrative and +might not match future requirements or usage. + +> [!NOTE] +> The Tech Preview does not provide a built-in mechanism for overriding +> values in the _formatting context_ (most notably the locale) +> Nor does it provide a mechanism for identifying specific expressions +> such as by assigning a name or id. +> The utility of these types of mechanisms has been debated. +> There are at least two proposed mechanisms for implementing support for +> these. +> Specifically, one mechanism would be to reserve specifically-named options, +> possibly using a Unicode namespace (i.e. `locale=xxx` or `u:locale=xxx`). +> Such options would be reserved for use in any and all functions or markup. +> The other mechanism would be to use the reserved "expression attribute" syntax +> for this purpose (i.e. `@locale=xxx` or `@id=foo`) +> Neither mechanism was included in this Tech Preview. +> Feedback on the preferred mechanism for managing these features +> is strongly desired. +> +> In the meantime, function authors and other implementers are cautioned to avoid creating +> function-specific or implementation-specific option values for this purpose. +> One workaround would be to use the implementation's namespace for these +> features to insure later interoperability when such a mechanism is finalized +> during the Tech Preview period. +> Specifically: +> - Avoid specifying an option for setting the locale of an expression as different from +> that of the overall _message_ locale, or use a namespace that later maps to the final +> mechanism. +> - Avoid specifying options for the purpose of linking placeholders +> (such as to pair opening markup to closing markup). +> If such an option is created, the implementer should use an +> implementation-specific namespace. +> Users and implementers are cautioned that such options might be +> replaced with a standard mechanism in a future version. +> - Avoid specifying generic options to communicate with translators and +> translation tooling (i.e. implementation-specific options that apply to all +> functions. +> The above are all desirable features. +> We welcome contributions to and proposals for such features during the +> Technical Preview. + +An **_attribute_** is an _identifier_ with an optional value +that appears in an _expression_ or in _markup_. + +_Attributes_ are prefixed by a U+0040 COMMERCIAL AT `@` sign, +followed by an _identifier_. +An _attribute_ MAY have a _value_ which is separated from the _identifier_ +by an U+003D EQUALS SIGN `=` along with optional whitespace. +The _value_ of an _attribute_ can be either a _literal_ or a _variable_. + +Multiple _attributes_ are permitted in an _expression_ or _markup_. +Each _attribute_ is separated by whitespace. + +The order of _attributes_ is not significant. + + +```abnf +attribute = "@" identifier [[s] "=" [s] (literal / variable)] +``` + +> Examples of _expressions_ and _markup_ with _attributes_: +> +> A _message_ including a _literal_ that should not be translated: +> +> ``` +> In French, "{|bonjour| @translate=no}" is a greeting +> ``` +> +> A _message_ with _markup_ that should not be copied: +> +> ``` +> Have a {#span @can-copy}great and wonderful{/span @can-copy} birthday! +> ``` + +### Other Syntax Elements + +This section defines common elements used to construct _messages_. + +#### Keywords + +A **_keyword_** is a reserved token that has a unique meaning in the _message_ syntax. + +The following three keywords are defined: `.input`, `.local`, and `.match`. +Keywords are always lowercase and start with U+002E FULL STOP `.`. + +```abnf +input = %s".input" +local = %s".local" +match = %s".match" +``` + +#### Literals + +A **_literal_** is a character sequence that appears outside +of _text_ in various parts of a _message_. +A _literal_ can appear +as a _key_ value, +as the _operand_ of a _literal-expression_, +or in the value of an _option_. +A _literal_ MAY include any Unicode code point +except for U+0000 NULL or the surrogate code points U+D800 through U+DFFF. + +All code points are preserved. + +A **_quoted_** literal begins and ends with U+005E VERTICAL BAR `|`. +The characters `\` and `|` within a _quoted_ literal MUST be +escaped as `\\` and `\|`. + +An **_unquoted_** literal is a _literal_ that does not require the `|` +quotes around it to be distinct from the rest of the _message_ syntax. +An _unquoted_ MAY be used when the content of the _literal_ +contains no whitespace and otherwise matches the `unquoted` production. +Any _unquoted_ literal MAY be _quoted_. +Implementations MUST NOT distinguish between _quoted_ and _unquoted_ literals +that have the same sequence of code points. + +_Unquoted_ literals can contain a _name_ or consist of a _number-literal_. +A _number-literal_ uses the same syntax as JSON and is intended for the encoding +of number values in _operands_ or _options_, or as _keys_ for _variants_. + +```abnf +literal = quoted / unquoted +quoted = "|" *(quoted-char / quoted-escape) "|" +unquoted = name / number-literal +number-literal = ["-"] (%x30 / (%x31-39 *DIGIT)) ["." 1*DIGIT] [%i"e" ["-" / "+"] 1*DIGIT] +``` + +#### Names and Identifiers + +An **_identifier_** is a character sequence that +identifies a _function_, _markup_, or _option_. +Each _identifier_ consists of a _name_ optionally preceeded by +a _namespace_. +When present, the _namespace_ is separated from the _name_ by a +U+003A COLON `:`. +Built-in _functions_ and their _options_ do not have a _namespace_ identifier. + +The _namespace_ `u` (U+0075 LATIN SMALL LETTER U) +is reserved for future standardization. + +_Function_ _identifiers_ are prefixed with `:`. +_Markup_ _identifiers_ are prefixed with `#` or `/`. +_Option_ _identifiers_ have no prefix. + +A **_name_** is a character sequence used in an _identifier_ +or as the name for a _variable_ +or the value of an _unquoted_ _literal_. + +_Variable_ names are prefixed with `$`. + +Valid content for _names_ is based on Namespaces in XML 1.0's +[NCName](https://www.w3.org/TR/xml-names/#NT-NCName). +This is different from XML's [Name](https://www.w3.org/TR/xml/#NT-Name) +in that it MUST NOT contain a U+003A COLON `:`. +Otherwise, the set of characters allowed in a _name_ is large. + +> [!NOTE] +> _External variables_ can be passed in that are not valid _names_. +> Such variables cannot be referenced in a _message_, +> but are not otherwise errors. + +Examples: +> A variable: +>``` +> This has a {$variable} +>``` +> A function: +> ``` +> This has a {:function} +> ``` +> An add-on function from the `icu` namespace: +> ``` +> This has a {:icu:function} +> ``` +> An option and an add-on option: +> ``` +> This has {:options option=value icu:option=add_on} +> ``` + +Support for _namespaces_ and their interpretation is implementation-defined +in this release. + +```abnf +variable = "$" name +option = identifier [s] "=" [s] (literal / variable) + +identifier = [namespace ":"] name +namespace = name +name = name-start *name-char +name-start = ALPHA / "_" + / %xC0-D6 / %xD8-F6 / %xF8-2FF + / %x370-37D / %x37F-1FFF / %x200C-200D + / %x2070-218F / %x2C00-2FEF / %x3001-D7FF + / %xF900-FDCF / %xFDF0-FFFC / %x10000-EFFFF +name-char = name-start / DIGIT / "-" / "." + / %xB7 / %x300-36F / %x203F-2040 +``` + +#### Escape Sequences + +An **_escape sequence_** is a two-character sequence starting with +U+005C REVERSE SOLIDUS `\`. + +An _escape sequence_ allows the appearance of lexically meaningful characters +in the body of _text_, _quoted_, or _reserved_ (which includes, in this case, +_private-use_) sequences respectively: + +```abnf +text-escape = backslash ( backslash / "{" / "}" ) +quoted-escape = backslash ( backslash / "|" ) +reserved-escape = backslash ( backslash / "{" / "|" / "}" ) +backslash = %x5C ; U+005C REVERSE SOLIDUS "\" +``` + +#### Whitespace + +**_Whitespace_** is defined as one or more of +U+0009 CHARACTER TABULATION (tab), +U+000A LINE FEED (new line), +U+000D CARRIAGE RETURN, +U+3000 IDEOGRAPHIC SPACE, +or U+0020 SPACE. + +Inside _patterns_ and _quoted literals_, +whitespace is part of the content and is recorded and stored verbatim. +Whitespace is not significant outside translatable text, except where required by the syntax. + +> [!NOTE] +> The character U+3000 IDEOGRAPHIC SPACE is included in whitespace for +> compatibility with certain East Asian keyboards and input methods, +> in which users might accidentally create these characters in a _message_. + +```abnf +s = 1*( SP / HTAB / CR / LF / %x3000 ) +``` + +## Complete ABNF + +The grammar below uses the ABNF notation [[STD68](https://www.rfc-editor.org/info/std68)], +including the modifications found in [RFC 7405](https://www.rfc-editor.org/rfc/rfc7405). + +RFC7405 defines a variation of ABNF that is case-sensitive. +Some ABNF tools are only compatible with the specification found in +[RFC 5234](https://www.rfc-editor.org/rfc/rfc5234). +To make `message.abnf` compatible with that version of ABNF, replace +the rules of the same name with this block: + +```abnf +input = %x2E.69.6E.70.75.74 ; ".input" +local = %x2E.6C.6F.63.61.6C ; ".local" +match = %x2E.6D.61.74.63.68 ; ".match" +``` + +### `message.abnf` + +```abnf +message = simple-message / complex-message + +simple-message = [simple-start pattern] +simple-start = simple-start-char / text-escape / placeholder +pattern = *(text-char / text-escape / placeholder) +placeholder = expression / markup + +complex-message = *(declaration [s]) complex-body +declaration = input-declaration / local-declaration / reserved-statement +complex-body = quoted-pattern / matcher + +input-declaration = input [s] variable-expression +local-declaration = local s variable [s] "=" [s] expression + +quoted-pattern = "{{" pattern "}}" + +matcher = match-statement 1*([s] variant) +match-statement = match 1*([s] selector) +selector = expression +variant = key *(s key) [s] quoted-pattern +key = literal / "*" + +; Expressions +expression = literal-expression + / variable-expression + / annotation-expression +literal-expression = "{" [s] literal [s annotation] *(s attribute) [s] "}" +variable-expression = "{" [s] variable [s annotation] *(s attribute) [s] "}" +annotation-expression = "{" [s] annotation *(s attribute) [s] "}" + +annotation = function + / private-use-annotation + / reserved-annotation + +markup = "{" [s] "#" identifier *(s option) *(s attribute) [s] ["/"] "}" ; open and standalone + / "{" [s] "/" identifier *(s option) *(s attribute) [s] "}" ; close + +; Expression and literal parts +function = ":" identifier *(s option) +option = identifier [s] "=" [s] (literal / variable) +; Attributes are reserved for future standardization +attribute = "@" identifier [[s] "=" [s] (literal / variable)] + +variable = "$" name +literal = quoted / unquoted +quoted = "|" *(quoted-char / quoted-escape) "|" +unquoted = name / number-literal +; number-literal matches JSON number (https://www.rfc-editor.org/rfc/rfc8259#section-6) +number-literal = ["-"] (%x30 / (%x31-39 *DIGIT)) ["." 1*DIGIT] [%i"e" ["-" / "+"] 1*DIGIT] + +; Keywords; Note that these are case-sensitive +input = %s".input" +local = %s".local" +match = %s".match" + +; Reserve additional .keywords for use by future versions of this specification. +reserved-statement = reserved-keyword [s reserved-body] 1*([s] expression) +; Note that the following production is a simplification, +; as this rule MUST NOT be considered to match existing keywords +; (`.input`, `.local`, and `.match`). +reserved-keyword = "." name + +; Reserve additional sigils for use by future versions of this specification. +reserved-annotation = reserved-annotation-start [[s] reserved-body] +reserved-annotation-start = "!" / "%" / "*" / "+" / "<" / ">" / "?" / "~" + +; Reserve sigils for private-use by implementations. +private-use-annotation = private-start [[s] reserved-body] +private-start = "^" / "&" +reserved-body = reserved-body-part *([s] reserved-body-part) +reserved-body-part = reserved-char / reserved-escape / quoted + +; Names and identifiers +; identifier matches https://www.w3.org/TR/REC-xml-names/#NT-QName +; name matches https://www.w3.org/TR/REC-xml-names/#NT-NCName +identifier = [namespace ":"] name +namespace = name +name = name-start *name-char +name-start = ALPHA / "_" + / %xC0-D6 / %xD8-F6 / %xF8-2FF + / %x370-37D / %x37F-1FFF / %x200C-200D + / %x2070-218F / %x2C00-2FEF / %x3001-D7FF + / %xF900-FDCF / %xFDF0-FFFC / %x10000-EFFFF +name-char = name-start / DIGIT / "-" / "." + / %xB7 / %x300-36F / %x203F-2040 + +; Restrictions on characters in various contexts +simple-start-char = content-char / s / "@" / "|" +text-char = content-char / s / "." / "@" / "|" +quoted-char = content-char / s / "." / "@" / "{" / "}" +reserved-char = content-char / "." +content-char = %x01-08 ; omit NULL (%x00), HTAB (%x09) and LF (%x0A) + / %x0B-0C ; omit CR (%x0D) + / %x0E-1F ; omit SP (%x20) + / %x21-2D ; omit . (%x2E) + / %x2F-3F ; omit @ (%x40) + / %x41-5B ; omit \ (%x5C) + / %x5D-7A ; omit { | } (%x7B-7D) + / %x7E-2FFF ; omit IDEOGRAPHIC SPACE (%x3000) + / %x3001-D7FF ; omit surrogates + / %xE000-10FFFF + +; Character escapes +text-escape = backslash ( backslash / "{" / "}" ) +quoted-escape = backslash ( backslash / "|" ) +reserved-escape = backslash ( backslash / "{" / "|" / "}" ) +backslash = %x5C ; U+005C REVERSE SOLIDUS "\" + +; Whitespace +s = 1*( SP / HTAB / CR / LF / %x3000 ) +``` + +## Errors + +Errors in messages and their formatting MAY occur and be detected +at different stages of processing. +Where available, +the use of validation tools is recommended, +as early detection of errors makes their correction easier. + +### Error Handling + +_Syntax Errors_ and _Data Model Errors_ apply to all message processors, +and MUST be emitted as soon as possible. +The other error categories are only emitted during formatting, +but it might be possible to detect them with validation tools. + +During selection, an _expression_ handler MUST only emit _Resolution Errors_ and _Selection Errors_. +During formatting, an _expression_ handler MUST only emit _Resolution Errors_ and _Formatting Errors_. + +_Resolution Errors_ and _Formatting Errors_ in _expressions_ that are not used +in _pattern selection_ or _formatting_ MAY be ignored, +as they do not affect the output of the formatter. + +In all cases, when encountering a runtime error, +a message formatter MUST provide some representation of the message. +An informative error or errors MUST also be separately provided. + +When a message contains more than one error, +or contains some error which leads to further errors, +an implementation which does not emit all of the errors +SHOULD prioritise _Syntax Errors_ and _Data Model Errors_ over others. + +When an error occurs within a _selector_, +the _selector_ MUST NOT match any _variant_ _key_ other than the catch-all `*` +and a _Resolution Error_ or a _Selection Error_ MUST be emitted. + +### Syntax Errors + +**_Syntax Errors_** occur when the syntax representation of a message is not well-formed. + +> Example invalid messages resulting in a _Syntax Error_: +> +> ``` +> {{Missing end braces +> ``` +> +> ``` +> {{Missing one end brace} +> ``` +> +> ``` +> Unknown {{expression}} +> ``` +> +> ``` +> .local $var = {|no message body|} +> ``` + +### Data Model Errors + +**_Data Model Errors_** occur when a message is invalid due to +violating one of the semantic requirements on its structure. + +#### Variant Key Mismatch + +A **_Variant Key Mismatch_** occurs when the number of keys on a _variant_ +does not equal the number of _selectors_. + +> Example invalid messages resulting in a _Variant Key Mismatch_ error: +> +> ``` +> .match {$one :func} +> 1 2 {{Too many}} +> * {{Otherwise}} +> ``` +> +> ``` +> .match {$one :func} {$two :func} +> 1 2 {{Two keys}} +> * {{Missing a key}} +> * * {{Otherwise}} +> ``` + +#### Missing Fallback Variant + +A **_Missing Fallback Variant_** error occurs when the message +does not include a _variant_ with only catch-all keys. + +> Example invalid messages resulting in a _Missing Fallback Variant_ error: +> +> ``` +> .match {$one :func} +> 1 {{Value is one}} +> 2 {{Value is two}} +> ``` +> +> ``` +> .match {$one :func} {$two :func} +> 1 * {{First is one}} +> * 1 {{Second is one}} +> ``` + +#### Missing Selector Annotation + +A **_Missing Selector Annotation_** error occurs when the _message_ +contains a _selector_ that does not have an _annotation_, +or contains a _variable_ that does not directly or indirectly reference a _declaration_ with an _annotation_. + +> Examples of invalid messages resulting in a _Missing Selector Annotation_ error: +> +> ``` +> .match {$one} +> 1 {{Value is one}} +> * {{Value is not one}} +> ``` +> +> ``` +> .local $one = {|The one|} +> .match {$one} +> 1 {{Value is one}} +> * {{Value is not one}} +> ``` +> +> ``` +> .input {$one} +> .match {$one} +> 1 {{Value is one}} +> * {{Value is not one}} +> ``` + +#### Duplicate Declaration + +A **_Duplicate Declaration_** error occurs when a _variable_ is declared more than once. +Note that an input _variable_ is implicitly declared when it is first used, +so explicitly declaring it after such use is also an error. + +> Examples of invalid messages resulting in a _Duplicate Declaration_ error: +> +> ``` +> .input {$var :number maximumFractionDigits=0} +> .input {$var :number minimumFractionDigits=0} +> {{Redeclaration of the same variable}} +> +> .local $var = {$ext :number maximumFractionDigits=0} +> .input {$var :number minimumFractionDigits=0} +> {{Redeclaration of a local variable}} +> +> .input {$var :number minimumFractionDigits=0} +> .local $var = {$ext :number maximumFractionDigits=0} +> {{Redeclaration of an input variable}} +> +> .input {$var :number minimumFractionDigits=$var2} +> .input {$var2 :number} +> {{Redeclaration of the implicit input variable $var2}} +> +> .local $var = {$ext :someFunction} +> .local $var = {$error} +> .local $var2 = {$var2 :error} +> {{{$var} cannot be redefined. {$var2} cannot refer to itself}} +> ``` + +#### Duplicate Option Name + +A **_Duplicate Option Name_** error occurs when the same _identifier_ +appears on the left-hand side of more than one _option_ in the same _expression_. + +> Examples of invalid messages resulting in a _Duplicate Option Name_ error: +> +> ``` +> Value is {42 :number style=percent style=decimal} +> ``` +> +> ``` +> .local $foo = {horse :func one=1 two=2 one=1} +> {{This is {$foo}}} +> ``` + +### Resolution Errors + +**_Resolution Errors_** occur when the runtime value of a part of a message +cannot be determined. + +#### Unresolved Variable + +An **_Unresolved Variable_** error occurs when a variable reference cannot be resolved. + +> For example, attempting to format either of the following messages +> would result in an _Unresolved Variable_ error if done within a context that +> does not provide for the variable reference `$var` to be successfully resolved: +> +> ``` +> The value is {$var}. +> ``` +> +> ``` +> .match {$var :func} +> 1 {{The value is one.}} +> * {{The value is not one.}} +> ``` + +#### Unknown Function + +An **_Unknown Function_** error occurs when an _expression_ includes +a reference to a function which cannot be resolved. + +> For example, attempting to format either of the following messages +> would result in an _Unknown Function_ error if done within a context that +> does not provide for the function `:func` to be successfully resolved: +> +> ``` +> The value is {horse :func}. +> ``` +> +> ``` +> .match {|horse| :func} +> 1 {{The value is one.}} +> * {{The value is not one.}} +> ``` + +#### Unsupported Expression + +An **_Unsupported Expression_** error occurs when an expression uses +syntax reserved for future standardization, +or for private implementation use that is not supported by the current implementation. + +> For example, attempting to format this message +> would always result in an _Unsupported Expression_ error: +> +> ``` +> The value is {!horse}. +> ``` +> +> Attempting to format this message would result in an _Unsupported Expression_ error +> if done within a context that does not support the `^` private use sigil: +> +> ``` +> .match {|horse| ^private} +> 1 {{The value is one.}} +> * {{The value is not one.}} +> ``` + +#### Invalid Expression + +An **_Invalid Expression_** error occurs when a _message_ includes an _expression_ +whose implementation-defined internal requirements produce an error during _function resolution_ +or when a _function_ returns a value (such as `null`) that the implementation does not support. + +An **_Operand Mismatch Error_** is an _Invalid Expression_ error that occurs when +an _operand_ provided to a _function_ during _function resolution_ does not match one of the +expected implementation-defined types for that function; +or in which a literal _operand_ value does not have the required format +and thus cannot be processed into one of the expected implementation-defined types +for that specific _function_. + +> For example, the following _message_ produces an _Operand Mismatch Error_ +> (a type of _Invalid Expression_ error) +> because the literal `|horse|` does not match the production `number-literal`, +> which is a requirement of the function `:number` for its operand: +> ``` +> .local $horse = {horse :number} +> {{You have a {$horse}.}} +> ``` +> The following _message_ might produce an _Invalid Expression_ error if the +> the function `:function` threw an exception or otherwise emitted an error +> rather than returning a valid value: +>``` +> {{This has an invalid expression {$var :function} because it has a bug in it.}} +>``` + +#### Unsupported Statement + +An **_Unsupported Statement_** error occurs when a message includes a _reserved statement_. + +> For example, attempting to format this message +> would always result in an _Unsupported Statement_ error: +> +> ``` +> .some {|horse|} +> {{The message body}} +> ``` + +### Selection Errors + +**_Selection Errors_** occur when message selection fails. + +> For example, attempting to format either of the following messages +> might result in a _Selection Error_ if done within a context that +> uses a `:number` selector function which requires its input to be numeric: +> +> ``` +> .match {|horse| :number} +> 1 {{The value is one.}} +> * {{The value is not one.}} +> ``` +> +> ``` +> .local $sel = {|horse| :number} +> .match {$sel} +> 1 {{The value is one.}} +> * {{The value is not one.}} +> ``` + +### Formatting Errors + +**_Formatting Errors_** occur during the formatting of a resolved value, +for example when encountering a value with an unsupported type +or an internally inconsistent set of options. + +> For example, attempting to format any of the following messages +> might result in a _Formatting Error_ if done within a context that +> +> 1. provides for the variable reference `$user` to resolve to +> an object `{ name: 'Kat', id: 1234 }`, +> 2. provides for the variable reference `$field` to resolve to +> a string `'address'`, and +> 3. uses a `:get` formatting function which requires its argument to be an object and +> an option `field` to be provided with a string value, +> +> ``` +> Hello, {horse :get field=name}! +> ``` +> +> ``` +> Hello, {$user :get}! +> ``` +> +> ``` +> .local $id = {$user :get field=id} +> {{Hello, {$id :get field=name}!}} +> ``` +> +> ``` +> Your {$field} is {$id :get field=$field} +> ``` + +## Function Registry + +Implementations and tooling can greatly benefit from a +structured definition of formatting and matching functions available to messages at runtime. +This specification is intended to provide a mechanism for storing such declarations in a portable manner. + +### Goals + +_This section is non-normative._ + +The registry provides a machine-readable description of MessageFormat 2 extensions (custom functions), +in order to support the following goals and use-cases: + +- Validate semantic properties of messages. For example: + - Type-check values passed into functions. + - Validate that matching functions are only called in selectors. + - Validate that formatting functions are only called in placeholders. + - Verify the exhaustiveness of variant keys given a selector. +- Support the localization roundtrip. For example: + - Generate variant keys for a given locale during XLIFF extraction. +- Improve the authoring experience. For example: + - Forbid edits to certain function options (e.g. currency options). + - Autocomplete function and option names. + - Display on-hover tooltips for function signatures with documentation. + - Display/edit known message metadata. + - Restrict input in GUI by providing a dropdown with all viable option values. + +### Conformance and Use + +_This section is normative._ + +To be conformant with MessageFormat 2.0, an implementation MUST implement +the _functions_, _options_ and _option_ values, _operands_ and outputs +described in the section [Default Registry](#default-registry) below. + +Implementations MAY implement additional _functions_ or additional _options_. +In particular, implementations are encouraged to provide feedback on proposed +_options_ and their values. + +> [!IMPORTANT] +> In the Tech Preview, the [registry data model](#registry-data-model) should +> be regarded as experimental. +> Changes to the format are expected during this period. +> Feedback on the registry's format and implementation is encouraged! + +Implementations are not required to provide a machine-readable registry +nor to read or interpret the registry data model in order to be conformant. + +The MessageFormat 2.0 Registry was created to describe +the core set of formatting and selection _functions_, +including _operands_, _options_, and _option_ values. +This is the minimum set of functionality needed for conformance. +By using the same names and values, _messages_ can be used interchangeably +by different implementations, +regardless of programming language or runtime environment. +This ensures that developers do not have to relearn core MessageFormat syntax +and functionality when moving between platforms +and that translators do not need to know about the runtime environment for most +selection or formatting operations. + +The registry provides a machine-readable description of _functions_ +suitable for tools, such as those used in translation automation, so that +variant expansion and information about available _options_ and their effects +are available in the translation ecosystem. +To that end, implementations are strongly encouraged to provide appropriately +tailored versions of the registry for consumption by tools +(even if not included in software distributions) +and to encourage any add-on or plug-in functionality to provide +a registry to support localization tooling. + +### Registry Data Model + +_This section is non-normative._ + +> [!IMPORTANT] +> This part of the specification is not part of the Tech Preview. + +The registry contains descriptions of function signatures. + +The main building block of the registry is the `` element. +It represents an implementation of a custom function available to translation at runtime. +A function defines a human-readable `` of its behavior +and one or more machine-readable _signatures_ of how to call it. +Named `` elements can optionally define regex validation rules for +literals, option values, and variant keys. + +MessageFormat 2 functions can be invoked in two contexts: + +- inside placeholders, to produce a part of the message's formatted output; + for example, a raw value of `|1.5|` may be formatted to `1,5` in a language which uses commas as decimal separators, +- inside selectors, to contribute to selecting the appropriate variant among all given variants. + +A single _function name_ may be used in both contexts, +regardless of whether it's implemented as one or multiple functions. + +A _signature_ defines one particular set of at most one argument and any number of named options +that can be used together in a single call to the function. +`` corresponds to a function call inside a placeholder inside translatable text. +`` corresponds to a function call inside a selector. + +A signature may define the positional argument of the function with the `` element. +If the `` element is not present, the function is defined as a nullary function. +A signature may also define one or more `