Date: Thu, 5 Oct 2023 17:57:26 +0200
Subject: [PATCH 07/12] CLDR-16937 Minor clarification for dx (#3320)
---
docs/ldml/tr35.md | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/docs/ldml/tr35.md b/docs/ldml/tr35.md
index 5b192781633..0946fa589ff 100644
--- a/docs/ldml/tr35.md
+++ b/docs/ldml/tr35.md
@@ -789,8 +789,9 @@ The BCP 47 form for keys and types is the canonical form, and recommended. Other
"dx" |
Dictionary break script exclusions |
unicode_script_subtag values |
- One or more items of type SCRIPT_CODE, which are valid unicode_script_subtag values.
- The code Zyyy (Common) can be specified to exclude all scripts, in which case it should be the only SCRIPT_CODE value specified. |
+ One or more items of type SCRIPT_CODE (as usual, separated by hyphens), which are valid unicode_script_subtag values.
+ The code Zyyy (Common) can be specified to exclude all scripts, in which case it should be the only SCRIPT_CODE value specified.
+ If others are included mistakenly, they are ignored. |
A Unicode Emoji Presentation Style Identifier specifies a request for the preferred emoji presentation style. This can be used as part of the value for an HTML lang attribute, for example <html lang="sr-Latn-u-em-emoji"> . The valid values are those name attribute values in the type elements of key name="em" in bcp47/variant.xml. |
"em" |
From 609ed4215705670b3806ecb2a929e2d6c0fb1cf7 Mon Sep 17 00:00:00 2001
From: Mark Davis
Date: Thu, 5 Oct 2023 17:57:49 +0200
Subject: [PATCH 08/12] CLDR-16038 fix spec constraints using unit id component
(#3321)
---
docs/ldml/tr35-general.md | 49 +++++++++++++++++++++++++--------------
1 file changed, 31 insertions(+), 18 deletions(-)
diff --git a/docs/ldml/tr35-general.md b/docs/ldml/tr35-general.md
index 39b8e7d14fe..a0bc167cbee 100644
--- a/docs/ldml/tr35-general.md
+++ b/docs/ldml/tr35-general.md
@@ -913,16 +913,21 @@ Some of the constraints reference data from the unitIdComponents in [Unit_Conver
| long_unit_identifier
core_unit_identifier | := |
- product_unit ("-per-" product_unit)*
- | "per-" product_unit ("-per-" product_unit)*
+ | product_unit ("-" per "-" product_unit)*
+ | per "-" product_unit ("-" per "-" product_unit)*
- Examples:
- foot-per-second-per-second
- per-second
- Note: The normalized form will have only one "per"
- - Note: The token 'per' is the single value in <unitIdComponent type=”per”>
|
+per | := |
+ "per"
+
+ - Constraint: The token 'per' is the single value in <unitIdComponent type="per">
+ |
+
product_unit | := |
single_unit ("-" single_unit)* ("-" pu_single_unit)*
| pu_single_unit ("-" pu_single_unit)*
@@ -935,9 +940,9 @@ Some of the constraints reference data from the unitIdComponents in [Unit_Conver
- Examples: square-meter, or 100-square-meter
|
pu_single_unit | := |
- “xxx-” single_unit | “x-” single_unit
+ | "xxx-" single_unit | "x-" single_unit
- Example: xxx-square-knuts (a Harry Potter unit)
- - Note: “x-” is only for backwards compatibility
+ - Note: "x-" is only for backwards compatibility
- See Private-Use Units
|
@@ -954,18 +959,19 @@ Some of the constraints reference data from the unitIdComponents in [Unit_Conver
dimensionality_prefix | := |
"square-" | "cubic-" | "pow" ([2-9]|1[0-5]) "-"
+ - Constraint: must be value in: <unitIdComponent type="power">.
- Note: "pow2-" and "pow3-" canonicalize to "square-" and "cubic-"
- - Note: These are values in <unitIdComponent type=”power”>
+ - Note: These are values in <unitIdComponent type="power">
|
simple_unit | := |
(prefix_component "-")* (prefixed_unit | base_component) ("-" suffix_component)*
| currency_unit
- | “em” | “g” | “us” | “hg” | "of"
+ | "em" | "g" | "us" | "hg" | "of"
- Examples: kilometer, meter, cup-metric, fluid-ounce, curr-chf, em
- - Note: Three simple units are currently allowed as legacy usage, for tokens that wouldn’t otherwise be a base_component due to length (eg, “g-force”).
- We will likely deprecate those and add conformant aliases in the future: the “hg” and “of” are already only in deprecated simple_units.
+ - Note: Three simple units are currently allowed as legacy usage, for tokens that wouldn’t otherwise be a base_component due to length (eg, "g-force").
+ We will likely deprecate those and add conformant aliases in the future: the "hg" and "of" are already only in deprecated simple_units.
|
prefixed_unit | |
@@ -984,16 +990,16 @@ Some of the constraints reference data from the unitIdComponents in [Unit_Conver
prefix_component | := |
[a-z]{3,∞}
- - Constraint: must be value in: <unitIdComponent type=”prefix_component”>.
|
+ - Constraint: must be value in: <unitIdComponent type="prefix">.
base_component | := |
[a-z]{3,∞}
- Constraint: must not be a value in any of the following:
- <unitIdComponent type=”prefix_component”>
- or <unitIdComponent type=”suffix_component”>
- or <unitIdComponent type=”power”>
- or <unitIdComponent type=”and”>
- or <unitIdComponent type=”per”>.
+ <unitIdComponent type="prefix">
+ or <unitIdComponent type="suffix">
+ or <unitIdComponent type="power">
+ or <unitIdComponent type="and">
+ or <unitIdComponent type="per">.
- Constraint: must not have a prefix as an initial segment.
- Constraint: no two different base_components will share the first 8 letters.
@@ -1004,12 +1010,19 @@ Some of the constraints reference data from the unitIdComponents in [Unit_Conver
suffix_component | := |
[a-z]{3,∞}
- - Constraint: must be value in: <unitIdComponent type=”suffix_component”>
|
+
+ - Constraint: must be value in: <unitIdComponent type="suffix">
+
|
mixed_unit_identifier | := |
- (single_unit | pu_single_unit) ("-and-" (single_unit | pu_single_unit ))*
+ | (single_unit | pu_single_unit) ("-" and "-" (single_unit | pu_single_unit ))*
- Example: foot-and-inch
- - Note: The token 'and' is the single value in <unitIdComponent type=”and”>
+
|
+
+and | := |
+ "and"
+
+ - Constraint: The token 'and' is the single value in <unitIdComponent type="and">
|
long_unit_identifier | := |
From 2e03d3c30a3205729ced20294e3eee472b14b118 Mon Sep 17 00:00:00 2001
From: Mark Davis
Date: Thu, 5 Oct 2023 17:58:43 +0200
Subject: [PATCH 09/12] CLDR-16249 Describe EBNF syntax more clearly (#3322)
---
docs/ldml/tr35.md | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/docs/ldml/tr35.md b/docs/ldml/tr35.md
index 0946fa589ff..943c86997c0 100644
--- a/docs/ldml/tr35.md
+++ b/docs/ldml/tr35.md
@@ -246,7 +246,13 @@ External specifications may also reference particular components of Unicode loca
> _Field X can contain any Unicode region subtag values as given in Unicode Technical Standard #35: Unicode Locale Data Markup Language (LDML), excluding grouping codes._
+### EBNF
+The BNF syntax used in LDML is a variant of the Extended Backus-Naur Form (EBNF) notation used in [W3C XML Notation](https://www.w3.org/TR/REC-xml/#sec-notation). The main differences are:
+1. Bounded repetition following Perl regex syntax is allowed, such as alphanum{3,8}
+2. Constraints (well-formedness or validity) use separate notes
+
+In the text, this is sometimes referred to as "EBNF (Perl-based)".
## What is a Locale?
From 873ab680b1736e80cc1b54dbf403ee70d6e7fdf1 Mon Sep 17 00:00:00 2001
From: Mark Davis
Date: Thu, 5 Oct 2023 17:59:04 +0200
Subject: [PATCH 10/12] CLDR-16251 Add constraint on duplicate variant tags
(#3323)
* CLDR-16251 Add constraint on duplicate variant tags
* CLDR-16251 Add note about tlang.
---
docs/ldml/tr35.md | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/docs/ldml/tr35.md b/docs/ldml/tr35.md
index 943c86997c0..7c295ab99af 100644
--- a/docs/ldml/tr35.md
+++ b/docs/ldml/tr35.md
@@ -323,6 +323,9 @@ A _Unicode language identifier_ has the following structure (provided in EBNF (P
alphanum | = [0-9 A-Z a-z] ; |
+> As is often the case, the complete syntactic constraints are not easily captured by ABNF, so there is a further condition:
+> The sequence of variant subtags must not have any duplicates (eg, de-1996-fonipa-1996 is not syntactically well-formed).
+
The semantics of the various subtags is explained in _[Language Identifier Field Definitions](#Field_Definitions)_ ; there are also direct links from [`unicode_language_subtag`](#unicode_language_subtag) , etc. While theoretically the [`unicode_language_subtag`](#unicode_language_subtag) may have more than 3 letters through the IANA registration process, in practice that has not occurred. The [`unicode_language_subtag`](#unicode_language_subtag) "und" may be omitted when there is a [`unicode_script_subtag`](#unicode_script_subtag) ; for that reason [`unicode_language_subtag`](#unicode_language_subtag) values with 4 letters are not permitted. However, such [`unicode_language_id`](#unicode_language_id) values are not intended for general interchange, because they are not valid BCP 47 tags. Instead, they are intended for certain protocols such as the identification of transliterators or font ScriptLangTag values. For more information on language subtags with 4 letters, see [BCP 47 Language Tag to Unicode BCP 47 Locale Identifier](#Language_Tag_to_Locale_Identifier).
For example, "en-US" (American English), "en_GB" (British English), "es-419" (Latin American Spanish), and "uz-Cyrl" (Uzbek in Cyrillic) are all valid Unicode language identifiers.
@@ -353,6 +356,9 @@ As is often the case, the complete syntactic constraints are not easily captured
| `tkey` | `= alpha digit ;` |
| `tvalue` | `= (sep alphanum{3,8})+ ;` |
+> As is often the case, the complete syntactic constraints are not easily captured by ABNF, so there is a further condition:
+> The sequence of variant subtags in a tlang must not have any duplicates.
+
For historical reasons, this is called a Unicode locale identifier. However, it really functions (with few exceptions) as a language identifier, and accesses language-based data. Except where it would be unclear, this document uses the term "locale" data loosely to encompass both types of data: for more information, see _[Language and Locale IDs](#Language_and_Locale_IDs)_.
As of the release of this specification, there were no other_extensions defined. The other_extensions are present in the syntax to allow implementations to preserve that information.
From ba1c4f0cb14e669d6ffdc14bf48b6a18fabdff73 Mon Sep 17 00:00:00 2001
From: "Steven R. Loomis"
Date: Thu, 5 Oct 2023 12:50:30 -0500
Subject: [PATCH 11/12] CLDR-17145 BRS/kbd: update Modifications section about
the keyboard spec (#3325)
---
docs/ldml/tr35.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/ldml/tr35.md b/docs/ldml/tr35.md
index 7c295ab99af..729734cd6ec 100644
--- a/docs/ldml/tr35.md
+++ b/docs/ldml/tr35.md
@@ -4058,7 +4058,7 @@ Other contributors to CLDR are listed on the [CLDR Project Page](https://www.uni
* Rewrote and clarified the material in [Unit Preferences Overrides](tr35-info.md#Unit_Preferences_Data)
* [Keyboards](tr35-keyboards.md#Contents)
- * Complete revision, description TBS
+ * Complete rewrite of the specification by the Keyboard Subcommittee. Available a technical preview in CLDR version 44. See [Part 7: Status](tr35-keyboards.md#status).
* [Person Names](tr35-personNames.md#Contents)
* Added material in [API Implementaion](tr35-personNames.md#api-implementation) on recommended implementation API options.
From 29af7f29f53512584821850e230bb30934cbba66 Mon Sep 17 00:00:00 2001
From: "Steven R. Loomis"
Date: Tue, 3 Oct 2023 17:07:09 -0500
Subject: [PATCH 12/12] =?UTF-8?q?CLDR-17091=20kbd:=20move=20names=20into?=
=?UTF-8?q?=20info=20name=3D"=E2=80=A6"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
- drop the element
- make required
- add as a required attribute
- update docs and dtd
- test fix for XML validity
- update the info name= attribute
---
.../supplemental/attributeValueValidity.xml | 2 +-
docs/ldml/tr35-keyboards.md | 90 +++++--------------
keyboards/3.0/fr-t-k0-azerty.xml | 6 +-
keyboards/3.0/ja-Latn.xml | 5 +-
keyboards/3.0/mt-t-k0-47key.xml | 8 +-
keyboards/3.0/mt.xml | 8 +-
keyboards/3.0/pcm.xml | 5 +-
keyboards/3.0/pt-t-k0-abnt2.xml | 5 +-
keyboards/dtd/ldmlKeyboard3.dtd | 22 ++---
keyboards/dtd/ldmlKeyboard3.xsd | 25 +-----
.../KeyboardFlatten/broken-import-missing.xml | 7 +-
.../broken-import-unknownbase.xml | 7 +-
.../broken-import-unknownver.xml | 7 +-
.../broken-import-wrongparent.xml | 12 +--
14 files changed, 49 insertions(+), 160 deletions(-)
diff --git a/common/supplemental/attributeValueValidity.xml b/common/supplemental/attributeValueValidity.xml
index 33e9ee45a3c..6ce3c371322 100644
--- a/common/supplemental/attributeValueValidity.xml
+++ b/common/supplemental/attributeValueValidity.xml
@@ -452,7 +452,7 @@
-
+
simple
diff --git a/docs/ldml/tr35-keyboards.md b/docs/ldml/tr35-keyboards.md
index d2be0295a26..557a19c6796 100644
--- a/docs/ldml/tr35-keyboards.md
+++ b/docs/ldml/tr35-keyboards.md
@@ -74,8 +74,6 @@ The LDML specification is divided into the following parts:
* [Element: locale](#Element_locale)
* [Element: version](#Element_version)
* [Element: info](#Element_info)
- * [Element: names](#Element_names)
- * [Element: name](#Element_name)
* [Element: settings](#Element_settings)
* [Element: keys](#Element_keys)
* [Element: key](#Element_key)
@@ -334,7 +332,7 @@ This is the top level element. All other elements defined below are under this e
>
> Parents: _none_
>
-> Children: [displays](#Element_displays), [import](#Element_import), [info](#Element_info), [keys](#Element_keys), [layers](#Element_layers), [locales](#Element_locales), [names](#Element_names), [settings](#Element_settings), [_special_](tr35.md#special), [transforms](#Element_transforms), [variables](#Element_variables), [version](#Element_version)
+> Children: [displays](#Element_displays), [import](#Element_import), [info](#Element_info), [keys](#Element_keys), [layers](#Element_layers), [locales](#Element_locales), [settings](#Element_settings), [_special_](tr35.md#special), [transforms](#Element_transforms), [variables](#Element_variables), [version](#Element_version)
>
> Occurrence: required, single
>
@@ -485,7 +483,9 @@ Element containing informative properties about the layout, for displaying in us
**Syntax**
```xml
-
```
@@ -496,10 +496,27 @@ Element containing informative properties about the layout, for displaying in us
>
> Children: _none_
>
-> Occurrence: optional, single
+> Occurrence: required, single
>
>
+_Attribute:_ `name` (required)
+
+> Note that this is the only required attribute for the `` element.
+>
+> This attribute is an informative name for the keyboard.
+
+```xml
+
+ …
+
+ …
+
+```
+
+* * *
+
+
_Attribute:_ `author`
> The `author` attribute contains the name of the author of the layout file.
@@ -519,69 +536,6 @@ _Attribute:_ `indicator`
* * *
-### Element: names
-
-Element used to store any names given to the layout.
-
-These names are not localized but are informative names for the keyboard. Localization of these names would be done as separate data items elsewhere in CLDR.
-
-**Syntax**
-
-```xml
-
- {set of name elements}
-
-```
-
->
->
-> Parents: [keyboard3](#Element_keyboard)
->
-> Children: [name](#Element_name), [_special_](tr35.md#special)
->
-> Occurrence: required, single
->
->
-
-### Element: name
-
-A single name given to the layout.
-
-**Syntax**
-
-```xml
-
-```
-
->
->
-> Parents: [names](#Element_names)
->
-> Children: _none_
->
-> Occurrence: required, multiple
->
->
-
-_Attribute:_ `value` (required)
-
-> The name of the layout.
-
-
-**Example**
-
-```xml
-
- …
-
-
-
- …
-
-```
-
-* * *
-
### Element: settings
An element used to keep track of layout-specific settings by implementations. This element may or may not show up on a layout. These settings reflect the normal practice by the implementation. However, an implementation using the data may customize the behavior.
diff --git a/keyboards/3.0/fr-t-k0-azerty.xml b/keyboards/3.0/fr-t-k0-azerty.xml
index 106a3970d2c..f9a39ab80fd 100644
--- a/keyboards/3.0/fr-t-k0-azerty.xml
+++ b/keyboards/3.0/fr-t-k0-azerty.xml
@@ -16,11 +16,7 @@
keyboard formats -->
-
-
-
-
-
+
diff --git a/keyboards/3.0/ja-Latn.xml b/keyboards/3.0/ja-Latn.xml
index 8b5b845bf25..0e7be47e8d6 100644
--- a/keyboards/3.0/ja-Latn.xml
+++ b/keyboards/3.0/ja-Latn.xml
@@ -5,10 +5,7 @@
-
-
-
-
+
diff --git a/keyboards/3.0/mt-t-k0-47key.xml b/keyboards/3.0/mt-t-k0-47key.xml
index 11ca8cda824..56361ae242c 100644
--- a/keyboards/3.0/mt-t-k0-47key.xml
+++ b/keyboards/3.0/mt-t-k0-47key.xml
@@ -5,12 +5,8 @@
-
-
-
-
-
-
+
+
diff --git a/keyboards/3.0/mt.xml b/keyboards/3.0/mt.xml
index 4e5fbfca37e..84985d1c0ad 100644
--- a/keyboards/3.0/mt.xml
+++ b/keyboards/3.0/mt.xml
@@ -11,12 +11,8 @@
-
-
-
-
-
-
+
+
diff --git a/keyboards/3.0/pcm.xml b/keyboards/3.0/pcm.xml
index 15242ed8121..4bc34be49ac 100644
--- a/keyboards/3.0/pcm.xml
+++ b/keyboards/3.0/pcm.xml
@@ -2,10 +2,7 @@
-
-
-
-
+
diff --git a/keyboards/3.0/pt-t-k0-abnt2.xml b/keyboards/3.0/pt-t-k0-abnt2.xml
index 6de82a8f9d3..8d056de3a1c 100644
--- a/keyboards/3.0/pt-t-k0-abnt2.xml
+++ b/keyboards/3.0/pt-t-k0-abnt2.xml
@@ -5,10 +5,7 @@
-
-
-
-
+
diff --git a/keyboards/dtd/ldmlKeyboard3.dtd b/keyboards/dtd/ldmlKeyboard3.dtd
index 044112a4099..5e0d7e4c458 100644
--- a/keyboards/dtd/ldmlKeyboard3.dtd
+++ b/keyboards/dtd/ldmlKeyboard3.dtd
@@ -10,7 +10,7 @@ The CLDR Keyboard Subcommittee is currently developing major changes to the CLDR
Please view the subcommittee page for the most recent information.
-->
-
+
@@ -45,6 +45,9 @@ Please view the subcommittee page for the most recent information.
+
+
+
@@ -55,26 +58,12 @@ Please view the subcommittee page for the most recent information.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
@@ -97,6 +86,9 @@ Please view the subcommittee page for the most recent information.
+
+
+
diff --git a/keyboards/dtd/ldmlKeyboard3.xsd b/keyboards/dtd/ldmlKeyboard3.xsd
index 0009638d985..ee10f8040ef 100644
--- a/keyboards/dtd/ldmlKeyboard3.xsd
+++ b/keyboards/dtd/ldmlKeyboard3.xsd
@@ -22,8 +22,7 @@ Note: DTD @-annotations are not currently converted to .xsd. For full CLDR file
-
-
+
@@ -97,6 +96,7 @@ Note: DTD @-annotations are not currently converted to .xsd. For full CLDR file
+
@@ -109,27 +109,8 @@ Note: DTD @-annotations are not currently converted to .xsd. For full CLDR file
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
@@ -178,6 +159,8 @@ Note: DTD @-annotations are not currently converted to .xsd. For full CLDR file
+
+
diff --git a/tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-missing.xml b/tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-missing.xml
index f706ab3fcd1..73088028861 100644
--- a/tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-missing.xml
+++ b/tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-missing.xml
@@ -8,12 +8,7 @@
-
-
-
-
-
-
+
diff --git a/tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-unknownbase.xml b/tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-unknownbase.xml
index 65e0307e8d9..a1e902f480e 100644
--- a/tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-unknownbase.xml
+++ b/tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-unknownbase.xml
@@ -8,12 +8,7 @@
-
-
-
-
-
-
+
diff --git a/tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-unknownver.xml b/tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-unknownver.xml
index 4b9bae247b2..1ff9b4fa415 100644
--- a/tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-unknownver.xml
+++ b/tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-unknownver.xml
@@ -8,12 +8,9 @@
-
+
-
-
-
-
+
diff --git a/tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-wrongparent.xml b/tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-wrongparent.xml
index 730ff719c96..5ee1baa544d 100644
--- a/tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-wrongparent.xml
+++ b/tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-wrongparent.xml
@@ -5,18 +5,12 @@
-->
+
+
-
-
-
-
-
-
-
-
-
+