From 2d4ced12e26847950cb3728919e659bb348df256 Mon Sep 17 00:00:00 2001
From: "Steven R. Loomis" <srl295@gmail.com>
Date: Thu, 8 Feb 2024 00:54:32 +0000
Subject: [PATCH] CLDR-17366 kbd: rename `unicodeSet` to `uset`

See #3496

- keyboard 3 needs to be from start version 45.0, otherwise tests incorrectly attempt stability with v44.
---
 docs/ldml/tr35-keyboards.md                   |  46 ++--
 keyboards/dtd/ldmlKeyboard3.dtd               |   8 +-
 keyboards/dtd/ldmlKeyboard3.xsd               | 212 +++++++++---------
 .../java/org/unicode/cldr/util/DtdType.java   |   4 +-
 .../unicode/cldr/unittest/TestDtdData.java    |   2 +-
 5 files changed, 136 insertions(+), 136 deletions(-)
diff --git a/docs/ldml/tr35-keyboards.md b/docs/ldml/tr35-keyboards.md
index 8dabaedf22c..bcadfcb1b4a 100644
--- a/docs/ldml/tr35-keyboards.md
+++ b/docs/ldml/tr35-keyboards.md
@@ -94,7 +94,7 @@ The LDML specification is divided into the following parts:
   * [Element: variables](#element-variables)
   * [Element: string](#element-string)
   * [Element: set](#element-set)
-  * [Element: unicodeSet](#element-unicodeset)
+  * [Element: uset](#element-uset)
   * [Element: transforms](#element-transforms)
     * [Markers](#markers)
   * [Element: transformGroup](#element-transformgroup)
@@ -1435,7 +1435,7 @@ Here is an example of a `row` element:
 >
 > Parents: [keyboard3](#element-keyboard3)
 >
-> Children: [import](#element-import), [_special_](tr35.md#special), [string](#element-string), [set](#element-set), [unicodeSet](#element-unicodeset)
+> Children: [import](#element-import), [_special_](tr35.md#special), [string](#element-string), [set](#element-set), [uset](#element-uset)
 >
 > Occurrence: optional, single
 > </small>
@@ -1450,7 +1450,7 @@ Note that the `id=` attribute must be unique across all children of the `variabl
 <variables>
     <string id="y" value="yes" /> <!-- a simple string-->
     <set id="upper" value="A B C D E FF" /> <!-- a set with 6 items -->
-    <unicodeSet id="consonants" value="[कसतनमह]" /> <!-- a UnicodeSet -->
+    <uset id="consonants" value="[कसतनमह]" /> <!-- a UnicodeSet -->
 </variables>
 ```
 
@@ -1571,7 +1571,7 @@ See [transform](#element-transform) for further details and syntax.
 
 * * *
 
-### Element: unicodeSet
+### Element: uset
 
 > <small>
 >
@@ -1587,38 +1587,39 @@ See [transform](#element-transform) for further details and syntax.
 
 _Attribute:_ `id` (required)
 
-> Specifies the identifier (name) of this unicodeSet.
+> Specifies the identifier (name) of this uset.
 > All ids must be unique across all types of variables.
 >
 > `id` must match `[0-9A-Za-z_]{1,32}`
 
 _Attribute:_ `value` (required)
 
-> String value in [UnicodeSet](tr35.md#Unicode_Sets) format.
+> String value in a subset of [UnicodeSet](tr35.md#Unicode_Sets) format.
 > Leading and trailing whitespace is ignored.
-> Variables may refer to other string variables if they have been previously defined, using `${string}` syntax, or to other previously-defined UnicodeSets (not sets) using `$[unicodeSet]` syntax.
+> Variables may refer to other string variables if they have been previously defined, using `${string}` syntax, or to other previously-defined `uset` elements (not `set` elements) using `$[...usetId]` syntax.
 
-**Syntax Note**
 
-- Warning: UnicodeSets look superficially similar to regex character classes as used in [`transform`](#element-transform) elements, but they are different. UnicodeSets must be defined with a `unicodeSet` element, and referenced with the `$[unicodeSet]` notation in transforms. UnicodeSets cannot be specified inline in a transform, and can only be used indirectly by reference to the corresponding `unicodeSet` element.
+- Warning: `uset` elements look superficially similar to regex character classes as used in [`transform`](#element-transform) elements, but they are different. `uset`s must be defined with a `uset` element, and referenced with the `$[...usetId]` notation in transforms. `uset`s cannot be specified inline in a transform, and can only be used indirectly by reference to the corresponding `uset` element.
 - Multi-character strings (`{}`) are not supported, such as `[żġħ{ie}{għ}]`.
-- UnicodeSet property notation (`\p{…}` or `[:…:]`) may **NOT** be used, because that would make implementations dependent on a particular version of Unicode. However, implementations and tools may wish to pre-calculate the value of a particular UnicodeSet, and "freeze" it as explicit code points.  The example below of `$[KhmrMn]` matches all nonspacing marks in the `Khmr` script.
-- UnicodeSets may represent a very large number of codepoints. A limit may be set on how many unique range entries may be matched.
+- UnicodeSet property notation (`\p{…}` or `[:…:]`) may **NOT** be used.
+
+> **Rationale**: allowing property notation would make keyboard implementations dependent on a particular version of Unicode. However, implementations and tools may wish to pre-calculate the value of a particular uset, and "freeze" it as explicit code points.  The example below of `$[KhmrMn]` matches nonspacing marks in the `Khmr` script.
+
+- `uset` elements may represent a very large number of codepoints. Keyboard implementations may set a limit on how many unique range entries may be matched.
+- The `uset` element may not be used as the source or target for mapping operations (`$[1:variable]` syntax).
+- The `uset` element may not be referenced by [`key`](#element-key) or [`display`](#element-display) elements.
 
 **Examples**
 
 ```xml
 <variables>
-  <unicodeSet id="consonants" value="[कसतनमह]" /> <!-- unicode set range -->
-  <unicodeSet id="range" value="[a-z D E F G \u{200A}]" /> <!-- a through z, plus a few others -->
-  <unicodeSet id="newrange" value="[$[range]-[G]]" /> <!-- The above range, but not including G -->
-  <unicodeSet id="KhmrMn" value="[\u{17B4}\u{17B5}\u{17B7}-\u{17BD}\u{17C6}\u{17C9}-\u{17D3}\u{17DD}]"> <!--  [[:Khmr:][:Mn:]] as of Unicode 15.0-->
+  <uset id="consonants" value="[कसतनमह]" /> <!-- unicode set range -->
+  <uset id="range" value="[a-z D E F G \u{200A}]" /> <!-- a through z, plus a few others -->
+  <uset id="newrange" value="[$[range]-[G]]" /> <!-- The above range, but not including G -->
+  <uset id="KhmrMn" value="[\u{17B4}\u{17B5}\u{17B7}-\u{17BD}\u{17C6}\u{17C9}-\u{17D3}\u{17DD}]"> <!--  [[:Khmr:][:Mn:]] as of Unicode 15.0-->
 </variables>
 ```
 
-The `unicodeSet` element may not be used as the source or target for mapping operations (`$[1:variable]` syntax).
-The `unicodeSet` element may not be referenced by [`key`](#element-key) and [`display`](#element-display) elements.
-
 * * *
 
 ### Element: transforms
@@ -1879,7 +1880,7 @@ _Attribute:_ `from` (required)
 
     - supported
     - no Unicode properties such as `\p{…}`
-    - Warning: Character classes look superficially similar to UnicodeSets as defined in [`unicodeSet`](#element-unicodeset) elements, but they are different. UnicodeSets must be defined with a `unicodeSet` element, and referenced with the `$[unicodeSet]` notation in transforms. UnicodeSets cannot be used directly in a transform.
+    - Warning: Character classes look superficially similar to [`uset`](#element-uset) elements, but they are distinct and referenced with the `$[...usetId]` notation in transforms. The `uset` notation cannot be embedded directly in a transform.
 
 - **Bounded quantifier**
 
@@ -1953,11 +1954,11 @@ The following are additions to standard Regex syntax.
 
     In this usage, the variable with `id="zwnj"` will be substituted in at this point in the expression. The variable can contain a range, a character, or any other portion of a pattern. If `zwnj` is a simple string, the pattern will match that string at this point.
 
-- **Set and UnicodeSet variables**
+- **`set` or `uset` variables**
 
     `$[upper]`
 
-    Given a space-separated variable, this syntax will match _any_ of the substrings. This expression may be thought of  (and implemented) as if it were a _non-capturing group_. It may, however, be enclosed within a capturing group. For example, the following definition of `$[upper]` will match as if it were written `(?:A|B|CC|D|E|FF)`.
+    Given a space-separated `set` or `uset` variable, this syntax will match _any_ of the substrings. This expression may be thought of  (and implemented) as if it were a _non-capturing group_. It may, however, be enclosed within a capturing group. For example, the following definition of `$[upper]` will match as if it were written `(?:A|B|CC|D|E|FF)`.
 
     ```xml
     <variables>
@@ -1978,7 +1979,7 @@ The following are additions to standard Regex syntax.
     Tooling may choose to suggest an expansion of properties, such as `\p{Mn}` to all non spacing marks for a certain Unicode version.  As well, a set of variables could be constructed in an `import`-able file matching particularly useful Unicode properties.
 
     ```xml
-    <unicodeSet id="Mn" value="[\u{034F}\u{0591}-\u{05AF}\u{05BD}\u{05C4}\u{05C5}\…]" /> <!-- 1,985 code points -->
+    <uset id="Mn" value="[\u{034F}\u{0591}-\u{05AF}\u{05BD}\u{05C4}\u{05C5}\…]" /> <!-- 1,985 code points -->
     ```
 
 - **Backreferences**
@@ -2066,7 +2067,6 @@ Used in the `to=`
     - The capture group on the `from=` side **must** contain exactly one set variable.  `from="Q($[upper])X"` can be used (other context before or after the capture group), but `from="(Q$[upper])"` may not be used with a mapped variable and is flagged as an error.
 
     - The `from=` and `to=` sides of the pattern must both be using `set` variables. There is no way to insert a set literal on either side and avoid using a variable.
-    A UnicodeSet may not be used directly, but must be defined as a `unicodeSet` variable.
 
     - The two variables (here `upper` and `lower`) must have exactly the same number of whitespace-separated items. Leading and trailing space (such as at the end of `lower`) is ignored. A variable without any spaces is considered to be a set variable of exactly one item.
 
diff --git a/keyboards/dtd/ldmlKeyboard3.dtd b/keyboards/dtd/ldmlKeyboard3.dtd
index e35c1853059..633aece6729 100644
--- a/keyboards/dtd/ldmlKeyboard3.dtd
+++ b/keyboards/dtd/ldmlKeyboard3.dtd
@@ -171,7 +171,7 @@ Please view the subcommittee page for the most recent information.
     <!--@MATCH:any-->
     <!--@VALUE-->
 
-<!ELEMENT variables ( import*, string*, set*, unicodeSet*, special* ) >
+<!ELEMENT variables ( import*, string*, set*, uset*, special* ) >
     <!--@TECHPREVIEW-->
 
 <!ELEMENT string EMPTY >
@@ -190,10 +190,10 @@ Please view the subcommittee page for the most recent information.
     <!--@VALUE-->
     <!--@ALLOWS_UESC-->
 
-<!ELEMENT unicodeSet EMPTY >
-<!ATTLIST unicodeSet id NMTOKEN #REQUIRED >
+<!ELEMENT uset EMPTY >
+<!ATTLIST uset id NMTOKEN #REQUIRED >
     <!--@MATCH:regex/[0-9A-Za-z_]{1,32}-->
-<!ATTLIST unicodeSet value CDATA #REQUIRED >
+<!ATTLIST uset value CDATA #REQUIRED >
     <!--@MATCH:any-->
     <!--@VALUE-->
 
diff --git a/keyboards/dtd/ldmlKeyboard3.xsd b/keyboards/dtd/ldmlKeyboard3.xsd
index 79d4ac77359..9a445135fc8 100644
--- a/keyboards/dtd/ldmlKeyboard3.xsd
+++ b/keyboards/dtd/ldmlKeyboard3.xsd
@@ -11,7 +11,7 @@ Note: DTD @-annotations are not currently converted to .xsd. For full CLDR file
   CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
 --><!--
   Important Note:
-  
+
   The CLDR Keyboard Subcommittee is currently developing major changes to the CLDR keyboard specification.
   Please view the subcommittee page for the most recent information.
   <https://cldr.unicode.org/index/keyboard-workgroup>
@@ -43,10 +43,10 @@ Note: DTD @-annotations are not currently converted to .xsd. For full CLDR file
       </xs:attribute>
     </xs:complexType>
   </xs:element>
-  
-  
-  
-  
+
+
+
+
   <xs:element name="import">
     <xs:complexType>
       <xs:attribute name="path" use="required"/>
@@ -59,8 +59,8 @@ Note: DTD @-annotations are not currently converted to .xsd. For full CLDR file
       </xs:attribute>
     </xs:complexType>
   </xs:element>
-  
-  
+
+
   <xs:element name="locales">
     <xs:complexType>
       <xs:sequence>
@@ -68,14 +68,14 @@ Note: DTD @-annotations are not currently converted to .xsd. For full CLDR file
       </xs:sequence>
     </xs:complexType>
   </xs:element>
-  
+
   <xs:element name="locale">
     <xs:complexType>
       <xs:attribute name="id" use="required"/>
     </xs:complexType>
   </xs:element>
-  
-  
+
+
   <xs:element name="version">
     <xs:complexType>
       <xs:attribute name="number"/>
@@ -88,12 +88,12 @@ Note: DTD @-annotations are not currently converted to .xsd. For full CLDR file
       </xs:attribute>
     </xs:complexType>
   </xs:element>
-  
-  
-  
+
+
+
   <!-- Note: post techpreview, change cldrVersion to MATCH:version -->
-  
-  
+
+
   <xs:element name="info">
     <xs:complexType>
       <xs:attribute name="name" use="required"/>
@@ -102,15 +102,15 @@ Note: DTD @-annotations are not currently converted to .xsd. For full CLDR file
       <xs:attribute name="indicator"/>
     </xs:complexType>
   </xs:element>
-  
-  
-  
-  
-  
-  
-  
-  
-  
+
+
+
+
+
+
+
+
+
   <xs:element name="settings">
     <xs:complexType>
       <xs:attribute name="normalization">
@@ -122,9 +122,9 @@ Note: DTD @-annotations are not currently converted to .xsd. For full CLDR file
       </xs:attribute>
     </xs:complexType>
   </xs:element>
-  
-  
-  
+
+
+
   <xs:element name="displays">
     <xs:complexType>
       <xs:sequence>
@@ -135,7 +135,7 @@ Note: DTD @-annotations are not currently converted to .xsd. For full CLDR file
       </xs:sequence>
     </xs:complexType>
   </xs:element>
-  
+
   <xs:element name="display">
     <xs:complexType>
       <xs:attribute name="keyId" type="xs:NMTOKEN"/>
@@ -143,24 +143,24 @@ Note: DTD @-annotations are not currently converted to .xsd. For full CLDR file
       <xs:attribute name="display" use="required"/>
     </xs:complexType>
   </xs:element>
-  
-  
-  
-  
-  
-  
-  
+
+
+
+
+
+
+
   <xs:element name="displayOptions">
     <xs:complexType>
       <xs:attribute name="baseCharacter"/>
     </xs:complexType>
   </xs:element>
-  
-  
-  
-  
+
+
+
+
   <xs:element name="special" type="any"/>
-  
+
   <xs:element name="keys">
     <xs:complexType>
       <xs:sequence>
@@ -170,7 +170,7 @@ Note: DTD @-annotations are not currently converted to .xsd. For full CLDR file
       </xs:sequence>
     </xs:complexType>
   </xs:element>
-  
+
   <xs:element name="key">
     <xs:complexType>
       <xs:attribute name="id" type="xs:NMTOKEN" use="required"/>
@@ -197,24 +197,24 @@ Note: DTD @-annotations are not currently converted to .xsd. For full CLDR file
       <xs:attribute name="width"/>
     </xs:complexType>
   </xs:element>
-  
-  
-  
-  
-  
-  
-  
-  
-  
-  
-  
-  
-  
-  
-  
-  
-  
-  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
   <xs:element name="flicks">
     <xs:complexType>
       <xs:sequence>
@@ -233,18 +233,18 @@ Note: DTD @-annotations are not currently converted to .xsd. For full CLDR file
       <xs:attribute name="id" type="xs:NMTOKEN" use="required"/>
     </xs:complexType>
   </xs:element>
-  
-  
+
+
   <xs:element name="flickSegment">
     <xs:complexType>
       <xs:attribute name="directions" type="xs:NMTOKENS" use="required"/>
       <xs:attribute name="keyId" type="xs:NMTOKEN" use="required"/>
     </xs:complexType>
   </xs:element>
-  
-  
-  
-  
+
+
+
+
   <xs:element name="forms">
     <xs:complexType>
       <xs:sequence>
@@ -254,7 +254,7 @@ Note: DTD @-annotations are not currently converted to .xsd. For full CLDR file
       </xs:sequence>
     </xs:complexType>
   </xs:element>
-  
+
   <xs:element name="form">
     <xs:complexType>
       <xs:sequence>
@@ -264,16 +264,16 @@ Note: DTD @-annotations are not currently converted to .xsd. For full CLDR file
       <xs:attribute name="id" type="xs:NMTOKEN"/>
     </xs:complexType>
   </xs:element>
-  
-  
+
+
   <xs:element name="scanCodes">
     <xs:complexType>
       <xs:attribute name="codes" type="xs:NMTOKENS" use="required"/>
     </xs:complexType>
   </xs:element>
-  
-  
-  
+
+
+
   <xs:element name="layers">
     <xs:complexType>
       <xs:sequence>
@@ -285,9 +285,9 @@ Note: DTD @-annotations are not currently converted to .xsd. For full CLDR file
       <xs:attribute name="minDeviceWidth"/>
     </xs:complexType>
   </xs:element>
-  
-  
-  
+
+
+
   <xs:element name="layer">
     <xs:complexType>
       <xs:sequence>
@@ -298,59 +298,59 @@ Note: DTD @-annotations are not currently converted to .xsd. For full CLDR file
       <xs:attribute name="modifiers" type="xs:NMTOKENS"/>
     </xs:complexType>
   </xs:element>
-  
-  
-  
+
+
+
   <xs:element name="row">
     <xs:complexType>
       <xs:attribute name="keys" type="xs:NMTOKENS" use="required"/>
     </xs:complexType>
   </xs:element>
-  
-  
-  
-  
+
+
+
+
   <xs:element name="variables">
     <xs:complexType>
       <xs:sequence>
         <xs:element maxOccurs="unbounded" minOccurs="0" ref="import"/>
         <xs:element maxOccurs="unbounded" minOccurs="0" ref="string"/>
         <xs:element maxOccurs="unbounded" minOccurs="0" ref="set"/>
-        <xs:element maxOccurs="unbounded" minOccurs="0" ref="unicodeSet"/>
+        <xs:element maxOccurs="unbounded" minOccurs="0" ref="uset"/>
         <xs:element maxOccurs="unbounded" minOccurs="0" ref="special"/>
       </xs:sequence>
     </xs:complexType>
   </xs:element>
-  
+
   <xs:element name="string">
     <xs:complexType>
       <xs:attribute name="id" type="xs:NMTOKEN" use="required"/>
       <xs:attribute name="value" use="required"/>
     </xs:complexType>
   </xs:element>
-  
-  
-  
-  
+
+
+
+
   <xs:element name="set">
     <xs:complexType>
       <xs:attribute name="id" type="xs:NMTOKEN" use="required"/>
       <xs:attribute name="value" use="required"/>
     </xs:complexType>
   </xs:element>
-  
-  
-  
-  
-  <xs:element name="unicodeSet">
+
+
+
+
+  <xs:element name="uset">
     <xs:complexType>
       <xs:attribute name="id" type="xs:NMTOKEN" use="required"/>
       <xs:attribute name="value" use="required"/>
     </xs:complexType>
   </xs:element>
-  
-  
-  
+
+
+
   <xs:element name="transforms">
     <xs:complexType>
       <xs:sequence>
@@ -368,8 +368,8 @@ Note: DTD @-annotations are not currently converted to .xsd. For full CLDR file
       </xs:attribute>
     </xs:complexType>
   </xs:element>
-  
-  
+
+
   <xs:element name="transformGroup">
     <xs:complexType>
       <xs:sequence>
@@ -382,20 +382,20 @@ Note: DTD @-annotations are not currently converted to .xsd. For full CLDR file
       </xs:sequence>
     </xs:complexType>
   </xs:element>
-  
+
   <xs:element name="transform">
     <xs:complexType>
       <xs:attribute name="from" use="required"/>
       <xs:attribute name="to"/>
     </xs:complexType>
   </xs:element>
-  
-  
-  
-  
-  
-  
-  
+
+
+
+
+
+
+
   <xs:element name="reorder">
     <xs:complexType>
       <xs:attribute name="before"/>
@@ -411,4 +411,4 @@ Note: DTD @-annotations are not currently converted to .xsd. For full CLDR file
       <xs:any maxOccurs="unbounded" minOccurs="0" processContents="strict"/>
     </xs:sequence>
   </xs:complexType>
-</xs:schema>
\ No newline at end of file
+</xs:schema>
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/DtdType.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/DtdType.java
index 5bc4812820a..70d5e41c95c 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/DtdType.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/DtdType.java
@@ -31,8 +31,8 @@ public enum DtdType {
             "validity"),
     ldmlBCP47("common/dtd/ldmlBCP47.dtd", "1.7.2", null, "bcp47"),
     // keyboard 3.0
-    keyboard3("keyboards/dtd/ldmlKeyboard3.dtd", "44.0", null, "../keyboards/3.0"),
-    keyboardTest3("keyboards/dtd/ldmlKeyboardTest3.dtd", "44.0", null, "../keyboards/test");
+    keyboard3("keyboards/dtd/ldmlKeyboard3.dtd", "45.0", null, "../keyboards/3.0"),
+    keyboardTest3("keyboards/dtd/ldmlKeyboardTest3.dtd", "45.0", null, "../keyboards/test");
 
     public static final Set<DtdType> STANDARD_SET =
             ImmutableSet.of(ldmlBCP47, supplementalData, ldml, keyboard3);
diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestDtdData.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestDtdData.java
index 2d3215d6f4d..b2f54ba9a0d 100644
--- a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestDtdData.java
+++ b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestDtdData.java
@@ -777,7 +777,7 @@ public boolean isDistinguishingOld(DtdType dtdType, String elementName, String a
                         || elementName.equals("layer") && attribute.equals("id")
                         || elementName.equals("string") && attribute.equals("id")
                         || elementName.equals("set") && attribute.equals("id")
-                        || elementName.equals("unicodeSet") && attribute.equals("id")) {
+                        || elementName.equals("uset") && attribute.equals("id")) {
                     return true;
                 }
                 // fall through to old keyboard