CLDR-17587 merge from main

conradarcturus · Oct 25, 2024 · 24f25d4 · 24f25d4
2 parents a0a084f + bca324f
commit 24f25d4
Show file tree

Hide file tree

Showing 175 changed files with 4,220 additions and 1,914 deletions.
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -10,7 +10,7 @@ updates:
       interval: "monthly"
     commit-message:
       include: scope
-      prefix: "CLDR-17492 gh:"
+      prefix: "CLDR-17993 gh:"
     assignees:
       - btangmu
       - srl295
@@ -25,7 +25,7 @@ updates:
       interval: "monthly"
     commit-message:
       include: scope
-      prefix: "CLDR-17492 js:"
+      prefix: "CLDR-17993 js:"
     assignees:
       - btangmu
       - srl295
@@ -39,7 +39,7 @@ updates:
       interval: "monthly"
     commit-message:
       include: scope
-      prefix: "CLDR-17492 tr:"
+      prefix: "CLDR-17993 tr:"
     assignees:
       - btangmu
       - srl295
@@ -54,7 +54,7 @@ updates:
       interval: "monthly"
     commit-message:
       include: scope
-      prefix: "CLDR-17492 j:"
+      prefix: "CLDR-17993 j:"
     assignees:
       - btangmu
       - srl295

diff --git a/.github/workflows/commit-checker.yml b/.github/workflows/commit-checker.yml
@@ -25,7 +25,7 @@ on:
 jobs:
   build:
     name: Run Checker
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     steps:
       - name: clone CLDR
         uses: actions/checkout@v4

diff --git a/common/supplemental/likelySubtags.xml b/common/supplemental/likelySubtags.xml
@@ -36,7 +36,7 @@ not be patched by hand, as any changes made in that fashion may be lost.
 		<likelySubtag from="ann" to="ann_Latn_NG"/>		<!--Obolo‧?‧?	➡ Obolo‧Latin‧Nigeria-->
 		<likelySubtag from="aoz" to="aoz_Latn_ID"/>		<!--Uab Meto‧?‧?	➡ Uab Meto‧Latin‧Indonesia-->
 		<likelySubtag from="apc" to="apc_Arab_SY"/>		<!--Levantine Arabic‧?‧?	➡ Levantine Arabic‧Arabic‧Syria-->
-		<likelySubtag from="apd" to="apd_Arab_TG"/>		<!--Sudanese Arabic‧?‧?	➡ Sudanese Arabic‧Arabic‧Togo-->
+		<likelySubtag from="apd" to="apd_Arab_SD"/>		<!--Sudanese Arabic‧?‧?	➡ Sudanese Arabic‧Arabic‧Sudan-->
 		<likelySubtag from="ar" to="ar_Arab_EG"/>		<!--Arabic‧?‧?	➡ Arabic‧Arabic‧Egypt-->
 		<likelySubtag from="arc" to="arc_Armi_IR"/>		<!--Aramaic‧?‧?	➡ Aramaic‧Imperial Aramaic‧Iran-->
 		<likelySubtag from="arc_Hatr" to="arc_Hatr_IQ"/>		<!--Aramaic‧Hatran‧?	➡ Aramaic‧Hatran‧Iraq-->
@@ -128,6 +128,7 @@ not be patched by hand, as any changes made in that fashion may be lost.
 		<likelySubtag from="cad" to="cad_Latn_US"/>		<!--Caddo‧?‧?	➡ Caddo‧Latin‧United States-->
 		<likelySubtag from="cch" to="cch_Latn_NG"/>		<!--Atsam‧?‧?	➡ Atsam‧Latin‧Nigeria-->
 		<likelySubtag from="ccp" to="ccp_Cakm_BD"/>		<!--Chakma‧?‧?	➡ Chakma‧Chakma‧Bangladesh-->
+		<likelySubtag from="ccr" to="ccr_Latn_SV"/>		<!--Cacaopera‧?‧?	➡ Cacaopera‧Latin‧El Salvador-->
 		<likelySubtag from="ce" to="ce_Cyrl_RU"/>		<!--Chechen‧?‧?	➡ Chechen‧Cyrillic‧Russia-->
 		<likelySubtag from="ceb" to="ceb_Latn_PH"/>		<!--Cebuano‧?‧?	➡ Cebuano‧Latin‧Philippines-->
 		<likelySubtag from="cgg" to="cgg_Latn_UG"/>		<!--Chiga‧?‧?	➡ Chiga‧Latin‧Uganda-->
@@ -405,6 +406,7 @@ not be patched by hand, as any changes made in that fashion may be lost.
 		<likelySubtag from="lbe" to="lbe_Cyrl_RU"/>		<!--Lak‧?‧?	➡ Lak‧Cyrillic‧Russia-->
 		<likelySubtag from="lbw" to="lbw_Latn_ID"/>		<!--Tolaki‧?‧?	➡ Tolaki‧Latin‧Indonesia-->
 		<likelySubtag from="lcp" to="lcp_Thai_CN"/>		<!--Western Lawa‧?‧?	➡ Western Lawa‧Thai‧China-->
+		<likelySubtag from="len" to="len_Latn_SV"/>		<!--Lenca‧?‧?	➡ Lenca‧Latin‧El Salvador-->
 		<likelySubtag from="lep" to="lep_Lepc_IN"/>		<!--Lepcha‧?‧?	➡ Lepcha‧Lepcha‧India-->
 		<likelySubtag from="lez" to="lez_Cyrl_RU"/>		<!--Lezghian‧?‧?	➡ Lezghian‧Cyrillic‧Russia-->
 		<likelySubtag from="lg" to="lg_Latn_UG"/>		<!--Ganda‧?‧?	➡ Ganda‧Latin‧Uganda-->
@@ -578,6 +580,7 @@ not be patched by hand, as any changes made in that fashion may be lost.
 		<likelySubtag from="pnt_Cyrl" to="pnt_Cyrl_RU"/>		<!--Pontic‧Cyrillic‧?	➡ Pontic‧Cyrillic‧Russia-->
 		<likelySubtag from="pnt_Latn" to="pnt_Latn_TR"/>		<!--Pontic‧Latin‧?	➡ Pontic‧Latin‧Türkiye-->
 		<likelySubtag from="pon" to="pon_Latn_FM"/>		<!--Pohnpeian‧?‧?	➡ Pohnpeian‧Latin‧Micronesia-->
+		<likelySubtag from="ppl" to="ppl_Latn_SV"/>		<!--Pipil‧?‧?	➡ Pipil‧Latin‧El Salvador-->
 		<likelySubtag from="pqm" to="pqm_Latn_CA"/>		<!--Maliseet-Passamaquoddy‧?‧?	➡ Maliseet-Passamaquoddy‧Latin‧Canada-->
 		<likelySubtag from="pra" to="pra_Khar_PK"/>		<!--Prakrit languages‧?‧?	➡ Prakrit languages‧Kharoshthi‧Pakistan-->
 		<likelySubtag from="prd" to="prd_Arab_IR"/>		<!--Parsi-Dari‧?‧?	➡ Parsi-Dari‧Arabic‧Iran-->
@@ -841,7 +844,7 @@ not be patched by hand, as any changes made in that fashion may be lost.
 		<likelySubtag from="zh_Hant" to="zh_Hant_TW"/>		<!--Chinese‧Traditional‧?	➡ Chinese‧Traditional‧Taiwan-->
 		<likelySubtag from="zhx" to="zhx_Nshu_CN"/>		<!--Chinese (family)‧?‧?	➡ Chinese (family)‧Nüshu‧China-->
 		<likelySubtag from="zkt" to="zkt_Kits_CN"/>		<!--Kitan‧?‧?	➡ Kitan‧Khitan small script‧China-->
-		<likelySubtag from="zlm" to="zlm_Latn_TG"/>		<!--Malay (individual language)‧?‧?	➡ Malay (individual language)‧Latin‧Togo-->
+		<likelySubtag from="zlm" to="zlm_Latn_MY"/>		<!--Malay (individual language)‧?‧?	➡ Malay (individual language)‧Latin‧Malaysia-->
 		<likelySubtag from="zmi" to="zmi_Latn_MY"/>		<!--Negeri Sembilan Malay‧?‧?	➡ Negeri Sembilan Malay‧Latin‧Malaysia-->
 		<likelySubtag from="zu" to="zu_Latn_ZA"/>		<!--Zulu‧?‧?	➡ Zulu‧Latin‧South Africa-->
 		<likelySubtag from="zza" to="zza_Latn_TR"/>		<!--Zaza‧?‧?	➡ Zaza‧Latin‧Türkiye-->
@@ -1060,7 +1063,6 @@ not be patched by hand, as any changes made in that fashion may be lost.
 		<likelySubtag from="und_Arab_MU" to="ur_Arab_MU"/>		<!--?‧Arabic‧Mauritius	➡ Urdu‧Arabic‧Mauritius-->
 		<likelySubtag from="und_Arab_NG" to="ha_Arab_NG"/>		<!--?‧Arabic‧Nigeria	➡ Hausa‧Arabic‧Nigeria-->
 		<likelySubtag from="und_Arab_PK" to="ur_Arab_PK"/>		<!--?‧Arabic‧Pakistan	➡ Urdu‧Arabic‧Pakistan-->
-		<likelySubtag from="und_Arab_TG" to="apd_Arab_TG"/>		<!--?‧Arabic‧Togo	➡ Sudanese Arabic‧Arabic‧Togo-->
 		<likelySubtag from="und_Arab_TH" to="mfa_Arab_TH"/>		<!--?‧Arabic‧Thailand	➡ Pattani Malay‧Arabic‧Thailand-->
 		<likelySubtag from="und_Arab_TJ" to="fa_Arab_TJ"/>		<!--?‧Arabic‧Tajikistan	➡ Persian‧Arabic‧Tajikistan-->
 		<likelySubtag from="und_Arab_TR" to="apc_Arab_TR"/>		<!--?‧Arabic‧Türkiye	➡ Levantine Arabic‧Arabic‧Türkiye-->
@@ -2340,7 +2342,6 @@ not be patched by hand, as any changes made in that fashion may be lost.
 		<likelySubtag from="ccl" to="ccl_Latn_TZ" origin="sil1"/>		<!--Cutchi-Swahili‧?‧?	➡ Cutchi-Swahili‧Latin‧Tanzania-->
 		<likelySubtag from="ccm" to="ccm_Latn_MY" origin="sil1"/>		<!--Malaccan Creole Malay‧?‧?	➡ Malaccan Creole Malay‧Latin‧Malaysia-->
 		<likelySubtag from="cco" to="cco_Latn_MX" origin="sil1"/>		<!--Comaltepec Chinantec‧?‧?	➡ Comaltepec Chinantec‧Latin‧Mexico-->
-		<likelySubtag from="ccr" to="ccr_Latn_SV" origin="sil1"/>		<!--Cacaopera‧?‧?	➡ Cacaopera‧Latin‧El Salvador-->
 		<likelySubtag from="cde" to="cde_Telu_IN" origin="sil1"/>		<!--Chenchu‧?‧?	➡ Chenchu‧Telugu‧India-->
 		<likelySubtag from="cdf" to="cdf_Latn_IN" origin="sil1"/>		<!--Chiru‧?‧?	➡ Chiru‧Latin‧India-->
 		<likelySubtag from="cdh" to="cdh_Deva_IN" origin="sil1"/>		<!--Chambeali‧?‧?	➡ Chambeali‧Devanagari‧India-->
@@ -4258,7 +4259,6 @@ not be patched by hand, as any changes made in that fashion may be lost.
 		<likelySubtag from="lek" to="lek_Latn_PG" origin="sil1"/>		<!--Leipon‧?‧?	➡ Leipon‧Latin‧Papua New Guinea-->
 		<likelySubtag from="lel" to="lel_Latn_CD" origin="sil1"/>		<!--Lele (Democratic Republic of Congo)‧?‧?	➡ Lele (Democratic Republic of Congo)‧Latin‧Congo - Kinshasa-->
 		<likelySubtag from="lem" to="lem_Latn_CM" origin="sil1"/>		<!--Nomaande‧?‧?	➡ Nomaande‧Latin‧Cameroon-->
-		<likelySubtag from="len" to="len_Latn_HN" origin="sil1"/>		<!--Lenca‧?‧?	➡ Lenca‧Latin‧Honduras-->
 		<likelySubtag from="leo" to="leo_Latn_CM" origin="sil1"/>		<!--Leti (Cameroon)‧?‧?	➡ Leti (Cameroon)‧Latin‧Cameroon-->
 		<likelySubtag from="leq" to="leq_Latn_PG" origin="sil1"/>		<!--Lembena‧?‧?	➡ Lembena‧Latin‧Papua New Guinea-->
 		<likelySubtag from="ler" to="ler_Latn_PG" origin="sil1"/>		<!--Lenkau‧?‧?	➡ Lenkau‧Latin‧Papua New Guinea-->
@@ -5781,7 +5781,6 @@ not be patched by hand, as any changes made in that fashion may be lost.
 		<likelySubtag from="ppe" to="ppe_Latn_PG" origin="sil1"/>		<!--Papi‧?‧?	➡ Papi‧Latin‧Papua New Guinea-->
 		<likelySubtag from="ppi" to="ppi_Latn_MX" origin="sil1"/>		<!--Paipai‧?‧?	➡ Paipai‧Latin‧Mexico-->
 		<likelySubtag from="ppk" to="ppk_Latn_ID" origin="sil1"/>		<!--Uma‧?‧?	➡ Uma‧Latin‧Indonesia-->
-		<likelySubtag from="ppl" to="ppl_Latn_SV" origin="sil1"/>		<!--Pipil‧?‧?	➡ Pipil‧Latin‧El Salvador-->
 		<likelySubtag from="ppm" to="ppm_Latn_ID" origin="sil1"/>		<!--Papuma‧?‧?	➡ Papuma‧Latin‧Indonesia-->
 		<likelySubtag from="ppn" to="ppn_Latn_PG" origin="sil1"/>		<!--Papapana‧?‧?	➡ Papapana‧Latin‧Papua New Guinea-->
 		<likelySubtag from="ppo" to="ppo_Latn_PG" origin="sil1"/>		<!--Folopa‧?‧?	➡ Folopa‧Latin‧Papua New Guinea-->

diff --git a/common/supplemental/supplementalData.xml b/common/supplemental/supplementalData.xml
@@ -1316,6 +1316,7 @@ XXX Code for transations where no currency is involved
 		<language type="anp" scripts="Deva"/>
 		<language type="aoz" scripts="Latn"/>
 		<language type="apc" territories="IL JO LB PS SY TR" alt="secondary"/>
+		<language type="apd" territories="SD" alt="secondary"/>
 		<language type="ar" scripts="Arab" territories="AE BH DJ DZ EG EH ER IL IQ JO KM KW LB LY MA MR OM PS QA SA SD SO SY TD TN YE"/>
 		<language type="ar" scripts="Syrc" territories="IR SS" alt="secondary"/>
 		<language type="arc" scripts="Armi Nbat Palm" alt="secondary"/>
@@ -4035,6 +4036,7 @@ XXX Code for transations where no currency is involved
 			<languagePopulation type="en" populationPercent="38" officialStatus="official"/>	<!--English-->
 		</territory>
 		<territory type="SD" gdp="136000000000" literacyPercent="71.9" population="50467300">	<!--Sudan-->
+			<languagePopulation type="apd" populationPercent="61"/>	<!--Sudanese Arabic-->
 			<languagePopulation type="ar" populationPercent="61" officialStatus="official" references="R1234"/>	<!--Arabic-->
 			<languagePopulation type="en" populationPercent="61" officialStatus="official" references="R1235"/>	<!--English-->
 			<languagePopulation type="bej" populationPercent="5.4" references="R1314"/>	<!--Beja-->
@@ -4139,6 +4141,9 @@ XXX Code for transations where no currency is involved
 		</territory>
 		<territory type="SV" gdp="71960000000" literacyPercent="84.5" population="6628700">	<!--El Salvador-->
 			<languagePopulation type="es" populationPercent="89" officialStatus="official"/>	<!--Spanish-->
+			<languagePopulation type="ccr" populationPercent="0.056"/>	<!--Cacaopera-->
+			<languagePopulation type="ppl" populationPercent="0.041"/>	<!--Pipil-->
+			<languagePopulation type="len" populationPercent="0.02"/>	<!--Lenca-->
 		</territory>
 		<territory type="SX" gdp="1912000000" literacyPercent="99" population="46215">	<!--Sint Maarten-->
 			<languagePopulation type="en" populationPercent="68" officialStatus="official" references="R1238"/>	<!--English-->

diff --git a/docs/ldml/tr35-dates.md b/docs/ldml/tr35-dates.md
@@ -91,6 +91,25 @@ The LDML specification is divided into the following parts:
   * [Week of Year](#Date_Patterns_Week_Of_Year)
   * [Week Elements](#Date_Patterns_Week_Elements)
 * [Parsing Dates and Times](#Parsing_Dates_Times)
+* [Semantic Skeletons](#Semantic_Skeletons)
+  * [Parts of a Semantic Skeleton](#Parts_of_a_Semantic_Skeleton)
+    * [Semantic Field Sets](#Semantic_Field_Sets)
+      * [Date Field Sets](#Semantic_Date_Field_Sets)
+      * [Calendar Period Field Sets](#Semantic_Calendar_Period_Field_Sets)
+      * [Time Field Sets](#Semantic_Time_Field_Sets)
+      * [Time Zone Field Sets](#Semantic_Time_Zone_Field_Sets)
+      * [Composite Field Sets](#Semantic_Composite_Field_Sets)
+    * [Semantic Skeleton Options](#Semantic_Skeleton_Options)
+      * [Length](#Semantic_Skeleton_Length)
+      * [Alignment](#Semantic_Skeleton_Alignment)
+      * [Year Style](#Semantic_Skeleton_Year_Style)
+      * [Hour Cycle](#Semantic_Skeleton_Hour_Cycle)
+      * [Fractional Second Digits](#Semantic_Skeleton_Fractional_Second_Digits)
+      * [Time Zone Style](#Semantic_Skeleton_Time_Zone_Style)
+  * [Generating Patterns for Semantic Skeletons](#Generating_Patterns_for_Semantic_Skeletons)
+    * [Mapping to Standard Skeletons](#mapping-to-standard-skeletons)
+      * [Year Style Skeleton Variations](#Semantic_Year_Style_Skeleton_Variations)
+  * [Semantic Skeleton Conformance](#Semantic_Skeleton_Conformance)
 
 ## <a name="Overview_Dates_Element_Supplemental" href="#Overview_Dates_Element_Supplemental">Overview: Dates Element, Supplemental Date and Calendar Information</a>
 

diff --git a/docs/ldml/tr35-info.md b/docs/ldml/tr35-info.md
@@ -1264,11 +1264,12 @@ If there is no valid -mu value, the following steps are used to determine a regi
 1. If there is a valid -ms value then let USM  be the corresponding value in column 2 of the table below.
 Otherwise FR is not used. In either case continue with step 2.
 2. If there is a valid -rg region portion of the rg value, let R be that region, and go to Compute the category.
-	* See the table above for the examples `usut`, `usabc`, and `abcdef`
-4. If there is a valid region in the locale, let R be that region, and go to Compute the category.
-5. Otherwise, compute the likely subtags for the locale.
-     1. If there is a likely region, then let R be that region, and go to Compute the category.
-	 2. Otherwise, let R be 001, and go to Compute the category
+    * In the table above, this would handle the examples `usut`, `uszzzz`, and `usabc`, resulting in R = US.
+    * Because the example `abzzzz` has an invalid region portion, no region is found and processing continues with step 3.
+3. If there is a valid region in the locale, let R be that region, and go to Compute the category.
+4. Otherwise, compute the likely subtags for the locale.
+    1. If there is a likely region, then let R be that region, and go to Compute the category.
+    2. Otherwise, let R be 001, and go to Compute the category
 
 | Key-Value   | Unit Systems Match          | Fallback Region for Unit Preferences |
 |-------------|-----------------------------|--------------------------------------|

diff --git a/docs/ldml/tr35-keyboards.md b/docs/ldml/tr35-keyboards.md
@@ -512,7 +512,7 @@ See [UAX #15](https://www.unicode.org/reports/tr15/#Description_Norm) for an ove
 
 #### Example Normalization with Markers
 
-**Example 1**
+**Example 1a**
 
 Consider this example, without markers:
 
@@ -521,7 +521,7 @@ Consider this example, without markers:
 
 The combining marks are reordered.
 
-**Example 2**
+**Example 1b**
 
 If we add markers:
 
@@ -1806,10 +1806,10 @@ _Attribute:_ `value` (required)
 <variables>
     <string id="cluster_hi" value="हि" /> <!-- a string -->
     <string id="zwnj" value="\u{200C}"/> <!-- single codepoint -->
-    <string id="acute" value="\m{acute}"/> <!-- refer to a marker -->
+    <string id="grave" value="\m{grave}"/> <!-- refer to a marker -->
     <string id="backquote" value="`"/>
-    <string id="zwnj_acute" value="${zwnj}${acute}"  /> <!-- Combine two variables -->
-    <string id="zwnj_sp_acute" value="${zwnj}\u{0020}${acute}"  /> <!-- Combine two variables -->
+    <string id="zwnj_grave" value="${zwnj}${grave}"  /> <!-- Combine two variables -->
+    <string id="zwnj_sp_grave" value="${zwnj}\u{0020}${grave}"  /> <!-- Combine two variables -->
 </variables>
 ```
 
@@ -1822,10 +1822,10 @@ These may be then used in multiple contexts:
 …
 <!-- as part of a key bag  -->
 <key id="hi_key" output="${cluster_hi}" />
-<key id="acute_key" output="${acute}" />
+<key id="grave_key" output="${grave}" />
 …
-<!-- Display ´ instead of the non-displayable marker -->
-<display output="${acute}" display="${backquote}" />
+<!-- Display ` instead of the non-displayable marker -->
+<display output="${grave}" display="${backquote}" />
 ```
 
 * * *
@@ -2393,7 +2393,7 @@ Used in the `to=`
 
     - The `from=` and `to=` sides of the pattern must both be using `set` variables. There is no way to insert a set literal on either side and avoid using a variable.
 
-    - The two variables (here `upper` and `lower`) must have exactly the same number of whitespace-separated items. Leading and trailing space (such as at the end of `lower`) is ignored. A variable without any spaces is considered to be a set variable of exactly one item.
+    - The two variables (here `upper` and `lower`) must have exactly the same number of whitespace-separated items. Leading and trailing space is ignored. A variable without any spaces is considered to be a set variable of exactly one item.
 
     - As described in [Additional Features](#additional-features), the `upper` set variable as used here matches as if it is `((?:A|B|CC|D|E|FF|G))`, showing the enclosing capturing group. When text from the input context matches this expression, and all above conditions are met, the mapping proceeds as follows:
 
@@ -2704,7 +2704,7 @@ Keyboarding applications typically work, but are not required to, in one of two
 
 **_text editing_**
 
-> text editing happens when a user moves the cursor into some previously entered text which may have been entered by someone else. As such, there is no way to know in which order things were typed, but a user will still want appropriate behaviour when they press backspace. This may involve deleting more than one character or replacing a sequence of characters with a different sequence.
+> text editing happens when a user moves the cursor into some previously entered text which may have been entered by someone else. As such, there is no way to know in which order things were typed, but a user will still want appropriate behavior when they press backspace. This may involve deleting more than one character or replacing a sequence of characters with a different sequence.
 
 In text editing mode, different keyboard layouts may behave differently in the same textual context. The backspace transform allows the keyboard layout to specify the effect of pressing backspace in a particular textual context. This is done by specifying a set of backspace rules that match a string before the cursor and replace it with another string. The rules are expressed within a `transforms type="backspace"` element.