CLDR-13980 v46 ddl: Seed data for Kazakh (Arabic) (#589)

unicode-org · May 11, 2024 · 1ccf85c · 1ccf85c
1 parent 82dca18
commit 1ccf85c
Show file tree

Hide file tree

Showing 10 changed files with 139 additions and 31 deletions.
diff --git a/common/casing/kk_Arab.xml b/common/casing/kk_Arab.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE ldml SYSTEM "../../common/dtd/ldml.dtd">
+<!-- Copyright © 1991-2020 Unicode, Inc.
+CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
+For terms of use, see http://www.unicode.org/copyright.html
+-->
+<ldml>
+	<identity>
+		<version number="$Revision$"/>
+		<language type="kk"/>
+		<script type="Arab"/>
+	</identity>
+</ldml>
diff --git a/common/collation/kk_Arab.xml b/common/collation/kk_Arab.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE ldml SYSTEM "../../common/dtd/ldml.dtd">
+<!--
+Copyright © 1991-2020 Unicode, Inc.
+CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
+For terms of use, see http://www.unicode.org/copyright.html
+Originally prepared by Murat Karibay <[email protected]>
+-->
+<ldml>
+	<identity>
+		<version number="$Revision$"/>
+		<language type="kk" />
+		<script type="Arab"/>
+	</identity>
+	<collations>
+		<collation type="standard">
+				<cr><![CDATA[
+					[reorder Arab]
+					&ا<ە<ب
+					&ك<گ<ڭ<ل
+					&ھ<و<ۇ<ۆ<ۋ<ى<ي
+				]]></cr>
+		</collation>
+	</collations>
+</ldml>
diff --git a/common/main/kk_Arab.xml b/common/main/kk_Arab.xml
@@ -0,0 +1,48 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE ldml SYSTEM "../../common/dtd/ldml.dtd">
+<!-- Copyright © 1991-2020 Unicode, Inc.
+For terms of use, see http://www.unicode.org/copyright.html
+Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
+-->
+<ldml>
+	<identity>
+		<version number="$Revision$"/>
+		<language type="kk"/>
+		<script type="Arab"/>
+	</identity>
+	<localeDisplayNames>
+		<languages>
+			<language type="kk_Arab">قازاق ٴتىلى</language>
+		</languages>
+	</localeDisplayNames>
+	<layout>
+		<orientation>
+			<characterOrder>right-to-left</characterOrder>
+			<lineOrder>top-to-bottom</lineOrder>
+		</orientation>
+	</layout>
+	<characters>
+		<exemplarCharacters draft="contributed">[ٴ ا ب پ ت ج چ ح د ر ز س ش ع ف ق ك ڭ گ ل م ن ھ ە و ۆ ۇ ۋ ى ي]</exemplarCharacters>
+		<exemplarCharacters type="index" draft="contributed">[ٴ ا ب پ ت ج چ ح د ر ز س ش ع ف ق ك ڭ گ ل م ن ھ ە و ۆ ۇ ۋ ى ي]</exemplarCharacters>
+		<exemplarCharacters type="punctuation">[\- ‐ ‑ – — ، ؛ \: ! ؟ . … ' ‘ ’ &quot; “ ” « » ( ) \[ \] \{ \} § @ * / \&amp; #]</exemplarCharacters>
+		<exemplarCharacters type="numbers">[\- ‑ , . % ‰ + 0 1 2 3 4 5 6 7 8 9]</exemplarCharacters>
+		<ellipsis type="final">{0}…</ellipsis>
+		<ellipsis type="initial">…{0}</ellipsis>
+		<ellipsis type="medial">{0}…{1}</ellipsis>
+		<ellipsis type="word-final">{0} …</ellipsis>
+		<ellipsis type="word-initial">… {0}</ellipsis>
+		<ellipsis type="word-medial">{0} … {1}</ellipsis>
+		<moreInformation>?</moreInformation>
+		<parseLenients scope="date" level="lenient">
+			<parseLenient sample="-" draft="contributed">↑↑↑</parseLenient>
+			<parseLenient sample=":" draft="contributed">↑↑↑</parseLenient>
+		</parseLenients>
+	</characters>
+        <!-- <numbers>
+		<minimalPairs>
+			<pluralMinimalPairs count="other">سەبەتتە {0} الما بار. ولار سىزدىكى مە؟</pluralMinimalPairs>
+			<ordinalMinimalPairs ordinal="other">{0}- بۇرىلىستان وڭعا بۇرىلىڭىز.</ordinalMinimalPairs>
+		</minimalPairs>
+       </numbers> -->
+</ldml>
diff --git a/common/main/kk_Arab_CN.xml b/common/main/kk_Arab_CN.xml
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE ldml SYSTEM "../../common/dtd/ldml.dtd">
+<!-- Copyright © 1991-2020 Unicode, Inc.
+For terms of use, see http://www.unicode.org/copyright.html
+Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
+-->
+<ldml>
+	<identity>
+		<version number="$Revision$"/>
+		<language type="kk"/>
+		<script type="Arab"/>
+		<territory type="CN"/>
+	</identity>
+</ldml>
diff --git a/common/main/kk_Cyrl.xml b/common/main/kk_Cyrl.xml
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE ldml SYSTEM "../../common/dtd/ldml.dtd">
+<!-- Copyright © 1991-2020 Unicode, Inc.
+For terms of use, see http://www.unicode.org/copyright.html
+Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
+-->
+<ldml>
+	<identity>
+		<version number="$Revision$"/>
+		<language type="kk"/>
+		<script type="Cyrl"/>
+	</identity>
+</ldml>
diff --git a/common/main/kk_Cyrl_KZ.xml b/common/main/kk_Cyrl_KZ.xml
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE ldml SYSTEM "../../common/dtd/ldml.dtd">
+<!-- Copyright © 1991-2020 Unicode, Inc.
+For terms of use, see http://www.unicode.org/copyright.html
+Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
+-->
+<ldml>
+	<identity>
+		<version number="$Revision$"/>
+		<language type="kk"/>
+		<script type="Cyrl"/>
+		<territory type="KZ"/>
+	</identity>
+</ldml>
diff --git a/common/supplemental/supplementalMetadata.xml b/common/supplemental/supplementalMetadata.xml
@@ -1837,8 +1837,10 @@ For terms of use, see http://www.unicode.org/copyright.html
 			hsb_DE hu_HU hy_AM
 			ia_001 id_ID ie_EE ife_TG ig_NG ii_CN io_001 is_IS it_IT iu_CA iu_Latn_CA
 			ja_JP jbo_001 jgo_CM jmc_TZ jv_ID
-			ka_GE kaa_Cyrl kaa_Cyrl_UZ kaa_Latn_UZ kab_DZ kaj_NG kam_KE kcg_NG kde_TZ kea_CV ken_CM kgp_BR 
-			khq_ML ki_KE kk_KZ kkj_CM kl_GL kln_KE km_KH kn_IN ko_KR kok_Deva kok_Deva_IN kok_Latn_IN kpe_LR ks_Arab 
+			ka_GE kaa_Cyrl kaa_Cyrl_UZ kaa_Latn_UZ kab_DZ kaj_NG kam_KE kcg_NG kde_TZ kea_CV ken_CM kgp_BR
+			khq_ML ki_KE
+			kk_Arab_CN kk_Cyrl kk_Cyrl_KZ
+			kkj_CM kl_GL kln_KE km_KH kn_IN ko_KR kok_Deva kok_Deva_IN kok_Latn_IN kpe_LR ks_Arab 
 			ks_Arab_IN ks_Deva_IN ksb_TZ ksf_CM ksh_DE ku_TR kw_GB kxv_Deva_IN kxv_Latn 
 			kxv_Latn_IN	kxv_Orya_IN kxv_Telu_IN ky_KG
 			la_VA lag_TZ lb_LU lg_UG lij_IT lkt_US lld_IT lmo_IT ln_CD lo_LA lrc_IR

diff --git a/exemplars/main/kk_Arab.xml b/exemplars/main/kk_Arab.xml
diff --git a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/locales/scriptToExemplars.txt b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/locales/scriptToExemplars.txt
@@ -4,13 +4,13 @@
 #   Eg, if locale1 and locale2 use the same script, and locale1 has exemplars [abcd] and locale2 has [cdef],
 #   then the resulting set will be [af]
 #   If the set is [], it is omitted here.
-# It is cached here to avoid the cost of doing the derivation at runtime; possible because the CLDR exemplars don't change much in the ST, 
+# It is cached here to avoid the cost of doing the derivation at runtime; possible because the CLDR exemplars don't change much in the ST,
 #   and these are only hints.
 # LikelySubtagsTest.java
 # - Contains a test that verifies that this data correspond to the derivation from the CLDR data.
 # - Also has the details of the derivation from CLDR data, and shows how to refresh this data, if CLDR data changes.
 # - NOTE: We may tweak the derivation over time.
-Arab ;	105 ;	[ؠ-ئةثحخذص-غؽف-كه-ْٰٔٙٛٲٶٹ-ځڃ-ڊڌڍڏڑړڕږژ-ښڠڢڤڦک-ګڭگڱڳڵں-ھہۃۄۆ-ېےەݙݜݢݨݲ]
+Arab ;	106 ;	[ؠ-ئةثحخذص-غؽف-كه-ْٰٔٙٛٲٴٶٹ-ځڃ-ڊڌڍڏڑړڕږژ-ښڠڢڤڦک-ګڭگڱڳڵں-ھہۃۄۆ-ېےەݙݜݢݨݲ]
 Beng ;	12 ;	[ঌরঽৄৎৗৠ-ৣৰৱ]
 Cans ;	133 ;	[ᐁᐍᐏᐑᐓᐕᐘᐚᐟᐠᐢ-ᐤᐦᐨᐩᐯᐻᐽᐿᑁᑃᑅᑇᑉᑊᑌᑘᑚᑜᑞᑠᑢᑤᑦᑫᑵᑷᑹᑻᑽᑿᒁᒃᒉᒓᒕᒗᒙᒛᒝᒟᒡᒣᒭᒯᒱᒳᒵᒷᒹᒻᒼᓀᓊᓌᓎᓐᓕ-ᓘᓚᓛᓪᓭᓷᓹᓻᓽᓿᔁᔃᔅᔦᔰᔲᔴᔶᔸᔺᔼᔾᕆ-ᕉᕋᕌᕐᕕ-ᕚᕝᕿ-ᖃᖅᖏᖑ-ᖖᖠ-ᖦᙱ-ᙶ]
 Cyrl ;	109 ;	[ʼве-йфцш-яё-ќўџѡѣѧѫѯѱѳѵѷѻѽѿ҃҇ґғҕҗҙқҝҟҡңҥҩҫҭүұҳҵҷҹһҽҿӑӕӗәӡӣөӯӳӷԥⷠ-ⷪⷬⷭⷯⷱⷴⸯꙁꙋꙍꙗ꙽ꙿ{гӏ}{кӏ}{пӏ}{тӏ}{хӏ}{цӏ}{чӏ}]

diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestLocale.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestLocale.java
@@ -86,12 +86,12 @@ public static void main(String[] args) {
 
     public void TestLanguageRegions() {
         Set<String> missingLanguageRegion = new LinkedHashSet<>();
-        // TODO This should be derived from metadata: https://unicode.org/cldr/trac/ticket/11224
+        // TODO This should be derived from metadata: CLDR-11224
         Set<String> knownMultiScriptLanguages =
                 new HashSet<>(
                         Arrays.asList(
-                                "az", "ff", "bs", "hi", "ks", "mni", "ms", "pa", "sat", "sd", "shi",
-                                "sr", "su", "vai", "uz", "yue", "zh"));
+                                "az", "ff", "bs", "hi", "kk", "ks", "mni", "ms", "pa", "sat", "sd",
+                                "shi", "sr", "su", "vai", "uz", "yue", "zh"));
         Set<String> available = testInfo.getCldrFactory().getAvailable();
         LanguageTagParser ltp = new LanguageTagParser();
         Set<String> defaultContents = SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales();