Skip to content

Commit

Permalink
CLDR-13980 v46 ddl: Seed data for Kazakh (Arabic) (#589)
Browse files Browse the repository at this point in the history
  • Loading branch information
MuratKaribay authored May 11, 2024
1 parent 82dca18 commit 1ccf85c
Show file tree
Hide file tree
Showing 10 changed files with 139 additions and 31 deletions.
13 changes: 13 additions & 0 deletions common/casing/kk_Arab.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE ldml SYSTEM "../../common/dtd/ldml.dtd">
<!-- Copyright © 1991-2020 Unicode, Inc.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
For terms of use, see http://www.unicode.org/copyright.html
-->
<ldml>
<identity>
<version number="$Revision$"/>
<language type="kk"/>
<script type="Arab"/>
</identity>
</ldml>
25 changes: 25 additions & 0 deletions common/collation/kk_Arab.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE ldml SYSTEM "../../common/dtd/ldml.dtd">
<!--
Copyright © 1991-2020 Unicode, Inc.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
For terms of use, see http://www.unicode.org/copyright.html
Originally prepared by Murat Karibay <[email protected]>
-->
<ldml>
<identity>
<version number="$Revision$"/>
<language type="kk" />
<script type="Arab"/>
</identity>
<collations>
<collation type="standard">
<cr><![CDATA[
[reorder Arab]
&ا<ە<ب
&ك<گ<ڭ<ل
&ھ<و<ۇ<ۆ<ۋ<ى<ي
]]></cr>
</collation>
</collations>
</ldml>
48 changes: 48 additions & 0 deletions common/main/kk_Arab.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE ldml SYSTEM "../../common/dtd/ldml.dtd">
<!-- Copyright © 1991-2020 Unicode, Inc.
For terms of use, see http://www.unicode.org/copyright.html
Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
-->
<ldml>
<identity>
<version number="$Revision$"/>
<language type="kk"/>
<script type="Arab"/>
</identity>
<localeDisplayNames>
<languages>
<language type="kk_Arab">قازاق ٴتىلى</language>
</languages>
</localeDisplayNames>
<layout>
<orientation>
<characterOrder>right-to-left</characterOrder>
<lineOrder>top-to-bottom</lineOrder>
</orientation>
</layout>
<characters>
<exemplarCharacters draft="contributed">[ٴ ا ب پ ت ج چ ح د ر ز س ش ع ف ق ك ڭ گ ل م ن ھ ە و ۆ ۇ ۋ ى ي]</exemplarCharacters>
<exemplarCharacters type="index" draft="contributed">[ٴ ا ب پ ت ج چ ح د ر ز س ش ع ف ق ك ڭ گ ل م ن ھ ە و ۆ ۇ ۋ ى ي]</exemplarCharacters>
<exemplarCharacters type="punctuation">[\- ‐ ‑ – — ، ؛ \: ! ؟ . … ' ‘ ’ &quot; “ ” « » ( ) \[ \] \{ \} § @ * / \&amp; #]</exemplarCharacters>
<exemplarCharacters type="numbers">[\- ‑ , . % ‰ + 0 1 2 3 4 5 6 7 8 9]</exemplarCharacters>
<ellipsis type="final">{0}…</ellipsis>
<ellipsis type="initial">…{0}</ellipsis>
<ellipsis type="medial">{0}…{1}</ellipsis>
<ellipsis type="word-final">{0} …</ellipsis>
<ellipsis type="word-initial">… {0}</ellipsis>
<ellipsis type="word-medial">{0} … {1}</ellipsis>
<moreInformation>?</moreInformation>
<parseLenients scope="date" level="lenient">
<parseLenient sample="-" draft="contributed">↑↑↑</parseLenient>
<parseLenient sample=":" draft="contributed">↑↑↑</parseLenient>
</parseLenients>
</characters>
<!-- <numbers>
<minimalPairs>
<pluralMinimalPairs count="other">سەبەتتە {0} الما بار. ولار سىزدىكى مە؟</pluralMinimalPairs>
<ordinalMinimalPairs ordinal="other">{0}- بۇرىلىستان وڭعا بۇرىلىڭىز.</ordinalMinimalPairs>
</minimalPairs>
</numbers> -->
</ldml>
15 changes: 15 additions & 0 deletions common/main/kk_Arab_CN.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE ldml SYSTEM "../../common/dtd/ldml.dtd">
<!-- Copyright © 1991-2020 Unicode, Inc.
For terms of use, see http://www.unicode.org/copyright.html
Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
-->
<ldml>
<identity>
<version number="$Revision$"/>
<language type="kk"/>
<script type="Arab"/>
<territory type="CN"/>
</identity>
</ldml>
14 changes: 14 additions & 0 deletions common/main/kk_Cyrl.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE ldml SYSTEM "../../common/dtd/ldml.dtd">
<!-- Copyright © 1991-2020 Unicode, Inc.
For terms of use, see http://www.unicode.org/copyright.html
Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
-->
<ldml>
<identity>
<version number="$Revision$"/>
<language type="kk"/>
<script type="Cyrl"/>
</identity>
</ldml>
15 changes: 15 additions & 0 deletions common/main/kk_Cyrl_KZ.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE ldml SYSTEM "../../common/dtd/ldml.dtd">
<!-- Copyright © 1991-2020 Unicode, Inc.
For terms of use, see http://www.unicode.org/copyright.html
Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
-->
<ldml>
<identity>
<version number="$Revision$"/>
<language type="kk"/>
<script type="Cyrl"/>
<territory type="KZ"/>
</identity>
</ldml>
6 changes: 4 additions & 2 deletions common/supplemental/supplementalMetadata.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1837,8 +1837,10 @@ For terms of use, see http://www.unicode.org/copyright.html
hsb_DE hu_HU hy_AM
ia_001 id_ID ie_EE ife_TG ig_NG ii_CN io_001 is_IS it_IT iu_CA iu_Latn_CA
ja_JP jbo_001 jgo_CM jmc_TZ jv_ID
ka_GE kaa_Cyrl kaa_Cyrl_UZ kaa_Latn_UZ kab_DZ kaj_NG kam_KE kcg_NG kde_TZ kea_CV ken_CM kgp_BR
khq_ML ki_KE kk_KZ kkj_CM kl_GL kln_KE km_KH kn_IN ko_KR kok_Deva kok_Deva_IN kok_Latn_IN kpe_LR ks_Arab
ka_GE kaa_Cyrl kaa_Cyrl_UZ kaa_Latn_UZ kab_DZ kaj_NG kam_KE kcg_NG kde_TZ kea_CV ken_CM kgp_BR
khq_ML ki_KE
kk_Arab_CN kk_Cyrl kk_Cyrl_KZ
kkj_CM kl_GL kln_KE km_KH kn_IN ko_KR kok_Deva kok_Deva_IN kok_Latn_IN kpe_LR ks_Arab
ks_Arab_IN ks_Deva_IN ksb_TZ ksf_CM ksh_DE ku_TR kw_GB kxv_Deva_IN kxv_Latn
kxv_Latn_IN kxv_Orya_IN kxv_Telu_IN ky_KG
la_VA lag_TZ lb_LU lg_UG lij_IT lkt_US lld_IT lmo_IT ln_CD lo_LA lrc_IR
Expand Down
24 changes: 0 additions & 24 deletions exemplars/main/kk_Arab.xml

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
# Eg, if locale1 and locale2 use the same script, and locale1 has exemplars [abcd] and locale2 has [cdef],
# then the resulting set will be [af]
# If the set is [], it is omitted here.
# It is cached here to avoid the cost of doing the derivation at runtime; possible because the CLDR exemplars don't change much in the ST,
# It is cached here to avoid the cost of doing the derivation at runtime; possible because the CLDR exemplars don't change much in the ST,
# and these are only hints.
# LikelySubtagsTest.java
# - Contains a test that verifies that this data correspond to the derivation from the CLDR data.
# - Also has the details of the derivation from CLDR data, and shows how to refresh this data, if CLDR data changes.
# - NOTE: We may tweak the derivation over time.
Arab ; 105 ; [ؠ-ئةثحخذص-غؽف-كه-ْٰٔٙٛٲٶٹ-ځڃ-ڊڌڍڏڑړڕږژ-ښڠڢڤڦک-ګڭگڱڳڵں-ھہۃۄۆ-ېےەݙݜݢݨݲ]
Arab ; 106 ; [ؠ-ئةثحخذص-غؽف-كه-ْٰٔٙٛٲٴٶٹ-ځڃ-ڊڌڍڏڑړڕږژ-ښڠڢڤڦک-ګڭگڱڳڵں-ھہۃۄۆ-ېےەݙݜݢݨݲ]
Beng ; 12 ; [ঌরঽৄৎৗৠ-ৣৰৱ]
Cans ; 133 ; [ᐁᐍᐏᐑᐓᐕᐘᐚᐟᐠᐢ-ᐤᐦᐨᐩᐯᐻᐽᐿᑁᑃᑅᑇᑉᑊᑌᑘᑚᑜᑞᑠᑢᑤᑦᑫᑵᑷᑹᑻᑽᑿᒁᒃᒉᒓᒕᒗᒙᒛᒝᒟᒡᒣᒭᒯᒱᒳᒵᒷᒹᒻᒼᓀᓊᓌᓎᓐᓕ-ᓘᓚᓛᓪᓭᓷᓹᓻᓽᓿᔁᔃᔅᔦᔰᔲᔴᔶᔸᔺᔼᔾᕆ-ᕉᕋᕌᕐᕕ-ᕚᕝᕿ-ᖃᖅᖏᖑ-ᖖᖠ-ᖦᙱ-ᙶ]
Cyrl ; 109 ; [ʼве-йфцш-яё-ќўџѡѣѧѫѯѱѳѵѷѻѽѿ҃҇ґғҕҗҙқҝҟҡңҥҩҫҭүұҳҵҷҹһҽҿӑӕӗәӡӣөӯӳӷԥⷠ-ⷪⷬⷭⷯⷱⷴⸯꙁꙋꙍꙗ꙽ꙿ{гӏ}{кӏ}{пӏ}{тӏ}{хӏ}{цӏ}{чӏ}]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,12 @@ public static void main(String[] args) {

public void TestLanguageRegions() {
Set<String> missingLanguageRegion = new LinkedHashSet<>();
// TODO This should be derived from metadata: https://unicode.org/cldr/trac/ticket/11224
// TODO This should be derived from metadata: CLDR-11224
Set<String> knownMultiScriptLanguages =
new HashSet<>(
Arrays.asList(
"az", "ff", "bs", "hi", "ks", "mni", "ms", "pa", "sat", "sd", "shi",
"sr", "su", "vai", "uz", "yue", "zh"));
"az", "ff", "bs", "hi", "kk", "ks", "mni", "ms", "pa", "sat", "sd",
"shi", "sr", "su", "vai", "uz", "yue", "zh"));
Set<String> available = testInfo.getCldrFactory().getAvailable();
LanguageTagParser ltp = new LanguageTagParser();
Set<String> defaultContents = SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales();
Expand Down

0 comments on commit 1ccf85c

Please sign in to comment.