From 16ec47c180d541ea582b6d4d83fe915e1389e348 Mon Sep 17 00:00:00 2001 From: Conrad Nied Date: Tue, 20 Aug 2024 09:47:04 -0700 Subject: [PATCH 01/52] CLDR-11567 Add population counts for El Salvadoran Languages See https://unicode-org.atlassian.net/browse/CLDR-11567 and https://translatorswithoutborders.org/language-data-for-el-salvador Re-generated the xml with mvn package -DskipTests=true && java -jar tools/cldr-code/target/cldr-code.jar ConvertLanguageData --- common/supplemental/likelySubtags.xml | 6 +++--- common/supplemental/supplementalData.xml | 3 +++ .../unicode/cldr/util/data/country_language_population.tsv | 3 +++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/common/supplemental/likelySubtags.xml b/common/supplemental/likelySubtags.xml index ce4e105a831..e81f6d9b341 100644 --- a/common/supplemental/likelySubtags.xml +++ b/common/supplemental/likelySubtags.xml @@ -128,6 +128,7 @@ not be patched by hand, as any changes made in that fashion may be lost. + @@ -405,6 +406,7 @@ not be patched by hand, as any changes made in that fashion may be lost. + @@ -578,6 +580,7 @@ not be patched by hand, as any changes made in that fashion may be lost. + @@ -2340,7 +2343,6 @@ not be patched by hand, as any changes made in that fashion may be lost. - @@ -4258,7 +4260,6 @@ not be patched by hand, as any changes made in that fashion may be lost. - @@ -5781,7 +5782,6 @@ not be patched by hand, as any changes made in that fashion may be lost. - diff --git a/common/supplemental/supplementalData.xml b/common/supplemental/supplementalData.xml index 5d09af73a59..b344266964f 100644 --- a/common/supplemental/supplementalData.xml +++ b/common/supplemental/supplementalData.xml @@ -4139,6 +4139,9 @@ XXX Code for transations where no currency is involved + + + diff --git a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/country_language_population.tsv b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/country_language_population.tsv index 1c423b54848..e2b1238c10d 100644 --- a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/country_language_population.tsv +++ b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/country_language_population.tsv @@ -349,6 +349,9 @@ Egypt EG "99,413,317" 74% "1,204,000,000,000" Egyptian Arabic arz 64% Egypt EG "99,413,317" 74% "1,204,000,000,000" English en 35% Egypt EG "99,413,317" 74% "1,204,000,000,000" Greek el "60,900" El Salvador SV "6,187,271" 85% "51,170,000,000" official Spanish es 89% +El Salvador SV "6,187,271" 85% "51,170,000,000" Nahaut Pipil ppl 2730 https://translatorswithoutborders.org/language-data-for-el-salvador +El Salvador SV "6,187,271" 85% "51,170,000,000" Cacaopera ccr 3700 https://translatorswithoutborders.org/language-data-for-el-salvador +El Salvador SV "6,187,271" 85% "51,170,000,000" Lenca len 1340 https://translatorswithoutborders.org/language-data-for-el-salvador Equatorial Guinea GQ "797,457" 94% "31,520,000,000" Bube bvb 7.9% Equatorial Guinea GQ "797,457" 94% "31,520,000,000" Fang fan 51% Equatorial Guinea GQ "797,457" 94% "31,520,000,000" official French fr 8.8% http://www.nationsonline.org/oneworld/equatorial_guinea.htm French is a minority official language. Crude estimate of usage based on import partner data. From bbfa120e2a399816b1f822889f9f1ed28f4d9982 Mon Sep 17 00:00:00 2001 From: Fredrik Date: Thu, 29 Aug 2024 20:12:15 -0700 Subject: [PATCH 02/52] CLDR-17382 languagematch Ukrainian should not fall back to Russian (#3993) Co-authored-by: Markus Scherer --- common/supplemental/languageInfo.xml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/common/supplemental/languageInfo.xml b/common/supplemental/languageInfo.xml index e36aefdc9a4..69206ac3569 100644 --- a/common/supplemental/languageInfo.xml +++ b/common/supplemental/languageInfo.xml @@ -144,7 +144,10 @@ For terms of use, see http://www.unicode.org/copyright.html - + + + + From 7c35c37726b092fcce07bf4e1860c7c52fe47361 Mon Sep 17 00:00:00 2001 From: Chris Pyle <118906070+chpy04@users.noreply.github.com> Date: Tue, 3 Sep 2024 11:42:03 -0400 Subject: [PATCH 03/52] CLDR-17566 Converting Cldr Spec (#4009) --- .../cldr-spec/core-data-for-new-locales.md | 33 ++ docs/site/index/cldr-spec/coverage-levels.md | 108 ++++++ .../picking-the-right-language-code.md | 93 +++++ docs/site/index/cldr-spec/plural-rules.md | 335 +++++++++++++++++ .../cldr-spec/transliteration-guidelines.md | 354 ++++++++++++++++++ 5 files changed, 923 insertions(+) create mode 100644 docs/site/index/cldr-spec/core-data-for-new-locales.md create mode 100644 docs/site/index/cldr-spec/coverage-levels.md create mode 100644 docs/site/index/cldr-spec/picking-the-right-language-code.md create mode 100644 docs/site/index/cldr-spec/plural-rules.md create mode 100644 docs/site/index/cldr-spec/transliteration-guidelines.md diff --git a/docs/site/index/cldr-spec/core-data-for-new-locales.md b/docs/site/index/cldr-spec/core-data-for-new-locales.md new file mode 100644 index 00000000000..b2d1c813989 --- /dev/null +++ b/docs/site/index/cldr-spec/core-data-for-new-locales.md @@ -0,0 +1,33 @@ +--- +title: Core Data for New Locales +--- + +# Core Data for New Locales + +This document describes the minimal data needed for a new locale. There are two kinds of data that are relevant for new locales: + +1. **Core Data** \- This is data that the CLDR committee needs from the proposer ***before*** a new locale is added. The proposer is expected to also get a Survey Tool account, and contribute towards the Basic Data. +2. **Basic Data** \- The Core data is just the first step. It is only created under the expectation that people will engage in suppling data, at a [Basic Coverage Level](https://cldr.unicode.org/index/cldr-spec/coverage-levels#h.yi1eiryx7yl4). **If the locale does not meet the [Basic Coverage Level](https://cldr.unicode.org/index/cldr-spec/coverage-levels#h.yi1eiryx7yl4) in the next Survey Tool cycle, the committee may remove the locale.** + +## Core Data + +Collect and submit the following data, using the [Core Data Submission Form](https://docs.google.com/forms/d/e/1FAIpQLSfSyz0VUSXD93IJQQdjzUCnbQwC2nwz6eiLjTaFjASQZzpoSg/viewform). *Note to translators: If you are having difficulties or questions about the following data, please contact us: [file a new bug](https://cldr.unicode.org/index/bug-reports#TOC-Filing-a-Ticket), or post a follow\-up to comment to your existing bug.* + +1. The correct language code according to [Picking the Right Language Identifier](https://cldr.unicode.org/index/cldr-spec/picking-the-right-language-code). +2. The four exemplar sets: main, auxiliary, numbers, punctuation.  + - These must reflect the Unicode model. For more information, see [tr35\-general.html\#Character\_Elements](http://www.unicode.org/reports/tr35/tr35-general.html#Character_Elements). +3. Verified country data ( i.e. the population of speakers in the regions (countries) in which the language is commonly used)  + - There must be at least one country, but should include enough others that they cover approximately 75% or more of the users of the language. + - "Users of the language" includes as either a 1st or 2nd language. The main focus is on written language. +4. Default content script and region (normally the region is the country with largest population using that language, and the customary script used for that language in that country).  + - **\[[supplemental/supplementalMetadata.xml](https://github.com/unicode-org/cldr/blob/main/common/supplemental/supplementalMetadata.xml#LC1654:~:text=%3CdefaultContent)]** + - *See*: [http://cldr.unicode.org/translation/translation\-guide\-general/default\-content](https://cldr.unicode.org/translation/translation-guide-general/default-content) +5. The correct time cycle used with the language in the default content region + - In common/supplemental/supplementalData.xml, this is the "timeData" element + - The value should be h (1\-12\), H (0\-23\), k (1\-24\), or K (0\-11\); as defined in [https://www.unicode.org/reports/tr35/tr35\-dates.html\#Date\_Field\_Symbol\_Table](https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table) + +***You must commit to supplying [the data required for the new locale to reach Basic level](https://cldr.unicode.org/index/cldr-spec/core-data-for-new-locales#h.yaraq3qjxnns) during the next open CLDR submission when requesting a new locale to be added.*** + +For more information on the other coverage levels refer to [Coverage Levels](https://cldr.unicode.org/index/cldr-spec/coverage-levels)  + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file diff --git a/docs/site/index/cldr-spec/coverage-levels.md b/docs/site/index/cldr-spec/coverage-levels.md new file mode 100644 index 00000000000..4b70ea1228f --- /dev/null +++ b/docs/site/index/cldr-spec/coverage-levels.md @@ -0,0 +1,108 @@ +--- +title: Coverage Levels +--- + +# Coverage Levels + +There are four main coverage levels as defined in the [UTS \#35: Unicode Locale Data Markup Language (LDML) Part 6: Supplemental: 8 Coverage Levels](https://www.unicode.org/reports/tr35/tr35-info.html#Coverage_Levels). They are described more fully below. + +## Usage + +You can use the file **common/properties/coverageLevels.txt** (added in v41\) for a given release to filter the locales that they support. For example, see [coverageLevels.txt](https://github.com/unicode-org/cldr/blob/main/common/properties/coverageLevels.txt). (This and other links to data files are to the development versions; see the specific version for the release you are working with.) For a detailed chart of the coverage levels, see the [locale\_coverage.html](https://unicode-org.github.io/cldr-staging/charts/43/supplemental/locale_coverage.html) file for the respective release. + +The file format is semicolon delimited, with 3 fields per line. + + +```Locale ID ; Coverage Level ; Name``` + +Each locale ID also covers all the locales that inherit from it. So to get locales at a desired coverage level or above, the following process is used. + +1. Always include the root locale file, **root.xml** +2. Include all of the locale files listed in **coverageLevels.txt** at that level or above. +3. Recursively include all other files that inherit from the files in \#2\. + - **Warning**: Inheritance is not simple truncation; the **parentLocale** information in [supplementalData.xml](https://github.com/unicode-org/cldr/blob/main/common/supplemental/supplementalData.xml) needs to be applied also. See [Parent\_Locales](https://www.unicode.org/reports/tr35/tr35.html#Parent_Locales). + - For example, if you include fr.xml in \#2, you would also include fr\_CA.xml; if you include no.xml in \#2 you would also include nn.xml. + +### Filtering + +To filter "at that level or above", you use the fact that basic ⊂ moderate ⊂ modern, so  + +1. to filter for basic and above, filter for basic\|moderate\|modern +2. to filter for moderate and above, filter for moderate\|modern + +### Migration + +As of v43, the files in **/seed/** have been moved to **/common/**. Older versions of CLDR separated some locale files into a 'seed' directory. Some implementations used for filtering, but the criteria for moving from seed to common were not rigorous. To maintain compatibility with the set of locales used from previous versions, an implementation may use the above process for Basic and above, but then also add locales that were previously included. For more information, see [CLDR 43 Release Note](https://cldr.unicode.org/index/downloads/cldr-43).  + +## Core Data + +**The data needed for a new locale to be added. See [Core Data for New Locales](https://cldr.unicode.org/index/cldr-spec/core-data-for-new-locales) for details on Core Data and how to submit for new locales.** + +**It is expected that during the next Survey Tool cycle after a new locale is added, the data for the Basic Coverage Level will be supplied.** + +## Basic Data + +**Suitable for locale selection and minimal support, eg. choice of language on mobile phone** + +This includes very minimal data for support of the language: basic dates, times, autonyms: + +1. Delimiter Data —Quotation start/end, including alternates +2. Numbering system — default numbering system \+ native numbering system (if default \= Latin and native ≠ Latin) +3. Locale Pattern Info — Locale pattern and separator, and code pattern +4. Language Names — in the native language for the native language and for English +5. Script Name(s) — Scripts customarily used to write the language +6. Country Name(s) — For countries where commonly used (see "Core XML Data") +7. Measurement System — metric vs UK vs US +8. Full Month and Day of Week names +9. AM/PM period names +10. Date and Time formats +11. Date/Time interval patterns — fallback +12. Timezone baseline formats — region, gmt, gmt\-zero, hour, fallback +13. Number symbols — decimal and grouping separators; plus, minus, percent sign (for Latin number system, plus native if different) +14. Number patterns — decimal, currency, percent, scientific + +## Moderate Data + +**Suitable for “document content” internationalization, eg. content in a spreadsheet** + +Before submitting data above the Basic Level, the following must be in place: + +1. Plural and Ordinal rules + - As in \[supplemental/plurals.xml] and \[supplemental/ordinals.xml] + - Must also include minimal pairs + - For more information, see [cldr\-spec/plural\-rules](https://cldr.unicode.org/index/cldr-spec/plural-rules). +2. Casing information (only where the language uses a cased scripts according to [ScriptMetadata.txt](https://github.com/unicode-org/cldr/blob/main/common/properties/scriptMetadata.txt)) + - This will go into [common/casing](https://home.unicode.org/basic-info/projects/#!/repos/cldr/trunk/common/casing/) +3. Collation rules \[non\-Survey Tool] + - This can be supplied as a list of characters, or as rule file. + - The list is a space\-delimited list of the characters used by the language (in the given script). The list may include multiple\-character strings, where those are treated specially. For example, if "ch" is sorted after "h" one might see "a b c d .. g h ch i j ..." + - More sophisticated users can do a better job, supplying a file of rules as in [cldr\-spec/collation\-guidelines](https://cldr.unicode.org/index/cldr-spec/collation-guidelines). +4. The result will be a file like: [common/collation/ar.xml](https://home.unicode.org/basic-info/projects/#!/repos/cldr/trunk/common/collation/ar.xml) or [common/collation/da.xml](https://home.unicode.org/basic-info/projects/#!/repos/cldr/trunk/common/collation/da.xml). + +The data for the Moderate Level includes subsets of the Modern data, both in depth and breadth. + +## Modern Data + +**Suitable for full UI internationalization** + +Before submitting data at the Moderate Level, the following must be in place: + +1. Grammatical Features + 1. The grammatical cases and other information, as in [supplemental/grammaticalFeatures.xml](https://github.com/unicode-org/cldr/blob/main/common/supplemental/grammaticalFeatures.xml) + 2. Must include minimal pair values. +2. Romanization table (non\-Latin scripts only) + 1. This can be supplied as a spreadsheet or as a rule file. + 2. If a spreadsheet, for each letter (or sequence) in the exemplars, what is the corresponding Latin letter (or sequence). + 3. More sophisticated users can do a better job, supplying a file of rules like [transforms/Arabic\-Latin\-BGN.xml](https://home.unicode.org/basic-info/projects/#!/repos/cldr/trunk/common/transforms/Arabic-Latin-BGN.xml). + +The data for the Modern Level includes: + +**\#\#\# TBD** + +## References + +For the coverage in the latest released version of CLDR, see [Locale Coverage Chart](https://unicode-org.github.io/cldr-staging/charts/latest/supplemental/locale_coverage.html). + +To see the development version of the rules used to determine coverage, see [coverageLevels.xml](https://github.com/unicode-org/cldr/blob/main/common/supplemental/coverageLevels.xml). For a list of the locales at a given level, see [coverageLevels.txt](https://github.com/unicode-org/cldr/blob/main/common/properties/coverageLevels.txt).  + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file diff --git a/docs/site/index/cldr-spec/picking-the-right-language-code.md b/docs/site/index/cldr-spec/picking-the-right-language-code.md new file mode 100644 index 00000000000..3bb83669538 --- /dev/null +++ b/docs/site/index/cldr-spec/picking-the-right-language-code.md @@ -0,0 +1,93 @@ +--- +title: Picking the Right Language Identifier +--- + +# Picking the Right Language Identifier + +Within programs and structured data, languages are indicated with stable identifiers of the form [en](http://unicode.org/cldr/utility/languageid.jsp?a=en), [fr\-CA](http://unicode.org/cldr/utility/languageid.jsp?a=fr-CA), or [zh\-Hant](http://unicode.org/cldr/utility/languageid.jsp?a=zh-Hant&l=en). The standard Unicode language identifiers follow IETF BCP 47, with some small differences defined in [UTS \#35: Locale Data Markup Language (LDML)](http://www.unicode.org/reports/tr35/). Locale identifiers use the same format, with certain possible extensions. + +Often it is not clear which language identifier to use. For example, what most people call Punjabi in Pakistan actually has the code '[lah](http://unicode.org/cldr/utility/languageid.jsp?a=lah)', and formal name "Lahnda". There are many other cases where the same name is used for different languages, or where the name that people search for is not listed in the IANA registry. Moreover, a language identifier uses not only the 'base' language code, like '[en](http://unicode.org/cldr/utility/languageid.jsp?a=en)' for English or '[ku](http://unicode.org/cldr/utility/languageid.jsp?a=ku)' for Kurdish, but also certain modifiers such as [en\-CA](http://unicode.org/cldr/utility/languageid.jsp?a=en-CA) for *Canadian English*, or [ku\-Latn](http://ku-Latn) for *Kurdish written in Latin script*. Each of these modifiers are called *subtags* (or sometimes *codes*), and are separated by "\-" or "\_". The language identifier itself is also called a *language tag*, and sometimes a *language code*. + +Here is an example of the steps to take to find the right language identifier to use. Let's say you to find the identifier for a language called "Ganda" which you know is spoken in Uganda. You'll first pick the base language subtag as described below, then add any necessary script/territory subtags, and then verify. If you can't find the name after following these steps or have other questions, ask on the [Unicode CLDR Mailing List](http://www.unicode.org/consortium/distlist.html#cldr_list). + +If you are looking at a prospective language code, like "swh", the process is similar; follow the steps below, starting with the verification. + +## Choosing the Base Language Code + +1. Go to [iso639\-3](http://www-01.sil.org/iso639-3/codes.asp) to find the language. Typically you'll look under **Name** starting with **G** for Ganda. +2. There may be multiple entries for the item you want, so you'll need to look at all of them. For example, on the page for names starting with “P”, there are three records: “Panjabi”, “Mirpur Panjabi” and “Western Panjabi” (it is the last of these that corresponds to Lahnda). You can also try a search, but be [careful](https://cldr.unicode.org/index/cldr-spec/picking-the-right-language-code). +3. You'll find an entry like: + + lug  lug  **lg**  Ganda  Individual  Living  more ... + +While you may think that you are done, you have to verify that the three\-letter code is correct. + +1. Click on the "more..." in this case and you'll find [id\=lug](http://www.sil.org/iso639-3/documentation.asp?id=lug). You can also use the URL http://www.sil.org/iso639\-3/documentation.asp?id\=XXX, where you replace XXX by the three\-letter code. +2. Click on "See corresponding entry in [Ethnologue](http://www.ethnologue.com/show_language.asp?code=lug)." and you get to [code\=lug](http://www.ethnologue.com/show_language.asp?code=lug) +3. Verify that is indeed the language: + 1. Look at the information on the ethnologue page + 2. Check Wikipedia and other web sources +4. ***AND IMPORTANTLY: Review [Caution!](https://cldr.unicode.org/index/cldr-spec/picking-the-right-language-code) below*** + +Once you have the right three\-letter code, you are still not done. Unicode (and BCP 47\) uses the 2 letter ISO code if it exists. Unicode also uses the "macro language" where suitable. *So* + +1. Use the two\-letter code if there is one. In the example above, the highlighted "lg" from the first table. +2. Verify that the code is in http://www.iana.org/assignments/language-subtag-registry +3. If the code occurs in http://unicode.org/repos/cldr/trunk/common/supplemental/supplementalMetadata.xml in the type attribute of a languageAlias element, then use the replacement instead. + - For example, because "swh" occurs in \, "sw" must be used instead of "swh". + +## Choosing Script/Territory Subtags + +If you need a particular variant of a language, then you'll add additional subtags, typically script or territory. Consult [Sample Subtags](http://unicode.org/cldr/utility/sample_subtags.html) for the most common choices. ***Again, review*** [***Caution!***](https://cldr.unicode.org/index/cldr-spec/picking-the-right-language-code) ***below.*** + +## Verifying Your Choice + +1. Verify your choice by using the [online language identifier](http://unicode.org/cldr/utility/languageid.jsp) demo. +2. You need to fix the identifier and try again in *any* if the demo shows any of the following: + 1. the language identifer is illegal, or + 2. one of the subtags is invalid, or + 3. there are any replacement values. [\*\*](https://cldr.unicode.org/index/cldr-spec/picking-the-right-language-code) + +## Documenting Your Choice + +If you are requesting a new locale / language in CLDR, please include the links to the particular pages above so that we can process your request more quickly, as we have to double check before any addition. The links will be of the form: + +- http://www.sil.org/iso639-3/documentation.asp?id=xxx +- http://www.ethnologue.com/show_language.asp?code=xxx +- http://en.wikipedia.org/wiki/Western_Punjabi +- and so on + +## Caution! + +### Canonical Form + +Unicode language and locale IDs are based on BCP 47, but differ in a few ways. The canonical form is produced by using the canonicalization based on BCP47 (thus changing iw → he, and zh\-yue → yue), plus a few other steps: + +1. Replacing the most prominent encompassed subtag by the macrolanguage (cmn → zh) +2. Canonicalizing overlong 3 letter codes (eng\-840 → en\-US) +3. Minimizing according to the likely subtag data (ru\-Cyrl → ru, en\-US → en). +4. BCP 47 also provides for "variant subtags", such as [zh\-Latn\-pinyin](http://unicode.org/cldr/utility/languageid.jsp?a=zh-Latn-pinyin). When there are multiple variant subtags, the canonical format for Unicode language identifiers puts them in alphabetical order. + +Note that the CLDR likely subtag data is used to minimize scripts and regions, *not* the IANA Suppress\-Script. The latter had a much more constrained design goal, and is more limited. + +In some cases, systems (or companies) may have different conventions than the Preferred\-Values in BCP 47 \-\- such as those in the Replacement column in the the [online language identifier](http://unicode.org/cldr/utility/languageid.jsp) demo. For example, for backwards compatibility, "iw" is used with Java instead of "he" (Hebrew). When picking the right subtags, be aware of these compatibility issues. *If a target system uses a different canonical form for locale IDs than CLDR, the CLDR data needs to be processed by remapping its IDs to the target system's.* + +For compatibility, it is strongly recommended that all implementations accept both the preferred values and their alternates: for example, both "iw" and "he". Although BCP 47 itself only allows "\-" as a separator; for compatibility, Unicode language identifiers allows both "\-" and "\_". Implementations should also accept both. + +### Macrolanguages + +ISO (and hence BCP 47\) has the notion of an individual language (like en \= English) versus a Collection or Macrolanguage. For compatibility, Unicode language and locale identifiers always use the Macrolanguage to identify the predominant form. Thus the Macrolanguage subtag "zh" (Chinese) is used instead of "cmn" (Mandarin). Similarly, suppose that you are looking for Kurdish written in Latin letters, as in Turkey. It is a mistake to think that because that is in the north, that you should use the subtag 'kmr' for Northern Kurdish. You should instead use [ku\-Latn\-TR](http://ku-latn/). See also: [ISO 636 Deprecation Requests](https://cldr.unicode.org/development/development-process/design-proposals/iso-636-deprecation-requests-draft). + +Unicode language identifiers do not allow the "extlang" form defined in BCP 47\. For example, use "yue" instead of "zh\-yue" for Cantonese. + +### Ethnologue + +*When searching, such as* [*site:ethnologue.com ganda*](http://www.google.com/search?q=site%3Aethnologue.com+ganda)*, be sure to completely disregard matches in* [*Ethnologue 14*](http://www.ethnologue.com/14/) *\-\- these are out of date, and do not have the right codes!* + +The Ethnologue is a great source of information, but it must be approached with a certain degree of caution. Many of the population figures are far out of date, or not well substantiated. The Ethnologue also focus on native, spoken languages, whereas CLDR and many other systems are focused on written language, for computer UI and document translation, and on fluent speakers (not necessarily native speakers). So, for example, it would be a mistake to look at http://www.ethnologue.com/show_country.asp?name=EG and conclude that the right language subtag for the Arabic used in Egypt was "arz", which has the largest population. Instead, the right code is "ar", Standard Arabic, which would be the one used for document and UI translation. + +### Wikipedia + +Wikipedia is also a great source of information, but it must be approached with a certain degree of caution as well. Be sure to follow up on references, not just look at articles. + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file diff --git a/docs/site/index/cldr-spec/plural-rules.md b/docs/site/index/cldr-spec/plural-rules.md new file mode 100644 index 00000000000..6e1b96d7eaf --- /dev/null +++ b/docs/site/index/cldr-spec/plural-rules.md @@ -0,0 +1,335 @@ +--- +title: Plural Rules +--- + +# Plural Rules + +Languages vary in how they handle plurals of nouns or unit expressions ("hour" vs "hours", and so on). Some languages have two forms, like English; some languages have only a single form; and some languages have multiple forms. CLDR uses short, mnemonic tags for these plural categories: + +- zero +- one (singular) +- two (dual) +- few (paucal) +- many (also used for fractions if they have a separate class) +- other (required—general plural form—also used if the language only has a single form) + +*See [Language Plural Rules](https://www.unicode.org/cldr/charts/45/supplemental/language_plural_rules.html) for the categories for each language in CLDR.* + +These categories are used to provide localized units, with a more natural ways of expressing phrases that vary in plural form, such as "1 hour" vs "2 hours". While they cannot express all the intricacies of natural languages, they allow for more natural phrasing than constructions like "1 hour(s)". + +## Reporting Defects + +When you find errors or omissions in this data, please report the information with a [bug report](https://cldr.unicode.org/index/bug-reports#TOC-Filing-a-Ticket). Please give examples of how the forms may differ. You don't have to give the exact rules, but it is extremely helpful! Here's an example:   + +**Sample Bug Report** + +The draft Ukrainian (uk) plural rules are: + +one: 1, 21, 31, 41, 51, 61\... + +few: 2\-4, 22\-24, 32\-34\... + +other: 0, 5\-20, 25\-30, 35\-40\...; 1\.31, 2\.31, 5\.31\... + +Although rules for integer values are correct, there needs to be four categories, + +with an extra one for fractions. For example: + +1 день
+2 дні
+5 днів
+1\.31 дня
+2\.31 дня
+5\.31 дня + +## Determining Plural Categories + +The CLDR plural categories do not necessarily match the traditional grammatical categories. Instead, the categories are determined by changes required in a phrase or sentence if a numeric placeholder changes value.  + +### Minimal pairs + +The categories are verified by looking a minimal pairs: where a change in numeric value (expressed in digits) forces a change in the other words. For example, the following is a minimal pair for English, establishing a difference in category between "1" and "2". + +| Category | Resolved String | Minimal Pair Template | +|---|---|---| +| one | 1 day | {NUMBER} day | +| other | 2 day s | {NUMBER} day s | + +Warning for Vetters + +The Category (Code) values indicate a certain range of numbers that differ between languages. To see the meaning of each Code value for your language see [Language Plural Rules](https://www.unicode.org/cldr/charts/45/supplemental/language_plural_rules.html) chart. + +*The minimal pairs in the Survey Tool are not direct translations of English*. They *may* be translations of English, such as in [German](https://st.unicode.org/cldr-apps/v#/de/MinimalPairs/), but must be different if those words or terms do not show the right plural differences for your language. For example, if we look at [Belarusian](https://st.unicode.org/cldr-apps/v#/be/MinimalPairs/), they are quite different, corresponding to “{0} books in {0} days”, while [Welsh](https://st.unicode.org/cldr-apps/v#/cy/MinimalPairs/43b7793f1f673abe) has the equivalent of “{0} dog, {0} cat”. *Be sure to read the following examples carefully and pay attention to error messages.* + +For example, English has no separate plural form for "sheep". It would be wrong for the two phrases to be:  + +- one: {0} sheep +- other: {0} sheep + +You have to pick a different phrase if that is the case in your language. Do not change the sentence in other ways, such as an "unforced change". For example, don't have the 'one' phrase be "{0} sheep" and the 'other' be "{0} deer". + +The {0} will always have just a number composed of pure digits in it, such as 0, 1, 2, 3, … 11, 12, … 21, 22, .… 99, 100, …. For example, “1 dog, 1 cat” or “21 dog, 21 cat”. If there are multiple instances of {0}, they will always have the same number. The sentences must be parallel, with exactly the same construction except for what is forced by a change in digits. That is, for a language that has "one" and "other" categories:  + +- take the phrase for "other" +- change the {0} to "1" +- make only the other changes to the phrase that are grammatically necessary because of that change +- change the "1" back to "{0}" +- you should then have the phrase for "one" + +Gender is irrelevant. Do not contort your phrasing so that it could cover some (unspecified) item of a different gender. (Eg, don't have “Prenez la {0}re à droite; Prenez le {0}er à droite.”) The exception to that is where two nouns of different genders to cover all plural categories, such as Russian “из {0} книг за {0} дня”. + +Non\-inflecting Nouns—Verbs + +Some languages, like Bengali, do not change the form of the following noun when the numeric value changes. Even where nouns are invariant, other parts of a sentence might change. That is sufficient to establish a minimal pair. For example, even if all nouns in English were invariant (like 'fish' or 'sheep'), the verb changes are sufficient to establish a minimal pair: + +| Category | Resolved String | Minimal Pair Template | +|---|---|---| +| one | 1 fish is swimming | {NUMBER} fish is swimming | +| other | 2 fish **are** swimming | {NUMBER} fish **are** swimming | + +Non\-inflecting Nouns—Pronouns + +In other cases, even the verb doesn't change, but *referents* (such as pronouns) change. So a minimal pair in such a language might look something like: + +| Category | Resolved String | Minimal Pair Template | +|---|---|---| +| one | You have 1 fish in your cart; do you want to buy **it**? | You have {NUMBER} fish in your cart; do you want to buy **it**? | +| other | You have 2 fish in your cart; do you want to buy **them**? | You have {NUMBER} fish in your cart; do you want to buy **them**? | + +Multiple Nouns + +In many cases, a single noun doesn't exhibit all the numeric forms. For example, in Welsh the following is a minimal pair that separates 1 and 2: + +| **Category** | **Resolved String** | +|---|---| +| one | 1 ci | +| two | 2 **g**i | + +But the form of this word is the same for 1 and 4\. We need a separate word to get a minimal pair that separates 1 and 4: + +| **Category** | **Resolved String** | +|---|---| +| one | 1 gath | +| two | 1 cath | + +These combine into a single Minimal Pair Template that can be used to separate all 6 forms in Welsh. + +| Category | Resolved String | Minimal Pair Template | +|---|---|---| +| zero | 0 cŵn, 0 cathod | {NUMBER} cŵn, {NUMBER} cathod | +| one | 1 ci, 1 gath | {NUMBER} ci, {NUMBER} gath | +| two | 2 gi, 2 gath | {NUMBER} gi, {NUMBER} gath | +| few | 3 chi, 3 cath | {NUMBER} chi, {NUMBER} cath | +| many | 6 chi, 6 chath | {NUMBER} chi, {NUMBER} chath | +| other | 4 ci, 4 cath | {NUMBER} ci, {NUMBER} cath | + +Russian is similar, needing two different nouns: + +| Category | Resolved String | Minimal Pair Template | +|---|---|---| +| one | из 1 книги за 1 день | из {NUMBER} книги за {NUMBER} день | +| few | из 2 книг за 2 дня | из {NUMBER} книг за {NUMBER} дня | +| many | из 5 книг за 5 дней | из {NUMBER} книг за {NUMBER} дней | +| other | из 1,5 книги за 1,5 дня | из {NUMBER} книги за {NUMBER} дня | + +The minimal pairs are those that are required for correct grammar. So because 0 and 1 don't have to form a minimal pair (it is ok—even though often not optimal—to say "0 people") , 0 doesn't establish a separate category. However, implementations are encouraged to provide the ability to have special plural messages for 0 in particular, so that more natural language can be used: + +- None of your friends are online. +- *rather than* +- You have 0 friends online. + +Fractions + +In some languages, fractions require a separate category. For example, Russian 'other' in the example above. In some languages, they all in a single category with some integers, and in some languages they are in multiple categories. In any case, they also need to be examined to make sure that there are sufficial minimal pairs. + +### Rules + +The next step is to determine the rules: which numbers go into which categories. + +Integers + +Test a variety of integers. Look for cases where the 'teens' (11\-19\) behave differently. Many languages only care about the last 2 digits only, or the last digit only. + +Fractions + +Fractions are often a bit tricky to determine: languages have very different behavior for them. In some languages the fraction is ignored (when selecting the category), in some languages the final digits of the fraction are important, in some languages a number changes category just if there are visible trailing zeros. Make sure to try out a range of fractions to make sure how the numbers behave: values like 1 vs 1\.0 may behave differently, as may numbers like 1\.1 vs 1\.2 vs 1\.21, and so on. + +### Choosing Plural Category Names + +In some sense, the names for the categories are somewhat arbitrary. Yet for consistency across languages, the following guidelines should be used when selecting the plural category names. + +1. If no forms change, then stop (there are no plural rules — everything gets '**other**') +2. '**one**': Use the category '**one**' for the form used with 1\. +3. '**other**': Use the category '**other**' for the form used with the most integers. +4. '**two**': Use the category '**two**' for the form used with 2, *if it is limited to numbers whose integer values end with '2'.* + - If everything else has the same form, stop (everything else gets '**other**') +5. '**zero**': Use the category '**zero**' for the form used with 0, *if it is limited to numbers whose integer values end with '0'.* + - If everything else has the same form, stop (everything else gets '**other**') +6. '**few**': Use the category '**few**' for the form used with the least remaining number (such as '4') + - If everything else has the same form, stop (everything else gets '**other**') +7. '**many**': Use the category '**many**' for the form used with the least remaining number (such as '10') + - If everything else has the same form, stop (everything else gets '**other**') + - If there needs to be a category for items only have fractional values, use '**many**' +8. If there are more categories needed for the language, describe what those categories need to cover in the bug report. + +See [*Language Plural Rules*](http://www.unicode.org/cldr/data/charts/supplemental/language_plural_rules.html) for examples of rules, such as for [Czech](https://www.unicode.org/cldr/charts/45/supplemental/language_plural_rules.html#cs), and for [comparisons of values](https://www.unicode.org/cldr/charts/45/supplemental/language_plural_rules.html#cs-comp). Note that in the integer comparison chart, most languages have 'x' (other—gray) for most integers. There are some exceptions (Russian and Arabic, for example), where the categories of 'many' and 'other' should have been swapped when they were defined, but are too late now to change. + +## Important Notes + +*These categories are only mnemonics \-\- the names don't necessarily imply the exact contents of the category.* For example, for both English and French the number 1 has the category one (singular). In English, every other number has a plural form, and is given the category other. French is similar, except that the number 0 also has the category one and not other or zero, because the form of units qualified by 0 is also singular. + +*This is worth emphasizing:* A common mistake is to think that "one" is only for only the number 1\. Instead, "one" is a category for any number that behaves like 1\. So in some languages, for example, one → numbers that end in "1" (like 1, 21, 151\) but that don't end in 11 (like "11, 111, 10311\). + +Note that these categories may be different from the forms used for pronouns or other parts of speech. *In particular, they are solely concerned with changes that would need to be made if different numbers, expressed with decimal digits,* are used with a sentence. If there is a dual form in the language, but it isn't used with decimal numbers, it should not be reflected in the categories. That is, the key feature to look for is:  + +If you were to substitute a different number for "1" in a sentence or phrase, would the rest of the text be required to change? For example, in a caption for a video: + + "Duration: 1 hour" → "Duration: 3\.2 hours" + +## Plural Rule Syntax + +See [LDML Language Plural Rules](http://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules). + +## Plural Message Migration + +The plural categories are used not only within CLDR, but also for localizing messages for different products. When the plural rules change (such as in [CLDR 24](https://cldr.unicode.org/index/downloads/cldr-24-release-note)), the following issues should be considered. Fractional support in plurals is new in CLDR 24\. Because the fractions didn't work before, the changes in categories from 23 to 24 should not cause an issue for implementations. The other changes can be categorized as Splitting or Merging categories. + +There are some more complicated cases, but the following outlines the main issues to watch for, using examples. For illustration, assume a language uses "" for singular, "u" for dual, and "s" for other.​ ​ + +- **OLD Rules \& OLD Messages** marks the situation before the change, +- **NEW Rules \& OLD Messages** marks the situation after the change (but before any fixes to messages), and +- **NEW Rules \& NEW Messages** shows the changes to the messages + +### Merging + +The language really doesn't need 3 cases, because the dual is always identical to one of the other forms.  + +**OLD Rules \& OLD Messages** + +one: book + +two: books + +other: books + +1  ➞ book, 2 ➞ books, 3 ➞ ​ books​ + +**NEW Rules \& OLD or NEW Messages** + +one: book + +other: books + +1  ➞ book, 2 ➞ books, 3  ➞​ books​ + +This is fairly harmless; merging two of the categories shouldn't affect anyone because the messages for the merged category should not have material differences. The old messages for 'two' are ignored in processing. They could be deleted if desired. + +This was done in CLDR 24 for Russian, for example. + +### Splitting Other + +In this case, the 'other' needs to be fixed by moving some numbers to a 'two' category. The way plurals are defined in CLDR, when a message (eg for 'two') is missing, it always falls back to 'other'. So the translation is no worse than before. There are two subcases. + +Specific Other Message + +In this case, the *other* message is appropriate for the other case, and not for the new 'two' case. + +**OLD Rules \& OLD Messages** + +one: book + +other: books + +1  ➞ book, 2 ➞ books, 3  ➞​ books​ + +**NEW Rules \& OLD Messages** + +one: book + +two: **books** + +other: books + +1  ➞ book, 2 ➞ **books**, 3  ➞​ books​ + +The quality is no different than previously. The message can be improved by adding the correct message for 'two', so that the result is: + +**NEW Rules \& NEW Messages** + +one: book + +two: booku + +other: books + +1  ➞ book, 2 ➞ **booku**, 3  ➞​ books​ + +***However, if the translated message is not missing, but has some special text like "UNUSED MESSAGE", then it will need to be fixed; otherwise the special text will show up to users!*** + +Generic Other Message + +In this case, the *other* message was written to be generic by trying to handle (with parentheses or some other textual device) both the plural and dual categories. + +**OLD Rules \& OLD Messages** + +one: book + +other: book(u/s) + +1  ➞ book, 2 ➞ **book(u/s)**, 3  ➞​ **book(u/s)** + +**NEW Rules \& OLD Messages** + +one: book + +two: book(u/s) + +other: book(u/s) + +1  ➞ book, 2 ➞ **book(u/s)**, 3  ➞​ **book(u/s)** + +The message can be improved by adding a message for 'two', and fixing the message for 'other' to not have the (u/s) workaround: + +**NEW Rules \& NEW Messages** + +one: book + +two: booku + +other: books + +1  ➞ book, 2 ➞ booku, 3  ➞​ books + +### Splitting Non\-Other + +In this case, the 'one' category needs to be fixed by moving some numbers to a 'two' category. + +**OLD Rules \& OLD Messages** + +one: book/u + +other: books + +1  ➞ book/u, 2 ➞ book/u, 3  ➞​ books​ + +**NEW Rules \& OLD Messages** + +one: book/u + +other: books + +1  ➞ **book/u**, 2 ➞ **books**, 3  ➞​ books​ + +This is the one case where there is a regression in quality. In order to fix the problem, the message for 'two' needs to be fixed. If the messages for 'one' was written to be generic, then it needs to be fixed as well. + +**NEW Rules \& NEW Messages** + +one: book + +two: booku + +other: books + +1  ➞ **book**, 2 ➞ **booku**, 3  ➞​ books​ + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file diff --git a/docs/site/index/cldr-spec/transliteration-guidelines.md b/docs/site/index/cldr-spec/transliteration-guidelines.md new file mode 100644 index 00000000000..9cf742bff67 --- /dev/null +++ b/docs/site/index/cldr-spec/transliteration-guidelines.md @@ -0,0 +1,354 @@ +--- +title: Unicode Transliteration Guidelines +--- + +# Unicode Transliteration Guidelines + +## Introduction + +*This document describes guidelines for the creation and use of CLDR transliterations. Please file any feedback on this document or those charts at [Locale Bugs](https://github.com/unicode-org/cldr/blob/main/docs/requesting_changes.md).* + +Transliteration is the general process of converting characters from one script to another, where the result is roughly phonetic for languages in the target script. For example, "Phobos" and "Deimos" are transliterations of Greek mythological "Φόβος" and "Δεῖμος" into Latin letters, used to name the moons of Mars. + +Transliteration is *not* translation. Rather, transliteration is the conversion of letters from one script to another without translating the underlying words. The following shows a sample of transliteration systems: + +Sample Transliteration Systems +| Source | Translation | Transliteration | System | +|:---:|:---:|:---:|:---:| +| Αλφαβητικός | Alphabetic | Alphabētikós | Classic | +| | | Alfavi̱tikós | UNGEGN | +| しんばし | new bridge (district in Tokyo) | shimbashi | Hepburn | +| | | sinbasi | Kunrei | +| яйца Фаберже | Fabergé eggs | yaytsa Faberzhe | BGN/PCGN | +| | | jajca Faberže | Scholarly | +| | | âjca Faberže | ISO | + +***Display**. Some of the characters in this document may not be visible in your browser, and with some fonts the diacritics will not be correctly placed on the base letters. See [Display Problems](http://www.unicode.org/help/display_problems.html).* + +While an English speaker may not recognize that the Japanese word kyanpasu is equivalent to the English word campus, the word kyanpasu is still far easier to recognize and interpret than if the letters were left in the original script. There are several situations where this transliteration is especially useful, such as the following. See the sidebar for examples. + +- When a user views names that are entered in a world\-wide database, it is extremely helpful to view and refer to the names in the user's native script. +- When the user performs searching and indexing tasks, transliteration can retrieve information in a different script. +- When a service engineer is sent a program dump that is filled with characters from foreign scripts, it is much easier to diagnose the problem when the text is transliterated and the service engineer can recognize the characters. + +Sample Transliterations +| Source | Transliteration | +|---|---| +| 김, 국삼 | Gim, Gugsam | +| 김, 명희 | Gim, Myeonghyi | +| 정, 병호 | Jeong, Byeongho | +| ... | ... | +| たけだ, まさゆき | Takeda, Masayuki | +| ますだ, よしひこ | Masuda, Yoshihiko | +| やまもと, のぼる | Yamamoto, Noboru | +| ... | ... | +| Ρούτση, Άννα | Roútsē, Ánna | +| Καλούδης, Χρήστος | Kaloúdēs, Chrḗstos | +| Θεοδωράτου, Ελένη | Theodōrátou, Elénē | + +The term *transliteration* is sometimes given a narrow meaning, implying that the transformation is *reversible* (sometimes called *lossless*). In CLDR this is not the case; the term *transliteration* is interpreted broadly to mean both reversible and non\-reversible transforms of text. (Note that even if theoretically a transliteration system is supposed to be reversible, in source standards it is often not specified in sufficient detail in the edge cases to actually be reversible.) A non\-reversible transliteration is often called a *transcription*, or called a *lossy* or *ambiguous* transcription. + +Note that reversibility is generally only in one direction, so a transliteration from a native script to Latin may be reversible, but not the other way around. For example, Hangul is reversible, in that any Hangul to Latin to Hangul should provide the same Hangul as the input. Thus we have the following: + + 갗 → gach → 갗 + +However, for completeness, many Latin characters have fallbacks. This means that more than one Latin character may map to the same Hangul. Thus from Latin we don't have reversibility, because two different Latin source strings round\-trip back to the same Latin string. + + gach → 갗 → gach + + gac → 갗 → gach + +Transliteration can also be used to convert unfamiliar letters within the same script, such as converting Icelandic THORN (þ) to th. These are not typically reversible. + + *There is an online demo using released CLDR data at [ICU Transform Demo](https://icu4c-demos.unicode.org/icu-bin/translit).* + +## Variants + +There are many systems for transliteration between languages: the same text can be transliterated in many different ways. For example, for the Greek example above, the transliteration is classical, while the [UNGEGN](https://arhiiv.eki.ee/wgrs/) alternate has different correspondences, such as φ → f instead of φ → ph. + +CLDR provides for generic mappings from script to script (such as Cyrillic\-Latin), and also language\-specific variants (Russian\-French, or Serbian\-German). There can also be semi\-generic mappings, such as Russian\-Latin or Cyrillic\-French. These can be referred to, respectively, as script transliterations, language\-specific transliterations, or script\-language transliterations. Transliterations from other scripts to Latin are also called *Romanizations*. + +Even within particular languages, there can be variant systems according to different authorities, or even varying across time (if the authority for a system changes its recommendation). The canonical identifier that CLDR uses for these has the form: + + *source\-target/variant* + +The source (and target) can be a language or script, either using the English name or a locale code. The variant should specify the authority for the system, and if necessary for disambiguation, the year. For example, the identifier for the Russian to Latin transliteration according to the UNGEGN system would be: + +- ru\-und\_Latn/UNGEGN, or +- Russian\-Latin/UNGEGN + +If there were multiple versions of these over time, the variant would be, say, UNGEGN2006\. + +The assumption is that implementations will allow the use of fallbacks, if the exact transliteration specified is unavailable. For example, the following would be the fallback chain for the identifier Russian\-English/UNGEGN. This is similar to the *Lookup Fallback Pattern* used in [BCP 47 Tags for Identifying Languages](https://www.rfc-editor.org/info/bcp47), except that it uses a "stepladder approach" to progressively handle the fallback among source, target, and variant, with priorities being the target, source, and variant, in that order. + +- Russian\-English/UNGEGN +- Russian\-English +- Cyrillic\-English/UNGEGN +- Cyrillic\-English +- Russian\-Latin/UNGEGN +- Russian\-Latin +- Cyrillic\-Latin/UNGEGN +- Cyrillic\-Latin + +## Guidelines + +There are a number of generally desirable guidelines for script transliterations. These guidelines are rarely satisfied simultaneously, so constructing a reasonable transliteration is always a process of balancing different requirements. These requirements are most important for people who are building transliterations, but are also useful as background information for users. + +The following lists the general guidelines for Unicode CLDR transliterations: + +- *standard*: follow established systems (standards, authorities, or de facto practice) where possible, deviating sometimes where necessary for reversibility. In CLDR, the systems are generally described in the comments in the XML data files found in the in the [transforms](https://github.com/unicode-org/cldr/tree/main/common/transforms) folder online. For example, the system for Arabic transliteration in CLDR are found in the comments in [Arabic\-Latin.xml](https://github.com/unicode-org/cldr/blob/main/common/transforms/Arabic-Latin.xml); there is a reference to the [UNGEGN Arabic Tables](https://arhiiv.eki.ee/wgrs/rom1_ar.pdf). Similarly for Hebrew, which also follows the [Hebrew UNGEGN Tables](https://arhiiv.eki.ee/wgrs/rom1_he.pdf). +- *complete*: every well\-formed sequence of characters in the source script should transliterate to a sequence of characters from the target script, and vice versa. +- *predictable*: the letters themselves (without any knowledge of the languages written in that script) should be sufficient for the transliteration, based on a relatively small number of rules. This allows the transliteration to be performed mechanically. +- *pronounceable*: the resulting characters have reasonable pronunciations in the target script. Transliteration is not as useful if the process simply maps the characters without any regard to their pronunciation. Simply mapping by alphabetic order ("αβγδεζηθ..." to "abcdefgh...") could yield strings that might be complete and unambiguous, but the pronunciation would be completely unexpected. +- *reversible*: it is possible to recover the text in the source script from the transliteration in the target script. That is, someone that knows the transliteration rules would be able to recover the precise spelling of the original source text. For example, it is possible to go from *Elláda* back to the original Ελλάδα, while if the transliteration were *Ellada* (with no accent), it would not be possible. + +Some of these principles may not be achievable simultaneously; in particular, adherence to a standard system *and* reversibility. Often small changes in existing systems can be made to accommodate reversibility. However, where a particular system specifies a fundamentally non\-reversible transliterations, those transliterations as represented in CLDR may not be reversible. + +### Ambiguity + +In transliteration, multiple characters may produce ambiguities (non\-reversible mappings) unless the rules are carefully designed. For example, the Greek character PSI (ψ) maps to ps, but ps could also result from the sequence PI, SIGMA (πσ) since PI (π) maps to p and SIGMA (σ) maps to s. + +The Japanese transliteration standards provide a good mechanism for handling these kinds of ambiguities. Using the Japanese transliteration standards, whenever an ambiguous sequence in the target script does not result from a single letter, the transform uses an apostrophe to disambiguate it. For example, it uses that procedure to distinguish between *man'ichi* and *manichi*. Using this procedure, the Greek character PI SIGMA (πσ) maps to p's. This method is recommended for all script transliteration methods, although sometimes the character may vary: for example, "\-" is used in Korean. + +**Note**: We've had a recent proposal to consistently use the hyphenation dot for this code, thus we'd have πσ → p‧s. + +A second problem is that some characters in a target script are not normally found outside of certain contexts. For example, the small Japanese "ya" character, as in "kya" (キャ), is not normally found in isolation. To handle such characters, the Unicode transliterations currently use different conventions. + +- Tilde: "ャ" in isolation is represented as "\~ya" +- Diacritics: Greek "ς" in isolation is represented as s̱ + +**Note**: The CLDR committee is considering converging on a common representation for this. The advantage of a common representation is that it allows for easy filtering. + +For the default script transforms, the goal is to have unambiguous mappings, with variants for any common use mappings that are ambiguous (non\-reversible). In some cases, however, case may not be preserved. For example, + +| Latin | Greek | Latin | +|:---:|:---:|:---:| +| ps PS | ψ Ψ | ps PS | +| psa Psa **PsA** | ψα Ψα **ΨΑ** | psa Psa **PSA** | +| psA PSA **PSa** | ψΑ ΨΑ **Ψα** | psA PSA **Psa** | + +The following shows Greek text that is mapped to fully reversible Latin: + +| **Greek\-Latin** | | +|---|---| +| τί φῄς; γραφὴν σέ τις, ὡς ἔοικε, γέγραπται: οὐ γὰρ ἐκεῖνό γε καταγνώσομαι, ὡς σὺ ἕτερον. | tí phḗis; graphḕn sé tis, hōs éoike, gégraptai: ou gàr ekeînó ge katagnṓsomai, hōs sỳ héteron. | + +If the user wants a version without certain accents, then CLDR's chaining rules can be used to remove the accents. For example, the following transliterates to Latin but removes the macron accents on the long vowels. + +| **Greek\-Latin; nfd; \[\\u0304] remove; nfc** | | +|---|---| +| τί φῄς; γραφὴν σέ τις, ὡς ἔοικε, γέγραπται: οὐ γὰρ ἐκεῖνό γε καταγνώσομαι, ὡς σὺ ἕτερον. | tí phéis; graphèn sé tis, hos éoike, gégraptai: ou gàr ekeînó ge katagnósomai, hos sỳ héteron. | + +The above chaining rules, separated by semi\-colons, perform the following commands in order: + +| Rule | Description | +|---|---| +| Greek-Latin | transliterate Greek to Latin | +| nfd | convert to Unicode NFD format (separating accents from base characters) | +| [\u0304] remove | remove accents, but filter the command to only apply to a single character: [U+0304](http://unicode.org/cldr/utility/character.jsp?a=0304) ( ̄ ) COMBINING MACRON | +| nfc | convert to Unicode NFC format (rejoining accents to base characters) | + +The following transliterates to Latin but removes *all* accents. Note that the only change is to expand the filter for the remove command. + +| **Greek\-Latin; nfd; \[:nonspacing marks:] remove; nfc** | | +|---|---| +| τί φῄς; γραφὴν σέ τις, ὡς ἔοικε, γέγραπται: οὐ γὰρ ἐκεῖνό γε καταγνώσομαι, ὡς σὺ ἕτερον. | ti pheis; graphen se tis, hos eoike, gegraptai: ou gar ekeino ge katagnosomai, hos sy heteron. | + +### Pronunciation + +Standard transliteration methods often do not follow the pronunciation rules of any particular language in the target script. For example, the Japanese Hepburn system uses a "j" that has the English phonetic value (as opposed to French, German, or Spanish), but uses vowels that do not have the standard English sounds. A transliteration method might also require some special knowledge to have the correct pronunciation. For example, in the Japanese kunrei\-siki system, "ti" is pronounced as English "chee". + +This is similar to situations where there are different languages within the same script. For example, knowing that the word *Gewalt* comes from German allows a knowledgeable reader to pronounce the "w" as a "v".  When encountering a foreign word like *jawa*, there is little assurance how it is to be pronounced even when it is not a transliteration (it is just from /span\>another Latin\-script language). The *j* could be pronounced (for an English speaker) as in *jump*, or *Junker*, or *jour*; and so on. Transcriptions are only roughly phonetic, and only so when the specific pronunciation rules are understood. + +The pronunciation of the characters in the original script may also be influenced by context, which may be particularly misleading in transliteration. For, in the Bengali নিঃশব, transliterated as niḥśaba, the *visarga ḥ* is not pronounced itself (whereas elsewhere it may be) but lengthens the ś sound, and the final inherent *a* is pronounced (whereas it commonly is not), and the two inherent a's are pronounced as ɔ and ô, respectively. + +In some cases, transliteration may be heavily influenced by tradition. For example, the modern Greek letter beta (β) sounds like a "v", but a transliteration may use a b (as in biology). In that case, the user would need to know that a "b" in the transliterated word corresponded to beta (β) and is to be pronounced as a v in modern Greek. + +Letters may also be transliterated differently according to their context to make the pronunciation more predictable. For example, since the Greek sequence GAMMA GAMMA (γγ) is pronounced as *ng*, the first GAMMA can be transcribed as an "n" in that context. Similarly, the transliteration can give other guidance to the pronunciation in the source language, for example, using "n" or "m" for the same Japanese character (ん) depending on context, even though there is no distinction in the source script. + +In general, predictability means that when transliterating Latin script to other scripts using reversible transliterations, English text will not produce phonetic results. This is because the pronunciation of English cannot be predicted easily from the letters in a word: e.g. *grove*, *move*, and *love* all end with "ove", but are pronounced very differently. + +### Cautions + +Reversibility may require modifications of traditional transcription methods. For example, there are two standard methods for transliterating Japanese katakana and hiragana into Latin letters. The *kunrei\-siki* method is unambiguous. The Hepburn method can be more easily pronounced by foreigners but is ambiguous. In the Hepburn method, both ZI (ジ) and DI (ヂ) are represented by "ji" and both ZU (ズ) and DU (ヅ) are represented by "zu". A slightly amended version of Hepburn, that uses "dji" for DI and "dzu" for DU, is unambiguous. + +When a sequence of two letters map to one, case mappings (uppercase and lowercase) must be handled carefully to ensure reversibility. For cased scripts, the two letters may need to have different cases, depending on the next letter. For example, the Greek letter PHI (Φ) maps to PH in Latin, but Φο maps to Pho, and not to PHo. + +Some scripts have characters that take on different shapes depending on their context. Usually, this is done at the display level (such as with Arabic) and does not require special transliteration support. However, in a few cases this is represented with different character codes, such as in Greek and Hebrew. For example, a Greek SIGMA is written in a final form (ς) at the end of words, and a non\-final form (σ) in other locations. This also requires the transform to map different characters based on the context. + +Another thing to look out for when dealing with cased scripts is that some of the characters in the target script may not be able to represent case distinctions, such as some of the IPA characters in the Latin script. + +It is useful for the reverse mapping to be complete so that arbitrary strings in the target script can be reasonably mapped back to the source script. Complete reverse mapping makes it much easier to do mechanical quality checks and so on. For example, even though the letter "q" might not be necessary in a transliteration of Greek, it can be mapped to a KAPPA (κ). Such reverse mappings will not, in general, be unambiguous. + +## Available Transliterations + +Currently Unicode CLDR offers Romanizations for certain scripts, plus transliterations between the Indic scripts (excluding Urdu). Additional script transliterations will be added in the future. + +Except where otherwise noted, all of these systems are designed to be reversible. For bicameral scripts (those with uppercase and lowercase), however, case may not be completely preserved. + +The transliterations are also designed to be complete for any sequence of the Latin letters a\-z. A fallback is used for a letter that is not covered by the transliteration, and default letters may be inserted as required. For example, in the Hangul transliteration, rink → 린크 → linkeu. That is, "r" is mapped to the closest other letter, and a default vowel is inserted at the end (since "nk" cannot end a syllable). + +*Preliminary [charts](http://www.unicode.org/cldr/data/charts/transforms/index.html) are available for the available transliterations. Be sure to read the known issues described there.* + +### Korean + +There are many Romanizations of Korean. The default transliteration in Unicode CLDR follows the [Korean Ministry of Culture \& Tourism Transliteration](http://www.korean.go.kr/06_new/rule/rule06.jsp) regulations (see also [English summary](https://web.archive.org/web/20070916025652/http://www.korea.net/korea/kor_loca.asp?code=A020303)). There is an optional clause 8 variant for reversibility: + +"제 8 항 학술 연구 논문 등 특수 분야에서 한글 복원을 전제로 표기할 경우에는 한글 표기를 대상으로 적는다. 이때 글자 대응은 제2장을 따르되 'ㄱ, ㄷ, ㅂ, ㄹ'은 'g, d, b, l'로만 적는다. 음가 없는 'ㅇ'은 붙임표(\-)로 표기하되 어두에서는 생략하는 것을 원칙으로 한다. 기타 분절의 필요가 있을 때에도 붙임표(\-)를 쓴다." + +*translation*: "Clause 8: When it is required to recover the original Hangul representation faithfully as in scholarly articles, ' ㄱ, ㄷ, ㅂ, ㄹ' must be always romanized as 'g, d, b, l' while the mapping for the rest of the letters remains the same as specified in clause 2\. The placeholder 'ㅇ' at the beginning of a syllable should be represented with '\-', but should be omitted at the beginning of a word. In addition, '\-' should be used in other cases where a syllable boundary needs to be explicitly marked (be disambiguated." + +There are a number of cases where this Romanization may be ambiguous, because sometimes multiple Latin letters map to a single entity (jamo) in Hangul. This happens with vowels and consonants, the latter being slightly more complicated because there are both initial and final consonants: + +| Type | Multi-Character Consonants | +|---|---| +| Initial-Only | tt pp jj | +| Initial-or-Final | kk ch ss | +| Final-Only | gs nj nh lg lm lb ls lt lp lh bs ng | + +CLDR uses the following rules for disambiguation of the possible boundaries between letters, in order. The first rule comes from Clause 8\. + +1. Don't break so as to require an implicit vowel or null consonant (if possible) +2. Don't break within Initial\-Only or Initial\-Or\-Final sequences (if possible) +3. Favor longest match first. + +If there is a single consonant between vowels, then Rule \#1 will group it with the following vowel if there is one (this is the same as the first part of Clause 8\). If there is a sequence of four consonants between vowels, then there is only one possible break (with well\-formed text). So the only ambiguities lie with two or three consonants between vowels, where there are possible multi\-character consonants involved. Even there, in most cases the resolution is simple, because there isn't a possible multi\-character consonant in the case of two, or two possible multi\-character consonants in the case of 3\. For example, in the following cases, the left side is unambiguous: + + angda \= ang\-da → 앙다 + + apda \= ap\-da → 앞다 + +There are a relatively small number of possible ambiguities, listed below using "a" as a sample vowel. + +| No. of Cons. | Latin | CLDR Disambiguation | Hangul | Comments | | +|---|---|---|---|---|---| +| 2 | atta | = a-tta | 아따 | Rule 1, then 2 | | +| | appa | = a-ppa | 아빠 | | | +| | ajja | = a-jja | 아짜 | | | +| | akka | = a-kka | 아까 | Rule 1, then 2 | | +| | assa | = a-ssa | 아싸 | | | +| | acha | = a-cha | 아차 | | | +| | agsa | = ag-sa | 악사 | Rule 1 | | +| | anja | = an-ja | 안자 | | | +| | anha | = an-ha | 안하 | | | +| | alga | = al-ga | 알가 | | | +| | alma | = al-ma | 알마 | | | +| | alba | = al-ba | 알바 | | | +| | alsa | = al-sa | 알사 | | | +| | alta | = al-ta | 알타 | | | +| | alpa | = al-pa | 알파 | | | +| | alha | = al-ha | 알하 | | | +| | absa | = ab-sa | 압사 | | | +| | anga | = an-ga | 안가 | | | +| 3 | agssa | = ag-ssa | 악싸 | Rule 1, then 2 | | +| | anjja | = an-jja | 안짜 | | | +| | alssa | = al-ssa | 알싸 | | | +| | altta | = al-tta | 알따 | | | +| | alppa | = al-ppa | 알빠 | | | +| | abssa | = ab-ssa | 압싸 | | | +| | akkka | = akk-ka | 앆카 | Rule 1, then 2, then 3 | | +| | asssa | = ass-sa | 았사 | | | + +For vowel sequences, the situation is simpler. Only Rule \#3 applies, so aeo \= ae\-o → 애오. + +### Japanese + +The default transliteration for Japanese uses the a slight variant of the Hepburn system. With Hepburn system, both ZI (ジ) and DI (ヂ) are represented by "ji" and both ZU (ズ) and DU (ヅ) are represented by "zu". This is amended slightly for reversibility by using "dji" for DI and "dzu" for DU. + +### Greek + +The default transliteration uses a standard transcription for Greek which is aimed at preserving etymology. The ISO 843 variant includes following differences: + +| Greek | Default | ISO 843 | +|---|---|---| +| β | b | v | +| γ* | n | g | +| η | ē | ī | +| ̔ | h | (omitted) | +| ̀ | ̀ | (omitted) | +| ~ | ~ | (omitted) | + +\* before γ, κ, ξ, χ + +### Cyrillic + +Cyrillic generally follows ISO 9 for the base Cyrillic set. There are tentative plans to add extended Cyrillic characters in the future, plus variants for GOST and other national standards. + +### Indic + +Transliteration of Indic scripts follows the ISO 15919 *Transliteration of Devanagari and related Indic scripts into Latin characters*. Internally, all Indic scripts are transliterated by converting first to an internal form, called Inter\-Indic, then from Inter\-Indic to the target script. Inter\-Indic thus provides a pivot between the different scripts, and contains a superset of correspondences for all of them. + +ISO 15919 differs from ISCII 91 in application of diacritics for certain characters. These differences are shown in the following example (illustrated with Devanagari, although the same principles apply to the other Indic scripts): + +| Devanagari | ISCII 91 | ISO 15919 | +|---|---|---| +| ऋ | ṛ | r̥ | +| ऌ | ḻ | l̥ | +| ॠ | ṝ | r̥̄ | +| ॡ | ḻ̄ | l̥̄ | +| ढ़ | d̂ha | ṛha | +| ड़ | d̂a | ṛa | + +Transliteration rules from Indic to Latin are reversible with the exception of the ZWJ and ZWNJ used to request explicit rendering effects. For example: + +| Devanagari | Romanization | Note | +|---|---|---| +| क्ष | kṣa | normal | +| क्‍ष | kṣa | explicit halant requested | +| क्‌ष | kṣa | half-consonant requested | + +Transliteration between Indic scripts are roundtrip where there are corresponding letters. Otherwise, there may be fallbacks. + +There are two particular instances where transliterations may produce unexpected results: (1\) where the final vowel is suppressed in speech, and (2\) with the transliteration of 'c'. + +For example: + +| Devanagari | Romanization | Notes | +|---|---|---| +| सेन्गुप्त | Sēngupta | | +| सेनगुप्त | Sēnagupta | The final 'a' is not pronounced | +| मोनिक | Monika | | +| मोनिच | Monica | The 'c' is pronounced "ch" | + +### Others + +Unicode CLDR provides other transliterations based on the [U.S. Board on Geographic Names](https://www.usgs.gov/us-board-on-geographic-names) (BGN) transliterations. These are currently unidirectional — to Latin only. The goal is to make them bidirectional in future versions of CLDR. + +Other transliterations are generally based on the [UNGEGN: Working Group on Romanization Systems](https://arhiiv.eki.ee/wgrs/) transliterations. These systems are in wider actual implementation than most ISO standardized transliterations, and are published freely available on the web () and thus easily accessible to all. The UNGEGN also has good documentation. For example, the [UNGEGN Arabic Tables](https://arhiiv.eki.ee/wgrs/rom1_ar.pdf) not only presents the UN system, but compares it with the BGN/PCGN 1956 system, the I.G.N. System 1973, ISO 233:1984, the royal Jordanian Geographic Centre System, and the Survey of Egypt System. + +## Submitting Transliterations + +If you are interested in providing transliterations for one or more scripts, file an initial bug report at [*Locale Bugs*](http://www.unicode.org/cldr/bugs/locale-bugs). The initial bug should contain the scripts and or languages involved, and the system being followed (with a link to a full description of the proposed transliteration system), and a brief example. The proposed data can also be in that bug, or be added in a Reply to that bug. You can also file a bug in [*Locale Bugs*](http://www.unicode.org/cldr/bugs/locale-bugs) if you find a problem in an existing transliteration. + +For submission to CLDR, the data needs to supplied in the correct XML format or in the ICU format, and should follow an accepted standard (like UNGEGN, BGN, or others). + +- The format for rules is specified in [Transform\_Rules](http://www.unicode.org/reports/tr35/#Transform_Rules). It is best if the results are tested using the [ICU Transform Demo](https://icu4c-demos.unicode.org/icu-bin/translit) first, since if the data doesn't validate it would not be accepted into CLDR. +- As mentioned above, even if a transliteration is only used in certain countries or contexts CLDR can provide for them with different variant tags. +- For comparison, you can see what is currently in CLDR in the [transforms]() folder online. For example, see [Hebrew\-Latin.xml](). +- Script transliterators should cover every character in the exemplar sets for the CLDR locales using that script. +- Romanizations (Script\-Latin) should cover all the ASCII letters (some of these can be fallback mappings, such as the 'x' below). +- If the rules are very simple, they can be supplied in a spreadsheet, with two columns, such as + +| Shavian | Relation | Latin | Comments | +|:---:|:---:|:---:|---| +| \𐑐 | ↔ | p | Map all uppercase to lowercase first | +| \𐑚 | ↔ | b | | +| \𐑑 | ↔ | t | | +| \𐑒\𐑕 | ← | x | fallback | +| ... | | | | + +## More Information + +For more information, see: + +- BGN: [U.S. Board on Geographic Names](https://www.usgs.gov/us-board-on-geographic-names) +- UNGEGN: [UNITED NATIONS GROUP OF EXPERTS ON GEOGRAPHICAL NAMES: Working Group on Romanization Systems](https://arhiiv.eki.ee/wgrs/) +- [Transliteration of Non\-Roman Alphabets and Scripts (Thomas T. Pedersen)](http://transliteration.eki.ee/) +- [Standards for Archival Description: Romanization](http://www.archivists.org/catalog/stds99/chapter8.html) +- [ISO\-15915 (Hindi)](http://transliteration.eki.ee/pdf/Hindi-Marathi-Nepali.pdf) +- [ISO\-15915 (Gujarati)](http://transliteration.eki.ee/pdf/Gujarati.pdf) +- [ISO\-15915 (Kannada)](http://transliteration.eki.ee/pdf/Kannada.pdf) +- [ISCII\-91](http://www.cdacindia.com/html/gist/down/iscii_d.asp) +- [UTS \#35: Locale Data Markup Language (LDML)](http://www.unicode.org/reports/tr35/) + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file From 8a5b8c21f7d4ddaac0341f929a69f171efbb6be5 Mon Sep 17 00:00:00 2001 From: Chris Pyle <118906070+chpy04@users.noreply.github.com> Date: Tue, 3 Sep 2024 11:42:20 -0400 Subject: [PATCH 04/52] CLDR-17566 Converting Dev P2 (#4008) --- .../updating-englishroot.md | 97 ++++++++++++++++++ .../documenting-cldr-tools.md | 46 +++++++++ docs/site/development/creating-the-archive.md | 61 +++++++++++ docs/site/development/running-tests.md | 49 +++++++++ .../images/development/creatingTheArchive.png | Bin 0 -> 35590 bytes 5 files changed, 253 insertions(+) create mode 100644 docs/site/development/cldr-development-site/updating-englishroot.md create mode 100644 docs/site/development/coding-cldr-tools/documenting-cldr-tools.md create mode 100644 docs/site/development/creating-the-archive.md create mode 100644 docs/site/development/running-tests.md create mode 100644 docs/site/images/development/creatingTheArchive.png diff --git a/docs/site/development/cldr-development-site/updating-englishroot.md b/docs/site/development/cldr-development-site/updating-englishroot.md new file mode 100644 index 00000000000..74e184e3da4 --- /dev/null +++ b/docs/site/development/cldr-development-site/updating-englishroot.md @@ -0,0 +1,97 @@ +--- +title: Updating English/Root +--- + +# Updating English/Root + +Whenever you update English or Root, there is one additional step that needs to be done for the vetting viewer and tests to work properly. + +Update CldrVersion.java to have the newest release in the list. + +## Run GenerateBirth + +The tool is in tools/java/org/unicode/cldr/tool/GenerateBirth.java. It requires a set of sources from all previous major CLDR release, trunk, and a generation directory. These three directories must be structured as follows. The tool takes environment parameters for the second two. + +**cldr (set with \-t \, default\=CldrUtility.BASE\_DIRECTORY, set with environment variable \-DCLDR\_DIR)** + +... common/ ... tools/ java/ (apps such as GenerateBirth are run from here) ... + +**CldrUtility.ARCHIVE\_DIRECTORY** + +1. Create the archive ([Creating the Archive](https://cldr.unicode.org/development/creating-the-archive)) with all releases (if you don't have it already) +2. The archive directory should have the latest version of every major and minor version (where versions before 21\.0 have the major version split across the top two fields). +3. You will probably need to modify both CldrVersion.java and ToolConstants.java to bring them up to date. + +**log (set with \-l \, default\=CldrUtility.UTIL\_DATA\_DIR, set with CLDR\_DIR** + +Pass an argument for \-t to specify the output directory. Takes a few minutes to run (and make sure you have set Java with enough memory)! + +The tool generates (among other things) the following two binary files (among others) in the output directory specified with \-t: + +- **outdated.data** +- **outdatedEnglish.data** + +Replacing the previous versions in /cldr/tools/java/org/unicode/cldr/util/data/births/. These files are used to support OutdatedPaths.java, which is used in CheckNew. + +Readable data is found in https://github.com/unicode\-org/cldr\-staging/tree/master/births/\* That should also be checked in, for comparison over time. Easiest to read if you paste into a spreadsheet! + +## Binary File Format + +| outdatedEnglish.data | outdated.data | +|---|---| +| **int:size** | **str:locale** | +| long:pathId str:oldValue | **int:size** | +| long:pathId str:oldValue | long:pathId | +| ... | long:pathId | +| | ... | +| | **str:locale** | +| | **int:size** | +| | long:pathId | +| | long:pathId | +| | ... | +| **\$END\$** | **\$END\$** | +| ~50KB | ~100KB | + +In a limited release, the file **SubmissionLocales.java** is set up to allow just certain locales and paths in those locales. + +## Testing + +Make sure TestOutdatedPaths.java passes. It may take some modifications, since it depends on the exact data. + +Run TestCheckCLDR and TestBasic with the option **\-prop:logKnownIssue\=false** (that option is important!). This checks that the Limited Submission is set up properly and that SubmissionLocales are correct. + + + +If you run into any problems, look below at debugging. + +**Check in the files** + +Eg https://github.com/unicode-org/cldr/pull/243 + +## Debugging + +It also generates readable log files for double checking. These will be in {workspace}/cldr\-aux/births/\/, that is: CLDRPaths.AUX\_DIRECTORY \+ "births/" \+ trunkVersion. Examples: https://unicode.org/repos/cldr-aux/births/35.0/en.txt, https://unicode.org/repos/cldr-aux/births/35.0/fr.txt. + +Their format is the following (TSV \= tab\-delimited\-values) — to view, it is probably easier to copy the files into a spreadsheet. + +- English doesn't have the E... values, but is a complete record. +- Other languages only have lines where the English value is more recently changed (younger) than the native’s. +- So what the first line below says is that French has "bengali" dating back to version 1\.1\.1, while English has "Bangla" dating back to version 30\. + +| Loc | Version | Value | PrevValue | EVersion | EValue | EPrevValue | Path | +|---|:---:|---|---|:---:|---|---|---| +| fr | 1.1.1 | bengali | � | 30 | Bangla | Bengali | //ldml/localeDisplayNames/languages/language[@type="bn"] | +| fr | 1.1.1 | galicien | � | 1.4.1 | Galician | Gallegan | //ldml/localeDisplayNames/languages/language[@type="gl"] | +| fr | 1.1.1 | kirghize | � | 24 | Kyrgyz | Kirghiz | //ldml/localeDisplayNames/languages/language[@type="ky"] | +| fr | 1.1.1 | ndébélé du Nord | � | 1.3 | North Ndebele | Ndebele, North | //ldml/localeDisplayNames/languages/language[@type="nd"] | +| fr | 1.1.1 | ndébélé du Sud | � | 1.3 | South Ndebele | Ndebele, South | //ldml/localeDisplayNames/languages/language[@type="nr"] | +| ... | | | | | | | | +| fr | 34 | exclamation \| point d’exclamation blanc \| ponctuation | exclamation \| point d’exclamation blanc | trunk | ! \| exclamation \| mark \| outlined \| punctuation \| white exclamation mark | exclamation \| mark \| outlined \| punctuation \| white exclamation mark | //ldml/annotations/annotation[@cp="❕"] | +| fr | 34 | exclamation \| point d’exclamation \| ponctuation | exclamation \| point d’exclamation | trunk | ! \| exclamation \| mark \| punctuation | exclamation \| mark \| punctuation | //ldml/annotations/annotation[@cp="❗"] | +| fr | 34 | cœur \| cœur point d’exclamation \| exclamation \| ponctuation | cœur \| cœur point d’exclamation | trunk | exclamation \| heart exclamation \| mark \| punctuation | exclamation \| heavy heart exclamation \| mark \| punctuation | //ldml/annotations/annotation[@cp="❣"] | +| fr | 34 | couple \| deux hommes se tenant la main \| hommes \| jumeaux | couple \| deux hommes se tenant la main \| jumeaux | trunk | couple \| Gemini \| man \| twins \| men \| holding hands \| zodiac | couple \| Gemini \| man \| twins \| two men holding hands \| zodiac | //ldml/annotations/annotation[@cp="👬"] | +| fr | 34 | couple \| deux femmes se tenant la main \| femmes \| jumelles | couple \| deux femmes se tenant la main \| jumelles | trunk | couple \| hand \| holding hands \| women | couple \| hand \| two women holding hands \| woman | //ldml/annotations/annotation[@cp="👭"] | + +A value of � indicates that there is no value for that version. + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file diff --git a/docs/site/development/coding-cldr-tools/documenting-cldr-tools.md b/docs/site/development/coding-cldr-tools/documenting-cldr-tools.md new file mode 100644 index 00000000000..d206f6cbc8f --- /dev/null +++ b/docs/site/development/coding-cldr-tools/documenting-cldr-tools.md @@ -0,0 +1,46 @@ +--- +title: Documenting CLDR Tools +--- + +# Documenting CLDR Tools + +*Developers: Make sure your tool is easily accessible from the command line.* + +You can add the @CLDRTool annotation to any class in cldr\-code that has a main() function, and it will be documented as part of the JAR cldr\-code.jar is used. + +See [CLDR Tools](https://cldr.unicode.org/development/cldr-tools) for general information about obtaining and using CLDR tools. + +## Coding it + +An example from ConsoleCheckCLDR.java will start us out here + +  @CLDRTool(alias \= "check", + +  description \= "Run CheckCLDR against CLDR data") + +  public class ConsoleCheckCLDR { … + +Then, calling ```java -jar cldr-tools.jar -l``` produces: + +  *check \- Run CheckCLDR against CLDR data* + +  *\* + +  *\= org.unicode.cldr.test.ConsoleCheckCLDR* + +And then ```java -jar cldr-tools.jar check``` can be used to run this tool. All additional arguments after "check" are passed to **ConsoleCheckCLDR.main()** as arguments. + +Note these annotation parameters. Only "alias" is required. + +- **alias** \- used from the command line instead of the full class name. Also forms part of the default URL for documentation. +- **description** \- a short description of the tool. + +Additional parameters: + +- **url** \- you can specify a custom URL for the tool. This is displayed with the listing. +- **hidden** \- if non\-empty, this specifies a reason to *not* show the tool when running "java \-jar" without "\-l". For example, the main() function may be a less\-useful internal tool, or a test. +## Documenting it + +Assuming your tools’s alias is *myalias,* create a new subpage with the URL http://cldr.unicode.org/tools/myalias (a subpage of [CLDR Tools](https://cldr.unicode.org/development/cldr-tools)). Fill this page out with information about how to use your tool. + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file diff --git a/docs/site/development/creating-the-archive.md b/docs/site/development/creating-the-archive.md new file mode 100644 index 00000000000..8909967950c --- /dev/null +++ b/docs/site/development/creating-the-archive.md @@ -0,0 +1,61 @@ +--- +title: Checking out the CLDR Archive +--- + +# Checking out the CLDR Archive + +A number of the tools in CLDR depend on access to older versions. These tools include: + +- [Generating Charts](https://cldr.unicode.org/development/cldr-big-red-switch/generating-charts) +- [Update Validity XML](https://cldr.unicode.org/development/updating-codes/update-validity-xml) +- [Updating English/Root](https://cldr.unicode.org/development/cldr-development-site/updating-englishroot) + - \[Note: add others when we find them] + - Some tests + - TestCompatibility.java + - TestTransforms.java + - TestValidity.java + - Some other tools (typically when given a version argument on the command line) + - FindPluralDifferences + - ... + +### Here's how to do that. + +1. Create an archive directory **cldr\-archive**. The Simplest is if it on the same level as your local CLDR repository. In other words, if your [CLDR\_DIR](https://cldr.unicode.org/development/cldr-development-site/running-cldr-tools) is .../workspace/cldr, then create the directory  **…/workspace/cldr\-archive**
+(Note: The Java property **ARCHIVE** can be used to overide the path to cldr\-archive). +2. Open up ToolConstants.java and look at ToolConstants.CLDR\_VERSIONS. You'll see something like: + 1. **public static final** List\ ***CLDR\_VERSIONS*** \= ImmutableList.of( + 2. "1\.1\.1", + 3. "1\.2", + 4. "1\.3", + 5. "1\.4\.1", + 6. "1\.5\.1", + 7. "1\.6\.1", + 8. "1\.7\.2", + 9. "1\.8\.1", + 10. ... + 11. "41\.0" + 12. // add to this once the release is final! + 13. ); + - NOTE: this should also match CldrVersion.java (those two need to be merged together) +3. Add the just\-released version, such as "**42\.0**" to the list  above + - Also update **DEV\_VERSION** to "43" (the next development version) + - Finally, update CldrVersion.java and make similar changes. +4. Now, run the tool **org.unicode.cldr.tool.CheckoutArchive** + - Or from the command line:
+ **mvn \-DCLDR\_DIR\=** *path\_to/cldr* **\-\-file\=tools/pom.xml \-pl cldr\-code compile \-DskipTests\=true exec:java \-Dexec.mainClass\=org.unicode.cldr.tool.CheckoutArchive  \-Dexec.args\=""** + - Note other options for this tool: +   *\-\-help* will give help +   *\-\-prune* will run a 'git workspace prune' before proceeding +   *\-\-echo* will just show the commands that would be run, without running anything + (For example,  **\-Dexec.args\="\-\-prune"** in the above command line) + +The end result (where you need all of the releases) looks something like the following: + +![alt-text](../images/development/creatingTheArchive.png) + +## Advanced Configuration + +- You can set the property  **\-DCLDR\_ARCHIVE** to point to a different parent directory for the archive +- You can set **\-DCLDR\_HAS\_ARCHIVE\=false** to tell unit tests and tools not to look for the archive + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file diff --git a/docs/site/development/running-tests.md b/docs/site/development/running-tests.md new file mode 100644 index 00000000000..42bd0b31e9b --- /dev/null +++ b/docs/site/development/running-tests.md @@ -0,0 +1,49 @@ +--- +title: Running Tests +--- + +# Running Tests + +You will always need to run tests when you do a check\-in. + +1. Preconditions + - If you change the DTD, be sure to read and follow [Updating DTDs](https://cldr.unicode.org/development/updating-dtds) first. + - If you added a new feature or fixed a significant bug, add a unit test for it. + - See unittest/NumberingSystemsTest as an example. + - Remember to add to unittest/TestAll +2. Run **TestAll \-e** + - These are the unit tests in exhaustive mode + - If you are doing something you know to be simple, you could do the shorter run of just **TestAll** +3. Run **ConsoleCheckCLDR \-e \-z final\_testing \-S common,seed** + - This runs the same set of test that the Survey Tool does. + - If you know what you are doing, you can run a set of filtered tests. +4. Other tests + 1. The unit tests are not complete, so you get a better workout if you are doing anything fancy by running: + 2. [**NewLdml2IcuConverter**](https://cldr.unicode.org/development/coding-cldr-tools/newldml2icuconverter) + 3. [**Generating Charts**](https://cldr.unicode.org/development/cldr-big-red-switch/generating-charts) + 1. If you have interesting new data, write a chart for it. See subclasses of Chart.java for examples. + +## Running tests on the command line + +```bash +$ export CLDR_DIR=/path/to/svn/root/for/cldr + +$ cd $CLDR_DIR/tools/java && ant all + +$ cd $CLDR_DIR/tools/cldr-unittest && ant unittestExhaustive datacheck +``` + +\[TODO: add more commands here; can't we automate all this into a single build rule for ant?] TODO: [TODOL ticket:8864](http://unicode.org/cldr/trac/ticket/8864) + +## Debugging + +\[TODO: add more tips here] + +### Regexes + +We use a lot of regexes! + +1. There is org.unicode.cldr.util.RegexUtilities.showMismatch (and related methods) that are really useful in debugging cases where regexes fail. You hand it a pattern or matcher and a string, and it shows how far the regex got before it failed. +2. To debug RegexLookup, there is a special call you can make where you pass in a set. On return, that set is filled with a set of strings showing how far each of the regex patterns progressed. You can thus see why a string didn't match as expected. + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file diff --git a/docs/site/images/development/creatingTheArchive.png b/docs/site/images/development/creatingTheArchive.png new file mode 100644 index 0000000000000000000000000000000000000000..1a6a51a89199414c912f11318bcf4d8108d0f557 GIT binary patch literal 35590 zcma&N2RzmP`~UxP>{TcsdxUI}nJp_tc6JdF*?Y_0d+(8KGBXZQh!EN9$Sms^aU93^ zKYM@Ppa1Rm{rztL-|c^H-EJg~bDr1ryspRNeqBXsK2atmx=jQ@5UHw)q89l01%hBb z1TgT~&+b4G2qGG@QBcsdv9N$3mB`d|e4XTHw*nDYWuk^Kn3g=Li3CMFts4@)rq5;dP%#9i`0m*3 zGRk1(MY zQxEoy9FZ?(>|!sI#<-HD#}eTLwAg_hE@DZ-&_HuUZr<=?g_Q(WvS!MI5SEZ5vXeC; zu?HcE9o<~qufO74XSNGFjm9eYPGTt$ly_a!TLpv(1Lnx%^EZ zp6JD^FD!w(Ja)2C0=J@lnVFk+Zw6npS(L4+FkO^FKX4v=u3X(3Texjea?!!8>>Ij zw~9fJo_^*$|8kGq^1PkB%y1~6m=UGJ|_@mFMIA{M&TBa=PMFcBCz^OCL2nCg)8Y2Z35ns3 z1U*{5-AcF@tXzQq=dHdubxg1zL9inkZjroUg0v;1S-%b7v)!`x)%W*)ZC^ILQ<W-NW(-wZ7#pwH)+N9$Bi$5qC&=HX$q>Is;BUC(WOSHJyg}Kd>Zko(3v&B}_xwiP?-}DA_8kU0 z>}JyW(B0m+Wk)gIoI4}fRB@}zmaVXbHw)wo&lY%%W%gL}LZ3y|zwa02LDW=>vQXZ!~8;nkINqLl_!HLjh&|-d{QE^(~ zsH0bzQkkRkTaV^xX~kT5Sfy3@8?9>H&LW9|Fl`~-BmLF#^-6jj$=WI9hfibQ!*%%F zq>jmsS(@_0N(Eop8A?<~xTj31((37Jd+5aLn$?)r_#1XAW0yX!_%t;zF;nDZyqedy z`K+zMpy9MZWTYbB?e!G?@NCvnmhZ6qFy3%`R({hj!%IUN!}-Rnro30hO}vepulav0 zRr)>kc_m`sjIrC>>6z`fmYE20<8ftib9TG7WjswYHu-~ef^vIrN~%_}q^xkWI=!Yv z0U5JYAgzngHhbTta8-5rs7)gG;b=+vIRE!>Qdg2-k~KVh5(&~i5}Gi|irr}=w&W9d@kA{&|T-9yW zU#EDs?Y1ShO=(H9HM8ABhDB;OsYg|F8tk9+=QuP!Y0hj8w_}_wpT(W8Zg}0~^V;G0 zXcK=+*Q%@&fuq6d_m%y=vGGJ?>+}Kq#-vT&54vf2^85sFRkO*txybjx(2^f&CW zG@lkeckDzt9Xpw`Ax(>!8Gk z-DeNAb8v5e#Z%W?>PX~p!%N?*%RIAT{F$;rt)HDAn{=!+qwk`xKDrrQcn-TDx{$b- zJhwO>I?u{z$L+#z}>)W#(f?vAKZ-V*5Ed4>`v+XSIBh3Z z-p{=sdzJr-cP#$Jw-;-IyY++hs`dBl8CT9%qE@|EHb2)!b)^;LGO52({d}j7UGGjD z&%Rlmi(EjGwwBy@XJ}055S8h(l1@&SvEN??qs5~&*esG(lU^rxIrfOB<|N*9S9>@p zFrD5WjfvVEp8rI<=CdD-=GT(aiq^&-e=y!$MoQFyb0?U&u(nS9lwFF`&+IHbWmbJ}pG0q9nn!u<@ai0$%C1 zvk=zxe0K&>~glYQTu+4O7;w<*fsZ{cjpck#UA9FBx_RdQ=`CERp%a75H0 zeg2?&kdmSjb_<;cM$EPQwu$ynRmajFFFe){sKXTg2|pMv&z5WvbC_+XY}ai(UJ|Lb zpZrqsMf{83{HDFboE+k5>*~eG7s=Wbv9z7}hYNQQliIfp=RVAPqNWzj260|`4EYVx za=CJe==sj{&bz#>MJ=8AwPB)8{Fe9^0S=aa~ zvna{n+;M?(qd80)m+JJiW`u^Rw+UsSL z8V=$L50V3{&h18K6Gfcnzkh6W@!O4_XUvV43E1p8C(fl(;4_8abo}j$U%9&TGJdG6 zrqx8;E7hwBopsdoZu`kJZGhWZQ~i&R4Zp*NV$-r~M2*@v+;cy!yKOJncg?5o)gg`B zY2914O}}+w7F8FeP+ll}21{wrBLU3Mnc|NAjQ&s!oxGF>lU|1CY_tQZ)QD7>&xLob zOtkmO`}ld(md9_T+1XLPc3yhN_kgED<}!}90_OW0L%94Hvq-a8Qp$6MV@Ar_VbiPg za1`To_-Z%?p|TXywfN`B_To=e9CBWnMJX@NDmKoK^ZfmB{n?Ep?f%WyQU1O(zfXR5 zFKjQHj*SLd`d6Q=uD*y15fUdVNWd+72irdpxW~u=$#xP#PM;=xrS5*82nbe%T}75z z?W;QBEt}IqV_uLLa~t`ycLdx%*knOfuN)vrG0v$j#jGuJ8OrF1m>BjutEnsyo)?5)L zvE9B&_gRk+Xk2U;c~u<(0)qLU8q46XN!~gJu3FDMm>iuQtZeLFGP!y=zGRZq)D(mu zf=u1GM?}2>B#J2Mhj9XD2F-EW!8c%~gdeYpoOk)|y`>h62p?aP-j$wOZyVk9Y-v1S z%bJ)(;}*P#z+%?aNO?A^a%5Og|9yv+VbJOM3!N92Jl*)p{jsuQF}5R}91NW`8fjX$ zQZE-Rqg9^|{k(N%qgsaur(MoCZUU}A5mi5+_Mkj3~0SQe7M}4*lr10j> z&cSCIh(#8M2`43-u!bH7!eNq^m6uH_+pXPkV1Z;6S>$n8GVvht?g50$VM~j~E1$PJ zmjr=iJ-5eSzazwyBva_k-IaJ1 zyIC7}@z$~~ahJDQkCc9xktH!9wqSSl<$U;Z1iB}9C*oS7CAW|x1=Zov3kk1$72eA9 z^Yy1}Db>}x)^f*s{;XHDqh`#qim=&h42>Q&A5`x7V?nY6r^h=q zasv@$_GqPk%|G-ZF{~|y)j7<}_cRNiWiN|fx)g4lOX6@pmovrh>f$=WXMqIndV0i+ zUOGe%@hF}=s!Bh^9y2k?G975R^!_9ddno~RNcC`+lulclQ7w61ny0&59vfU{=XC|E zI17h{IPPK(T#lVg2c8rt;mCfVB-tINWj5w*IKyqm{ghbD6)404OAQnvgNb3|KE0zE z*{aEJb*?s3vT+`Ma!Bu=jwnRd44#vdMfL=%AN4n>8NWgwsApEgw#v^5#C8jBVc@*} zyShGDTJ#+Xw;F@X@FxOEP93^~C{jx5t=w=P+rGm{DqNra%KI`fO>cT{%O zJV`rcUlY6R;_M6g!{eGy=B7Arz;tx|jnd7GE-44Ek8~-p8RZiB{_OP4fA@+K$Fmrn z>{sK%yfH{-rsUQ(VVgYOM4+7IQ7w((^lJ?_qqdfC2Oy3fp5f<6s|=Jgfw z1-q;SG%zd}erS*_?u!C4qQ<5yMXp&q!)C3Mwx-$EE8?Ar5pM4OHY-f&q8ZhEe3mR?rEPmv%zCEu&*Y=0vbAsi0k}|1C6j#=l$^YT|g*a zi@@&1JI`loa+j2Do(>4p77_G(0AI;B2h8PWm;G508P?^2=8@67PrMhi&$(YvFY@pR z_1x^L#d2xh>*1ZEwzI<5)xmx-@+!KMU8|2=Y8d++R7TN42?0!|v>VvD?XSEXHeVXO z#cvKSM_?MvB;F^ zKq6=I4CUjW#?z3W9tS*1LK#QyPVHnq<+Xj!hqq7a?Sx0|nkFs1uDM z5oZmOl~oUf*64I9Eu7_4W*S_6SS{_8uTu%}8MZLqJ=e}*v<^J^{V;z%J5R#sawjV~ zmVz%wR3(nWP>_bnQk%l#w0&lGW}Ld!#MY&#g8_b zBt8BbC7-uUK9qK`=*v~?JC#Mp{A<2(oNu>t{R>j&*}0mtG_=6*b8&m>-M0H>*omp* zX8J=R2K(8;s>bUlhp6ZI{<^29QX4VRX|e6KkbJwyJ5!UaNllr10UuBmAMVuom8EVW zMDpvWug^(vk9%E4(M{bDG~!lj{@M1hR9|F+-|ItcOo6`0trR2syDux1Px6S@a3DDY z8mxO*dRT$oG=zBSa1ADyqf$3{qHF=vtnVBV&C;A}le(pGBTRFNa(gb`pOx(ce{Cp! zW?U{;Vx>))KW|G*1`d?@Q4iXp-jWinPvse2XE}~u{USK_7-V+z@~x=qh~`_jN7fB3 z6Ix!|OC%by;6Td<`H$lAxe`rY_u*DnH8gAvy*0`jkUnASHsOf6L~BL|d5CP2?YK!J z&g>0m7sgDPc~fJrh?=GaPch4>l|_SR^~06B)qFGkaNVh{PM`Y8c>6o`eP`1Omr;JW zCL>qhKVnJzxtQ!lnC;Ft{r-u3bnQwrrun)8*$GW4i4BzG$H@_jO|-@?%knX55Gz6u zFNB1En&8C%4n-lqKUX+k)VfXD@mPw4W%}p;b z{z0?;+7bQ!@Hv+q{trQElqV>BqnoJ_W5geX_|gLE3aWn~iPp*PNsf-!#Hyxs`PJ!z zm|bv0+slYzu2XhT9S*7S@)UuxLho7T;Q9JTe>idEFJJh5kX(*E%f)>5u7Hpo-nb;m_W$g{~6tl#gdoM)t^`7Obk~a zON=a&yB2l>ly!cj{+O%s<)T(>I%d^hhicayvPE>wuHw#|N-vKSx`N}>T;t4*;v+J} z(>P4OjeM)0;mC+!qmz1P|N9es2RB%jOu(Fo0#;t)$P_dnd*=lS&48dJ#N-IW@I%mu zDNn#U-6MeBgfH$)m(V)A-N$88;tfn$9icV+U7I zKm?Q(JXhOB{KIsU&YxNfqSLD$-jToTtKyoq7TMn$LG%QtTv2TzCTJU+d9S>Mu47eI zR130%d~kkh#muIo%gIf3c=`DgGMczHs>*nqe{%?HzgmMYyhSjU;myfO&(4Ew7l(E6 z!`dQ$JKU!A_~ytrkB^b>8Pm7S@X;11j*)>kly=eFAJ7fgQ|;b*85x;hx5tbg&#m1c z|8*H&^^}}Gg8DsMhW)UP+)WiuI~UzL4bmt1m&d&~O~tP^E(h1;bu}r`+3N2dTMccm zNM{XSSJj-yJQZ>*eTO9|{$dV zGAYNsC&FQ|ocK)lI7MIlDwx~W4V`Y({o;@BzS-;gIT4Ei!V9?uijbW?uT=BWiAuHQRhrLNGpE7}fCr{L|6?FmH$2a4Te$r9!q$5^EgYUY*{ zU+%3h{-&2ytT;jA;5#w%^Ox&yXPfXTxFy7`*o@Wu$SH)@6c4B9Cj2pBfh2ymR_-iX@&+{BWzX zR!_{p&``o{OGkgu{rvPG2peDeWX6Irj_T%3^$dYRm(3AeTwFf(13DW@jq>vHp>%#k zS3uXKty>dk&fOetx&+zh5Kc}IRr(`FMVJc2x=fPb!eVbtfrMk;XlZGAdbX1FpgnDE zZRO=*n;p*WrY02eNnwbnFE3`>J&;k9+~ecp#$H{vlohf}HzRN&NC?TPokN{Q5?D70 zjFp6&~;+H=9CIsG1wejUvl4nX|E`s@mod0DQDQ!`4 zaX2aMEoy3m-t2050UXP2a$ItPWs)G3CJJXVE(o8G>IF9`T1`!jT|MJ_v2wg9!uM!3 zUWP%&|NPTie0pmX647P(HEA=?r^bH1nV5<%1z%Tp_}jPhpW%#Yyhc;y+GRvEPqKu| z2jx$Y2u8@+*_kW9f-h28wbIks;9;U5i`wJI!vg~<>gslr6}oP!)X7(NEm#RdM)3qO zVk}$`#7TgaP;A@D_s;+Ppuec72z|7ft^RJ_p{r9lp5f)omvIf4lif!uDy>(S7_8-4 z>wLdIW6Jbp8^hVt?;dA^6J}M88(@`|mR@Xgt89td7XZO*mMT4wQ*w3vBZXSfcEPY! z5kN7rd+IMp2upckWP#*@Vck!&us$C@p5X5~A8)od5`SqCLrO0(f!?X@x}0lqFhfh+ zyEl&3iN3`PF>!EM_r=g0s`SUvBm6PD2)~~Ue8ZUb?v0?{uxphIg*vwcHN_OA;rbMf>p4-b!|dKY#ul zxhO6~@RIhgvM1Yg2V&hwW^bcVhYBiSWC^Ss<~R{2S1_^}U-1YCMvXlgHDcq?zAM7~ z`NPX0B4T3EHwzgF3695Gy8W|xQa*1M+m1m7eH|L|Sx?pXN38RhcATCbtfs~z!7tyP zYwF_HKqc!P+@z<^O-T_p?}nz+Nkf^)0-*scF*2-82qy-M$q~18IDm)=vC0+@a0QZX zccvcX1;e9szFbj^ZFnq_)T0#Ee1Rl z6(Tm{YbZ($f=>pMGBT18pA+)-@LS@4d9Ka0RNy8!^{Eo*66mOtU{&c+(yWqH!PXhN z-k4Ov7qv_TumZnYwk7)TOyb^{;b;hx)!)_j_lF;`by<0Nc{SP1YW+B*KBFIh`hdn6 zo5o*4;rMRpjE$-Nz<1KYSNSB9Wv;?qL;EL7^{A=_gop8!`>Xne9cP+~x2}z5w(E&i zgZ_xIMVQX@<{S4VeskvO(T%t-B7IQZ?oXR|`7EuiK};`4B$2MtoFOv})*8mf3#4a$ zrM*AwL=Y8fTY_KJ8(d>}&}wy@^`lIq zJ!u8+0$%pxM>4y5#O^#QF`QA_>(DZ^0oidhvj4LepOmiE>&VKBO;l9$`R9mKZatES z=y~wQLQzzdw`zQ@E?Tj$xzdl)MsoW#G731OYV*z1)zy`h!npM}na8>E>sDl)WT7w zewUZmEl0|AfBAJST;k2n43(Mp@6~weBv5;#(o>IrXvpw-qt}Tncpz#~e_u~e&(w7~ zapw}@yEmz*7{xs6di;5=r@!0A6`A5@c~@3yZDl1bvQ>YJ*GLDoM|*&|M1iE`*bt{pC*}00>l>3|4ncS-$^DBnMqtUanA?+x8TQl$k%e^ufyN zstabfX>S)B%8>GLfA#8DL>~AY8r9N0kYk5xOKqAeIA}~KT-4# zJonwKNAo%>Jsi+&7X5J}Wm%#Q*PxFdKaNUcsvb~`74~jd>Q8$IfqVA&@ne&gHzuTT z?CU9EyUBLe->i-jm=%hOD)zZ_zsH{XDab3($C*L{0|T#qyo+5O6|tW)ZgrIVr=oy6 zSl#im0?#iY>4W|DlqkacQCKl^xG=_^{B9J*d@Tah0~12_R>3ZZbUu?1GH(4UPA;Bv zraSBWD1@1o){2;qHEM5O=JIsyYcd**uH`d2!SKDB{qWM+2rgBJr@e04Fn5f$iJRNiW;UZo9*vqmITc8UWC|S=@mlg8T^hHI8>CsV#u_6A^ zd{Kwvtos{*f!3Sf2nh%rPyKFCQ23u-VoC^a3=Rz`OP$Ol7AtRii_3$&Y{8tKe*XNq z%D8FpMbk*u{YQCQH|aZ_e!Oe5`c*UjHmYl6CupV|eCADqd22QeIF{@WkKjBjt~Tnv!DAduw?2 z<{Uv)u#gEZJWCmqrm4{Q{mS2Z-)~V?Vhc$EuccK{KA31dg@=ZQe);lc#DkDD=$$VD z1Z?uIs>M%?Dcl!*(9ejRu0?dBmLy+&XQKLDz`#O^hI8vx%>ZHXWt~)`?GQ5sZJTs@ zH<7JWg5-AJF#ut>#cNn%qk22N@l22G>7k!~jd5mP-kw^L(kB#dpxj%$;d~jy{Cu|H zRJD<)ezS;YobY+-kPim@r1#YY%H;Ruo9{@;=RF~M)rOs*h6J4UGWNwYB6m=!zKF;h z9DL6{Sc3a?SRjOG`C#oaUZM(mLFI=WK|w*#fXw`kC&L;2PdBn1Bz~BX+6-B^btL#A z#)_5kadCCiOUMuWLdje`BvA1T8TF_4c~-FYvtW?z>s{U!M%H##b4& z?46^JMi@80j@v!B_MUSv5DT0ANzN}d0lC$m7zPQ~SC{8#aM~3W?@CG23g*=N_FdLhj>1kJ+=d4pVw$w1_~PG>p0>umW(vA$l?OlY z`Jiju@9X#d=x<9}fy$`U!Ty+wXL&N~duJ&#V%oN8PsY)F zcpbDUHtdL6s;P>ffnPEXggtx4MG|j8=o(F zApB5^9^860De0LR8DjQxjUe4-QrF(_1Jwh<_vYMFKbY_~A!53}-x}npQM27_hu0BM zcz%Aq8>9JG{UIShH^e>H&6 zDf|r+{qg?{C*~)5{7sCg?ZkU-{Tdux+}~wC z9aLE)3Sy)-MwX|%zL)uyzk1#M8 z(+fGdPhrPMhnYIL*If>85BywQT!e&#R##Ws-L^$Qh1uMG5i^`C+47-$!KLJ(s+4{G@9-^a5$^Pu?n(S?sL*27s%zUad?y|{L@p<@n=Xk5h8 zr%kKZ|K8*Bvb@8?!+Y7ohsU5xkd{R$+&(N?4-^&4 zOKxdtA?pzeU(!`;I1o$xYWd;F@AY-|R}XvJT*q(+^}No9$cGJ@oU~nE{0* zzVuPL-Uf!9jg5_+9r%$el+=jqp`S*M7#5_estOX*4`d9`!d6xrJ)y){%RuNzy6-qX zS;>?t$eZe?r9`w86m;7*Nz@XPk?V=0|FHdA4D|Ahs6XMf+2;S-2P_9j*CH*H6y`vA z_`jC}L{U%A_=kHI!$rZs(B9$TFmu<2J2*KVd@d;grTtC}^+ESQBIO%_8)HTeos*T; z8sSc(#o%q1mX&qEy^nzntJbTmeE3uSr}57~93pI{Ws;6odN+n>T*E5NIZu@iVRUR(~VZWZL z68Af6Dw3`VHW6!eReo!Phs`G`1rNjT6V+`UMnD;AbOAWGLB zXB&T-7bSu_SSf`Sar&V4V!@>g=<**O&fV@{3=$MM%Csiwa=0MgjWDBtQ9pY8XlP)7 z`gq17HW!(ln``o>erK`8VR)|T&Ye3A)}y8$EWiHw{Ubnw{$sJQQGmQ66O(*j1Si)$ zXGh--cYg%lDh4lO7!vS1`vcC05@L^GyaKH%m|Vl%<|_)9sYtfL%hA`jv3=jKx2K0} z>;Q*6IX!)nHNWy{t}va<-KcW2tIO5Cc#k_71&RB9*x1;sD=T`{#?Oq6bJEjMBl8ZZ zt=St8$QH9BKD_IRz#dW3h`TS$TwY+>PX(BOQmS3U>@9q30M7#uLG3pby|l0AcnG1w z%4)HRP=h{K_6*z)126?Kq#<+R3X1fV|3w%ny{$<06?#G&a{TjaY}9?v^~p-1zZrdX zyeJpk;mMP@mKG0L*}&C^pD0|D#p`d3eG!9_gR$l?lO~P-G?l-SqTcXiZ?WBZ zaf zVQ}~quC+Z?{d&IH-p5ng2r39rktMIq+`sJO zusoJl)E~T%?de*SI_7Pd3{$ddWOQ`)n!B$VG}G#2VQ#(*G&)y~gzK;G!MMa$!&&RQ zySb^UsmaMl80pFbwaSyPsn=?P?flK~x`QAUXp;#?0Iz57LKjy6ry}hCw}=5_SclHz zDa6&KN9M<)a(y$u$rttN*&>hMwXLUtUcG2IJ!(7gudHeF9Q;vIjQJEBjoenm|pxWQ3-5Tf`Z6GAL^9-#)_eI9bT#A9W8_wYC z2#ixQ$73!oE@#0)4wXtzwMQCe7i~U=epI}M!k+ufouKY6bpprW0}h^#{SS`K#wbpm zAus-QI%6P)fizh!uQ6$d1p*rx9uZO0BTseX=|CORe|G`AgN4_uDaav{HrKo2;^N>J zNlTl0*Hk9|!LVS*aYo2O)&z@@{`fM#(BQNLPx(mmn z)-??95ZuB%JL?al*W|3>4lje3#5nXlaitCiSb_9tcl|y7C7LSW?04B+?RTvi-*1#W zdmx@E6v@f^Uv&hk7%{|a)WpTlZ-!d(dHC>QVq#*~eBE-|_&1qr?!RA1^ zjbF#p1HXn3AAJ4%NZ}5+pprCO`_Rig#fKyFB|TSy3r9+UE&4|eeMx2(P54olAV43wL5EOD`=MvO%-uS%zC<6Utb?+%%B!pEOmHM z`erCLf=dEoUSrk;pNe;s`45EU*5mPzfjc~V_DoA_9GHc8Mj1G0O;SE5Vu3kY#Y(Yr za6cbS%>@73mtiS-2qN-lJX4)Ms2bys_oSp2Q`usJkr<#j37HuJ&YcwP=9_mT$XRn% zRSC#xXfl(Mw6NJHI!o_>+YJ#B5iJD-Kji$61&?9W0kynXJu*m;NbI(+h{-97$hwv*-Z1wq z*GIcZ2NGYzhYueBV(mCxi5}+=3%ESr-*?of@Qx6siw8)wwY3#^IRFR0tDM3k#DLuc zY6ln{MoD)&P|>bG*oUB<|8hQSkEH){K37lx+u?Seto~+SQH%8!0Om;n1!|ay%uYy1 z$jAWJ^vdXs)7@6?)W?6%>vQA{Q&Us$I@Cdvij2$@w$Xa=qR97sA0Q(@X8p|hEL52F zS6V;tHDmt#(PdL!K|#Fh)^r$x%9T>%uJz|>x1Sj4x!DYZ-%li>{uj0FwURNG;0_Wh zY)e}SWBFdXl(nn&pC0z=>c73rbpXZ7&p$deL`+CnzDEiG#O39s=qq}64x9V_XH0j# zP=?hs{OLpa0l<0orBC{_8~ZIjX$8sJHvl~W7)FBLyu9eq1-I1=Z2);qI>(HNOaW}mj1nVE0j)HfT} z*Vpl{G2{q)IX74hOL*-0j!L0?K?cM%hyzrA-+oSL^ug+&ZhNM1GzXg^Ti_$y^dwRH zv*owfZ5Oqi-})ms%AVNIepnyWt!#7OK3Pk-JMuSl^rG%N(fpl%4wMnqE(XhPip}Yy_9tyfSCaj5`Mg37GZ(9n=;ksrjX+K3jDfz@a-$_7CVvkxo> z65hY(>QJLZ)Cj>Hl$82pt`6e6E-sMRTv9tsY-}MyX=!Qfj426ec??s{_SICs2*(|} zQi?e1FV12aWtLLSVYvtY6w#N5Yrw$&*`DVAOP(Z+2%HX9zTh5h0vyQ|bx%y%|I6Vz zKr6R7fTQr%O;E$v2Ptc)x+2&wPO1 z!@}<9?7VT~2C!c2G~K{){LQ}icTq{eUK|gjQY&Q9z-a;QwSRI_b6AD1_Fqm_bVEEqBXB zsJ<zSnl#w*rp_*oi34tn2 zH{Dw5!~h^L4W=ypCLe*LAiTlu3iLIwD?OO}fJ^jJr%(TvFH)DMtNK!I`o($&u3Ho3 z0PsviB8Mn-#Dtu1Qg8CvKUp5e!V1Dk>_tY$xUiV|#EBLtgkQ4 zCl09qo~oj9 zJ9`jQSilmwyTuh0lZTPq$dz)+q-lOci)u2vkkz*YDHCI3Vm)aE zKCp0c5)i6BkBA7Qc;No{cm$OymFwFHUMZOAz+|rgas@zvoYp?!0UPD@wh;xi25a3eWlmCGBqlDf;SVc@YYb1&a2YxbVzlwpVmN0DE zO3BYhQilyu0sah9v`IIqU3LDLLy?U+p&n3WpS_P|R$aBW51Gt^d0>Q3fg?ZU{&3xkF{jcHdu8^?w76Q$2P|9q7Lecr@=~ z0B(i^X}J5nP$9Ghw4_romILlu-PiX#kyTkKj?SjZ5Ri^{Pja`;LfeyF8>KIii@(8| z1HsyhuseJgbk6NJ+2C--(-*1p}gK9F746ttw%@~L@gIJkMTHIV2k*^tg znF_s~J{p8o#y`4@0W~CwL*WfmuZA@!D*Jw^x4mbM~0?U;U91^)iNbkcUg_1mOqToXU`wKXrMv<>JzGVhr<`E~1 zVs+lbS-POx1Ai+~29WKy5e*sp$%@qenk}1O{B?%ae>;Asx0|{Z90LVP?zQ%gPKye$_9iM4i~-Gv z0{+uY%JoZeNoD{YL`JaOg5WD<<0vaBnV7qH7Q&TS#ePH8I3xHyQ!u4CejyCYS<##= zTzABVHVnDFxhd*3coS`T#2S9xsCsHXXP`Q!J`*U4pKrN_r-;eyMVBHWrvJcPonc?I zxEN+@woccG2w508M4hC1TL%XV($~~=U`L<;0|Q`RHA}4i2_&tNVC8GZ;@T!oO-V_~ z$Y|!Wt1S+Z!HB?)95p%iyQIcb7IJ3@FF1jaZ2G$9?WDhO_rdX34xl?Orn@eiMvhnZ z#$Bb7$po30x^?0F%Av&zwj)?%8BFE`!Q`xLVNf7WOc2DyrDT$Y_hpiIKlYIwr20XU$(H# z;iqs$Shtx{TnaviMwYOIVEw(RX$!e52}4FR|5Ia2OB}KUNZHy-zHW}kO;+TVIP(j` zv&_2}JzUmB_Yo~>GzMnjuC6kzCgBmqX8Ah2fjL5W@=ZkYUp|u$R1k#?P#fx+L*4J^ zcSXaIMx`<4U<|dZEhi`E<%MoaeaX__P*So^1(B`gJr~YY$3y6c|I4BOvEu)eLpL1w z$r0RSUsDuzGB5G4Pf%BAONu*d|I+@0m4#*fHiYF!M&<01>`mi~=wqY*@}tWmcSAi` z7orxBhbUsYi6TfSCv9JnL?#GQuB@`YspM7IJ%w+M=A({|j*gG3=PA7d|1Q`pwASzW zn-PNxW=4P)MuZ?EA|Sskv~3U|Ah~hLyiD~DCrne88Xc-B`#oIJzvjLI?H*&brrL)9xPIJ2t;gIY(x)zhLa`tx zrSZ#XHkF4I1A#a3Vz_1Ju2og6VPM^W0W$jR)Yrn#wxaprVqS-9KwRZ&$KJkg3)lfT z!!w&>T7%moNS<3m0Jm**qjYt>Bp)&nn3IqvFk2YQfjihOPNDqKmalGwY6|i-tDHRh z^F!m&xfO6n3i9$fsQU!}!t-E~u39De&mA2dq_iTfeW*KefpnC()cDX^j96eDi!&^! zTZOs3e)-JNmilk+IXE}~GZW=Fx@UlZfd!RZao3o7;CycTn>}6&+)?RQv9+xr_BAu7 z6Ni}I%MM^+MUb;_5>&|J3`F9m9kqEMgG%oOc}z6c;n zz+y(s?fXQKyuAG0$IZOI@o_X^U?aVNvnQ(_`k4^3*Aa0;gTLfpeFV7oaGoe~qFvc$ z@djjdYqJseN&Xqqd`-Fo&J;TkJROMm*Ia?WJ4pWLp7gf;ex)=BQ7&6B25?)&%n6PS zJY`CZ%1I`RAWD#}f!>!4zPUM&X-G&qfVu!vcqu@k0dE1W8%#{Bfy|SS02&Q_b~Y&G zt;Mii_hanrv}n;LkzSIF`8^Zanm;$L9E2?g!y>m!@a=n&FDx~WoG@`7{#%pWtBo=_I|lzid#JYd^64m_fK~j z6P!HUawzu}S*Zb}G0DfE7*oXmHs}Mh{LW69SqwJ|IYC@bMBlMPI3A+qsj4ytsmCxJ z^@{PKETkBb^EtF*VQldm_$P8z<%7l*&0Cx5OwM0_Gt0B3MiAokQMUk*EeO;6uI6!_ zD4vVER{Za+ykFVa*jQD?!^BkZq%cF0`x}avuOT?#^&u&{OT?roHMV@HS;oH2 zqhWWs7O|9O)(g`#d0ZG!;xPQL68{h8eK@UKY|Ml*8=Ee$joOq z?MW_|43;~%dtd^&&}#DoFx#QQp{oYh@^_&Vg~B-qK-jV-D>d?^cLy<{p*qa@CL@6m zH8#$R2s&zufdrSZ9A*Jo_JWmrcAz1ani{k^D#`~7S^OyEho8P!8%PARXSlz=MSsj} zr%wwv)!vhFp1hsyJ>c5a++&fib#*NRiL%r`i!lZEd=F8%C3}O2l2A@go{cSVJN@{d zc7Q=4uK>K|c${Ofe#g?vQcNX#SJIWU)wSF}wD(6?B0ykX-1t}#od2{#iwQI3rm9cK zp+r_kr4UH|O)s{*;x)R+%s|;JQb;^v`(Tx{7u96IQ_Exe;N{>=ge|8wSP-%ac^nu{R7?yV9iv$^Vw#9TsM?TR%n|RiigaKWAqA9^=xP&OucAKc z%uT@_NaG2y2dmAgFHY#ujQ>TSM#eG=LSGkmV2P9raAu_t5^9K?YydA_7P8QMJ~acL z6>MPuGdl#Pt)dP~bc~Dv$o-!H{@`%H!6&_<0jJNsy}iMNk@ofUHCmBx$G2Us+e}Kk z?aVk0d@poc!BnuxlHJ3>5hx_t8lBjFB{}OQFe6|P6(_luVK7{`7+YF)e8BoF5h!2+911cl7^FJmiJG{!{c*ru0uIaPvi-h6SAqX>ngaIw5gxrD!}?C z=i3(E8NqAgYOv3YA0Q?>$d!-&|AT98c>IPKccm3Lp$Jyi zB2{cj?_#V|d-d_B0WF>$-r*4{6oD}~g|IDjM+c#lQ8Bqztg=K@Spf2henNR55xD?u zE`EaSos}Fx-6nxGoPqCA2M6?TSP(5%fGB`)ziON+bpbrf8Q0N3?vB3hzC-N4i>=>9 zsbTUw($8=8g{bQB{zzk+!c=)lZ>TfUvMlN+;9pzt%aeV#u zjjf2F9U;oX(lhI}$2|T`BYf>Vx2&}EV>jv{5NlWv1=e7h6U=diV1SIR_nPYSf{N)y zk~ac>=W|a$9Rd|KBP~q?Xyk|pg79HuLm&EH$A;OQpnFCRU9u(L`r%;Q~mE+ zEUNMI+WNY~?HX$e7#S$5V53rN8-vT540{jL*L1hV`Ak^S62X>X9N3d!m-hZO zP$hpS$YA>-CnJLsdwPC;eirxk?c0OmGHzQF^d1XXf5-daoSRIWvQ>MZB}H=owa*+H z?sqaE+4nfmKw|R3v^+_IprQ>>9^n79MIK?=99^*L9xf|M&k5|LfjHuih1(|5X2hI8k-( zB~L%Z@sUzPot8R;g%PJ^WF(XgjTd_clHR}}_3`6JIH)$ee%Z5d;4hlrNT+^5%dg+R z1G>Z;`{C>|+P1@9eEU5cZehpO=J&2eMhZF-E3+dxg?Cs~-AkO>QIr>ZcFPpyWqeF0 z-uKb-+@_zUg&apCy6W{tbAHg_4E^7@fBt|1trj24JUb%>lYQ#!=lX>bi=9uU)X5x# z1DR>tqrS1Roj%JlqXagm+X7o$7q2xFvLY2rm~gY3bWB>1;_ju{tdCN5;Cyeig!Oss z%Y`4hi4R1nMVXHu0};Gr~ZzgO8wGzg&Moehi)6w>$06CXGH#FGW z7iPKn+t>2vd92v;`5YVnK06LcCN?H1$zYNvOVk-%+^-UkznCm6w5f%Iul~9K0A8J! zu}j$B9!SyBBA1WaoRyVDtbNGKtO|6KEZ)@J>6a80#0m0we- zDaw-{t}(QJ?|q;aRBaV?n3=BmQ}-rmd4!-5d_xGy#ItXckuceMnmD|!o3PYj-Z)Ls z@d(fW-bCNL!!!XBPbK<+a{KbW-r8J`hUV|HE-&mH3I>^4<|+5ZRy936S{P`5qTKvb zZ=&OB!($a!H|g+QJ0;xcUw!zGSu~3`Y}fnIf7=##pMuu}OwEn;2UG%P&b~`p zPRYMM;#960LByGw0_>}HalMa>c*DIxo@vc9;rDS@g-E;-JIH3Fb(75pfDx7}C6OGU zE~5l15PO8Q7i!d~%oz-=%;qiMH=c6R!otSUfQ$Y!Aw-@8lZsSGFeh=9banOCN3XVp za(KXE1YJ6!|LdBgqoYslXnd+u6p@&eJ<|J6p3E8ax4*~nY$PCrhqPlYS%-NoSwc@r zhb(Y4$tA zh-uEWzmJEwTl^Qj30p97O`o~}S%R0{oSlb_;Aa+y;Nc*2UYG<2CoGiPXN>1E?XL%r zxN~Qis5larGlS#5%^zw*TFlSw&0wJCRcH zgMJrz6h8XbEyo5j%l*##_`X{am-S4V8y)iK=r!L$N=uK5IqSbzZw{S(*J9*uY+*Oo zsT8^qXwT%7S@C@;LOxkBuuLSGX8VWbhIiwTfB7Hkk+Jg*Z72*5a(z4&@YnxO@?`Mt zpRTvIc@PN(uTlm&rL~v%zzvR6#ZbsTd%4_BIaO7!Y1e#|@bZ{I`#cXFri&zQ4;H)C z+(d*S!S29(hje;hqmDln3~)3`BD%CC!OD)6s|#I`0O{!Hh>`JjgB*dw*|x|{^K8G# zfdegR`d2n@NOpT594ku6-~n^ zR!A=uW@R=JI?eo)_w=qKOJhuW8q`4OC?wv#?c4uJ@^}g(=^(nVEFvNTNZ0txjM0ff z`nv58)xf*1S>p%+SanK3IDXNoRN}M_p~q~nnn!>j_``@Q(?UD7XlK|M8?!93d3ky7 zZ197i6mz)PKd$mMz2RS8fTW~BQ(VZJxm){++<7GA)xQZ)6bU7!YzZ9)PWF&pK^(-I zBb*`kuliF5k3U#mNl#3H$dw1GaC=_^Oe=G9{?j%T=k{-WR_dF(Vp}h~_o68NdxYct z&b369408y{vF*2J}A9|nfsMc=HI_(8G-f+pe#k{aHxMP`28<=hn5lS?CkK~p=9IZqV~Ro zui#MDnxc5i-}|2}6Fs<;T~~q-CRmvyNOyKq6xo<+No?luKIsG=`i5-`4gEQ~uo(L) zlw?MR0BhyVyLaN-PCKE?vdmVMmOh5oI?uj%`!3_bE)Ym-)CuCU>lIRD+ef|0vU3Ci z{~5-MDqX!J0e3C_8n8vUNmS-FAHzEGdyTr-w#i%)+O0G z2ElvI4%$rozWy_mo!BECzp`%Y*+BnHKyKFBe?ji@oGr|;J0K`0w{iAc{(q9pqJDMi zvkWW3{rH$kYOcmLix%4Um$tz!pq69*rj|>F=w*HmB}rBEGLIjj0Qc^ysmvE>wu?rd zBw%T;(b>SW@lzDI*SkQyi~A1$1^b+F^5#dGnVEX>|I|LKe7k_mT7UbpsetUR*^M^O zoZ((UVQ4M$^YQkce159+&VJOn4bzmB)i8lI=Nver&af7!_0vxgSZVwMD-l!^ApGa& z)X8N0mwE?2e|lF0 z+Bm>O7EHbm_mt8h!m488Mhbs$xxb;c;!ua-%{XPtsmX%67$Idni>4C|4>ufq>~O1L zr8chS0S#1Nl%or*TzSYr#dWgGQBWQ=&-6yUJrb8-Q9BJ!)JjFWUw7W1UT+a&{eFjj zC0bO??G8LFYQ6MRz_Dk~ZtnAkUPCrNc(Bx~`g-fjvm`il*8*0`3KEk7XV?7k08*wt z?_Ts>m_`sK;l_=fK0i0pU-ZH6qRv>-JD>jSrr%xd*DyuF_JZ3Ro##`mnAU=_n3Snn z{c_sxBCqK)tX-7kApk;oS=r2ZYV)R5#NFLHM_}c2ni@a=C~BVV|K&``EdSdN{B}QV z?tlk!vGO;1BO!M>(>8SE-sPJaC^ z=5Jrna?s8$5V5KA8fat{6sor+>P|OZO&OTW=$_SrK}A)^d%TVw9U1xa>ys|%YjAwr zy?*`qnZAAa`+Y}*66{Y+Q$|(eB4UQRS^VwE{$0^(0MO%|R}>8l3}DimhIZvuTa!BH z(yAqy);;Tu<70B|%f8v`iHV1bJ$}3uv^VEpO2C^9#m-JroGW9_Wow|ytv&NyGceoM zCpv^yqQ!axWq19dm(NN|=e|C1M6C+=9>PjEK(>b?koCf->u^B!elLIO#bts3ie0~p zsBY*4j~qDyk8~I}DjbMvxW-EG>0r}e^JA7nvi9zFK}MhBv-eH%40qcI>aK-JX6<{L`YiY?e)uzrrC zMLr4Uv#z7;OmFQxeYfJSahDoqd+d~ZU}2gru&&5CRL~2iJ8x~h096fk>6Yw#qmMt( zvEAd4WMxt1^T%5|R(@LQgtA=mS!}kwI8F-P+b%s6=R-r8H65cFd=`S`KwsxHeuX8WC4 zx9faEp8-+@A^`JaPrrxdIE@lHiVj8M3~C*mmz6IJ^G-%wX!?Iz>kR>ugZ(Vnj0PZf)ABCD)B# z3{4dd`eo)jh5?mVfPd0AFj(cS%FLGkK80m;Kw;rruIhCk_dVe=4@bWnS7fJ5}R6I=UFV4%tK&AmG9MU9Oc z-Kwri;^@@!{&mw;@V6U7F@YBs?xcC7Ydg+$MT3(Q*YrJ$2+XpT>t)xkXAVXr@D!Dz zi33cpBsnQ*I|TIf^mKRl=Z%AA8W@suNU!Iovw_E-Y(7G9M}`7Qf!B%ak(V!coNPH0ihf-ID~jGCGWixVyW>FCye7CYJ71GdidbM z(PK_g`~C_4;>xu@KKvRlV`FAEIyY9=>|X{#sFv%iQc?{=nV0e04OzsA9+3T( zh2HJO>TqrTz0@Fhi$Fzpa!P;JXz>X2$yT4IfK18TsvDG&yCa%bJ5!E-)r^jz`62EN zsl2#&D-jVL9UT+1fH=5IM8@#&aCf&8yViO+xoti}2Vv82DRZL7hj)4En?hbuI?j@OThi;#R)u3R|@uTOekV_X9Z7HU>fb0t z9*M3z-N2l~7Gb_dqH5^l^t`820n<+x>!DvtG%GtCeA}1!P5$_JYg5Wt@;l(dEUWzF zj11Iw!(+06)DI(cI-|dUv~{k-=i^P~p2}YbU-@DWf^=nsnQLlJ!D$Kd3U>a>Wn}?m zGWo%4@0K+WracFy1JIe>Nlxx4_Ut-+-acTwhZ6M!EHX0VA`5Gr!oz;<8kzk-us6kb z(i2I%Su#t7Q@aQFvv}XqvP}|Y=i~J(r0$jG|M{j(P{vfi(crM3@5AOO8h-c1Wk2m4 znYGkJlN#>iGNH^~?rvw-f&1>u%1Y;86A}>lup?owYtH$x$MyFuuezc2#+)dl}sivk~`_ljI*R#qZ;3-uWaVaS&t?BXoXU>d8MMeGkb<$*x;ww~78>G?fPo1Z_jLqg< zj{NLgXzxF=G{|;8SM1HN*0}6DjK;bicF}VqI_W)znaTX+`J7ixH01*Ag(dF!nq4Is zCGWc@Z~A48tob^l#dMQILs?q(pZgNy6G^pV!H?{zqS>05n3$W7uYbKCc^I%D1)pDy z8jQ?p#T(62g61ch8ObczN@2mG!+e`2!Z2nb&-3$3ZY#c(*&UD_h4TjKsm|`s> zR<9s0Z#H_%g2gyWsS;h6H})E6lCOs6sXxbOBh!S$#FB5_8u<0=V?nq2=`&}%f;P?R zdV4=NGqzxnIvtR7XO0qfH6?I{!BQhO%D}-3Pd?YyZd6mdq20^%>zB(H|4XLxT~W%G zxNzUUJ8*Hu3c4M^at4*=-T&#Y_wNTl@~=Xe7^X|`3K_EpkV~L={}u_llaOFu9vHBW zW_vWV=_e)uM8iCJ^#~K+$ZWE@mkUY=bmmZrH*epTEzArq7umcO7P*;;K~gI27JQ(F zMn)fMY8aTYy`<<~^g6+)oAl+Joc@m@uI-u%z@2D-`PkRbZ@&?86H{|va0V(Kdb#;@ zw;{s&s0TPab+VhVCA|?ZRcvxr(fND1*3{H&kdyN~JGb5A;6{dJWSl2&X$ni$ znj8PKGWnf?uVw7dG{U0Rvq&2^=CHUNHcI*PP3QT${~SSlqq8gFpwsja;bSRn@dIrZV%1~Q`*ZX2R6lw<*6*2nu`4RSA~RRI-Mg`@{Ndp@rHDlhcK6ZS zi#_klMzX`jfT0xdY>j3tr`$ub`P`HTl6$Vda+2fbDVHw~Hs_W|t|@m1mU?}>uJ=3@ z3fscO@I5y^Q~pDT4i%kjw;|n(e7T z>Zap$2J3C!j8OnX9-%mazptfbJLD|9Z>rfKA@O~hwfOYSFsjdo1UW$2!H(Qj4;sH! zV(9leB6V!`=N84j7x~)^PXxA|oudE;U?;9vvEt?N@iB_3@PC({f0hb|N^1CuRRU3A zPsMa`(9OtP%p&WZxBv6qv!(mzC@MO76Ha=8R6#RG#h=2ibNqaKYj&6C$G~E*>fTYB zmN`}xw+2%}ma(z*w-of@$g-Q-?-b56MnQrD6pT&omrwTkM(3%=rvtX3`>1~Zej3Im zi#(sY+FB$tdT%N_ZnUSY=H&%zJbwFO)RxZq3#uY};iLc}!RGdVjd>ZsY;_-VO27Bu z-~e5>YO^!@?6@sdz7eI0+J{91l)9sR3P zN*i`?>|xePWlOa4>ItM`m%78I12-hjGF+pVRxhY9;y;0t4g(pug3PEiTeqmCL`)2n z8Sg#2c3qB*O;L6XhdM>Tm@zpv=8ELhTl+Pts!ea-UevbCZi4M)&)&VN=`Wlvcm~uH z#FXCa-#f8C z_SLIbebk}0wR04J%n4lEQz=hE-*$A^Et(`HCD}IAjz)damQ-Al30sPTgM({Rkhp%$@2TZ@BFq#5J@bK`!;CbuTEhr5eWo7$u10a&&1i&<# zQg9%?6F<-U`igWMQ+Z`QA74_k)8x~#6&{SjonKH;Fr+D;m<4zZJ9-c%(V%q-Um^Ol zw|5Jc4#Vqu;OU!=4lS5(JGZ5l#(kbE9+(PHsEGBPQ+8ahsab|a7QQIa!YIXmuf+7f zG0`qv7TNRaSgIql^8ZreF6HD zYss&KOqlp}nH{2(c|vYzlfr(E&8n9auU0=R>;EO^dur;p>(_z0MZXor%Eo(KFVXM; z5hflqr!2kYx!Ya!E}1=3-qWMEk2P+#=@8UWV3RSvV_o8%i$KYWC83>g9N8MiFvN zOS3tDe(xP6U2R%5R2;n><#x~dUoFhf?FuM&f1$jR*t&J=Gk5Aj<-{PiCqBMcAXJ7s z&&JW%&P<*2ixpSPYx5j)bICe$prhz=!5O>2z&NMyjHV0N(}rv+V#|WlySp1pe6U1p z5X(M(icw2{<>cU>mNUK9*4FE9Y6qc(#ZGGY&B&cRJSkNBt_zcC6G=df)AF+N1pw@$$G1FV3Hz^R%-Q3+nCpR`W)=*I$}R z&%iS?32d9E|(I@)sN&x|kWNF+}mhr#s0gh^h{%4i$Kle`$uxno+Rpbh4 zbG>rCLCx$!{v2g4>v0Us&;9NdLg0W7p^=fF33g?xL>l_g?w9AgjzMV$#B?$=CU~o7 zf6h_z^72?$tiGug;EQt$!8`0uR2Cc=wbWy73ylPl?uT(eJu56s+?Q`(BQ#=L@2tvD zLkf;XV{x{$w7_S)Wy=<Am$>6X;wB;(-apW*H*fX}{`|eS zf{|Japbs4i3JSvHgioJL=DVaN85wBRy%Lry?oV4+@5!0dMbCP7`y-X%*<+YXb@b>4 z>&LGuDvsc90jaH40vYbNcyvT~({hEpf~?v_)xkr}z6XN)m8Xh~q$MxCe%mNV2#SkSF&ZQg zUFkskEVxqT?`Ntx9Gmu^Hu~UWYI^zdWswW4IvTX>v|vti=3vq~2ZL(!J5owKY)g!Y z6BE+`v7*{yT7KSd()2GbT}mLc_84WhO4XIu`0j0d;!@OEMU6g~R+6@>o$amyD49e^ zNJFFaq1^rGe#a)~Lc9k2it+JOAA~881Ue7GbF)az?EI|w+z;0IDjN8hgGFxb^jUgF zLg3~jWYfbxQi3Sw+QaTBeY4+I{;cjRc$y3hr0WE7{T+fyl89Ub^(?`uDN zF6X@CO?Qibx{5Qq$z|uxch5?E>|YM-5q6vjs9snaoRHA{>{-cfJ5S1iD%A7?2h4)!b1 zHnXs>;E2m#!nz_otnra8y;VR#_JMkNeqv(N#{&f`*IPgCY<2(r>o=G*`-baXyZ*+3 zXj6bDZeUc}knnt|qT~8CYhKqsxn^%X`G4K?DzGutk(#Y(Zwr)eQ`Kye1ML_5(Yi5) zxdAajD~j!T{_@;NT1pBPUBvRKH!5$O&r`d*5ka}T)w6;;Mo~xKUT_?5wA|q@*!Y+T z=q=P|CmKMPY_uz7j5Gm3^;5dlEcN}X740oWp%aMxUKG1JM;de(c;)`l6HGbQlLz zd;9!FrdbC32O-kI(A@g&`p7=OkE6*kg4^Il%hJu5#+xGH&!T!El ztl{u+W|Ql5ftGk~lA{j02{TWyawVDBZh@IQtByAKtMfEkx8VZ?p%+2{I%34NYpD{E zow#pjkEy#&-c>rRS5-oDvo6=t&;XY^gvNZ+PenQ1sL5QVhXP?RTj6`jwlC}=@K(aO z2UTj5HFwJ`U$LT>X7!P>`hL&5YAF*Ga`fI;qp9@w8IC7k&rG}yU*2#?#u9&FO>Y3{ zgV}CE?!6qYpD47SGaFyt3ZvfO6Bl0uIJg)V)}L(3_)Rt10~?n~sZ_QdJha63J^ZaH z)W?DItQT05k`~KE?38CuN2#S;j=CJ-eCpJ18ZCcSkrpeqCzDR+`}l!@0qoE9MVCa} zCVPbHXw{9!jzK>|qY=)}cQXPx=`XqV+L+0?Y}qoRIB0Ppbb4&rG7kO%#dpqu_Eu_( zg6zMl4+B-fGJ4Q>*8x+@1gu2d&AIZ$S#3e9X2$RRr^A~Wc3d5FFusPe`hV!{blx|} znXmic%M7SIkb>LI-V*=f^`8 z@?h&TS3ga?$2o^i>)yS4_!Q^{;JsL+G4jpXu-%^>{DQ7D6=DM3tkSRh)vWiUQW%uQ z{ESywa_q<|4!`1(Aftp!Pneg<V!oV8gfAaNE2d#$j)=CrybH`hfH0Y+kL>UJEBy7>Uvt-1N=Xg>VmC3m@Zs~5 zLG`x#r1Jb89yz86@QrzOOQsAfHWJWQts1--A_bSlcs7{ci#I0V$p?D)VKGE0mZU#M|d?P5v}!3&nS3mZ3XoO@!E>jt#s z;_|cldGapDoas>5xUp|(Sa}g)%9c>tU}74aPGam+j1L1b1oh++XnXWuFFqYa1RVVN z(Bij0JzyC*`<7N^mWm((0 zt#-i3Gjr)1BE`48uOEM7np=XeORNOyS1GweFgPnBk~cIWm^qGHkH;cGLYLW`hx|rx zCSam@jZiy<*u$||Z%~`X#pggwV8(!5p^F7( zi)d(SE66N4~bU?j(~NpEc$u^uF8sj+T^?VsCEV`1tUS_DwSwe`t;D*h01{>;JU;^_njT zI0bjoEi#P~7Ixi}k2C3@i(hkb3+$f~)rnJnD3zdm*r{2XT))Ihar8itpqF&}+m^jo zq9h`uGNJ9&T8zJHit_(?Vq)N|z&fcNAB13{W{+KVC3wo`pio@!%8s8SM>HQ-C`i;m^omn71>pvJ;_f@}_Nzj-%z&tOHgq z*{qkp><3Bs+qTGVA6Jf4f?0)7gpRCArL3;eEw-wqPEnF1i7?4~QF3&x%^bYBGUQJ# z4B!Q6Dyh6Y=*1_q2hN3|vJ$$|5>kYe3YjdyEM#IJw6u|ln}to{$n=4>K{>hI*Vj)& zZ$Z%kLEZq8bIGf`yW1Td9nk`8_w}p0t(uKSzr^SF48&sOI$;`aeBjy)Oo$*jK$KE< zyAP%;+`)G2ydT@a&ffC!=x*HCV;3*&%sp@h3>b9o^#@)ye)~4FZT4f(@b?>61h;z) zX`J}H7DSxmGZ$5fmT4v@;*~WlD<}8tfpKfu^6}8Bt3+0W%~C@i9&=_{CTAYGB|OBf z+e+l`3Qxw1a?QE?hkmud?*y}n;-Tm3H*Jbvwq@CsNSp<yr05jOb!|y*oH~~rF z?OWkTd$UQGCyg0)Q8r#3maEsUomNxz`a1lhu4&08BO!fIGfvT*yp?Q9{JIx)B6g|cIh0CPO#-CVKVbNlzOq59x zp;hO&-UKU=$ra7>k}3{TTzB|%NNh)KmW;pZ(9&X^q?LnJLT_N1cd+F{_27f2=Ms-Z z1sHMQGN7|VzIn5f>_?twQjwf&+#G~##2)>tT%Ywx<}NIpg*K^GLQDK-&RAu!4uN$$ zZT1<439*&@=X`xfZFe1hP`ya7*=ygswDD9<-R|EF{BiYjQ!QtG`lJcjNYG{y5}5>< zaCjv|95$}~P2f&xb`)JZGm7h3X$`%iDNk6fQfpl(c{alen#%OYi`i)`LXu%je1&N` znIdt)sXTGKp%TLnevwZOOtaVvS;s`?ogn9oDs~9XCa{eTq@4A2pwu3smo6BQL?gw% z8dhZ_^QQ-s>uCIY`2URq@UNv^BdwV0Te*gb*e}T=yo&mI?}3p6HC27*=wz-aonUrO z3kj#*Zh7DDE(Tf?<26BMvXWB7QG!2MQbn4#a&wf4QRZ{=-LmkvZJ1)!c)+@n`3HkO zG7735sa7oi+R|e2BKP|PjW?fl)4DQl-n@xez>RO+rk7Xd{JNF6q`7y#?>xAk zq+jiy8i108QgUu=FT^^vlrxGu0+Ow(goq`2!j7pAtAzOCBpsthl7%eTqPXj;e7}oa zIh$m0ziI8-)v-}g-DdfM<>lu5WT&VIP9Z5?DJhlEFsTRpa+$Jek@f-Wa`ZZ^-k~(k z*Y6Zhynfvdvh!;$ZP-cXcZvrvKna03WqB?0972LH$R}!3P}|^~!XRqQ;XZeFL~YBK z=l11+z^owfe`uyiu)z;7{3pmck6CC_t)ShCoa@to9!AP6#<`cgf>+vay#L4G;Px}U zdvYyjpKdsC*RUwL_?MibB4cVIgfB&nIrcqdW`U)J!V)BhRH;4Dgw5InpZ$iUs(1;{ zp6v$;U7BEOc?Ky$m%B1Xv=#jht$eYgqXP-V5#oDK&jdWJqdsC9=kQ=eAp&wdKTMk# zvvd9WN-?n&1m;`XJ$CpDEfoX((>gRa_C-ZUGcy0z*6l{#sOV_*O1I#e+S-mHcWQ#% zere<6gO4t4g+~->6_6Hmny!?fg%`LOLEoNMl3z}kvq&AmSRmXh)PoedR)sk~8;MDW z$6Q{AFd*2tnG#k@NoowM16)ii_AnoTji*7F$<-Q_}9keno;|tF& z132U$Qo^xxuihEe1f3w)-FG9H27De97ciT33iA5clSZ71GYG0+uo!%#8#OfMz}tvK z_hOpqRve1pv%Wn)H8?PE9x#$JooRZXX~~jRYHm4W+~l1m>7gz%V1MzE3n(^w67xP`N>?&N&)VI~8s z71Gn_Cd@{5>*?zo%y|VZ6c+_kxt=_Kt`)T456d#nZ7-{Y7n5QHBsMWk--N_ORF}Q& z&w&C(qNO)%SWciF%*K?G<}8cd*-by!zIi!s{!`NX?Af_r1$8So9)x*EbFrHx(GPGdG)SJNhWYdzS7BMT5B>ebpo9W zh&PR{Ug(y`kNa3~iHnO9Y#bbsu!F|NLaZ!u+i}QpD{7;R;A&A(45;+V=hYK|Ji^0c zifagyBLh`|r|(o8Ue7gxWrMv?2qhE$8_xmw>-*%axJ0|JS#}eYS5fUie;~Kz=gTEj z!lQT%$Sh3UBEPg1$A0ox0~TS zJYZ{!k~etGCqf7^6nX+=U$B*S8W=F?cz6tBeO6Rdh-vwM1>nWV6hg<^`YTpzS!roo zd%M?seBhRBGcHdYKp>cW$NNJfpeoJ7h-3d;Ih#+oCRTi}rV7aE|_qVPwr0O>RyOQ$kgFiO*1r?53QPbWf{Aq4~){AMbcQlDeP9gmYp;b*y>;k&^yCm;T__)@lm#CMp6A=(c zxHp|S&99q(Lh_z(q{dxc_Q>FbkrOA$8~X)__ta23olvII9{Leo#@He>G-n_2#>hru zqoKn|lI)wa$wf0zfbd<9o29L_iHWtjdBaeT3mdP9wx)c2aH6R77Gf3az2{4;Rk?Fl zGcVhj*(GbU#o_vL3op@|h4-CXh@4aO#i`D#?KthyzU0W!o#4ojN%0@m^OGX{1qFI$ zdz0{pfq?yr?UvToTLasQVZmCUrlPe8VN6VYkqe~4zk0QX{N~2TTpQG)R03h4EAyOJ zmr!Df2)ZJ3@X#UF6(Zv~xoqp?jr3V0LL&)Bo$0q*iAo_po^#WDI;kJ4LfRSLe(VaA zgxZZ8!2)rls48rg&3yi#2QvC56&3JQHm#pWF~Wo~j&7*8iFgcXnyKoF5#}4&LQR7- z-ycbEUne0EoMj$GlKL5W*+gV`{M5T@NoRdsvP0E6S;MQ6OuAW83B2isa!mTnWF3|s zR*UBSW|dK5zwsF_Xit6rkf_jO*ABaqhldB3b~*a?vExq*@a%_y0%?_}enoD$?h5n; z=ma^|FRslh;Vj$`Atf%vzmk7zNpa)zXUxGkBv&Rp@Y@EK*tQ6>&&y_aZ@yltWVC3W zu}{?9owg~Xu3wtU^QpD91c9}>79)j7W)iyzIZl~7gqOLJ8LDdnum!$XI^hjUdznsY6<+I8?I1T&j)-5NCi`N&|94ci{eri_)txD%#g0sAE z;WZ}D&>lWG)%vm!AvynN{6hTCe8&5+8NYt}<_1F+On)=B(HzVCF7{sItGun)j{)>4 zL3447-$~6qjZYpmU=#Pb>#;4y+B@$^4<lDnh9rRukTD-p=EQ1C6M7thT!j*@|kE za0p3woi7R3x41O+r#!nhr(~&VYBaxu2_`BgLg`CG`SBO~(<38nn28D<1r;3$W?`no z#!Mw)%6i;@p8`ut1eTT=m-+U^7-#I8d$KX9`Ug)oyiAAeTHm%zmlV2AcR}-kiKpw= ztxb;aqR)7F!HqTz`e?8zyXf$17NY^i^H^rb8^kefae6vlqI~-NG!%aeS%DjXbm$|jjJ!tq<2xc^Q_x9!;d{S}MUK*AqU;tku4gyp6 zf1o^Uuz5xVUr$c<^QLW-mp>AP>Dnenni?AW04##8Tg596O&q_t#iYbdmSE!~Qnb!l%J(-3GeY;XgF%bikB#?ZR`}u9mr}O*OwL>utI3Eii!J^!l!PGK+F7d)2*STCr|Iqu@cx8SSu?Ou$+NlMY< zd(SCX6PvQj=Zq%{A-$j}oT|r7Tve3&zFUJNR53AFR5`0|*|tkxH$TSc%(J?_k*;?T zDKL6NzNmK|O)WV2n3$kNrbHWcR|(#%j0?iY1cP44I}_~d5-z7nL}r@tF^hd&mk?}8 z9wmQj?bJNp^D?;`?z92svhY+k-v^hWCF-&m_WM2#{xLpRVx>!77B7o>cTsq0)yMZD?e+U^t;}K^`aFv-@_cy(R|=Du zIrccc>T6@aXR}vsojegPt7p8JF-PIBxJZum!I1{8=;iha5j{`OM@N^)RLXL5>!*e( zvoP&2x+yDVVnS+A(P60yYuBD@dFpefYkom>C1?&aGhVAwOra3@MSqaME2ZL~!xJL8 zPDhxJG-aU^M_NjRHgOB)Juxgg0Q*P9qJH3rXIn};yX3xh^^gPx|{_rbBoa8E5?eyZVN4L~r|=q?(KiZP8t zgEb#Mptvp$mC*eC{lO@nnI7$7{0g^r1g-9=$4?bZZgXoZhW}yDEtQMZkv4Lio92kb zAxt1Xd-m)zlCNLLUbS42~oDbt>6wx!%5JO&{_oukiR<3xZF4bKk=g5w9^ z15?y-c06jVx3#t|K3*pVV;8m*W_J2x+5jvFJ(vuM(3r>U%KoFP~_a6R{cZ{z~g3Gyr_9bnp0i*yfLpbF1B`#J% zTU&d`G2n|975EUw4H9F<8b~J0A+wy=V6(ySdC=!Y%B@S6cJ^1r)hS3557)sJO0Eo6 z)+5+VD%ez*4G4>{M{H6gX71#nR6x|GOw%=o4i=-&f#wW;=d8DPulpyT@t>Usfz{Oy z-vK8*wC7g2?qU;~mocl9LgVcvGV~2ki;C3d(u$emoEP8FNFRj?o;+da;7}pIm*;(G zCdt8;z@x(wJg3FZpM2ILESQl0a8oB(qfjdkZ+b?n9riayxYukaO?&QO_^*ca;~s0j zXdm-_%Pq>c7|3loW!z`cr=BqtSrVz z4zw20kY17Z8ee!_ck~|$LrhFek^k$B9Zpga!C{j6I{Zsbc=+{oLZuwZ7sKvZ5}{F& z+%x9l4o8nNhDhU9U^yTNqlkwpRaQR4`zkor%Ko1eMm@jJaX}qZ{*<<~i;}k`8Ll;yObJKjI@^tf>wt(tX|sJa;uG56)D^$*9a zrk{N&8{|txMjy#6m6NC++rE@f|G$knRef_qF)yZnbQO_=mps&S;?xt+^e9}D!T^mA zOSX9Okw~|ybe2|>{VL_j&E?nh@RCaT1007ht8hH|s2&}4IT#LmqowI={*<}gqTBt0 zAxry6TVR->9yM84nsCWl-SF%jf05&*jlstxVvUUIE5+7}eyWTc47rtf z;Y$V2$!1l<(RZ6>O=e4B=+~J^{14b z>-@Z;e~dscy-JYu0XxOA5W;I@5f;fNary<983(iQ67HLZ`cf+4%xwD16->#EG=E`w|cklk| z3QQf0kB@(C5dE`raJ%~YUuakeB|P)yF8evuE;N~av1stL#gx;oGZS!$EGDZ95RB;-DJrMXab5z+@mkz{ zdSMom&o|goG}dU&gMnfWJ_DyCnal~UW8mpgSj-OAzCfp#0_7SI0Mjediro8HzCtGy zUm>bOZ z<`@teER0d)71*iU!99%dNw<}HDf}6TG;Dq(&EWp~+FHtFUy6FHBiuC9z`nbPtj~lf%XgKwFtk~STGn)!94+$a# z%ntUY9+Gg^q2G9UZe?@FlHh`ZvHP8!=w4fk+y z`}A^JjUR{|0nPgePvp4v_DWyS^ie$jr7~vK@l@^6AdsRmhb$k(MDNAP520>q?ntPN zuLC*UT)$C1otdYKFJoKUZ6*^PLO`;VnpJ#BH7*XWFmOm42pqYuzI>T%n;v_)2#$!2 zKpPAO0AFa!UM~0I=>WBy^r#5}l9z`EG*iluMX0m8d%1s$%9u^z!kO{@flbGuG$|}F z)s1o^R6M1dkMa?Q>(w0FT%%!w`Bd|^ic)He49cS z#=J56LYD*9)@w1nf5bp)4Tf7>RW;XdXIn;lh)?hnb*HDpSFN#Lc6fXo zGgy_0sHv&0I<5Iel>M3}i)QVCO92WK+(joED!+Vr?&F~(_0sR0|LA*}{7=bE;r_kv ztWx&*_?)UBbcr~s<-I*zdxTWD4;)BNBH5LDo|#Kxsd2usn_z_D2bV29{ZomrJcP)Zu4#&}m{O720f& zsw^Y*zD_}ZnvkDRCH5c Date: Tue, 3 Sep 2024 11:42:35 -0400 Subject: [PATCH 05/52] CLDR-17566 Converting Dev P1 (#4007) --- docs/site/development.md | 7 ++ docs/site/development/adding-locales.md | 43 ++++++++++++ docs/site/development/cldr-big-red-switch.md | 31 +++++++++ .../cldr-big-red-switch/generating-charts.md | 67 +++++++++++++++++++ .../running-cldr-tools.md | 34 ++++++++++ 5 files changed, 182 insertions(+) create mode 100644 docs/site/development.md create mode 100644 docs/site/development/adding-locales.md create mode 100644 docs/site/development/cldr-big-red-switch.md create mode 100644 docs/site/development/cldr-big-red-switch/generating-charts.md create mode 100644 docs/site/development/cldr-development-site/running-cldr-tools.md diff --git a/docs/site/development.md b/docs/site/development.md new file mode 100644 index 00000000000..b19d1fd381b --- /dev/null +++ b/docs/site/development.md @@ -0,0 +1,7 @@ +--- +title: Internal Development +--- + +# Internal Development + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file diff --git a/docs/site/development/adding-locales.md b/docs/site/development/adding-locales.md new file mode 100644 index 00000000000..176974acbed --- /dev/null +++ b/docs/site/development/adding-locales.md @@ -0,0 +1,43 @@ +--- +title: Adding a new locale to CLDR +--- + +# Adding a new locale to CLDR + +NOTE: this is an internal page for TC developers. Others should see [Core Data for New Locales](https://cldr.unicode.org/index/cldr-spec/core-data-for-new-locales). + +### Country Locales + +If you are just adding a new country locale (eg we have a locale for the language), you just need to add the new empty locale file, and Update Survey Tool. Otherwise: + +### Minimal Structure + +Before adding a new locale, you must have the core data: see [Core Data for New Locales](https://cldr.unicode.org/index/cldr-spec/core-data-for-new-locales) for the process. + +Here is an example: https://github.com/unicode-org/cldr/pull/59/files + +### Add Data in git + +- Before starting to add a new locale, make sure you have the minimal core data that cannot be added through the Survey Tool. See above. +- Create the new locale files. If you are adding a single new language locale, for example, language "xx" as spoken in country "YY", you will need two files: + - **common/main/xx.xml** \- The main locale file containing the core data. You can use the template in seed/main/und.xml as a starting point. + - **common/main/xx\_YY.xml** \- An empty country locale containing the identification of xx\_YY as a valid locale. You can use the template in seed/main/und\_XX.xml as a starting point. + - See files are here: https://github.com/unicode-org/cldr/tree/master/seed/main +- Add the plural rules (if available) to **common/supplemental/plurals.xml** +- Add the day period rules (if you have them ) to **common/supplemental/dayPeriods.xml** +- If you are adding a new language + - Add the language subtag to \ in + - **/common/supplemental/attributeValueValidity.xml** + - add the appropriate default content locale + - to \ in **common/supplemental/supplementalMetadata.xml** + - The default content locale is usually the locale where the most people speak the language in question. + - If the language is not already in common/supplemental/likelySubtags.xml + - Send the literate pop information to Rick, or file a bug, if the language is not already in the supplemental data. + - Once he has added, run the tool in [LikelySubtags and Default Content](https://cldr.unicode.org/development/updating-codes/likelysubtags-and-default-content) to add the new language and its associated subtags to common/supplemental/likelySubtags.xml + - Also add the English translation for any new languages in **common/main/en.xml** +- If requested, add to vendor targets (Locale.txt), and to Cldr where resources are committed. +- Run the tests (will be done automatically when a PR is created) +- Commit your work to a branch and create a Pull Request. +- The new locale will be included in Smoketest when the PR is merged, and will be in production once a push to production occurs. + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file diff --git a/docs/site/development/cldr-big-red-switch.md b/docs/site/development/cldr-big-red-switch.md new file mode 100644 index 00000000000..68d6e47a301 --- /dev/null +++ b/docs/site/development/cldr-big-red-switch.md @@ -0,0 +1,31 @@ +--- +title: CLDR: Big Red Switch +--- + +# CLDR: Big Red Switch + +## What to do before we release for CLDR version X (current version X \= 28\): + +*All: remember to make sure that the* ***target: X*** *is set on all the X bugs, otherwise the Release Changes link won't pick them up.* + +*Items marked \* are for the "Little Red Switch": dot\-dot releases (supplemental data, ids, timezones, small spec fixes). Items marked \*\* are only done for a LRS if there was a DTD change.* + +### Trial New Version (Let us know if you need write access!) + +(For editors: [List View of BRS](https://cldr.unicode.org/development/cldr-big-red-switch/list-view-of-brs), [Spreadsheet View](https://docs.google.com/spreadsheets/d/1dIOLxKX2gW7BRDVdMBH9qr1GdxpPj8Bc1Pe-02p_92k/edit#gid=0)) + +## Contributor Message + +For each release, we add names to the Unicode CLDR Acknowledgments page: + +http://cldr.unicode.org/index/acknowledgments. + +However, names are not automatically entered there, since some people may not wish to have their names listed. If your name is not there and you would like it to be, please send me your name as it should appear on that page. Your name should be in Latin characters, optionally with names in one or more other scripts in parentheses, such as "Vladimir Weinstein (Владимир Вајнштајн)" + +**\-\-\-\- how to send this message: currently a crude process \-\-\-\-** + +1. get list of those who contributed through Survey tool (Login as TC, under 'Manage Users', click 'Email Address of Users Who Participated' (shows all users, not just your org) +2. e\-mail that list **on BCC:** the above message with a subject line of "\[CLDR X.Y Contributor Message]", and a request to please keep the subject line intact. +3. Then, the subject line can be used to filter/locate the contributor requests. + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file diff --git a/docs/site/development/cldr-big-red-switch/generating-charts.md b/docs/site/development/cldr-big-red-switch/generating-charts.md new file mode 100644 index 00000000000..cc2136b6d94 --- /dev/null +++ b/docs/site/development/cldr-big-red-switch/generating-charts.md @@ -0,0 +1,67 @@ +--- +title: Generating Charts +--- + +# Generating Charts + +## Generate + +The input for this is the cldr\-staging/production file, and the output is in github cldr\-staging. **(If for the development version, the input is main or the maint branch, but we should change that.)** + +1. Make sure the settings and VM arguments are right for where you are in the release: + 1. **Start, Mid\-release, Prefinal release,** or **Final release** (see below) +2. Run GenerateAllCharts. The results for each will be in ... cldr\-staging/docs/charts/by\_type/names.currency.html and so on. +3. Spot\-check for sanity. + 1. Start from the main page (eg cldr\-staging/docs/charts/index.html), and click on each of those links. + 2. On each of the subpages, take the first chart on each page, recursively. + 3. Use the "Index" link to go back up (not the back button), and make sure it goes to the right version of the page. +4. Check into github on cldr\-staging + +## Start Release + +1. Make sure the version  (eg **99**) is right in ToolConstants + 1. Make sure the *last* number (eg **99\.0**) is in CLDR\_VERSIONS + 2. Set DEFAULT\_CHART\_VERSION \= "99"; +2. Add an new folder with that number, such as cldr\-staging/docs/charts/**99** +3. Create the archive ([Creating the Archive](https://cldr.unicode.org/development/creating-the-archive)) with at least the last release (if you don't have it already) +4. **Use the same VM arguments as Mid\-Release** + +## Mid\-release + +1. Use the VM arguements + 1. \-DCHART\_VERSION\=**99** + 2. \-DCHART\_STATUS\=**beta** // \=*default*, uses trunk, calls it β + +## Prefinal Release + +1. VM Arguments + 1. \-DCHART\_VERSION\=**99** + 2. \-DCHART\_STATUS\=**trunk** (uses trunk, no β. Used at the end of the release, but before the final data is in cldr\-archive) +2. In the printout from delta\_summary.txt, there is a listing of the sizes at the top + 1. Something like the following: + 1. \# dir file added deleted changed total + 1. TOTAL 30,276 3,601 10,909 2,153,094 + 2. Add those new figures to the release page + +## Final Release + +1. Make sure the settings are: + 1. \-DCHART\_VERSION\=**99** + 2. \-DCHART\_STATUS\=**release** (only uses the cldr\-archive, no β) +2. Change the page to add the new release +3. Check the redirection links on [test\-chart\-links](https://cldr.unicode.org/development/cldr-big-red-switch/test-chart-links). +4. On index.html; open it, and fix the version (eg to 25β \=\> 25\) + +## Modifying the chart programs + +The chart programs have grown over time, and need some cleanup. For example, the supplemental charts duplicate code that is now in SupplementalDataInfo.  + +### ShowLanguages. + +The messages that they use are in a file util/data/chart\_messages.html. The right cell contains the key, which is extracted by lines like: + + PrintWriter pw \= new PrintWriter(new FormattedFileWriter(index, "Zone \\u2192 Tzid", null)); + +The key will be zone\_tzid, in this case. + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file diff --git a/docs/site/development/cldr-development-site/running-cldr-tools.md b/docs/site/development/cldr-development-site/running-cldr-tools.md new file mode 100644 index 00000000000..044d4317b96 --- /dev/null +++ b/docs/site/development/cldr-development-site/running-cldr-tools.md @@ -0,0 +1,34 @@ +--- +title: Running CLDR Tools +--- + +# Running CLDR Tools + +You will need to include some options to run various programs. Here are some samples, but the directories may vary depending on your configuration. + +**Standard Gorp** + +\-Dfile.encoding\=UTF\-8 + +\-Xmx3000m + +\-DCLDR\_DIR\=${workspace\_loc}/cldr + +\-DOTHER\_WORKSPACE\=${workspace\_loc}/"../Google Drive/workspace/" + +\-DCLDR\_GEN\_DIR\=${workspace\_loc}/"../Google Drive/workspace/Generated/cldr/" + +\-Dregistry\=language\-subtag\-registry + +\-DSHOW\_FILES + +The xmx is to increase memory so that you don't blow up. If you only do a few dozen locales, you don't need to set it that high. + +**Optional** + +\-f regex // to only check locales matching the regex, like ".\*" or "en\|fr\|de" ... + +\-DSHOW\_FILES // shows files being opened and created + + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file From 474116335562b338db551df00a828ebb7965f2a9 Mon Sep 17 00:00:00 2001 From: Chris Pyle <118906070+chpy04@users.noreply.github.com> Date: Tue, 3 Sep 2024 11:42:49 -0400 Subject: [PATCH 06/52] CLDR-17566 Converting Updating Codes P2 (#4006) --- .../updating-codes/update-validity-xml.md | 23 ++++ .../updating-population-gdp-literacy.md | 108 +++++++++++++++ .../updating-script-metadata.md | 85 ++++++++++++ .../updating-subdivision-codes.md | 129 ++++++++++++++++++ .../updating-subdivision-translations.md | 26 ++++ .../updating-codes/updating-un-codes.md | 30 ++++ 6 files changed, 401 insertions(+) create mode 100644 docs/site/development/updating-codes/update-validity-xml.md create mode 100644 docs/site/development/updating-codes/updating-population-gdp-literacy.md create mode 100644 docs/site/development/updating-codes/updating-script-metadata.md create mode 100644 docs/site/development/updating-codes/updating-subdivision-codes.md create mode 100644 docs/site/development/updating-codes/updating-subdivision-translations.md create mode 100644 docs/site/development/updating-codes/updating-un-codes.md diff --git a/docs/site/development/updating-codes/update-validity-xml.md b/docs/site/development/updating-codes/update-validity-xml.md new file mode 100644 index 00000000000..39eecb1e2ff --- /dev/null +++ b/docs/site/development/updating-codes/update-validity-xml.md @@ -0,0 +1,23 @@ +--- +title: Update Validity XML +--- + +# Update Validity XML + +1. Create the archive ([Creating the Archive](https://cldr.unicode.org/development/creating-the-archive)) with at least the last release (if you don't have it already) +2. Run GenerateValidityXML.java +3. This updates files in cldr/common/validity/. (If you set \-DSHOW\_FILES, you'll see this on the console.) + 1. New files should not be generated. If there are any, something has gone wrong, so raise this as an issue on cldr\-dev. **Note:** cldr/common/validity/currency.xml contains a comment line \- *\) of the form: + - \ + 5. Run the following (you must have all the archived versions loaded, back to cldr\-28\.0!) + 1. TestValidity \-e9 + 6. If they are ok, replace and checkin + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file diff --git a/docs/site/development/updating-codes/updating-population-gdp-literacy.md b/docs/site/development/updating-codes/updating-population-gdp-literacy.md new file mode 100644 index 00000000000..fe5c0d674f3 --- /dev/null +++ b/docs/site/development/updating-codes/updating-population-gdp-literacy.md @@ -0,0 +1,108 @@ +--- +title: Updating Population, GDP, Literacy +--- + +# Updating Population, GDP, Literacy + +**Updated 2021\-02\-10 by Yoshito** + +Instructions are based on Chrome browser. + +## Load the World DataBank + +**The World DataBank is at (http://databank.worldbank.org/data/views/variableselection/selectvariables.aspx?source=world-development-indicators). Unfortunately, they keep changing the link. If the page has been moved, try to get to it by doing the following. Each of the links are what currently works, but that again may change.** + +1. Go to http://worldbank.org +2. Click "View More Data" in the Data section (http://data.worldbank.org/) +3. Click "Data Catalog" (http://datacatalog.worldbank.org/) +4. Search "World Development Indicators" (http://data.worldbank.org/data-catalog/world-development-indicators) +5. In "Data \& Resources" tab, click on the blue "Databank" link. It should open a new Window \- https://databank.worldbank.org/reports.aspx?source\=world\-development\-indicators + +Once you are there, generate a file by using the following steps. There are 3 collapsible sections, "Country", "Series", and "Time" + +- Countries + - Expand the "Country" section, click the "Countries" tab, and then click the "Select All" button on the left. You do NOT want the aggregates here, just the countries. There were 217 countries on the list when these instructions were written; if substantially more than that, you may have mistakenly included aggregates. +- Series + - Expand the "Series" section. + - Select "Population, total" + - Select "GNI, PPP (current international $)" +- Time + - Select all years starting at 2000 up to the latest available year. The latest as of this writing was "2021". Be careful here, because sometimes it will list a year as being available, but there will be no real data there, which messes up our tooling. + - The tooling will automatically handle new years. +- Click the "Download Options" link in the upper right. + - A small "Download options" box will appear. + - Select "CSV" + - Instruct your browser to the save the file. +- You will receive a ZIP file named "**Data\_Extract\_From\_World\_Development\_Indicators.zip**". + - Unpack this zip file. It will contain two files. + - (From a unix command line, you can unpack it with + - "unzip \-j \-a \-a **Data\_Extract\_From\_World\_Development\_Indicators.zip"** + - to junk subdirectories and force the file to LF line endings.) + - The larger file (126kb as of 2021\-02\-10\) contains the actual data we are interested in. The file name should be something like f17e18f5\-e161\-45a9\-b357\-cba778a279fd\_Data.csv + - The smaller file is just a field definitions file that we don't care about. +- Verify that the data file is of the form: + - Country Name,Country Code,Series Name,Series Code,2000 \[YR2000],2001 \[YR2001],2004 \[YR2004],... + - Afghanistan,AFG,"Population, total",SP.POP.TOTL,19701940,20531160,23499850,24399948,25183615,... + - Afghanistan,AFG,"GNI, PPP (current international $)",NY.GNP.MKTP.PP.CD,..,..,22134851020\.6294,25406550418\.3726,27761871367\.4836,32316545463\.8146,... + - Albania,ALB,"Population, total",SP.POP.TOTL,3089027,3060173,3026939,3011487,2992547,2970017,... + - ... +- Rename it to **world\_bank\_data.csv** and and save in {**cldr}/tools/cldr\-code/src/main/resources/org/****unicode****/cldr/util/data/external/** +- Diff the old version vs. the current. +- If the format changes, you'll have to modify WBLine in AddPopulationData.java to have the right order and contents. + +## Load UN Literacy Data + +1. Goto http://unstats.un.org/unsd/demographic/products/socind/default.htm +2. Click on "Education" +3. Click in "Table 4a \- Literacy" +4. Download data \- save as temporary file +5. Open in Excel, OpenOffice, or Numbers \- save as cldr/tools/java/org/unicode/cldr/util/data/external/un\_literacy.csv (Windows Comma Separated) + 1. If it has multiple sheets, you want the one that says "Data", and looks like: +6. Table 4a. Literacy +7. Last update: December 2012 +8. Country or area Year Adult (15\+) literacy rate Youth (15\-24\) literacy rate +9. Total Men Women Total Men Women +10. Albania 2008 96 97 95 99 99 99 +11. Diff the old version vs. the current. +12. If the format changes, you'll have to modify the loadUnLiteracy() method in **org/unicode/cldr/tool/AddPopulationData.java** +13. Note that the content does not seem to have changed since 2012, but the page says "*Please note this page is currently under revision*." + 1. If there is no change to the data (still no change 10 years later), there is no reason to commit a new version of the file. + 2. See also [CLDR\-15923](https://unicode-org.atlassian.net/browse/CLDR-15923) + +## Load CIA Factbook + +**Note:** Pages in original instruction were moved to below. These pages no longer provide text version compatible with files in CLDR. ([CLDR\-14470](https://unicode-org.atlassian.net/browse/CLDR-14470)) + +- Population: https://www.cia.gov/the-world-factbook/field/population +- Real GDP (purchasing power parity): https://www.cia.gov/the-world-factbook/field/real-gdp-purchasing-power-parity +1. All files are saved in **cldr/tools/java/org/unicode/cldr/util/data/external/** +2. Goto: https://www.cia.gov/library/publications/the-world-factbook/index.html +3. Goto the "References" tab, and click on "Guide to Country Comparisons" +4. Expand "People and Society" and click on "Population" \- + 1. There's a "download" icon in the right side of the header. Right click it, Save Link As... call it + 2. **factbook\_population.txt** + 3. **You may need to delete header lines. The first line should begin with "1 China … " or similar.** +5. Back up a page, then Expand "Economy" and click on "GDP (purchasing power parity)" + 1. Right Click on DownloadData, Save Link As... call it + 2. **factbook\_gdp\_ppp.txt** + 3. **You may need to delete header lines. The first line should begin with "1 China … " or similar.** +6. Literacy \- **No longer works, so we need to revise program \- They are still publishing updates to the data at this page, we just need to write some code to put the data into a form we can use, see** [**CLDR\-9756 (comment 4\)**](https://unicode-org.atlassian.net/browse/CLDR-9756?focusedCommentId=118608) + 1. ~~https://www.cia.gov/library/publications/the-world-factbook/fields/2103.html~~ maybe https://www.cia.gov/library/publications/the-world-factbook/fields/370.html ? + 2. ~~Right Click on "Download Data", Save Link As... Call it~~ + 3. ~~**factbook\_literacy.txt**~~ +7. Diff the old version vs. the current. +8. If the format changes, you'll have to modify the loadFactbookLiteracy()) method in **org/unicode/cldr/tool/AddPopulationData.java** + +## Convert the data + +1. If you saw any different country names above, you'll need to edit external/alternate\_country\_names.txt to add them. + 1. For example, we needed to add Czechia in 2016\. +2. Q: How would I know? + 1. If two\-letter non\-countries are added, then you'll need to adjust StandardCodes.isCountry. +3. Q: How would I know? + 1. Run "AddPopulationData *\-DADD\_POP*\=**true"** and look for errors. +4. **java \-jar \-DADD\_POP\=true \-DCLDR\_DIR\=${HOME}/src/cldr cldr.jar org.unicode.cldr.tool.AddPopulationData** +5. Once everything looks ok, check everything in to git. +6. Once done, then run the ConvertLanguageData tool as on [Update Language Script Info](https://cldr.unicode.org/development/updating-codes/update-language-script-info) + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file diff --git a/docs/site/development/updating-codes/updating-script-metadata.md b/docs/site/development/updating-codes/updating-script-metadata.md new file mode 100644 index 00000000000..7ab8a0342e0 --- /dev/null +++ b/docs/site/development/updating-codes/updating-script-metadata.md @@ -0,0 +1,85 @@ +--- +title: Updating Script Metadata +--- + +# Updating Script Metadata + +### New Unicode scripts + +We should work on script metadata early for a Unicode version, so that it is available for tools (such as Mark's "UCA" tools). + +- Unicode 9/CLDR 29: New scripts in CLDR but not yet in ICU caused trouble. +- Unicode 10: Working on a pre\-CLDR\-31 branch, plan to merge into CLDR trunk after CLDR 31 is done. +- Should the script metadata code live in the Unicode Tools, so that we don't need a CLDR branch during early Unicode next\-version work? + +If the new Unicode version's PropertyValueAliases.txt does not have lines for Block and Script properties yet, then create a preliminary version. Diff the Blocks.txt file and UnicodeData.txt to find new scripts. Get the script codes from . Follow existing patterns for block and script names, especially for abbreviations. Do not add abbreviations (which differ from the long forms) unless there is a well\-established pattern in the existing data. + +Aside from instructions below for all script metadata changes, new script codes need English names (common/main/en.xml) and need to be added to common/supplemental/coverageLevels, under key %script100, so that the new script names will show up in the survey tool. For example, see the [changes for new Unicode 8 scripts](https://unicode-org.atlassian.net/browse/CLDR-8109). + +Can we add new scripts in CLDR *trunk* before or only after adding them to CLDR's copy of ICU4J? We did add new Unicode 9 scripts in CLDR 29 before adding them to ICU4J. The CLDR unit tests do not fail any more for scripts that are newer than the Unicode version in CLDR's copy of ICU. + +### Sample characters + +We need sample characters for the "UCA" tools for generating FractionalUCA.txt. + +Look for patterns of what kinds of characters we have picked for other scripts, for example the script's letter "KA". We basically want a character where people say "that looks Greek", and the same shape should not be used in multiple scripts. So for Latin we use "L", not "A". We usually prefer consonants, if applicable, but it is more important that a character look unique across scripts. It does want to be a *letter*, and if possible should not be a combining mark. It would be nice if the letters were commonly used in the majority language, if there are multiple. Compare with the [charts for existing scripts](http://www.unicode.org/charts/), especially related ones. + +### Editing the spreadsheet + +Google Spreadsheet: [Script Metadata](https://docs.google.com/spreadsheets/d/1Y90M0Ie3MUJ6UVCRDOypOtijlMDLNNyyLk36T6iMu0o/edit#gid=0) + +Use and copy cell formulas rather than duplicating contents, if possible. Look for which cells have formulas in existing data, especially for Unicode 1\.1 and 7\.0 scripts. + +For example, + +- Script names should only be entered on the LikelyLanguage sheet. Other sheets should use a formula to map from the script code. +- On the Samples sheet, use a formula to map from the code point to the actual character. This is especially important for avoiding mistakes since almost no one will have font support for the new scripts, which means that most people will see "Tofu" glyphs for the sample characters. + +### Script Metadata properties file +1. Go to the spreadsheet [Script Metadata](https://docs.google.com/spreadsheets/d/1Y90M0Ie3MUJ6UVCRDOypOtijlMDLNNyyLk36T6iMu0o/edit#gid=0) + 1. File\>Download as\>Comma Separated Values + 2. Location/Name \= {CLDR}/tools/cldr\-code/src/main/resources/org/unicode/cldr/util/data/Script\_Metadata.csv + 3. Refresh files (eclipse), then compare with previous version for sanity check. If there are no new scripts for target Unicode version of CLDR release you're working on, then skip the rest of steps below. For example, script "Toto" is ignore for CLDR 39 because target Unicode release of CLDR 39 is Unicode 13 and "Toto" will be added in Unicode 14\. +2. **Note: VM arguments** + 1. Each tool (and test) needs   \-DCLDR\_DIR\=/usr/local/google/home/mscherer/cldr/uni/src   (or wherever your repo root is) + 2. It is easiest to set this once in the global Preferences, rather than in the Run Configuration for each tool. + 3. Most of these tools also need   \-DSCRIPT\_UNICODE\_VERSION\=14   (set to the upcoming Unicode version), but it is easier to edit the ScriptMetadata.java line that sets the UNICODE\_VERSION variable. + 4. Run {cldr}/tools/cldr\-code/src/test/java/org/unicode/cldr/unittest/TestScriptMetadata.java + 5. A common error is if some of the data from the spreadsheet is missing, or has incorrect values. +3. Run GenerateScriptMetadata, which will produce a modified [common/properties/scriptMetadata.txt](https://github.com/unicode-org/cldr/blob/main/common/properties/scriptMetadata.txt) file. + 1. If this ignores the new scripts: Check the \-DSCRIPT\_UNICODE\_VERSION or the ScriptMetadata.java UNICODE\_VERSION. + 2. Add the English script names (from the script metadata spreadsheet) to common/main/en.xml. + 3. Add the French script names from [ISO 15924](https://www.unicode.org/iso15924/iso15924-codes.html) to common/main/fr.xml, but mark them as draft\="provisional". + 4. Add the script codes to common/supplemental/coverageLevels.xml (under key %script100\) so that the new script names will show up in the CLDR survey tool. + 1. See [\#8109\#comment:4](https://unicode-org.atlassian.net/browse/CLDR-8109#comment:4) [r11491](https://github.com/unicode-org/cldr/commit/1d6f2a4db84cc449983c7a01e5a2679dc1827598) + 2. See changes for Unicode 10: + 3. See changes for Unicode 12: [CLDR\-11478](https://unicode-org.atlassian.net/browse/CLDR-11478) [commit/647ce01](https://github.com/unicode-org/cldr/commit/be3000629ca3af2ae77de6304480abefe647ce01) + 5. Maybe add the script codes to TestCoverageLevel.java variable script100\. + 1. Starting with [cldr/pull/1296](https://github.com/unicode-org/cldr/pull/1296) we should not need to list a script here explicitly unless it is Identifier\_Type\=Recommended. + 6. Remove new script codes from $scriptNonUnicode in common/supplemental/attributeValueValidity.xml if needed + 7. For the following step to work as expected, the CLDR copy of the IANA BCP 47 language subtag registry must be updated (at least with the new script codes). + 1. Copy the latest version of https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry to {CLDR}/tools/cldr\-code/src/main/resources/org/unicode/cldr/util/data/language\-subtag\-registry + 2. Consider copying only the new script subtags (and making a note near the top of the CLDR file, or lines like "Comments: Unicode 14 script manually added 2021\-06\-01") to avoid having to update other parts of CLDR. + 8. Run GenerateValidityXML.java like this: + 1. See [Update Validity XML](https://cldr.unicode.org/development/updating-codes/update-validity-xml) + 2. This needs the previous version of CLDR in a sibling folder. + 1. see [Creating the Archive](https://cldr.unicode.org/development/creating-the-archive) for details on running the CheckoutArchive tool + 3. Now run GenerateValidityXML.java + 4. If this crashes with a NullPointerException trying to create a Validity object, check that ToolConstants.LAST\_RELEASE\_VERSION is set to the actual last release. + 1. Currently, the CHART\_VERSION must be a simple integer, no ".1" suffix. + 9. At least script.xml should show the new scripts. The generator overwrites the source data file; use ```git diff``` or ```git difftool``` to make sure the new scripts have been added. + 10. Run GenerateMaximalLocales, [as described on the likelysubtags page](https://cldr.unicode.org/development/updating-codes/likelysubtags-and-default-content), which generates another two files. + 11. Compare the latest git master files with the generated ones:  meld  common/supplemental  ../Generated/cldr/supplemental + 1. Copy likelySubtags.xml and supplementalMetadata.xml to the latest git master if they have changes. + 12. Compare generated files with previous versions for sanity check. + 13. Run the CLDR unit tests. + 1. Project cldr\-core: Debug As \> Maven test + 14. These tests have sometimes failed: + 1. LikelySubtagsTest + 2. TestInheritance + 3. They may need special adjustments, for example in GenerateMaximalLocales.java adding an extra entry to its MAX\_ADDITIONS or LANGUAGE\_OVERRIDES. +4. Check in the updated files. + +Problems are typically because a non\-standard name is used for a territory name. That can be fixed and the process rerun. + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file diff --git a/docs/site/development/updating-codes/updating-subdivision-codes.md b/docs/site/development/updating-codes/updating-subdivision-codes.md new file mode 100644 index 00000000000..0b8c1455939 --- /dev/null +++ b/docs/site/development/updating-codes/updating-subdivision-codes.md @@ -0,0 +1,129 @@ +--- +title: Updating Subdivision Codes +--- + +# Updating Subdivision Codes + +## Main Process + +1. Get the latest version of the iso subdivision xml file from https://www.iso.org/obp/ui/ (you'll need a password) and add it to a cldr\-private directory: + 1. Click on the XML button to download a zip, and unzip into folder **iso\_country\_code\_ALL\_xml** + 2. Open **iso\_country\_codes.xml** in that folder. Find the generated line, eg \ + 3. Add that date to the folder name, **2016\-12\-09\_iso\_country\_code\_ALL\_xml** + 4. Post that folder into [/cldr\-private/external/iso\_country\_codes](https://goto.google.com/isocountrycodes)/ if not already there. + 5. Copy the contents of the folder to {cldr\-private}/iso\_country\_codes/iso\_country\_codes.xml also (overriding current contents. + 6. Make sure that you have defined \-DCLDR\_PRIVATE\_DATA\="\/cldr\-private/" + 7. ~~Diff just to see what's new.~~ + 1. Actually, this step is too painful, because ISO doesn't have a canonical XML format. So elements of a table come in random order... Sometimes + 1. \AZ\-ORD\ + 2. \AZ\-SAD\ + 2. And sometimes the reverse! + 3. May add diffs generation to GenerateSubdivisions... + 8. Run GenerateSubdivisions; it will create a number of files. The important ones are: + 9. {generated}/subdivision/subdivisions.xml + 10. {generated}/subdivision/subdivisionAliases.txt + 11. {generated}/subdivision/en.xml + 12. Diff {generated}**subdivisions.xml** and {workspace}/cldr/common/supplemental/**subdivisions.xml** + 1. If they not different (other than date/version/revision), skip to Step 4\. + 2. Copy the generated contents into the cldr file, and save. + 3. Make sure the added IDs make sense. + 4. Verify that we NEVER remove an ID. See [\#8735](http://unicode.org/cldr/trac/ticket/8735). + 1. An ID may be deprecated; in that case it should show up in **subdivisionAliases.txt** *if there is a good substitute.* + 2. We may need to add a 4\-letter code in case ISO messes up. + 3. In either of these cases, change GenerateSubdivisions.java to do the right thing. + 5. Save the Diffs, since they are useful for updating aliases. See example at end. + 13. Open up {workspace}/cldr/common/supplemental/**supplementalMetadata.xml** + 1. Search for \ + 2. Replace the line after that up to the line before \ with the contents of **subdivisionAliases.txt** + 3. Do a diff with the last release version. The new file should preserve the old aliases. + 1. *Note: there is a tool problem where some lines are duplicated. For now, check and fix them.* + 2. *If a line is duplicated, when you run the tests they will show as errors.* + 3. Make sure the changes make sense. + 4. ***IN PARTICULAR, make sure that NO former types (in*** ***uncommented*** ***lines) disappear!That is, restore any such lines before committing.) Put them below the line:*** + - \ + 5. ***(Ideally the tool would do that, but we're not quite there.)*** + 14. Use the names to add more aliases. (See Fixing). Check https://www.iso.org/obp/ui/#iso:code:3166:TW (replacing TW by the country code) to see notes there. +2. Put **en.xml** into {workspace}/cldr/common/subdivisions/ + 1. You'll overwrite the one there. The new one reuses all the old names where they exist. + 2. Do a diff with the last release. + 1. Make sure the added names (from ISO) are consistent. + 2. Verify that we NEVER remove an ID. (The deprecated ones move down, but don't disappear). +3. Run the [Update Validity XML](https://cldr.unicode.org/development/updating-codes/update-validity-xml) steps to produce a new {workspace}/cldr/common/validity/subdivision.xml + 1. Don't bother with the others, but diff and update that one. + 2. A code may move to deprecated, but it should never disappear. If you find that, then revisit \#4 (supplementalMetadata) above +4. Run the tests + 1. You may get some collisions in English. Those need to be fixed. + 2. Google various combinations like \[country code \ \] to find articles like [ISO\_3166\-2:UG](https://en.wikipedia.org/wiki/ISO_3166-2:UG), then make a fix. + 3. Often a sub\-subdivision has the same name as a subdivision. When that is the case add a qualifier to the lesser know one, like "City" or "District". + 4. Sometimes a name will change in ISO to correct a mistake, which can cause a collision. +5. Fix the ?? in supplemental data (where possible; see below) + +## Fixing ?? + +1. If there are not known new subdivisions that the old ones should map to, you'll see commented\-out lines in **supplementalMetadata** like: + - \ \ ?? \-\-\> +2. As many of these as possible, see if there is a mapping to one or more new subdivisions. That is, where possible, track down the best code(s) to map all of these to, and uncomment the line, and move BELOW \ + - Note that for the name comment, change \ + +**\** + +\ + +**\** + +\ + +\ + +\ + +... + +### New data + +\ + +\ + +\ + +**\** + +\ + +**\** + +\ + +... + +### Exact matches + +From this, we can see that items have been renamed. Easiest to add the type values and contains values to a [spreadsheet](https://docs.google.com/spreadsheets/d/1i3YAhD9ADP6d4j6p4s3lY0psNdlOuknBr4ZrX1mihCw/edit) (use regex to extract), marking with old/new. Then sort, and pick out the ones that match. + +| Source | | old | new | contents | Mechanical | +|---|---|---|---|---|---| +| \ | FR | "H" | | "2A 2B" | \ | +| \ | FR | | "COR" | "2A 2B" | | + +### Partial Matches + +Rearrange the leftovers to see if there is any OLD \=\> NEW1\+NEW2\... cases or OLD1 \= NEW, OLD2\=NEW cases. For example, for FR we get Q\=\>NOR and P\=\>NOR. Remember that these are "best fit", so there may be small discrepancies. + +| Source | | old | new | contents | Mechanical | | +|---|---|---|---|---|---|---| +| Source | | old | new | contents | Mechanical | Fixed ?? cases | +| \ | FR | "Q" | | "27 76" | \ | \ | +| \ | FR | "P" | | "14 50 61" | \ | \ | +| \ | FR | | "NOR" | "14 27 50 61 76" | | | + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file diff --git a/docs/site/development/updating-codes/updating-subdivision-translations.md b/docs/site/development/updating-codes/updating-subdivision-translations.md new file mode 100644 index 00000000000..96cc8fdf813 --- /dev/null +++ b/docs/site/development/updating-codes/updating-subdivision-translations.md @@ -0,0 +1,26 @@ +--- +title: Updating Subdivision Translations +--- + +# Updating Subdivision Translations + +1. Make sure that that the subdivisions are updated first as per [Updating Subdivision Codes](https://cldr.unicode.org/development/updating-codes/updating-subdivision-codes) +2. Make sure you have completed [Maven Setup](https://cldr.unicode.org/development/maven) +3. Run tool WikiSubdivisionLanguages +4. ~~mvn \-DCLDR\_DIR\=**\_\_\_\_\_\_\_\_/cldr**\-Dexec.mainClass\=org.unicode.cldr.tool.GenerateLanguageContainment exec:java \-pl cldr\-rdf~~ + 1. STEVEN LOOMIS 2022\-0829 \- this does not make sense here. +5. Sanity check result, run tests. + +### NOTES +1. Should only add values, never change what is there beforehand. + 1. Currently excludes items: + 1. That fail exemplar check (broad test, allows any letters in script). + 2. Many of these are reparable, but need manual work. + 2. Currently renames items that collide *within country*. + 1. Uses superscript 2, 3 for alternates. More than 3 alternates, it excludes since there is probably a more serious problem. + 3. Needs a couple more locales: zh\_Hant, de\_CH, fil not working yet. + 4. The Language List is in the query file **{workspace}cldr/tools/cldr\-rdf/src/main/resources/org/unicode/cldr/rdf/sparql/wikidata\-wikisubdivisionLanguages.sparql** +2. Check in + 1. Make sure you also check in **{workspace}/cldr/tools/cldr\-rdf/external/\*.tsv** ( intermediate tables, for tracking) + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file diff --git a/docs/site/development/updating-codes/updating-un-codes.md b/docs/site/development/updating-codes/updating-un-codes.md new file mode 100644 index 00000000000..a2aec71a012 --- /dev/null +++ b/docs/site/development/updating-codes/updating-un-codes.md @@ -0,0 +1,30 @@ +--- +title: Updating UN Codes +--- + +# Updating UN Codes + +1. UM M19 + 1. Open https://unstats.un.org/unsd/methodology/m49/overview/ + 2. Hit the Copy button, to copy all the data to the clipboard + 3. Open ...workspace/cldr/tools/java/org/unicode/cldr/util/data/external/UnCodes.txt + 4. Hit paste. you should see tab\-separated fields + 5. Save +2. Note: "git diff \-\-word\-diff" is helpful for finding that, for example, only a column was added. + +### EU +1. Go to  [https://europa.eu/european\-union/about\-eu/countries\_en](https://european-union.europa.eu/principles-countries-history/eu-countries_en) +2. **Note: The instructions below don't work. Manually update tools/cldr\-code/src/main/resources/org/unicode/cldr/util/data/external/EuCode.txt** +3. ~~(Old instructions:  do the same with https://europa.eu/european\-union/about\-eu/countries/member\-countries\_en, into util/data/external/eu\_member\_states\_raw.txt  BROKEN LINK )~~ +4. ~~Find the section "The XX member countries of the EU: (may be a link at the bottom or sidebar)~~ +5. ~~Copy and past into ...workspace/cldr/tools/java/org/unicode/cldr/util/data/external/EuCodes.txt~~ +6. ~~Compare with last revision; if there are differences, update containment.~~  + 1. ~~If there are no real differences, do not bother updating EuCodes.txt~~ + 2. ~~Note: "git diff \-\-word\-diff" is helpful for finding that, for example, only whitespace changed.~~ + 3. ~~Record the latest version that's been synced as a meta\-data//This is new (Aug 2020\)!~~ + 4. ~~Q: Not sure how or where to do this?~~ + +### Run TestUnContainment +1. ```mvn -Dorg.unicode.cldr.unittest.testArgs='-n -q -filter:TestUnContainment' --file=tools/pom.xml -pl cldr-code test -Dtest=TestShim``` + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file From 00115907cc1ee95a86bf8c6c8c3d57a1301f01a1 Mon Sep 17 00:00:00 2001 From: Chris Pyle <118906070+chpy04@users.noreply.github.com> Date: Tue, 3 Sep 2024 11:43:04 -0400 Subject: [PATCH 07/52] CLDR-17566 Converting Updating Codes P1 (#4005) --- .../external-version-metadata.md | 29 +++++++ .../likelysubtags-and-default-content.md | 24 ++++++ .../updating-codes/update-currency-codes.md | 62 ++++++++++++++ .../update-language-script-info.md | 41 ++++++++++ .../language-script-description.md | 23 ++++++ .../update-languagescriptregion-subtags.md | 82 +++++++++++++++++++ .../update-time-zone-data-for-zoneparser.md | 35 ++++++++ 7 files changed, 296 insertions(+) create mode 100644 docs/site/development/updating-codes/external-version-metadata.md create mode 100644 docs/site/development/updating-codes/likelysubtags-and-default-content.md create mode 100644 docs/site/development/updating-codes/update-currency-codes.md create mode 100644 docs/site/development/updating-codes/update-language-script-info.md create mode 100644 docs/site/development/updating-codes/update-language-script-info/language-script-description.md create mode 100644 docs/site/development/updating-codes/update-languagescriptregion-subtags.md create mode 100644 docs/site/development/updating-codes/update-time-zone-data-for-zoneparser.md diff --git a/docs/site/development/updating-codes/external-version-metadata.md b/docs/site/development/updating-codes/external-version-metadata.md new file mode 100644 index 00000000000..b0ab87557d9 --- /dev/null +++ b/docs/site/development/updating-codes/external-version-metadata.md @@ -0,0 +1,29 @@ +--- +title: Updating External Version Metadata +--- + +# Updating External Version Metadata + +## Updating Metadata + +[CLDR\-15005](https://unicode-org.atlassian.net/browse/CLDR-15005) is for updating the process for external metadata versions. The following table is out of date with [common/properties/external\_data\_versions.tsv](https://github.com/unicode-org/cldr/blob/main/common/properties/external_data_versions.tsv) + +### TODO: Need to add instructions for updating external metadata + +~~The following tells how to get the version info for imported data used in a CLDR release.~~ + +| Data | File | Version Info | Date | +|---|---|---|---| +| UN literacy data | [un_literacy.csv](https://github.com/unicode-org/cldr/blob/master/tools/java/org/unicode/cldr/util/data/external/un_literacy.csv) | Date at top | 2012-08 | +| Worldbank data | [world_bank_data.csv](https://github.com/unicode-org/cldr/blob/master/tools/java/org/unicode/cldr/util/data/external/world_bank_data.csv) | Date at bottom | 2020-12-16 | +| Factbook data | [factbook_population.txt](https://github.com/unicode-org/cldr/blob/master/tools/java/org/unicode/cldr/util/data/external/factbook_population.txt) | record when downloaded in TBD | | +| ISO 636 (language) data | [iso-639-3-version.tab](https://github.com/unicode-org/cldr/blob/master/tools/java/org/unicode/cldr/util/data/iso-639-3-version.tab) | Date in YYYYMMDD format | 2021-02-02 | +| ISO subdivision codes | iso subdivision codes | record when downloaded in TBD | | +| ISO subdivision names | iso subdivision names | record when downloaded in TBD | | +| ISO currency data | iso currency data | record when downloaded in TBD | | +| Timezone IDs (tzdb) | timezones (tz) | Release date on [IANA time zone DB](https://www.iana.org/time-zones) | 2021-01-24 (2021a) | +| Top level domains | [tlds-alpha-by-domain.txt](https://github.com/unicode-org/cldr/blob/master/tools/java/org/unicode/cldr/util/data/tlds-alpha-by-domain.txt) | Date at top | 2021-02-17 | +| Language Groups | TBD | Record when downloaded in TBD | | +| UN / EU Codes | TBD | Record when downloaded in TBD | | + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file diff --git a/docs/site/development/updating-codes/likelysubtags-and-default-content.md b/docs/site/development/updating-codes/likelysubtags-and-default-content.md new file mode 100644 index 00000000000..16da0776523 --- /dev/null +++ b/docs/site/development/updating-codes/likelysubtags-and-default-content.md @@ -0,0 +1,24 @@ +--- +title: LikelySubtags and Default Content +--- + +# LikelySubtags and Default Content + +1. First make sure that you do [Update Language/Script/Region Subtags](https://cldr.unicode.org/development/updating-codes/update-languagescriptregion-subtags) first +2. Run GenerateMaximalLocales with VM argument ```-DCLDR_DIR``` set to your cldr directory to generate the likely subtag data **AND** the default content locales. + 1. If you are trying to debug, add the VM argument ```-DGenerateMaximalLocalesDebug``` +3. Input data: + 1. Data comes from territory/language information in supplemental data. + 1. However, it is supplemented by **LANGUAGE\_OVERRIDES** in GenerateMaximalLocales.java + 1. If there is no territory/language information in supplemental data for a language, add it to **LANGUAGE\_OVERRIDES**. + 2. If the mapping changes when it shouldn't (there are some special cases), add to **LANGUAGE\_OVERRIDES.** +4. Output: + 1. Creates {CLDR\_DIR}/../Generated/cldr/supplemental/likelySubtags.xml and {CLDR\_DIR}/../Generated/cldr/supplemental/supplementalMetadata.xml + 2. Diff with {CLDR\_DIR}/common/supplemental/likelySubtags.xml and {CLDR\_DIR}/common/supplemental/supplementalMetadata.xml + 3. Be very careful to diff everything and check for errors. + 1. Watch especially for backwards incompatible changes; that is, changes rather than just additions. + 2. Look at the above to handle that with **LANGUAGE\_OVERRIDES.** + 4. Run tests, fix input data, and iterate as necessary. + 1. Copy into the svn workspace and commit. + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file diff --git a/docs/site/development/updating-codes/update-currency-codes.md b/docs/site/development/updating-codes/update-currency-codes.md new file mode 100644 index 00000000000..00113ec744e --- /dev/null +++ b/docs/site/development/updating-codes/update-currency-codes.md @@ -0,0 +1,62 @@ +--- +title: Update Currency Codes +--- + +# Update Currency Codes + +- Go to https://www.six-group.com/en/products-services/financial-information/data-standards.html#scrollTo=currency-codes +- Take the link for "Current Currency and Funds": ["List one (XML)"](https://www.six-group.com/dam/download/financial-information/data-center/iso-currrency/amendments/lists/list_one.xml) +- Save the page as {cldr}/tools/cldr\-code/src/main/resources/org/unicode/cldr/util/data/dl\_iso\_table\_a1\.xml +- ```curl 'https://www.six-group.com/dam/download/financial-information/data-center/iso-currrency/lists/list_one.xml' > tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/dl_iso_table_a1.xml``` +- Take the link for "Historic denominations": "[List three (XML)](https://www.six-group.com/dam/download/financial-information/data-center/iso-currrency/amendments/lists/list_three.xml)" +- Save the page as {cldr}/tools/cldr\-code/src/main/resources/org/unicode/cldr/util/data/dl\_iso\_table\_a3\.xml +- ```curl 'https://www.six-group.com/dam/download/financial-information/data-center/iso-currrency/lists/list_three.xml' > tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/dl_iso_table_a3.xml``` +- **Use git diff to sanity check the two XML files against the old, and check them in.** + - **"git diff \-w" is helpful to ignore whitespace. If there are only whitespace changes, there's no need to check them in.** +- **Check the** [**ISO amendments**](https://www.six-group.com/en/products-services/financial-information/data-standards.html#scrollTo=amendments) **to get changes that will happen during the current cycle.** + - Example: https://www.six-group.com/dam/download/financial-information/data-center/iso-currrency/amendments/dl_currency_iso_amendment_170.pdf + - It appears right now like there is no good way to collect all the amendments that are applicable, except to change "170" in the above link by incrementing until error \#404 results. So: + - *Review all amendments that are dated after the previous update , and patch the XML files and the* ```supplementalData.xml``` *as below.* + - *Record the last number viewed in the URL above.* + - *(There is a "download all amendments" link now that has a spreadsheet summary.)* + - **Record the version: See** [**Updating External Metadata**](https://cldr.unicode.org/development/updating-codes/external-version-metadata) + - If there are no diffs in the two iso tables, and no relevant changes in the amendments, you are done. + - Run ```CountItems -Dmethod=generateCurrencyItems``` to generate the new currency list. + - If any currency is missing from ISO4217\.txt, the program will throw an exception and will print a list of items at the end that need to be added to the ISO4217\.txt file. Add as described below. + - Once the necessary codes are added to ISO4217\.txt, repeat the CountItems \-Dmethod\=generateCurrencyItems until it runs cleanly. + - If any country changes the use of a currency, verify that there is a corresponding entry in SupplementalData + - Since ISO doesn't publish the exact date change (usually just a month), you may need to do some additional research to see if you can determine the exact date when a new currency becomes active, or when an old currency becomes inactive. If you can't find the exact date, use the last day of the month ISO publishes for an old currency expiring. + - For new stuff, see below. + - Adding a currency: + - Make sure the new code exists in common/bcp47/currency.xml. The currency code should be in lower case, and make sure the "since" release corresponds to the next release of CLDR that will publish using this data. + - In SupplementalData: + - If it has unusual rounding or number of digits, add to: + - \ + - \ + - ... + - For each country in which it comes into use, add a line for when it becomes valid + - \ + - \ + - Add the code to the file java/org/unicode/cldr/util/data/ISO4217\.txt. This is important, since it is used to get the valid codes for the survey tool. + - Example: + - currency \| TRY \| new Turkish Lira \| TR \| TURKEY \| C + - Mark the old code in java/org/unicode/cldr/util/data/ISO4217\.txt as deprecated. + - currency \| TRL \| Old Turkish Lira \| TR \| TURKEY \| O + - Changing currency. + - If the currency goes out of use in a country, then add the last day of use, such as: + - \ + - \ + - \=\> + - \ + - \ + - Edit common/main/en.xml to add the new names (or change old ones) based on the descriptions. + - If there is a collision between a new and old name, the old one typically changes to the currency name with the date range + - "currency\_name (1983\-2003\)". + - Check in your changes + - common/bcp47/currency.xml + - tools/java/org/unicode/cldr/util/data/ISO4217\.txt + - common/main/en.xml + - common/supplemental/supplementalData.xml +- ***Note: We no longer maintain the list of currency in supplementalMetadata.xml (***[***\#4298***](http://unicode.org/cldr/trac/ticket/4298)***). The list is currently maintained by bcp47/currency.xml. We need to move the code used for checking list of ISO currency (and its numeric code mapping) currently in ICU tools repository (http://source.icu-project.org/repos/icu/tools/trunk/currency/).*** + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file diff --git a/docs/site/development/updating-codes/update-language-script-info.md b/docs/site/development/updating-codes/update-language-script-info.md new file mode 100644 index 00000000000..fd663ee9a73 --- /dev/null +++ b/docs/site/development/updating-codes/update-language-script-info.md @@ -0,0 +1,41 @@ +--- +title: Update Language Script Info +--- + +# Update Language Script Info + +### Main + +1. https://github.com/unicode-org/cldr/tree/main/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data has files with this form: + 1. **country\_language\_population.tsv** + 2. **language\_script.tsv** + 3. For a descriptions of the contents, see [Language Script Guidelines](https://cldr.unicode.org/development/updating-codes/update-language-script-info/language-script-description) + 1. Do not edit the above files with a plain text editor; they are tab\-delimited UTF\-8 with many fields and should be imported/edited with a spreadsheet editor. Excel or Google sheets should also work fine. +2. The world bank, un, and factbook data should be updated as per [Updating Population, GDP, Literacy](https://cldr.unicode.org/development/updating-codes/updating-population-gdp-literacy) +3. Note that there is an auxiliary file **util/data/external/other\_country\_data.txt**, which contains data that supplements the others. If there are errors below because the country population is less than the language population, then that file may need updating. + 1. Run the tool **ConvertLanguageData**. + 1. \-DADD\_POP\=**true**; for error messages. + 1. If there are any different country names, you'll get an error:  edit external/alternate\_country\_names.txt to add them. + 2. Look for failures in the language vs script data, following the line: + - Problems in **language\_script.tsv** + 3. Look for Territory Language data, following the line: + - **Possible Failures ...** + - In Basic Data but not Population \> 20% + - and the reverse. + 4. Look for general problems, following the line: + - **Failures in Output.** + - It will also warn if a country doesn't have an official or de facto official language. + 5. Work until resolved. + 2. *The tool updates in place* **{cldrdata}/common/supplemental/supplementalData.xml** + 3. Carefully diff + 4. Then run QuickCheck to verify that the DTD is in order, and commit. + +### Update the supplementalData.xml \ + +1. For UN M.49 codes, see [Updating UN Codes](https://cldr.unicode.org/development/updating-codes/updating-un-codes) +2. For the UN, go to https://www.un.org/en/member-states/index.html. Copy the table, and paste into util/data/external/un\_member\_states\_raw.txt. Diff with old. **BROKEN LINK** +3. For the EU, see instructions on [Updating UN Codes](https://cldr.unicode.org/development/updating-codes/updating-un-codes) +4. For the EZ, do the same with , into util/data/external/ez\_member\_states\_raw.txt  **BROKEN LINK** + 1. If there are changes, update \ + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file diff --git a/docs/site/development/updating-codes/update-language-script-info/language-script-description.md b/docs/site/development/updating-codes/update-language-script-info/language-script-description.md new file mode 100644 index 00000000000..777ff0abc60 --- /dev/null +++ b/docs/site/development/updating-codes/update-language-script-info/language-script-description.md @@ -0,0 +1,23 @@ +--- +title: Language Script Description +--- + +# Language Script Description + +The language\_script spreadsheet should list all of the language / script combinations that are in common modern use. The countries are not important, since their function has been overtaken by the country\_language\_population spreadsheet. + +1. If the language and script are both modern, and the script is a major way to write the language in some country, then we should see that line marked as **primary**. +2. Otherwise it should be marked **secondary**. + +Every language that is in official use in any country according to country\_language\_population  should have at least one primary script in the language\_script spreadsheet. + +If a language has multiple primary scripts, then it should not appear without the script tag in the country\_language\_population.tsv. For example, we should not see "az", but rather "az\_Cyrl", "az\_Latn", and so on. For each country where the language is used, we should see figures on the script\-specific values. The values may overlap, that is, we may see az\_Cyrl at 60% and az\_Latn at 55%. However, the combination with the predominantly used script **must** have a larger figure than the others. + +This is also reflected in CLDR main: languages with multiple scripts will have that reflected in their structure (eg sr\-Cyrl\-RS), with aliases for the language\-region combinations. + +Files in https://github.com/unicode-org/cldr/tree/main/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data + +1. country\_language\_population.tsv +2. language\_script.tsv + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file diff --git a/docs/site/development/updating-codes/update-languagescriptregion-subtags.md b/docs/site/development/updating-codes/update-languagescriptregion-subtags.md new file mode 100644 index 00000000000..51b35bd05e7 --- /dev/null +++ b/docs/site/development/updating-codes/update-languagescriptregion-subtags.md @@ -0,0 +1,82 @@ +--- +title: Update Language/Script/Region Subtags +--- + +# Update Language/Script/Region Subtags + +### Updated 2021\-02\-17 by Yoshito Umaoka + +### This updates language codes, script codes, and territory codes. + +- First get the latest ISO 639\-3 from https://iso639-3.sil.org/code_tables/download_tables + - Download the zip file containing the UTF\-8 tables, it will have a name like iso\-639\-3\_Code\_Tables\_20210202\.zip + - Unpack the zip file and update files below with the latest version: + - {CLDR}/tools/cldr\-code/src/main/resources/org/unicode/cldr/util/data/iso\-639\-3\.tab + - {CLDR}/tools/cldr\-code/src/main/resources/org/unicode/cldr/util/data/iso\-639\-3\_Name\_Index.tab + - {CLDR}/tools/cldr\-code/src/main/resources/org/unicode/cldr/util/data/iso\-639\-3\-macrolanguages.tab + - {CLDR}/tools/cldr\-code/src/main/resources/org/unicode/cldr/util/data/iso\-639\-3\_Retirements.tab + - Take the **latest** version number of the zip files (e.g. iso\-639\-3\_Code\_Tables\_**20210202**.zip), and paste into + - {CLDR}/tools/cldr\-code/src/main/resources/org/unicode/cldr/util/data/iso\-639\-3\-version.tab +- Go to http://www.iana.org/assignments/language-subtag-registry/language-subtag-registry + - (you can set up a watch for changes in this page with http://www.watchthatpage.com ) + - Save as {CLDR}/tools/cldr\-code/src/main/resources/org/unicode/cldr/util/data/language\-subtag\-registry +- Go to http://data.iana.org/TLD/ + - Right\-click on [tlds\-alpha\-by\-domain.txt](http://data.iana.org/TLD/tlds-alpha-by-domain.txt) save as + - {{CLDR}/tools/cldr\-code/src/main/resources/org/unicode/cldr/util//data/[tlds\-alpha\-by\-domain.txt](http://data.iana.org/TLD/tlds-alpha-by-domain.txt) +- If using Eclipse, refresh the files +- Diff each with the old copy to check for consistency + - Certain of the steps below require that you note certain differences. +- Check if there is a new macrolanguage (marked with M in the second column of the iso\-639\-3\.tab file). (Should automate this, but there typically aren't that many new/changed entries). +- **Update tools/cldr\-code/src/main/resources/org/unicode/cldr/util/data/external/iso\_3166\_status.txt** + - Go to https://www.iso.org/obp/ui/#iso:pub:PUB500001:en + - Click **Full List of Country Codes** + - Run the tool **CompareIso3166\_1Status** + - Click on the "Officially Assigned" code type and also the "Other Codes" code type + - Compare total counts with tool output:  example "*formerly\_used \|\|  22*"  coinciding with 22 Formerly Used codes + - If something is wrong, you'll have to scroll through the code list and/or dig around for the updates +- Check if ISO has done something destabilizing with codes: you need to handle it specially. +- **Record the version: See [Updating External Metadata](https://cldr.unicode.org/development/updating-codes/external-version-metadata)** +- Do validity checks and regenerate: for details see [Validity](https://cldr.unicode.org/development/updating-codes/update-validity-xml) + - You'll have to do this again in [Updating Subdivision Codes](https://cldr.unicode.org/development/updating-codes/updating-subdivision-codes). +- Edit common/main/en.xml to add any new names, based on the Descriptions in the registry file. + - *You only need to add new languages and scripts that we add to supplementalMetaData.* + - But you need all territories. + - Any new macrolanguages need a language alias. + - Diff for sanity check +- If the code becomes deprecated, then add to supplementalMetadata under \ + - If there is a single replacement add it. + - Territories can have multiple replacements. Put them in population order. +- There are a few territories that don't yet have a top level domain (TLD) assigned, such as "BQ" or "SS". + - If there are new ones added in tlds\-alpha\-by\-domain.txt for a territory already in CLDR, update {cldrdata}\\tools\\java\\org\\unicode\\cldr\\util\\data\\territory\_codes.txt with the new TLD (usually the same as the country code. +- For new territories (regions) **// TODO: automate this more** + - Add to the territoryContainment in supplementalData.xml + - The data for that is at the UN site: + - With data from the EU at + - Add to territory\_codes.txt + - Use the UN mapping above for the 3letter and 3number codes. + - FIPS is a withdrawn standard as of 2008, so any new territories won't have a FIPS10 code. + - Look at tlds\-alpha\-by\-domain.txt to see if the new territory has a TLD assigned yet. + - rerun CountItems above. + - Add metazone mappings as needed. (Usually John \- requires research) + - Add the country/lang/population data (Usually Rick \- requires research) + - Add the currency data (Usually John \- requires research) + - ~~Update util/data/territory\_codes.txt~~ + - ~~This step will be different once the data is moved into SupplementalData.xml~~ + - ~~Todo: fix GenerateEnums around Utility.getUTF8Data("territory\_codes.txt");~~ +- Then run GenerateEnums.java, and make sure it completes with no exceptions. Fix any necessary results. + - Missing alpha3 for: xx, or "In RFC 4646 but not in CLDR: \[EA, EZ, IC, UN]" + - Ignore if it is {EA, EZ, IC, UN} Otherwise means you needed to do "For new territories" above +- Collision with: xx + - Ignore if it is {{MM, BU, 104}, {TP, TL, 626}, {YU, CS, 891}, {ZR, CD, 180}} +- Not in World but in CLDR: \[002, 003, 005, 009, 011, 013, 014, 015, 017\... Ignore 3\-digit coes + - (should have exception lists in tool for the Ignore's above) +- Run **ConsoleCheckCLDR \-f en \-z FINAL\_TESTING \-e** + - If you missed any codes, you will get error message: "Unexpected Attribute Value" +- Run all the unit tests. + - If you get a failure in LikelySubtagsTest because of a new region, you can hack around it with something like: + - \ + - \ + - You may also have to fix the coverageLevels.txt file for an error like: + - Error: (TestCoverageLevel.java:604\) Comprehensive \& no exception for path \=\> //ldml/localeDisplayNames/territories/territory\[@type\="202"] + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file diff --git a/docs/site/development/updating-codes/update-time-zone-data-for-zoneparser.md b/docs/site/development/updating-codes/update-time-zone-data-for-zoneparser.md new file mode 100644 index 00000000000..f1e7c6381c2 --- /dev/null +++ b/docs/site/development/updating-codes/update-time-zone-data-for-zoneparser.md @@ -0,0 +1,35 @@ +--- +title: Update Time Zone Data for ZoneParser +--- + +# Update Time Zone Data for ZoneParser + +Note: This is usually done as a part of full time zone data update process. + +1. Download the latest version of IANA Time Zone Database page: https://www.iana.org/time\-zones + - There are 3 links available for latest version. Select the complete distribution tzdb\-\.tar.lz (e.g. tzdb\-2021a.tar.lz). + - Extract entire contents to a work directory. + - **Note**: The data only distribution contains minimum set of files you really need. However, you cannot use a convenient make target without codes. The complete distribution package contains the codes. +2. Run make target \- rearguard\_tarballs\_version + - This target creates "rearguard" version of zoneinfo files under directory: tzdataunknown\-rearguard.dir. + - **Note**: If you specify a version (e.g. VERSION\=2021\) when invoking the target, "unknown" will be replaced with the specified version (e.g. tzdata2021a\-rearguard.dir), but it's not important in this instruction. + - A standard zoneinfo file may use negative daylight saving time offsets. CLDR code currently can not handle negative daylight saving time offsets. The "rearguard" version is designed for tools without negative daylight saving time support. +3. Copy files generated by previous step to {CLDR\_DIR}/tools/cldr\-code/src/main/resources/org/unicode/cldr/util/data + - Below the list of files to be include: + - africa + - antarctica + - asia + - australasia + - backward + - etcetera + - europe + - leapseconds + - northamerica + - southamerica + - zone.tab + - **Note**: leapseconds might be removed from the list later. +4. Edit the file {CLDR\_DIR}}/tools/cldr\-code/src/main/resources/org/unicode/cldr/util/data/tzdb\-version.txt + - This file contains just one line text specifying a version of Time Zone Database, e.g. 2021a. +5. **Record the version: See** [**Updating External Metadata**](https://cldr.unicode.org/development/updating-codes/external-version-metadata) + +![Unicode copyright](https://www.unicode.org/img/hb_notice.gif) \ No newline at end of file From b412baef198117afd0ac40999745c46ba840d65d Mon Sep 17 00:00:00 2001 From: Tom Bishop Date: Wed, 4 Sep 2024 12:55:29 -0400 Subject: [PATCH 08/52] CLDR-14913 Add Download XML button to Survey Tool (#4010) --- tools/cldr-apps/js/src/esm/cldrAnnounce.mjs | 19 +- .../cldr-apps/js/src/esm/cldrGenerateVxml.mjs | 88 +++++ tools/cldr-apps/js/src/esm/cldrText.mjs | 1 + tools/cldr-apps/js/src/esm/cldrVueMap.mjs | 4 +- tools/cldr-apps/js/src/views/GenerateVxml.vue | 75 +++++ tools/cldr-apps/js/src/views/MainMenu.vue | 1 + .../java/org/unicode/cldr/web/DBUtils.java | 7 +- .../unicode/cldr/web/OutputFileManager.java | 245 +++++--------- .../java/org/unicode/cldr/web/STFactory.java | 11 +- .../unicode/cldr/web/VettingViewerQueue.java | 3 +- .../org/unicode/cldr/web/VxmlGenerator.java | 70 ++++ .../java/org/unicode/cldr/web/VxmlQueue.java | 305 ++++++++++++++++++ .../java/org/unicode/cldr/web/XPathTable.java | 5 +- .../unicode/cldr/web/api/GenerateVxml.java | 111 +++++++ 14 files changed, 765 insertions(+), 180 deletions(-) create mode 100644 tools/cldr-apps/js/src/esm/cldrGenerateVxml.mjs create mode 100644 tools/cldr-apps/js/src/views/GenerateVxml.vue create mode 100644 tools/cldr-apps/src/main/java/org/unicode/cldr/web/VxmlGenerator.java create mode 100644 tools/cldr-apps/src/main/java/org/unicode/cldr/web/VxmlQueue.java create mode 100644 tools/cldr-apps/src/main/java/org/unicode/cldr/web/api/GenerateVxml.java diff --git a/tools/cldr-apps/js/src/esm/cldrAnnounce.mjs b/tools/cldr-apps/js/src/esm/cldrAnnounce.mjs index 056611e93aa..52bc0fda4cc 100644 --- a/tools/cldr-apps/js/src/esm/cldrAnnounce.mjs +++ b/tools/cldr-apps/js/src/esm/cldrAnnounce.mjs @@ -6,12 +6,6 @@ import * as cldrAjax from "./cldrAjax.mjs"; import * as cldrSchedule from "./cldrSchedule.mjs"; import * as cldrStatus from "./cldrStatus.mjs"; -/** - * This should be false for production. It can be made true during debugging, which - * may be useful for performance testing. - */ -const DISABLE_ANNOUNCEMENTS = false; - const CLDR_ANNOUNCE_DEBUG = false; const ANNOUNCE_REFRESH_SECONDS = 60; // one minute @@ -38,6 +32,16 @@ const MOST_RECENT_ID_UNKNOWN = -1; // must be less than zero */ let alreadyGotId = MOST_RECENT_ID_UNKNOWN; +/** + * Ordinarily announcements are enabled. They may be temporarily disabled during + * critical operations such as VXML generation, or for debugging. + */ +let announcementsEnabled = true; + +function enableAnnouncements(enable) { + announcementsEnabled = Boolean(enable); +} + /** * Get the number of unread announcements, to display in the main menu * @@ -60,7 +64,7 @@ async function getUnreadCount(setUnreadCount) { * we're only getting the number of unread announcements to display in the main header */ async function refresh(viewCallbackSetData, viewCallbackSetCounts) { - if (DISABLE_ANNOUNCEMENTS) { + if (!announcementsEnabled) { return; } if (viewCallbackSetData) { @@ -185,6 +189,7 @@ export { canAnnounce, canChooseAllOrgs, compose, + enableAnnouncements, getUnreadCount, refresh, resetSchedule, diff --git a/tools/cldr-apps/js/src/esm/cldrGenerateVxml.mjs b/tools/cldr-apps/js/src/esm/cldrGenerateVxml.mjs new file mode 100644 index 00000000000..24f20c3b37a --- /dev/null +++ b/tools/cldr-apps/js/src/esm/cldrGenerateVxml.mjs @@ -0,0 +1,88 @@ +/* + * cldrGenerateVxml: for Survey Tool feature "Generate VXML". The display logic is in GenerateVxml.vue. + */ +import * as cldrAjax from "./cldrAjax.mjs"; +import * as cldrAnnounce from "./cldrAnnounce.mjs"; +import * as cldrNotify from "./cldrNotify.mjs"; +import * as cldrStatus from "./cldrStatus.mjs"; + +const SECONDS_IN_MS = 1000; + +const NORMAL_RETRY = 10 * SECONDS_IN_MS; // "Normal" retry: starting or about to start + +const VXML_URL = "api/vxml"; + +// These must match the back end; used in requests +class LoadingPolicy { + static START = "START"; // start generating vxml + static CONTINUE = "CONTINUE"; // continue generating vxml + static STOP = "STOP"; // stop (cancel) generating vxml +} + +// These must match the back end; used in responses +class Status { + static INIT = "INIT"; // before making a request (back end does not have INIT) + static WAITING = "WAITING"; // waiting on other users/tasks + static PROCESSING = "PROCESSING"; // in progress + static READY = "READY"; // finished successfully + static STOPPED = "STOPPED"; // due to error or cancellation (LoadingPolicy.STOP) +} + +let canGenerate = false; + +let callbackToSetData = null; + +function canGenerateVxml() { + return canGenerate; +} + +function viewMounted(setData) { + callbackToSetData = setData; + const perm = cldrStatus.getPermissions(); + canGenerate = Boolean(perm?.userIsAdmin); +} + +function start() { + // Disable announcements during VXML generation to reduce risk of interference + cldrAnnounce.enableAnnouncements(false); + requestVxml(LoadingPolicy.START); +} + +function fetchStatus() { + if (!canGenerate || "generate_vxml" !== cldrStatus.getCurrentSpecial()) { + canGenerate = false; + } else if (canGenerate) { + requestVxml(LoadingPolicy.CONTINUE); + } +} + +function stop() { + requestVxml(LoadingPolicy.STOP); +} + +function requestVxml(loadingPolicy) { + const args = { loadingPolicy: loadingPolicy }; + const init = cldrAjax.makePostData(args); + cldrAjax + .doFetch(VXML_URL, init) + .then(cldrAjax.handleFetchErrors) + .then((r) => r.json()) + .then(setVxmlData) + .catch((e) => { + cldrNotify.exception(e, "generating VXML"); + }); +} + +function setVxmlData(data) { + if (!callbackToSetData) { + return; + } + callbackToSetData(data); + if (data.status === Status.WAITING || data.status === Status.PROCESSING) { + window.setTimeout(fetchStatus.bind(this), NORMAL_RETRY); + } else if (data.status === Status.READY || data.status === Status.STOPPED) { + cldrAnnounce.enableAnnouncements(true); // restore + } +} + +export { Status, canGenerateVxml, start, stop, viewMounted }; diff --git a/tools/cldr-apps/js/src/esm/cldrText.mjs b/tools/cldr-apps/js/src/esm/cldrText.mjs index 08fdf0535ef..d3991685e86 100644 --- a/tools/cldr-apps/js/src/esm/cldrText.mjs +++ b/tools/cldr-apps/js/src/esm/cldrText.mjs @@ -497,6 +497,7 @@ const strings = { special_forum: "Forum Posts", special_forum_participation: "Forum Participation", special_general: "General Info", + special_generate_vxml: "Generate VXML", special_list_emails: "List Email Addresses", special_list_users: "List Users", special_locales: "Locale List", diff --git a/tools/cldr-apps/js/src/esm/cldrVueMap.mjs b/tools/cldr-apps/js/src/esm/cldrVueMap.mjs index 875d7e8c051..7f396abdc74 100644 --- a/tools/cldr-apps/js/src/esm/cldrVueMap.mjs +++ b/tools/cldr-apps/js/src/esm/cldrVueMap.mjs @@ -1,11 +1,10 @@ -import * as cldrLoad from "./cldrLoad.mjs"; - import AboutPanel from "../views/AboutPanel.vue"; import AnnouncePanel from "../views/AnnouncePanel.vue"; import AddUser from "../views/AddUser.vue"; import AutoImport from "../views/AutoImport.vue"; import DowngradedVotes from "../views/DowngradedVotes.vue"; import GeneralInfo from "../views/GeneralInfo.vue"; +import GenerateVxml from "../views/GenerateVxml.vue"; import LockAccount from "../views/LockAccount.vue"; import LookUp from "../views/LookUp.vue"; import MainMenu from "../views/MainMenu.vue"; @@ -27,6 +26,7 @@ const specialToComponentMap = { auto_import: AutoImport, downgraded: DowngradedVotes, general: GeneralInfo, // see cldrLoad.GENERAL_SPECIAL + generate_vxml: GenerateVxml, lock_account: LockAccount, lookup: LookUp, menu: MainMenu, diff --git a/tools/cldr-apps/js/src/views/GenerateVxml.vue b/tools/cldr-apps/js/src/views/GenerateVxml.vue new file mode 100644 index 00000000000..9f0c060e4e0 --- /dev/null +++ b/tools/cldr-apps/js/src/views/GenerateVxml.vue @@ -0,0 +1,75 @@ + + + + + diff --git a/tools/cldr-apps/js/src/views/MainMenu.vue b/tools/cldr-apps/js/src/views/MainMenu.vue index dc51b6d3b51..1acbb88629a 100644 --- a/tools/cldr-apps/js/src/views/MainMenu.vue +++ b/tools/cldr-apps/js/src/views/MainMenu.vue @@ -2,6 +2,7 @@