Skip to content

Commit

Permalink
CLDR-17884 Update CIA Factbook
Browse files Browse the repository at this point in the history
CLDR-17884 Check alternate country names without parentheses too

CLDR-17884 Regex match

add country note

CLDR-17884 Remove world bank aggregates
  • Loading branch information
conradarcturus committed Aug 15, 2024
1 parent 936525b commit 78146cb
Show file tree
Hide file tree
Showing 8 changed files with 876 additions and 884 deletions.
750 changes: 380 additions & 370 deletions common/supplemental/supplementalData.xml

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,13 @@ static ArrayList<Pair<WBLine, Integer>> parseHeader(final String[] pieces) {
}
}

enum FBLine {
Rank,
Country,
enum FactbookLine {
CountryName,
CountrySlug,
Value,
Year;
DateOfInformation,
Ranking,
Region;

String get(String[] pieces) {
return pieces[ordinal()];
Expand Down Expand Up @@ -279,16 +281,13 @@ private static void loadFactbookInfo(String filename, final Counter2<String> fac
new LineHandler() {
@Override
public boolean handle(String line) {
if (line.length() == 0
|| line.startsWith("This tab")
|| line.startsWith("Rank")
|| line.startsWith(" This file")) {
String[] pieces = splitCommaSeparated(line);
String countryName = FactbookLine.CountryName.get(pieces);
if (countryName.equals("name")) {
return false;
}
String[] pieces = line.split("\\s{2,}");
String code =
CountryCodeConverter.getCodeFromName(
FBLine.Country.get(pieces), true, missing);
CountryCodeConverter.getCodeFromName(countryName, true, missing);
if (code == null) {
return false;
}
Expand All @@ -299,7 +298,7 @@ public boolean handle(String line) {
return false;
}
code = code.toUpperCase(Locale.ENGLISH);
String valueString = FBLine.Value.get(pieces).trim();
String valueString = FactbookLine.Value.get(pieces).trim();
if (valueString.startsWith("$")) {
valueString = valueString.substring(1);
}
Expand Down Expand Up @@ -539,8 +538,8 @@ static List<Pair<String, Double>> getUnLiteracy(Output<Boolean> hadErr) throws I
loadFactbookLiteracy();
loadUnLiteracy();

loadFactbookInfo("external/factbook_gdp_ppp.txt", factbook_gdp);
loadFactbookInfo("external/factbook_population.txt", factbook_population);
loadFactbookInfo("external/factbook_gdp_ppp.csv", factbook_gdp);
loadFactbookInfo("external/factbook_population.csv", factbook_population);
CldrUtility.handleFile("external/other_country_data.txt", new MyLineHandler(other));

loadWorldBankInfo();
Expand Down Expand Up @@ -581,7 +580,7 @@ static List<Pair<String, Double>> getUnLiteracy(Output<Boolean> hadErr) throws I
}
if (myErrors.length() != 0) {
throw new IllegalArgumentException(
"Missing Country values, the following and add to external/other_country_data to fix, chaning the 0 to the real value:"
"Missing Country values, the following and add to external/other_country_data to fix, changing the 0 to the real value:"
+ myErrors);
}
} catch (IOException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,13 +142,17 @@ SY; Syria; Syrian Arab Republic
SZ; Eswatini; eSwatini; Swaziland
SZ; Eswatini; Swaziland

SH; Saint Helena; Saint Helena
SH; Saint Helena; St. Helena
SH; Saint Helena; Saint Helena, Ascension, and Tristan da Cunha
SH; Saint Helena; Saint Helena, Ascension and Tristan da Cunha
SH; Saint Helena; Saint Helena ex. dep.

TL; East Timor; Timor-Leste
TL; East Timor; East Timor

TR; Turkey; Turkiye
TR; Turkey; Turkey (Turkiye)
TR; ; Turkey


Expand Down Expand Up @@ -198,11 +202,11 @@ RE; ; Reunion
PS; ; Palestinian Territory
CD; ; Congo, Democratic Republic
FX; ; France, Metropolitan
SH; ; St. Helena
SJ; ; Svalbard and Jan Mayen Islands
VA; ; Vatican
CW; ; Netherlands Antilles
WF; ; Wallis and Futuna Islands
WF; ; Wallis and Futuna
HM; ; Heard and McDonald Islands
PM; ; St. Pierre and Miquelon

Expand All @@ -220,34 +224,9 @@ UK;; U.K.
RS;; Yugoslavia
KM;; Comros

skip; skip; Arab World
skip; skip; Caribbean small states
skip; skip; Country Name
skip; skip; East Asia & Pacific (all income levels)
skip; skip; East Asia & Pacific (developing only)
skip; skip; Euro area
skip; skip; Europe & Central Asia (all income levels)
skip; skip; Europe & Central Asia (developing only)
skip; skip; Heavily indebted poor countries (HIPC)
skip; skip; High income
skip; skip; High income: nonOECD
skip; skip; High income: OECD
skip; skip; Latin America & Caribbean (all income levels)
skip; skip; Latin America & Caribbean (developing only)
skip; skip; Least developed countries: UN classification
skip; skip; Low & middle income
skip; skip; Low income
skip; skip; Lower middle income
skip; skip; Middle East & North Africa (all income levels)
skip; skip; Middle East & North Africa (developing only)
skip; skip; Middle income
skip; skip; OECD members
skip; skip; Other small states
skip; skip; Pacific island small states
skip; skip; Small states
skip; skip; South Asia
skip; skip; Sub-Saharan Africa (all income levels)
skip; skip; Sub-Saharan Africa (developing only)
skip; skip; Sudan (pre-secession)
skip; skip; Upper middle income
skip; skip; Paracel Islands
419; Latin America & Caribbean; Latin America & Caribbean
419; Latin America & Caribbean; Latin America & the Caribbean

# Many of the skipped values below are aggregates from world_bank_data that we can ignore since they don't correspond to UN country groups

skip; skip; Paracel Islands
Loading

0 comments on commit 78146cb

Please sign in to comment.