Skip to content

Commit

Permalink
Tiny amount of comment cleanup and minor code refactoring in WritingS…
Browse files Browse the repository at this point in the history
…ystems
  • Loading branch information
tombogle committed Dec 20, 2024
1 parent 7004a47 commit a8eb262
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 73 deletions.
112 changes: 47 additions & 65 deletions SIL.WritingSystems/LanguageLookup.cs
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ public LanguageLookup(bool ensureDefaultTags = false)

foreach (AllTagEntry entry in rootObject)
{
// tags starting with x- have undefined structure so ignoring them as well as deprecated tags
// tags starting with _ may provide some sort of undefined variant information, but we ignore them as well
// Tags starting with x- have undefined structure, so ignoring them as well as deprecated tags.
// Tags starting with _ may provide some sort of undefined variant information, but we ignore them as well.
if (!entry.deprecated && !entry.tag.StartsWith("x-", StringComparison.Ordinal) && !entry.tag.StartsWith("_", StringComparison.Ordinal))
{
AddLanguage(entry.tag, entry.iso639_3, entry.full, entry.name, entry.localname, entry.region, entry.names, entry.regions, entry.tags, entry.iana, entry.regionName);
Expand All @@ -82,10 +82,10 @@ public LanguageLookup(bool ensureDefaultTags = false)

/// <summary>
/// Some languages in langtags.json have not been normalized to have a default tag without a script marker
/// in one of its entries. For some uses of the data, we really want to see only the default tags but we
/// also don't want to not see any languages. So scan through the data for cases where every tag associated
/// in one of its entries. For some uses of the data, we really want to see only the default tags, but we
/// also don't want to not see any languages. So scan through the data for cases where every tag associated
/// with a language contains a script marker and choose one as the default to receive a minimal tag that is
/// equal to the language code alone. (The one found in the most countries is chosen by default.)
/// equal to the language code alone. (The one found in the most countries is chosen by default.)
/// </summary>
private void EnsureDefaultTags()
{
Expand All @@ -99,11 +99,7 @@ private void EnsureDefaultTags()
for (var i = 0; i < tagList.Count; ++i)
{
var tag = tagList[i];
string language;
string script;
string region;
string variant;
if (!TryGetParts(tag, out language, out script, out region, out variant))
if (!TryGetParts(tag, out var language, out var script, out _, out _))
{
prevLang = tag; // shouldn't happen, but if it does...
continue;
Expand Down Expand Up @@ -149,41 +145,34 @@ private void EnsureDefaultTags()
private bool AddLanguage(string code, string threelettercode, string full = null,
string name = null, string localName = null, string region = null, List<string> names = null, List<string> regions = null, List<string> tags = null, List<string> ianaNames = null, string regionName = null)
{
string primarycountry;
string primaryCountry;
if (region == null)
{
primarycountry = "";
primaryCountry = "";
}
else if (StandardSubtags.IsValidIso3166RegionCode(region))
{
if (StandardSubtags.IsPrivateUseRegionCode(region))
{
if (region == "XK")
{
primarycountry = "Kosovo";
}
else
{
primarycountry = "Unknown private use";
}
primaryCountry = region == "XK" ? "Kosovo" : "Unknown private use";
}
else
{
primarycountry = StandardSubtags.RegisteredRegions[region].Name; // convert to full region name
primaryCountry = StandardSubtags.RegisteredRegions[region].Name; // convert to full region name
}
}
else
{
primarycountry = "Invalid region";
primaryCountry = "Invalid region";
}
LanguageInfo language = new LanguageInfo
{
LanguageTag = code,
ThreeLetterTag = threelettercode,
// DesiredName defaults to Names[0], which is set below.
PrimaryCountry = primarycountry
PrimaryCountry = primaryCountry
};
language.Countries.Add(primarycountry);
language.Countries.Add(primaryCountry);

if (regions != null)
{
Expand Down Expand Up @@ -221,15 +210,15 @@ private bool AddLanguage(string code, string threelettercode, string full = null
// add each name that is not already in language.Names
language.Names.AddRange(names.Select(n => n.Trim()).Where(n => !language.Names.Contains(n)));
}
// If we end up needing to add the language code, that reflects a deficiency in the data. But
// having a bogus name value probably hurts less that not having any name at all. The sort
// If we end up needing to add the language code, that reflects a deficiency in the data. But
// having a bogus name value probably hurts less that not having any name at all. The sort
// process mentioned above using the language tag as well as the first two items in the Names list.
Debug.Assert(language.Names.Count > 0);
if (language.Names.Count == 0)
language.Names.Add(code);

// add language to _codeToLanguageIndex and _nameToLanguageIndex
// if 2 letter code then add both 2 and 3 letter codes to _codeToLanguageIndex
// if 2-letter code, then add both 2 and 3-letter codes to _codeToLanguageIndex

_codeToLanguageIndex[code] = language;
if (full != null && !string.Equals(full, code))
Expand All @@ -244,26 +233,25 @@ private bool AddLanguage(string code, string threelettercode, string full = null

if (tags != null)
{
foreach (string langtag in tags)
foreach (string langTag in tags)
{
_codeToLanguageIndex[langtag] = language;
_codeToLanguageIndex[langTag] = language;
}
}

foreach (string langname in language.Names)
GetOrCreateListFromName(langname).Add(language);
foreach (string langName in language.Names)
GetOrCreateListFromName(langName).Add(language);
// add to _countryToLanguageIndex
foreach (var country in language.Countries)
{
if (!string.IsNullOrEmpty(country))
{
List<LanguageInfo> list;
if (!_countryToLanguageIndex.TryGetValue(country, out list))
if (!_countryToLanguageIndex.TryGetValue(country, out var list))
{
list = new List<LanguageInfo>();
_countryToLanguageIndex[country] = list;
}
list.Add(language);
list.Add(language);
}
}

Expand All @@ -289,8 +277,7 @@ internal List<LanguageInfo> LanguagesWithoutRegions()

private List<LanguageInfo> GetOrCreateListFromName(string name)
{
List<LanguageInfo> languages;
if (!_nameToLanguageIndex.TryGetValue(name, out languages))
if (!_nameToLanguageIndex.TryGetValue(name, out var languages))
{
languages = new List<LanguageInfo>();
_nameToLanguageIndex.Add(name, languages);
Expand All @@ -305,13 +292,12 @@ private List<LanguageInfo> GetOrCreateListFromName(string name)
public LanguageInfo GetLanguageFromCode(string isoCode)
{
Guard.AgainstNullOrEmptyString(isoCode, "Parameter to GetLanguageFromCode must not be null or empty.");
LanguageInfo languageInfo = null;
_codeToLanguageIndex.TryGetValue(isoCode, out languageInfo);
_codeToLanguageIndex.TryGetValue(isoCode, out var languageInfo);
return languageInfo;
}

/// <summary>
/// Get an list of languages that match the given string in some way (code, name, country)
/// Get a list of languages that match the given string in some way (code, name, country)
/// </summary>
public IEnumerable<LanguageInfo> SuggestLanguages(string searchString)
{
Expand All @@ -322,13 +308,13 @@ public IEnumerable<LanguageInfo> SuggestLanguages(string searchString)

if (searchString == "*")
{
// there will be duplicate LanguageInfo entries for 2 and 3 letter codes and equivalent tags
// There will be duplicate LanguageInfo entries for 2 and 3-letter codes and equivalent tags.
var all_languages = new HashSet<LanguageInfo>(_codeToLanguageIndex.Select(l => l.Value));
foreach (LanguageInfo languageInfo in all_languages.OrderBy(l => l, new ResultComparer(searchString)))
yield return languageInfo;
}
// if the search string exactly matches a hard-coded way to say "sign", show all the sign languages
// there will be duplicate LanguageInfo entries for equivalent tags
// If the search string exactly matches a hard-coded way to say "sign", show all the sign languages.
// There will be duplicate LanguageInfo entries for equivalent tags
else if (new []{"sign", "sign language","signes", "langage des signes", "señas","lenguaje de señas"}.Contains(searchString.ToLowerInvariant()))
{
var parallelSearch = new HashSet<LanguageInfo>(_codeToLanguageIndex.AsParallel().Select(li => li.Value).Where(l =>
Expand All @@ -342,10 +328,10 @@ public IEnumerable<LanguageInfo> SuggestLanguages(string searchString)
{
IEnumerable<LanguageInfo> matchOnCode = from x in _codeToLanguageIndex where x.Key.StartsWith(searchString, StringComparison.InvariantCultureIgnoreCase) select x.Value;
List<LanguageInfo>[] matchOnName = (from x in _nameToLanguageIndex where x.Key.StartsWith(searchString, StringComparison.InvariantCultureIgnoreCase) select x.Value).ToArray();
// Apostrophes can cause trouble in lookup. Unicode TR-29 inexplicably says to use
// Apostrophes can cause trouble in lookup. Unicode TR-29 inexplicably says to use
// u2019 (RIGHT SINGLE QUOTATION MARK) for the English apostrophe when it also defines
// u02BC (MODIFIER LETTER APOSTROPHE) as a Letter character. Users are quite likely to
// type the ASCII apostrophe (u0027) which is defined as Punctuation. The current
// u02BC (MODIFIER LETTER APOSTROPHE) as a Letter character. Users are quite likely to
// type the ASCII apostrophe (u0027) which is defined as Punctuation. The current
// data appears to use u2019 in several language names, which means that users might
// end up thinking the language isn't in our database.
// See https://silbloom.myjetbrains.com/youtrack/issue/BL-6339.
Expand Down Expand Up @@ -393,9 +379,9 @@ public ResultComparer(string searchString)
/// <summary>
/// Sorting the languages for display is tricky: we want the most relevant languages at the
/// top of the list, so we can't simply sort alphabetically by language name or by language tag,
/// but need to take both items into account together with the current search string. Ordering
/// but need to take both items into account together with the current search string. Ordering
/// by relevance is clearly impossible since we'd have to read the user's mind and apply that
/// knowledge to the data. But the heuristics we use here may be better than nothing...
/// knowledge to the data. But the heuristics we use here may be better than nothing...
/// </summary>
public int Compare(LanguageInfo x, LanguageInfo y)
{
Expand All @@ -404,7 +390,7 @@ public int Compare(LanguageInfo x, LanguageInfo y)

// Favor ones where some language name matches the search string to solve BL-1141
// We restrict this to the top 2 names of each language, and to cases where the
// corresponding names of the two languages are different. (If both language names
// corresponding names of the two languages are different. (If both language names
// match the search string, there's no good reason to favor one over the other!)
if (!x.Names[0].Equals(y.Names[0], StringComparison.InvariantCultureIgnoreCase))
{
Expand All @@ -415,39 +401,34 @@ public int Compare(LanguageInfo x, LanguageInfo y)
}
else if (x.Names.Count == 1 || y.Names.Count == 1 || !x.Names[1].Equals(y.Names[1], StringComparison.InvariantCultureIgnoreCase))
{
// If we get here, x.Names[0] == y.Names[0]. If both equal the search string, then neither x.Names[1]
// If we get here, x.Names[0] == y.Names[0]. If both equal the search string, then neither x.Names[1]
// nor y.Names[1] should equal the search string since the code adding to Names checks for redundancy.
// Also it's possible that neither x.Names[1] nor y.Names[1] exists at this point in the code, or that
// Also, it's possible that neither x.Names[1] nor y.Names[1] exists at this point in the code, or that
// only one of them exists, or that both of them exist (in which case they are not equal).
if (x.Names.Count > 1 && x.Names[1].Equals(_searchString, StringComparison.InvariantCultureIgnoreCase))
return -1;
if (y.Names.Count > 1 && y.Names[1].Equals(_searchString, StringComparison.InvariantCultureIgnoreCase))
return 1;
}

// Favor a language whose tag matches the search string exactly. (equal tags are handled above)
// Favor a language whose tag matches the search string exactly. (equal tags are handled above)
if (x.LanguageTag.Equals(_searchString, StringComparison.InvariantCultureIgnoreCase))
return -1;
if (y.LanguageTag.Equals(_searchString, StringComparison.InvariantCultureIgnoreCase))
return 1;

// written this way to avoid having to catch predictable exceptions as the user is typing
string xlanguage;
string ylanguage;
string script;
string region;
string variant;
var xtagParses = TryGetParts(x.LanguageTag, out xlanguage, out script, out region, out variant);
var ytagParses = TryGetParts(y.LanguageTag, out ylanguage, out script, out region, out variant);
var bothTagLanguagesMatchSearch = xtagParses && ytagParses && xlanguage == ylanguage &&
_searchString.Equals(xlanguage, StringComparison.InvariantCultureIgnoreCase);
var xTagParses = TryGetParts(x.LanguageTag, out var xLanguage, out _, out _, out _);
var yTagParses = TryGetParts(y.LanguageTag, out var yLanguage, out _, out _, out _);
var bothTagLanguagesMatchSearch = xTagParses && yTagParses && xLanguage == yLanguage &&
_searchString.Equals(xLanguage, StringComparison.InvariantCultureIgnoreCase);
if (!bothTagLanguagesMatchSearch)
{
// One of the tag language pieces may match the search string even though not both match. In that case,
// One of the tag language pieces may match the search string even though not both match. In that case,
// sort the matching language earlier in the list.
if (xtagParses && _searchString.Equals(xlanguage, StringComparison.InvariantCultureIgnoreCase))
if (xTagParses && _searchString.Equals(xLanguage, StringComparison.InvariantCultureIgnoreCase))
return -1; // x.Tag's language part matches search string exactly, so sort it earlier in the list.
if (ytagParses && _searchString.Equals(ylanguage, StringComparison.InvariantCultureIgnoreCase))
if (yTagParses && _searchString.Equals(yLanguage, StringComparison.InvariantCultureIgnoreCase))
return 1; // y.Tag's language part matches search string exactly, so sort it earlier in the list.
}

Expand Down Expand Up @@ -481,7 +462,7 @@ public int Compare(LanguageInfo x, LanguageInfo y)
}

// Editing distance to a language name is not useful when we've detected that the user appears to be
// typing a language tag in that both language tags match what the user has typed. (For example,
// typing a language tag in that both language tags match what the user has typed. (For example,
// it gives a strange and unwanted order to the variants of zh.) In such a case we just order the
// matching codes by length (already done) and then alphabetically by code, skipping the sort by
// editing distance to the language names.
Expand All @@ -504,7 +485,8 @@ public int Compare(LanguageInfo x, LanguageInfo y)
return res;
}

// Alphabetize by Language tag if 3 characters or less or if there is a hyphen after the first 2 or 3 chars
// Alphabetize by Language tag if 3 characters or fewer, or if there is a hyphen
// after the first 2 or 3 chars.
if (_lowerSearch.Length <= 3 || _lowerSearch.IndexOf('-') == 3 || _lowerSearch.IndexOf('-') == 4)
{
return string.Compare(x.LanguageTag, y.LanguageTag, StringComparison.InvariantCultureIgnoreCase);
Expand Down
13 changes: 6 additions & 7 deletions SIL.WritingSystems/StandardSubtags.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ static StandardSubtags()
Iso3Languages = RegisteredLanguages.Where(l => !string.IsNullOrEmpty(l.Iso3Code)).ToDictionary(l => l.Iso3Code, StringComparer.InvariantCultureIgnoreCase);
}

internal static void InitialiseIanaSubtags(string twotothreecodes, string subtagregistry)
internal static void InitialiseIanaSubtags(string twoToThreeCodes, string subtagRegistry)
{
// JohnT: can't find anywhere else to document this, so here goes: TwoToThreeMap is a file adapted from
// FieldWorks Ethnologue\Data\iso-639-3_20080804.tab, by discarding all but the first column (3-letter
Expand All @@ -29,8 +29,8 @@ internal static void InitialiseIanaSubtags(string twotothreecodes, string subtag
// Iana code, and the string after it is the one we want to return as the corresponding ISO3Code.
// The following block of code assembles these lines into a map we can use to fill this slot properly
// when building the main table.
var twoToThreeMap = TwoAndThreeMap(twotothreecodes, false);
string[] ianaSubtagsAsStrings = subtagregistry.Split(new[] { "%%" }, StringSplitOptions.None);
var twoToThreeMap = TwoAndThreeMap(twoToThreeCodes, false);
string[] ianaSubtagsAsStrings = subtagRegistry.Split(new[] { "%%" }, StringSplitOptions.None);

var languages = new List<LanguageSubtag>();
var scripts = new List<ScriptSubtag>();
Expand All @@ -57,9 +57,9 @@ internal static void InitialiseIanaSubtags(string twotothreecodes, string subtag
continue;
if (component.Split(':').Length < 2) // the description for ia (Interlingua) is spread over 2 lines
{
if (descriptions.Count() > 0)
if (descriptions.Count > 0)
{
description = description + component.Substring(1);
description += component.Substring(1);
descriptions.Clear();
descriptions.Add(description);
}
Expand Down Expand Up @@ -127,8 +127,7 @@ internal static void InitialiseIanaSubtags(string twotothreecodes, string subtag
switch (type)
{
case "language":
string iso3Code;
if (!twoToThreeMap.TryGetValue(subtag, out iso3Code))
if (!twoToThreeMap.TryGetValue(subtag, out var iso3Code))
iso3Code = subtag;
languages.Add(new LanguageSubtag(subtag, description, false, iso3Code, descriptions, macrolanguage, deprecated));
break;
Expand Down
2 changes: 1 addition & 1 deletion SIL.WritingSystems/WritingSystemDefinition.cs
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,7 @@ public virtual string DisplayLabel
get
{
//jh (Oct 2010) made it start with RFC5646 because all ws's in a lang start with the
//same abbreviation, making imppossible to see (in SOLID for example) which you chose.
//same abbreviation, making it impossible to see (in SOLID for example) which you chose.
bool languageIsUnknown = _languageTag.Equals(WellKnownSubtags.UnlistedLanguage, StringComparison.OrdinalIgnoreCase);
if (!string.IsNullOrEmpty(_languageTag) && !languageIsUnknown)
{
Expand Down

0 comments on commit a8eb262

Please sign in to comment.