diff --git a/JSTests/ChangeLog b/JSTests/ChangeLog index 04d628b0e594d..9830122fcccda 100644 --- a/JSTests/ChangeLog +++ b/JSTests/ChangeLog @@ -1,3 +1,23 @@ +2020-09-11 Yusuke Suzuki + + [JSC] Canonicalize "true" unicode extension type value to "" + https://bugs.webkit.org/show_bug.cgi?id=216224 + + Reviewed by Ross Kirsling. + + * stress/intl-collator-co-extension.js: + (explicitTrueBeforeICU67): Deleted. + * stress/intl-collator.js: + (shouldBe.testCollator.Intl.Collator): + (explicitTrueBeforeICU67): Deleted. + * stress/intl-datetimeformat.js: + * stress/intl-locale.js: + * stress/intl-numberformat.js: + * stress/intl-object.js: + * stress/intl-pluralrules.js: + * stress/intl-relativetimeformat.js: + * test262/expectations.yaml: + 2020-09-11 Yusuke Suzuki [JSC] attribute-change transition should not pin Structure diff --git a/JSTests/stress/intl-collator-co-extension.js b/JSTests/stress/intl-collator-co-extension.js index c1487840b49ae..52121b3ca7525 100644 --- a/JSTests/stress/intl-collator-co-extension.js +++ b/JSTests/stress/intl-collator-co-extension.js @@ -15,10 +15,6 @@ function shouldBeArray(actual, expected) { } } -function explicitTrueBeforeICU67() { - return $vm.icuVersion() < 67 ? '-true' : ''; -} - shouldBeArray(["AE", "\u00C4"].sort(new Intl.Collator("de", {usage: "sort"}).compare), ["\u00C4", "AE"]); shouldBeArray(["AE", "\u00C4"].sort(new Intl.Collator("de", {usage: "search"}).compare), ["AE", "\u00C4"]); shouldBe(new Intl.Collator("de", {usage: "sort"}).resolvedOptions().locale, "de"); @@ -40,8 +36,8 @@ shouldBeArray(["2", "10"].sort(new Intl.Collator("de-U-kn-x-0", {usage: "search" shouldBe(new Intl.Collator("en-US-x-twain", {usage: "search"}).resolvedOptions().locale, "en-US"); -shouldBe(new Intl.Collator("de-u-kn", {usage: "sort"}).resolvedOptions().locale, "de-u-kn" + explicitTrueBeforeICU67()); -shouldBe(new Intl.Collator("de-u-kn", {usage: "search"}).resolvedOptions().locale, "de-u-kn" + explicitTrueBeforeICU67()); +shouldBe(new Intl.Collator("de-u-kn", {usage: "sort"}).resolvedOptions().locale, "de-u-kn"); +shouldBe(new Intl.Collator("de-u-kn", {usage: "search"}).resolvedOptions().locale, "de-u-kn"); shouldBeArray(["a", "ae", "ä", "æ"].sort(new Intl.Collator("de-u-co-phonebk").compare), ["a", "ae", "ä", "æ"]); shouldBeArray(["a", "ae", "ä", "æ"].sort(new Intl.Collator("de").compare), ["a", "ä", "ae", "æ"]); diff --git a/JSTests/stress/intl-collator.js b/JSTests/stress/intl-collator.js index a2e405014f555..b0c12871c2d85 100644 --- a/JSTests/stress/intl-collator.js +++ b/JSTests/stress/intl-collator.js @@ -19,10 +19,6 @@ function shouldThrow(func, errorType) { throw new Error(`Expected ${errorType.name}!`); } -function explicitTrueBeforeICU67() { - return $vm.icuVersion() < 67 ? '-true' : ''; -} - // 10.1 The Intl.Collator Constructor // The Intl.Collator constructor is a standard built-in property of the Intl object. @@ -85,8 +81,8 @@ shouldBe(testCollator(Intl.Collator('en-u-co-abcd'), [{locale: 'en'}]), true); shouldBe(testCollator(Intl.Collator('de-u-co-phonebk'), [{locale: 'de-u-co-phonebk', collation: 'phonebk'}, {locale: 'de'}]), true); // The 'kn' key is processed correctly. -shouldBe(testCollator(Intl.Collator('en-u-kn'), [{locale: 'en-u-kn' + explicitTrueBeforeICU67(), numeric: true}]), true); -shouldBe(testCollator(Intl.Collator('en-u-kn-true'), [{locale: 'en-u-kn' + explicitTrueBeforeICU67(), numeric: true}]), true); +shouldBe(testCollator(Intl.Collator('en-u-kn'), [{locale: 'en-u-kn', numeric: true}]), true); +shouldBe(testCollator(Intl.Collator('en-u-kn-true'), [{locale: 'en-u-kn', numeric: true}]), true); shouldBe(testCollator(Intl.Collator('en-u-kn-false'), [{locale: 'en-u-kn-false', numeric: false}]), true); shouldBe(testCollator(Intl.Collator('en-u-kn-abcd'), [{locale: 'en'}]), true); @@ -150,7 +146,7 @@ shouldThrow(() => Intl.Collator('en', { get ignorePunctuation() { throw new Erro // Options override the language tag. shouldBe(testCollator(Intl.Collator('en-u-kn-true', {numeric: false}), [{locale: 'en', numeric: false}]), true); shouldBe(testCollator(Intl.Collator('en-u-kn-false', {numeric: true}), [{locale: 'en', numeric: true}]), true); -shouldBe(testCollator(Intl.Collator('en-u-kn-true', {numeric: true}), [{locale: 'en-u-kn' + explicitTrueBeforeICU67(), numeric: true}]), true); +shouldBe(testCollator(Intl.Collator('en-u-kn-true', {numeric: true}), [{locale: 'en-u-kn', numeric: true}]), true); shouldBe(testCollator(Intl.Collator('en-u-kn-false', {numeric: false}), [{locale: 'en-u-kn-false', numeric: false}]), true); // Options and extension keys are processed correctly. @@ -192,7 +188,7 @@ shouldBe( JSON.stringify(Intl.Collator.supportedLocalesOf('En-laTn-us-variAnt-fOObar-1abc-U-kn-tRue-A-aa-aaa-x-RESERVED')), $vm.icuVersion() >= 67 ? '["en-Latn-US-1abc-foobar-variant-a-aa-aaa-u-kn-x-reserved"]' - : '["en-Latn-US-variant-foobar-1abc-a-aa-aaa-u-kn-true-x-reserved"]' + : '["en-Latn-US-variant-foobar-1abc-a-aa-aaa-u-kn-x-reserved"]' ); // Throws on problems with length, get, or toString. shouldThrow(() => Intl.Collator.supportedLocalesOf(Object.create(null, { length: { get() { throw Error() } } })), Error); diff --git a/JSTests/stress/intl-datetimeformat.js b/JSTests/stress/intl-datetimeformat.js index bc14e1998188a..061f797720261 100644 --- a/JSTests/stress/intl-datetimeformat.js +++ b/JSTests/stress/intl-datetimeformat.js @@ -84,7 +84,7 @@ shouldBe( JSON.stringify(Intl.DateTimeFormat.supportedLocalesOf('En-laTn-us-variAnt-fOObar-1abc-U-kn-tRue-A-aa-aaa-x-RESERVED')), $vm.icuVersion() >= 67 ? '["en-Latn-US-1abc-foobar-variant-a-aa-aaa-u-kn-x-reserved"]' - : '["en-Latn-US-variant-foobar-1abc-a-aa-aaa-u-kn-true-x-reserved"]' + : '["en-Latn-US-variant-foobar-1abc-a-aa-aaa-u-kn-x-reserved"]' ); // Throws on problems with length, get, or toString. shouldThrow(() => Intl.DateTimeFormat.supportedLocalesOf(Object.create(null, { length: { get() { throw new Error(); } } })), Error); diff --git a/JSTests/stress/intl-locale.js b/JSTests/stress/intl-locale.js index 04e525979fdc0..def108c408457 100644 --- a/JSTests/stress/intl-locale.js +++ b/JSTests/stress/intl-locale.js @@ -150,7 +150,7 @@ shouldBe( new Intl.Locale('En-laTn-us-variAnt-fOObar-1abc-U-kn-tRue-A-aa-aaa-x-RESERVED').toString(), $vm.icuVersion() >= 67 ? 'en-Latn-US-1abc-foobar-variant-a-aa-aaa-u-kn-x-reserved' - : 'en-Latn-US-variant-foobar-1abc-a-aa-aaa-u-kn-true-x-reserved' + : 'en-Latn-US-variant-foobar-1abc-a-aa-aaa-u-kn-x-reserved' ); shouldBe(new Intl.Locale('cel-gaulish', { script: 'Arab', numberingSystem: 'gujr' }).toString(), 'xtg-Arab-u-nu-gujr-x-cel-gaulish'); shouldBe(new Intl.Locale('en-Latn-US-u-ca-gregory-co-phonebk-hc-h12-kf-upper-kn-false-nu-latn').toString(), 'en-Latn-US-u-ca-gregory-co-phonebk-hc-h12-kf-upper-kn-false-nu-latn'); diff --git a/JSTests/stress/intl-numberformat.js b/JSTests/stress/intl-numberformat.js index facadca0e3154..56c577417645f 100644 --- a/JSTests/stress/intl-numberformat.js +++ b/JSTests/stress/intl-numberformat.js @@ -225,7 +225,7 @@ shouldBe( JSON.stringify(Intl.NumberFormat.supportedLocalesOf('En-laTn-us-variAnt-fOObar-1abc-U-kn-tRue-A-aa-aaa-x-RESERVED')), $vm.icuVersion() >= 67 ? '["en-Latn-US-1abc-foobar-variant-a-aa-aaa-u-kn-x-reserved"]' - : '["en-Latn-US-variant-foobar-1abc-a-aa-aaa-u-kn-true-x-reserved"]' + : '["en-Latn-US-variant-foobar-1abc-a-aa-aaa-u-kn-x-reserved"]' ); // Throws on problems with length, get, or toString. shouldThrow(() => Intl.NumberFormat.supportedLocalesOf(Object.create(null, { length: { get() { throw new Error(); } } })), Error); diff --git a/JSTests/stress/intl-object.js b/JSTests/stress/intl-object.js index 40cff6612d14b..0390f8c738f8e 100644 --- a/JSTests/stress/intl-object.js +++ b/JSTests/stress/intl-object.js @@ -72,7 +72,7 @@ shouldBe( JSON.stringify(Intl.getCanonicalLocales('En-laTn-us-variAnt-fOObar-1abc-U-kn-tRue-A-aa-aaa-x-RESERVED')), $vm.icuVersion() >= 67 ? '["en-Latn-US-1abc-foobar-variant-a-aa-aaa-u-kn-x-reserved"]' - : '["en-Latn-US-variant-foobar-1abc-a-aa-aaa-u-kn-true-x-reserved"]' + : '["en-Latn-US-variant-foobar-1abc-a-aa-aaa-u-kn-x-reserved"]' ); // Throws on problems with length, get, or toString. shouldThrow(() => Intl.getCanonicalLocales(Object.create(null, { length: { get() { throw new Error(); } } })), Error); diff --git a/JSTests/stress/intl-pluralrules.js b/JSTests/stress/intl-pluralrules.js index a136d46f41ad4..a438eace1d664 100644 --- a/JSTests/stress/intl-pluralrules.js +++ b/JSTests/stress/intl-pluralrules.js @@ -83,7 +83,7 @@ shouldBe( JSON.stringify(Intl.PluralRules.supportedLocalesOf('En-laTn-us-variAnt-fOObar-1abc-U-kn-tRue-A-aa-aaa-x-RESERVED')), $vm.icuVersion() >= 67 ? '["en-Latn-US-1abc-foobar-variant-a-aa-aaa-u-kn-x-reserved"]' - : '["en-Latn-US-variant-foobar-1abc-a-aa-aaa-u-kn-true-x-reserved"]' + : '["en-Latn-US-variant-foobar-1abc-a-aa-aaa-u-kn-x-reserved"]' ); // Throws on problems with length, get, or toString. shouldThrow(() => Intl.PluralRules.supportedLocalesOf(Object.create(null, { length: { get() { throw new Error(); } } })), Error); diff --git a/JSTests/stress/intl-relativetimeformat.js b/JSTests/stress/intl-relativetimeformat.js index 450282fb9133b..8c14346d0bf73 100644 --- a/JSTests/stress/intl-relativetimeformat.js +++ b/JSTests/stress/intl-relativetimeformat.js @@ -67,7 +67,7 @@ shouldBe( JSON.stringify(Intl.RelativeTimeFormat.supportedLocalesOf('En-laTn-us-variAnt-fOObar-1abc-U-kn-tRue-A-aa-aaa-x-RESERVED')), $vm.icuVersion() >= 67 ? '["en-Latn-US-1abc-foobar-variant-a-aa-aaa-u-kn-x-reserved"]' - : '["en-Latn-US-variant-foobar-1abc-a-aa-aaa-u-kn-true-x-reserved"]' + : '["en-Latn-US-variant-foobar-1abc-a-aa-aaa-u-kn-x-reserved"]' ); shouldThrow(() => Intl.RelativeTimeFormat.supportedLocalesOf('no-bok'), RangeError); diff --git a/JSTests/test262/expectations.yaml b/JSTests/test262/expectations.yaml index 253210761b5b0..9950389b06285 100644 --- a/JSTests/test262/expectations.yaml +++ b/JSTests/test262/expectations.yaml @@ -1452,9 +1452,6 @@ test/built-ins/TypedArrayConstructors/of/custom-ctor.js: test/built-ins/TypedArrayConstructors/of/new-instance-using-custom-ctor.js: default: 'TypeError: TypedArray.of requires its this argument to subclass a TypedArray constructor (Testing with Float64Array.)' strict mode: 'TypeError: TypedArray.of requires its this argument to subclass a TypedArray constructor (Testing with Float64Array.)' -test/intl402/Collator/missing-unicode-ext-value-defaults-to-true.js: - default: "Test262Error: \"kn-true\" is returned in locale, but shouldn't be. Expected SameValue(«7», «-1») to be true" - strict mode: "Test262Error: \"kn-true\" is returned in locale, but shouldn't be. Expected SameValue(«7», «-1») to be true" test/intl402/DateTimeFormat/prototype/format/timedatestyle-en.js: default: 'Test262Error: Result for full with {} Expected SameValue(«14:12:47 PM Coordinated Universal Time», «14:12:47 Coordinated Universal Time») to be true' strict mode: 'Test262Error: Result for full with {} Expected SameValue(«14:12:47 PM Coordinated Universal Time», «14:12:47 Coordinated Universal Time») to be true' @@ -1497,9 +1494,6 @@ test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-region.js: test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-subdivision.js: default: 'Test262Error: Expected SameValue(«und-NO-u-sd-no23», «und-NO-u-sd-no50») to be true' strict mode: 'Test262Error: Expected SameValue(«und-NO-u-sd-no23», «und-NO-u-sd-no50») to be true' -test/intl402/Intl/getCanonicalLocales/unicode-ext-canonicalize-yes-to-true.js: - default: 'Test262Error: Expected SameValue(«und-u-kb-true», «und-u-kb») to be true' - strict mode: 'Test262Error: Expected SameValue(«und-u-kb-true», «und-u-kb») to be true' test/intl402/Intl/getCanonicalLocales/unicode-ext-key-with-digit.js: default: 'Test262Error: Expected SameValue(«en-u-0c-yes», «en-u-0c») to be true' strict mode: 'Test262Error: Expected SameValue(«en-u-0c-yes», «en-u-0c») to be true' @@ -1509,21 +1503,12 @@ test/intl402/Locale/constructor-apply-options-canonicalizes-twice.js: test/intl402/Locale/constructor-non-iana-canon.js: default: 'Test262Error: new Intl.Locale("mo").maximize().toString() returns "ro-Latn-RO" Expected SameValue(«ro», «ro-Latn-RO») to be true' strict mode: 'Test262Error: new Intl.Locale("mo").maximize().toString() returns "ro-Latn-RO" Expected SameValue(«ro», «ro-Latn-RO») to be true' -test/intl402/Locale/constructor-options-numeric-undefined.js: - default: 'Test262Error: new Intl.Locale("en-u-kn-true", {numeric: undefined}).toString() returns "en-u-kn" Expected SameValue(«en-u-kn-true», «en-u-kn») to be true' - strict mode: 'Test262Error: new Intl.Locale("en-u-kn-true", {numeric: undefined}).toString() returns "en-u-kn" Expected SameValue(«en-u-kn-true», «en-u-kn») to be true' -test/intl402/Locale/constructor-options-numeric-valid.js: - default: 'Test262Error: new Intl.Locale("en", {numeric: true}).toString() returns "true" Expected SameValue(«en-u-kn-true», «en-u-kn») to be true' - strict mode: 'Test262Error: new Intl.Locale("en", {numeric: true}).toString() returns "true" Expected SameValue(«en-u-kn-true», «en-u-kn») to be true' test/intl402/Locale/constructor-options-region-valid.js: default: "Test262Error: new Intl.Locale('en', {region: \"554\"}).toString() returns \"en-NZ\" Expected SameValue(«en-554», «en-NZ») to be true" strict mode: "Test262Error: new Intl.Locale('en', {region: \"554\"}).toString() returns \"en-NZ\" Expected SameValue(«en-554», «en-NZ») to be true" test/intl402/Locale/constructor-tag.js: default: 'Test262Error: new Intl.Locale("sl-ROZAJ-BISKE-1994").toString() returns "sl-1994-biske-rozaj" Expected SameValue(«sl-rozaj-biske-1994», «sl-1994-biske-rozaj») to be true' strict mode: 'Test262Error: new Intl.Locale("sl-ROZAJ-BISKE-1994").toString() returns "sl-1994-biske-rozaj" Expected SameValue(«sl-rozaj-biske-1994», «sl-1994-biske-rozaj») to be true' -test/intl402/Locale/getters.js: - default: 'Test262Error: Expected SameValue(«de-Latn-DE-u-ca-gregory-co-phonebk-hc-h23-kf-true-kn-false-nu-latn», «de-Latn-DE-u-ca-gregory-co-phonebk-hc-h23-kf-kn-false-nu-latn») to be true' - strict mode: 'Test262Error: Expected SameValue(«de-Latn-DE-u-ca-gregory-co-phonebk-hc-h23-kf-true-kn-false-nu-latn», «de-Latn-DE-u-ca-gregory-co-phonebk-hc-h23-kf-kn-false-nu-latn») to be true' test/intl402/Locale/likely-subtags-grandfathered.js: default: 'Test262Error: Expected SameValue(«cmn», «zh») to be true' strict mode: 'Test262Error: Expected SameValue(«cmn», «zh») to be true' diff --git a/Source/JavaScriptCore/ChangeLog b/Source/JavaScriptCore/ChangeLog index 4f35e977f15e2..1b80285486674 100644 --- a/Source/JavaScriptCore/ChangeLog +++ b/Source/JavaScriptCore/ChangeLog @@ -1,3 +1,40 @@ +2020-09-11 Yusuke Suzuki + + [JSC] Canonicalize "true" unicode extension type value to "" + https://bugs.webkit.org/show_bug.cgi?id=216224 + + Reviewed by Ross Kirsling. + + Unicode Technical Standard #35 defines that unicode extension type's "true" should be converged to "". + This patch implements it by extracting unicode extension subtags and replacing "true" to "". + + * runtime/IntlLocale.cpp: + (JSC::LocaleIDBuilder::toCanonical): + (JSC::IntlLocale::keywordValue const): + (JSC::IntlLocale::calendar): + (JSC::IntlLocale::caseFirst): + (JSC::IntlLocale::collation): + (JSC::IntlLocale::hourCycle): + (JSC::IntlLocale::numberingSystem): + (JSC::IntlLocale::numeric): + * runtime/IntlLocale.h: + * runtime/IntlLocalePrototype.cpp: + (JSC::IntlLocalePrototypeGetterCalendar): + (JSC::IntlLocalePrototypeGetterCaseFirst): + (JSC::IntlLocalePrototypeGetterCollation): + (JSC::IntlLocalePrototypeGetterHourCycle): + (JSC::IntlLocalePrototypeGetterNumberingSystem): + * runtime/IntlObject.cpp: + (JSC::unicodeExtensionSubTags): + (JSC::canonicalizeUnicodeExtensionsAfterICULocaleCanonicalization): + (JSC::languageTagForLocaleID): + (JSC::resolveLocale): + * runtime/IntlObject.h: + * runtime/IntlObjectInlines.h: + (JSC::computeTwoCharacters16Code): + * runtime/StringPrototype.cpp: + (JSC::computeTwoCharacters16Code): Deleted. + 2020-09-11 Yusuke Suzuki [JSC] attribute-change transition should not pin Structure diff --git a/Source/JavaScriptCore/runtime/IntlLocale.cpp b/Source/JavaScriptCore/runtime/IntlLocale.cpp index 4aa2a50baf65f..ebe789147d7c0 100644 --- a/Source/JavaScriptCore/runtime/IntlLocale.cpp +++ b/Source/JavaScriptCore/runtime/IntlLocale.cpp @@ -91,11 +91,12 @@ CString LocaleIDBuilder::toCanonical() { ASSERT(m_buffer.size()); - Vector result; - auto status = callBufferProducingFunction(uloc_canonicalize, m_buffer.data(), result); + Vector buffer; + auto status = callBufferProducingFunction(uloc_canonicalize, m_buffer.data(), buffer); if (U_FAILURE(status)) return CString(); + auto result = canonicalizeUnicodeExtensionsAfterICULocaleCanonicalization(WTFMove(buffer)); return CString(result.data(), result.size()); } @@ -189,9 +190,15 @@ String IntlLocale::keywordValue(ASCIILiteral key, bool isBoolean) const uloc_getKeywordValue(m_localeID.data(), key.characters(), buffer.data(), bufferLength + 1, &status); } ASSERT(U_SUCCESS(status)); - - const char* value = !isBoolean ? uloc_toUnicodeLocaleType(key.characters(), buffer.data()) : buffer.data(); - return value ? String(value) : emptyString(); + if (isBoolean) + return String(buffer.data()); + const char* value = uloc_toUnicodeLocaleType(key.characters(), buffer.data()); + if (!value) + return nullString(); + String result(value); + if (result == "true"_s) + return emptyString(); + return result; } // https://tc39.es/ecma402/#sec-Intl.Locale @@ -404,41 +411,41 @@ const String& IntlLocale::region() // https://tc39.es/ecma402/#sec-Intl.Locale.prototype.calendar const String& IntlLocale::calendar() { - if (m_calendar.isNull()) + if (!m_calendar) m_calendar = keywordValue("calendar"_s); - return m_calendar; + return m_calendar.value(); } // https://tc39.es/ecma402/#sec-Intl.Locale.prototype.caseFirst const String& IntlLocale::caseFirst() { - if (m_caseFirst.isNull()) + if (!m_caseFirst) m_caseFirst = keywordValue("colcasefirst"_s); - return m_caseFirst; + return m_caseFirst.value(); } // https://tc39.es/ecma402/#sec-Intl.Locale.prototype.collation const String& IntlLocale::collation() { - if (m_collation.isNull()) + if (!m_collation) m_collation = keywordValue("collation"_s); - return m_collation; + return m_collation.value(); } // https://tc39.es/ecma402/#sec-Intl.Locale.prototype.hourCycle const String& IntlLocale::hourCycle() { - if (m_hourCycle.isNull()) + if (!m_hourCycle) m_hourCycle = keywordValue("hours"_s); - return m_hourCycle; + return m_hourCycle.value(); } // https://tc39.es/ecma402/#sec-Intl.Locale.prototype.numberingSystem const String& IntlLocale::numberingSystem() { - if (m_numberingSystem.isNull()) + if (!m_numberingSystem) m_numberingSystem = keywordValue("numbers"_s); - return m_numberingSystem; + return m_numberingSystem.value(); } // https://tc39.es/ecma402/#sec-Intl.Locale.prototype.numeric @@ -446,7 +453,7 @@ TriState IntlLocale::numeric() { constexpr bool isBoolean = true; if (m_numeric == TriState::Indeterminate) - m_numeric = triState(keywordValue("colnumeric"_s, isBoolean) == "yes"); + m_numeric = triState(keywordValue("colnumeric"_s, isBoolean) == "yes"_s); return m_numeric; } diff --git a/Source/JavaScriptCore/runtime/IntlLocale.h b/Source/JavaScriptCore/runtime/IntlLocale.h index ff50e7f76a50a..1c5dd0a38771e 100644 --- a/Source/JavaScriptCore/runtime/IntlLocale.h +++ b/Source/JavaScriptCore/runtime/IntlLocale.h @@ -60,6 +60,7 @@ class IntlLocale final : public JSNonFinalObject { const String& language(); const String& script(); const String& region(); + const String& calendar(); const String& caseFirst(); const String& collation(); @@ -83,11 +84,11 @@ class IntlLocale final : public JSNonFinalObject { String m_language; String m_script; String m_region; - String m_calendar; - String m_caseFirst; - String m_collation; - String m_hourCycle; - String m_numberingSystem; + Optional m_calendar; + Optional m_caseFirst; + Optional m_collation; + Optional m_hourCycle; + Optional m_numberingSystem; TriState m_numeric { TriState::Indeterminate }; }; diff --git a/Source/JavaScriptCore/runtime/IntlLocalePrototype.cpp b/Source/JavaScriptCore/runtime/IntlLocalePrototype.cpp index 4a8b84398eb08..80e070503dec9 100644 --- a/Source/JavaScriptCore/runtime/IntlLocalePrototype.cpp +++ b/Source/JavaScriptCore/runtime/IntlLocalePrototype.cpp @@ -166,7 +166,7 @@ EncodedJSValue JSC_HOST_CALL IntlLocalePrototypeGetterCalendar(JSGlobalObject* g return throwVMTypeError(globalObject, scope, "Intl.Locale.prototype.calendar called on value that's not an object initialized as a Locale"_s); const String& calendar = locale->calendar(); - RELEASE_AND_RETURN(scope, JSValue::encode(calendar.isEmpty() ? jsUndefined() : jsString(vm, calendar))); + RELEASE_AND_RETURN(scope, JSValue::encode(calendar.isNull() ? jsUndefined() : jsString(vm, calendar))); } // https://tc39.es/ecma402/#sec-Intl.Locale.prototype.caseFirst @@ -180,7 +180,7 @@ EncodedJSValue JSC_HOST_CALL IntlLocalePrototypeGetterCaseFirst(JSGlobalObject* return throwVMTypeError(globalObject, scope, "Intl.Locale.prototype.caseFirst called on value that's not an object initialized as a Locale"_s); const String& caseFirst = locale->caseFirst(); - RELEASE_AND_RETURN(scope, JSValue::encode(caseFirst.isEmpty() ? jsUndefined() : jsString(vm, caseFirst))); + RELEASE_AND_RETURN(scope, JSValue::encode(caseFirst.isNull() ? jsUndefined() : jsString(vm, caseFirst))); } // https://tc39.es/ecma402/#sec-Intl.Locale.prototype.collation @@ -194,7 +194,7 @@ EncodedJSValue JSC_HOST_CALL IntlLocalePrototypeGetterCollation(JSGlobalObject* return throwVMTypeError(globalObject, scope, "Intl.Locale.prototype.collation called on value that's not an object initialized as a Locale"_s); const String& collation = locale->collation(); - RELEASE_AND_RETURN(scope, JSValue::encode(collation.isEmpty() ? jsUndefined() : jsString(vm, collation))); + RELEASE_AND_RETURN(scope, JSValue::encode(collation.isNull() ? jsUndefined() : jsString(vm, collation))); } // https://tc39.es/ecma402/#sec-Intl.Locale.prototype.hourCycle @@ -208,7 +208,7 @@ EncodedJSValue JSC_HOST_CALL IntlLocalePrototypeGetterHourCycle(JSGlobalObject* return throwVMTypeError(globalObject, scope, "Intl.Locale.prototype.hourCycle called on value that's not an object initialized as a Locale"_s); const String& hourCycle = locale->hourCycle(); - RELEASE_AND_RETURN(scope, JSValue::encode(hourCycle.isEmpty() ? jsUndefined() : jsString(vm, hourCycle))); + RELEASE_AND_RETURN(scope, JSValue::encode(hourCycle.isNull() ? jsUndefined() : jsString(vm, hourCycle))); } // https://tc39.es/ecma402/#sec-Intl.Locale.prototype.numeric @@ -235,7 +235,7 @@ EncodedJSValue JSC_HOST_CALL IntlLocalePrototypeGetterNumberingSystem(JSGlobalOb return throwVMTypeError(globalObject, scope, "Intl.Locale.prototype.numberingSystem called on value that's not an object initialized as a Locale"_s); const String& numberingSystem = locale->numberingSystem(); - RELEASE_AND_RETURN(scope, JSValue::encode(numberingSystem.isEmpty() ? jsUndefined() : jsString(vm, numberingSystem))); + RELEASE_AND_RETURN(scope, JSValue::encode(numberingSystem.isNull() ? jsUndefined() : jsString(vm, numberingSystem))); } // https://tc39.es/ecma402/#sec-Intl.Locale.prototype.language diff --git a/Source/JavaScriptCore/runtime/IntlObject.cpp b/Source/JavaScriptCore/runtime/IntlObject.cpp index 301bdd13eb670..fa533c3140e87 100644 --- a/Source/JavaScriptCore/runtime/IntlObject.cpp +++ b/Source/JavaScriptCore/runtime/IntlObject.cpp @@ -190,6 +190,47 @@ Structure* IntlObject::createStructure(VM& vm, JSGlobalObject* globalObject, JSV return Structure::create(vm, globalObject, prototype, TypeInfo(ObjectType, StructureFlags), info()); } +static Vector unicodeExtensionComponents(StringView extension) +{ + // UnicodeExtensionSubtags (extension) + // https://tc39.github.io/ecma402/#sec-unicodeextensionsubtags + + auto extensionLength = extension.length(); + if (extensionLength < 3) + return { }; + + Vector subtags; + size_t subtagStart = 3; // Skip initial -u-. + size_t valueStart = 3; + bool isLeading = true; + for (size_t index = subtagStart; index < extensionLength; ++index) { + if (extension[index] == '-') { + if (index - subtagStart == 2) { + // Tag is a key, first append prior key's value if there is one. + if (subtagStart - valueStart > 1) + subtags.append(extension.substring(valueStart, subtagStart - valueStart - 1)); + subtags.append(extension.substring(subtagStart, index - subtagStart)); + valueStart = index + 1; + isLeading = false; + } else if (isLeading) { + // Leading subtags before first key. + subtags.append(extension.substring(subtagStart, index - subtagStart)); + valueStart = index + 1; + } + subtagStart = index + 1; + } + } + if (extensionLength - subtagStart == 2) { + // Trailing an extension key, first append prior key's value if there is one. + if (subtagStart - valueStart > 1) + subtags.append(extension.substring(valueStart, subtagStart - valueStart - 1)); + valueStart = subtagStart; + } + // Append final key's value. + subtags.append(extension.substring(valueStart, extensionLength - valueStart)); + return subtags; +} + Vector localeIDBufferForLanguageTag(const CString& tag) { if (!tag.length()) @@ -212,6 +253,71 @@ Vector localeIDBufferForLanguageTag(const CString& tag) return buffer; } +Vector canonicalizeUnicodeExtensionsAfterICULocaleCanonicalization(Vector&& buffer) +{ + StringView locale(buffer.data(), buffer.size()); + ASSERT(locale.is8Bit()); + size_t extensionIndex = locale.find("-u-"); + if (extensionIndex == notFound) + return WTFMove(buffer); + + // Since ICU's canonicalization is incomplete, we need to perform some of canonicalization here. + size_t extensionLength = locale.length() - extensionIndex; + size_t end = extensionIndex + 3; + while (end < locale.length()) { + end = locale.find('-', end); + if (end == notFound) + break; + // Found another singleton. + if (end + 2 < locale.length() && locale[end + 2] == '-') { + extensionLength = end - extensionIndex; + break; + } + end++; + } + + Vector result; + result.append(buffer.data(), extensionIndex + 2); // "-u" is included. + StringView extension = locale.substring(extensionIndex, extensionLength); + ASSERT(extension.is8Bit()); + auto subtags = unicodeExtensionComponents(extension); + for (unsigned index = 0; index < subtags.size();) { + auto subtag = subtags[index]; + ASSERT(subtag.is8Bit()); + result.append('-'); + result.append(subtag.characters8(), subtag.length()); + + if (subtag.length() != 2) { + ++index; + continue; + } + ASSERT(subtag.length() == 2); + + // This is unicode extension key. + unsigned valueIndexStart = index + 1; + unsigned valueIndexEnd = valueIndexStart; + for (; valueIndexEnd < subtags.size(); ++valueIndexEnd) { + if (subtags[valueIndexEnd].length() == 2) + break; + } + // [valueIndexStart, valueIndexEnd) is value of this unicode extension. If there is no value, valueIndexStart == valueIndexEnd. + + for (unsigned valueIndex = valueIndexStart; valueIndex < valueIndexEnd; ++valueIndex) { + auto value = subtags[valueIndex]; + if (value != "true"_s) { + result.append('-'); + result.append(value.characters8(), value.length()); + } + } + index = valueIndexEnd; + } + + unsigned remainingStart = extensionIndex + extensionLength; + unsigned remainingLength = buffer.size() - remainingStart; + result.append(buffer.data() + remainingStart, remainingLength); + return result; +} + String languageTagForLocaleID(const char* localeID, bool isImmortal) { Vector buffer; @@ -219,12 +325,15 @@ String languageTagForLocaleID(const char* localeID, bool isImmortal) if (U_FAILURE(status)) return String(); - // This is used to store into static variables that may be shared across JSC execution threads. - // This must be immortal to make concurrent ref/deref safe. - if (isImmortal) - return StringImpl::createStaticStringImpl(buffer.data(), buffer.size()); + auto createResult = [&](Vector&& buffer) -> String { + // This is used to store into static variables that may be shared across JSC execution threads. + // This must be immortal to make concurrent ref/deref safe. + if (isImmortal) + return StringImpl::createStaticStringImpl(buffer.data(), buffer.size()); + return String(buffer.data(), buffer.size()); + }; - return String(buffer.data(), buffer.size()); + return createResult(canonicalizeUnicodeExtensionsAfterICULocaleCanonicalization(WTFMove(buffer))); } // Ensure we have xx-ZZ whenever we have xx-Yyyy-ZZ. @@ -694,45 +803,6 @@ static MatcherResult bestFitMatcher(JSGlobalObject* globalObject, const HashSet< return lookupMatcher(globalObject, availableLocales, requestedLocales); } -static void unicodeExtensionSubTags(const String& extension, Vector& subtags) -{ - // UnicodeExtensionSubtags (extension) - // https://tc39.github.io/ecma402/#sec-unicodeextensionsubtags - - auto extensionLength = extension.length(); - if (extensionLength < 3) - return; - - size_t subtagStart = 3; // Skip initial -u-. - size_t valueStart = 3; - bool isLeading = true; - for (size_t index = subtagStart; index < extensionLength; ++index) { - if (extension[index] == '-') { - if (index - subtagStart == 2) { - // Tag is a key, first append prior key's value if there is one. - if (subtagStart - valueStart > 1) - subtags.append(extension.substring(valueStart, subtagStart - valueStart - 1)); - subtags.append(extension.substring(subtagStart, index - subtagStart)); - valueStart = index + 1; - isLeading = false; - } else if (isLeading) { - // Leading subtags before first key. - subtags.append(extension.substring(subtagStart, index - subtagStart)); - valueStart = index + 1; - } - subtagStart = index + 1; - } - } - if (extensionLength - subtagStart == 2) { - // Trailing an extension key, first append prior key's value if there is one. - if (subtagStart - valueStart > 1) - subtags.append(extension.substring(valueStart, subtagStart - valueStart - 1)); - valueStart = subtagStart; - } - // Append final key's value. - subtags.append(extension.substring(valueStart, extensionLength - valueStart)); -} - constexpr ASCIILiteral relevantExtensionKeyString(RelevantExtensionKey key) { switch (key) { @@ -756,9 +826,9 @@ ResolvedLocale resolveLocale(JSGlobalObject* globalObject, const HashSet String foundLocale = matcherResult.locale; - Vector extensionSubtags; + Vector extensionSubtags; if (!matcherResult.extension.isNull()) - unicodeExtensionSubTags(matcherResult.extension, extensionSubtags); + extensionSubtags = unicodeExtensionComponents(matcherResult.extension); ResolvedLocale resolved; resolved.dataLocale = foundLocale; @@ -776,9 +846,10 @@ ResolvedLocale resolveLocale(JSGlobalObject* globalObject, const HashSet size_t keyPos = extensionSubtags.find(keyString); if (keyPos != notFound) { if (keyPos + 1 < extensionSubtags.size() && extensionSubtags[keyPos + 1].length() > 2) { - const String& requestedValue = extensionSubtags[keyPos + 1]; - if (keyLocaleData.contains(requestedValue)) { - value = requestedValue; + StringView requestedValue = extensionSubtags[keyPos + 1]; + auto dataPos = keyLocaleData.find(requestedValue); + if (dataPos != notFound) { + value = keyLocaleData[dataPos]; supportedExtensionAddition = makeString('-', keyString, '-', value); } } else if (keyLocaleData.contains("true"_s)) { diff --git a/Source/JavaScriptCore/runtime/IntlObject.h b/Source/JavaScriptCore/runtime/IntlObject.h index eb190d76e6345..a958209636cd9 100644 --- a/Source/JavaScriptCore/runtime/IntlObject.h +++ b/Source/JavaScriptCore/runtime/IntlObject.h @@ -113,6 +113,8 @@ String bestAvailableLocale(const HashSet& availableLocales, const String template String bestAvailableLocale(const String& requestedLocale, Predicate); Vector numberingSystemsForLocale(const String& locale); +Vector canonicalizeUnicodeExtensionsAfterICULocaleCanonicalization(Vector&&); + bool isUnicodeLocaleIdentifierType(StringView); bool isUnicodeLanguageSubtag(StringView); diff --git a/Source/JavaScriptCore/runtime/IntlObjectInlines.h b/Source/JavaScriptCore/runtime/IntlObjectInlines.h index e8e3f910b08f1..48e340d22183d 100644 --- a/Source/JavaScriptCore/runtime/IntlObjectInlines.h +++ b/Source/JavaScriptCore/runtime/IntlObjectInlines.h @@ -34,6 +34,12 @@ namespace JSC { +template +static constexpr uint32_t computeTwoCharacters16Code(const StringType& string) +{ + return static_cast(string.characterAt(0)) | (static_cast(static_cast(string.characterAt(1))) << 16); +} + template String bestAvailableLocale(const String& locale, Predicate predicate) { // BestAvailableLocale (availableLocales, locale) diff --git a/Source/JavaScriptCore/runtime/StringPrototype.cpp b/Source/JavaScriptCore/runtime/StringPrototype.cpp index 1ef2cb315a609..1a2dc5f58a292 100644 --- a/Source/JavaScriptCore/runtime/StringPrototype.cpp +++ b/Source/JavaScriptCore/runtime/StringPrototype.cpp @@ -1541,12 +1541,6 @@ EncodedJSValue JSC_HOST_CALL stringProtoFuncLocaleCompare(JSGlobalObject* global RELEASE_AND_RETURN(scope, JSValue::encode(collator->compareStrings(globalObject, string, that))); } -template -static constexpr uint32_t computeTwoCharacters16Code(const StringType& string) -{ - return static_cast(string.characterAt(0)) | (static_cast(static_cast(string.characterAt(1))) << 16); -} - enum class CaseConversionMode { Upper, Lower, diff --git a/Source/WTF/ChangeLog b/Source/WTF/ChangeLog index 831ede8007f9d..ae7c3232cd321 100644 --- a/Source/WTF/ChangeLog +++ b/Source/WTF/ChangeLog @@ -1,3 +1,14 @@ +2020-09-11 Yusuke Suzuki + + [JSC] Canonicalize "true" unicode extension type value to "" + https://bugs.webkit.org/show_bug.cgi?id=216224 + + Reviewed by Ross Kirsling. + + * wtf/text/StringView.h: + (WTF::StringView::characterAt const): + (WTF::StringView::operator[] const): + 2020-09-11 Myles C. Maxfield [Cocoa] Migrate CoreText-specific code from ***Cocoa.mm to ***CoreText.cpp diff --git a/Source/WTF/wtf/text/StringView.h b/Source/WTF/wtf/text/StringView.h index 920fbfb7d1f3d..84024ee74ce95 100644 --- a/Source/WTF/wtf/text/StringView.h +++ b/Source/WTF/wtf/text/StringView.h @@ -79,6 +79,7 @@ class StringView final { explicit operator bool() const; bool isNull() const; + UChar characterAt(unsigned index) const; UChar operator[](unsigned index) const; class CodeUnits; @@ -499,7 +500,7 @@ inline StringView StringView::substring(unsigned start, unsigned length) const return result; } -inline UChar StringView::operator[](unsigned index) const +inline UChar StringView::characterAt(unsigned index) const { ASSERT(index < length()); if (is8Bit()) @@ -507,6 +508,11 @@ inline UChar StringView::operator[](unsigned index) const return characters16()[index]; } +inline UChar StringView::operator[](unsigned index) const +{ + return characterAt(index); +} + inline bool StringView::contains(UChar character) const { return find(character) != notFound;