Skip to content

Commit

Permalink
Bump ICU4X to 1.3 (#3306)
Browse files Browse the repository at this point in the history
* Bump ICU4X to 1.3

* Fix build on --no-default-features

* Fix wasm build

* Fix tests

* Add locale resolver hack

* Fix panics

* Pin ICU4X related deps to minor versions
  • Loading branch information
jedel1043 authored Sep 26, 2023
1 parent 60c9583 commit 57604ae
Show file tree
Hide file tree
Showing 39 changed files with 811 additions and 2,008 deletions.
1,481 changes: 411 additions & 1,070 deletions Cargo.lock

Large diffs are not rendered by default.

36 changes: 18 additions & 18 deletions boa_engine/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,17 @@ rust-version.workspace = true
profiler = ["boa_profiler/profiler"]
deser = ["boa_interner/serde", "boa_ast/serde"]
intl = [
"boa_icu_provider/full",
"icu_normalizer/serde",
"icu_normalizer/std",
"dep:boa_icu_provider",
"dep:icu_locid_transform",
"dep:icu_locid",
"dep:icu_datetime",
"dep:icu_plurals",
"dep:icu_provider",
"dep:icu_calendar",
"dep:icu_collator",
"dep:icu_casemapping",
"dep:icu_casemap",
"dep:icu_list",
"dep:icu_segmenter",
"dep:writeable",
Expand Down Expand Up @@ -53,7 +53,6 @@ boa_profiler.workspace = true
boa_macros.workspace = true
boa_ast.workspace = true
boa_parser.workspace = true
boa_icu_provider.workspace = true
serde = { version = "1.0.188", features = ["derive", "rc"] }
serde_json = "1.0.107"
rand = "0.8.5"
Expand All @@ -77,24 +76,25 @@ num_enum = "0.7.0"
pollster = "0.3.0"
thin-vec = "0.2.12"
itertools = { version = "0.11.0", default-features = false }
icu_normalizer = "1.2.0"
icu_normalizer = "~1.3.0"

# intl deps
icu_locid_transform = { version = "1.2.1", features = ["std", "serde"], optional = true }
icu_locid = { version = "1.2.0", features = ["serde"], optional = true }
icu_datetime = { version = "1.2.1", features = ["serde", "experimental"], optional = true }
icu_calendar = { version = "1.2.0", optional = true }
icu_collator = { version = "1.2.0", features = ["serde"], optional = true }
icu_plurals = { version = "1.2.0", features = ["serde"], optional = true }
icu_provider = { version = "1.2.0", optional = true }
icu_list = { version = "1.2.0", features = ["serde"], optional = true }
icu_casemapping = { version = "0.7.2", features = ["serde"], optional = true}
icu_segmenter = { version = "1.2.1", features = ["serde"], optional = true }
writeable = { version = "0.5.2", optional = true }
yoke = { version = "0.7.1", optional = true }
zerofrom = { version = "0.1.2", optional = true }
boa_icu_provider = {workspace = true, features = ["std"], optional = true }
sys-locale = { version = "0.3.1", optional = true }
fixed_decimal = { version = "0.5.4", features = ["ryu"], optional = true}
icu_provider = { version = "~1.3.0", optional = true }
icu_locid = { version = "~1.3.0", features = ["serde"], optional = true }
icu_locid_transform = { version = "~1.3.0", default-features = false, features = ["std", "serde"], optional = true }
icu_datetime = { version = "~1.3.0", default-features = false, features = ["serde", "experimental"], optional = true }
icu_calendar = { version = "~1.3.0", default-features = false, optional = true }
icu_collator = { version = "~1.3.0", default-features = false, features = ["serde"], optional = true }
icu_plurals = { version = "~1.3.0", default-features = false, features = ["serde"], optional = true }
icu_list = { version = "~1.3.0", default-features = false, features = ["serde"], optional = true }
icu_casemap = { version = "~1.3.0", default-features = false, features = ["serde"], optional = true}
icu_segmenter = { version = "~1.3.0", default-features = false, features = ["auto", "serde"], optional = true }
writeable = { version = "~0.5.3", optional = true }
yoke = { version = "~0.7.2", optional = true }
zerofrom = { version = "~0.1.3", optional = true }
fixed_decimal = { version = "~0.5.4", features = ["ryu"], optional = true}

[dev-dependencies]
criterion = "0.5.1"
Expand Down
28 changes: 6 additions & 22 deletions boa_engine/src/builtins/intl/collator/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use boa_gc::{custom_trace, Finalize, Trace};
use boa_profiler::Profiler;
use icu_collator::{
provider::CollationMetadataV1Marker, AlternateHandling, CaseFirst, MaxVariable, Numeric,
provider::CollationMetadataV1Marker, AlternateHandling, CaseFirst, Collator as NativeCollator,
MaxVariable, Numeric,
};

use icu_locid::{
Expand Down Expand Up @@ -39,6 +40,7 @@ use super::{
mod options;
pub(crate) use options::*;

#[derive(Debug)]
pub struct Collator {
locale: Locale,
collation: Value,
Expand All @@ -47,7 +49,7 @@ pub struct Collator {
usage: Usage,
sensitivity: Sensitivity,
ignore_punctuation: bool,
collator: icu_collator::Collator,
collator: NativeCollator,
bound_compare: Option<JsFunction>,
}

Expand All @@ -65,22 +67,6 @@ impl Collator {
}
}

impl std::fmt::Debug for Collator {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Collator")
.field("locale", &self.locale)
.field("collation", &self.collation)
.field("numeric", &self.numeric)
.field("case_first", &self.case_first)
.field("usage", &self.usage)
.field("sensitivity", &self.sensitivity)
.field("ignore_punctuation", &self.ignore_punctuation)
.field("collator", &"ICUCollator")
.field("bound_compare", &self.bound_compare)
.finish()
}
}

#[derive(Debug, Clone)]
pub(in crate::builtins::intl) struct CollatorLocaleOptions {
collation: Option<Value>,
Expand Down Expand Up @@ -346,10 +332,8 @@ impl BuiltInConstructor for Collator {
.then_some((AlternateHandling::Shifted, MaxVariable::Punctuation))
.unzip();

let collator = context
.icu()
.provider()
.try_new_collator(&collator_locale, {
let collator =
NativeCollator::try_new_unstable(&context.icu().provider(), &collator_locale, {
let mut options = icu_collator::CollatorOptions::new();
options.strength = strength;
options.case_level = case_level;
Expand Down
44 changes: 25 additions & 19 deletions boa_engine/src/builtins/intl/list_format/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,13 @@ use super::{

mod options;
pub(crate) use options::*;

#[derive(Debug)]
pub struct ListFormat {
locale: Locale,
typ: ListFormatType,
style: ListLength,
formatter: ListFormatter,
}

impl std::fmt::Debug for ListFormat {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ListFormat")
.field("locale", &self.locale)
.field("typ", &self.typ)
.field("style", &self.style)
.field("formatter", &"ListFormatter")
.finish()
}
native: ListFormatter,
}

impl Service for ListFormat {
Expand Down Expand Up @@ -146,6 +137,25 @@ impl BuiltInConstructor for ListFormat {
// 16. Let dataLocaleData be localeData.[[<dataLocale>]].
// 17. Let dataLocaleTypes be dataLocaleData.[[<type>]].
// 18. Set listFormat.[[Templates]] to dataLocaleTypes.[[<style>]].
let data_locale = DataLocale::from(&locale);
let formatter = match typ {
ListFormatType::Conjunction => ListFormatter::try_new_and_with_length_unstable(
&context.icu().provider(),
&data_locale,
style,
),
ListFormatType::Disjunction => ListFormatter::try_new_or_with_length_unstable(
&context.icu().provider(),
&data_locale,
style,
),
ListFormatType::Unit => ListFormatter::try_new_unit_with_length_unstable(
&context.icu().provider(),
&data_locale,
style,
),
}
.map_err(|e| JsNativeError::typ().with_message(e.to_string()))?;

// 2. Let listFormat be ? OrdinaryCreateFromConstructor(NewTarget, "%ListFormat.prototype%", « [[InitializedListFormat]], [[Locale]], [[Type]], [[Style]], [[Templates]] »).
let prototype =
Expand All @@ -154,14 +164,10 @@ impl BuiltInConstructor for ListFormat {
context.root_shape(),
prototype,
ObjectData::list_format(Self {
formatter: context
.icu()
.provider()
.try_new_list_formatter(&DataLocale::from(&locale), typ, style)
.map_err(|e| JsNativeError::typ().with_message(e.to_string()))?,
locale,
typ,
style,
native: formatter,
}),
);

Expand Down Expand Up @@ -225,7 +231,7 @@ impl ListFormat {

// 4. Return ! FormatList(lf, stringList).
Ok(lf
.formatter
.native
.format_to_string(strings.into_iter().map(|s| s.to_std_string_escaped()))
.into())
}
Expand Down Expand Up @@ -349,7 +355,7 @@ impl ListFormat {

// 1. Let parts be ! CreatePartsFromList(listFormat, list).
let mut parts = PartsCollector(Vec::new());
lf.formatter
lf.native
.format(strings)
.write_to_parts(&mut parts)
.map_err(|e| JsNativeError::typ().with_message(e.to_string()))?;
Expand Down
69 changes: 47 additions & 22 deletions boa_engine/src/builtins/intl/locale/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@ use icu_locid::{
LanguageIdentifier, Locale,
};
use icu_locid_transform::LocaleCanonicalizer;
use icu_provider::{DataLocale, DataProvider, DataRequest, DataRequestMetadata, KeyedDataMarker};
use icu_provider::{
DataError, DataErrorKind, DataLocale, DataProvider, DataRequest, DataRequestMetadata,
KeyedDataMarker,
};
use icu_segmenter::provider::WordBreakDataV1Marker;
use indexmap::IndexSet;

Expand Down Expand Up @@ -184,28 +187,38 @@ pub(crate) fn best_available_locale<M: KeyedDataMarker>(
},
);

if let Ok(req) = response {
// `metadata.locale` returns None when the provider doesn't have a fallback mechanism,
// but supports the required locale. However, if the provider has a fallback mechanism,
// this will return `Some(locale)`, where the locale is the used locale after applying
// the fallback algorithm, even if the used locale is exactly the same as the required
// locale.
match req.metadata.locale {
// TODO: ugly hack to accept locales that fallback to "und" in the collator/segmenter services
Some(loc)
if loc == candidate
|| (loc.is_empty()
&& [
CollationMetadataV1Marker::KEY.path(),
WordBreakDataV1Marker::KEY.path(),
]
.contains(&M::KEY.path())) =>
{
return Some(candidate.into_locale().id)
match response {
Ok(req) => {
// `metadata.locale` returns None when the provider doesn't have a fallback mechanism,
// but supports the required locale. However, if the provider has a fallback mechanism,
// this will return `Some(locale)`, where the locale is the used locale after applying
// the fallback algorithm, even if the used locale is exactly the same as the required
// locale.
match req.metadata.locale {
// TODO: ugly hack to accept locales that fallback to "und" in the collator/segmenter services
Some(loc)
if loc == candidate
|| (loc.is_empty()
&& [
CollationMetadataV1Marker::KEY.path(),
WordBreakDataV1Marker::KEY.path(),
]
.contains(&M::KEY.path())) =>
{
return Some(candidate.into_locale().id)
}
None => return Some(candidate.into_locale().id),
_ => {}
}
None => return Some(candidate.into_locale().id),
_ => {}
}
Err(DataError {
kind: DataErrorKind::ExtraneousLocale,
..
}) => {
// This is essentially the same hack as above but for singleton keys
return Some(candidate.into_locale().id);
}
Err(_) => {}
}

// b. Let pos be the character index of the last occurrence of "-" (U+002D) within candidate. If that character does not occur, return undefined.
Expand Down Expand Up @@ -241,11 +254,23 @@ pub(crate) fn best_locale_for_provider<M: KeyedDataMarker>(
candidate: LanguageIdentifier,
provider: &(impl DataProvider<M> + ?Sized),
) -> Option<LanguageIdentifier> {
// another hack to the list...
// This time is because markers like `WordBreakDataV1Marker` throw an error if they receive
// a request with a locale, because they don't really need it. In this case, we can
// check if the key is one of those kinds and return the candidate as it is.
if M::KEY.metadata().singleton {
return Some(candidate);
}

let response = DataProvider::<M>::load(
provider,
DataRequest {
locale: &DataLocale::from(&candidate),
metadata: DataRequestMetadata::default(),
metadata: {
let mut md = DataRequestMetadata::default();
md.silent = true;
md
},
},
)
.ok()?;
Expand Down
10 changes: 7 additions & 3 deletions boa_engine/src/builtins/intl/mod.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
//! Boa's implementation of ECMAScript's global `Intl` object.
//!
//! `Intl` is a built-in object that has properties and methods for i18n. It's not a function object.
//! The `Intl` namespace object contains several constructors as well as functionality common to the
//! internationalization constructors and other language sensitive functions. Collectively, they
//! comprise the ECMAScript Internationalization API, which provides language sensitive string
//! comparison, number formatting, date and time formatting, and more.
//!
//! More information:
//! - [ECMAScript reference][spec]
//! - [MDN documentation][mdn]
//!
//!
//! [spec]: https://tc39.es/ecma402/#intl-object
#![allow(clippy::string_lit_as_bytes)]
//! [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl
use crate::{
builtins::{Array, BuiltInBuilder, BuiltInObject, IntrinsicObject},
Expand Down
21 changes: 16 additions & 5 deletions boa_engine/src/builtins/intl/plural_rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,11 +132,22 @@ impl BuiltInConstructor for PluralRules {
context.icu(),
);

let native = context
.icu()
.provider()
.try_new_plural_rules(&DataLocale::from(&locale), rule_type)
.map_err(|err| JsNativeError::typ().with_message(err.to_string()))?;
let native = match rule_type {
PluralRuleType::Cardinal => NativePluralRules::try_new_cardinal_unstable(
&context.icu().provider(),
&DataLocale::from(&locale),
),
PluralRuleType::Ordinal => NativePluralRules::try_new_ordinal_unstable(
&context.icu().provider(),
&DataLocale::from(&locale),
),
_ => {
return Err(JsNativeError::typ()
.with_message("unimplemented plural rule type")
.into())
}
}
.map_err(|e| JsNativeError::typ().with_message(e.to_string()))?;

let proto = get_prototype_from_constructor(
new_target,
Expand Down
Loading

0 comments on commit 57604ae

Please sign in to comment.