Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bump ICU4X to 1.3 #3306

Merged
merged 7 commits into from
Sep 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,481 changes: 411 additions & 1,070 deletions Cargo.lock

Large diffs are not rendered by default.

36 changes: 18 additions & 18 deletions boa_engine/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,17 @@ rust-version.workspace = true
profiler = ["boa_profiler/profiler"]
deser = ["boa_interner/serde", "boa_ast/serde"]
intl = [
"boa_icu_provider/full",
"icu_normalizer/serde",
"icu_normalizer/std",
"dep:boa_icu_provider",
"dep:icu_locid_transform",
"dep:icu_locid",
"dep:icu_datetime",
"dep:icu_plurals",
"dep:icu_provider",
"dep:icu_calendar",
"dep:icu_collator",
"dep:icu_casemapping",
"dep:icu_casemap",
"dep:icu_list",
"dep:icu_segmenter",
"dep:writeable",
Expand Down Expand Up @@ -53,7 +53,6 @@ boa_profiler.workspace = true
boa_macros.workspace = true
boa_ast.workspace = true
boa_parser.workspace = true
boa_icu_provider.workspace = true
serde = { version = "1.0.188", features = ["derive", "rc"] }
serde_json = "1.0.107"
rand = "0.8.5"
Expand All @@ -77,24 +76,25 @@ num_enum = "0.7.0"
pollster = "0.3.0"
thin-vec = "0.2.12"
itertools = { version = "0.11.0", default-features = false }
icu_normalizer = "1.2.0"
icu_normalizer = "~1.3.0"

# intl deps
icu_locid_transform = { version = "1.2.1", features = ["std", "serde"], optional = true }
icu_locid = { version = "1.2.0", features = ["serde"], optional = true }
icu_datetime = { version = "1.2.1", features = ["serde", "experimental"], optional = true }
icu_calendar = { version = "1.2.0", optional = true }
icu_collator = { version = "1.2.0", features = ["serde"], optional = true }
icu_plurals = { version = "1.2.0", features = ["serde"], optional = true }
icu_provider = { version = "1.2.0", optional = true }
icu_list = { version = "1.2.0", features = ["serde"], optional = true }
icu_casemapping = { version = "0.7.2", features = ["serde"], optional = true}
icu_segmenter = { version = "1.2.1", features = ["serde"], optional = true }
writeable = { version = "0.5.2", optional = true }
yoke = { version = "0.7.1", optional = true }
zerofrom = { version = "0.1.2", optional = true }
boa_icu_provider = {workspace = true, features = ["std"], optional = true }
sys-locale = { version = "0.3.1", optional = true }
fixed_decimal = { version = "0.5.4", features = ["ryu"], optional = true}
icu_provider = { version = "~1.3.0", optional = true }
icu_locid = { version = "~1.3.0", features = ["serde"], optional = true }
icu_locid_transform = { version = "~1.3.0", default-features = false, features = ["std", "serde"], optional = true }
icu_datetime = { version = "~1.3.0", default-features = false, features = ["serde", "experimental"], optional = true }
icu_calendar = { version = "~1.3.0", default-features = false, optional = true }
icu_collator = { version = "~1.3.0", default-features = false, features = ["serde"], optional = true }
icu_plurals = { version = "~1.3.0", default-features = false, features = ["serde"], optional = true }
icu_list = { version = "~1.3.0", default-features = false, features = ["serde"], optional = true }
icu_casemap = { version = "~1.3.0", default-features = false, features = ["serde"], optional = true}
icu_segmenter = { version = "~1.3.0", default-features = false, features = ["auto", "serde"], optional = true }
writeable = { version = "~0.5.3", optional = true }
yoke = { version = "~0.7.2", optional = true }
zerofrom = { version = "~0.1.3", optional = true }
fixed_decimal = { version = "~0.5.4", features = ["ryu"], optional = true}

[dev-dependencies]
criterion = "0.5.1"
Expand Down
28 changes: 6 additions & 22 deletions boa_engine/src/builtins/intl/collator/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use boa_gc::{custom_trace, Finalize, Trace};
use boa_profiler::Profiler;
use icu_collator::{
provider::CollationMetadataV1Marker, AlternateHandling, CaseFirst, MaxVariable, Numeric,
provider::CollationMetadataV1Marker, AlternateHandling, CaseFirst, Collator as NativeCollator,
MaxVariable, Numeric,
};

use icu_locid::{
Expand Down Expand Up @@ -39,6 +40,7 @@ use super::{
mod options;
pub(crate) use options::*;

#[derive(Debug)]
pub struct Collator {
locale: Locale,
collation: Value,
Expand All @@ -47,7 +49,7 @@ pub struct Collator {
usage: Usage,
sensitivity: Sensitivity,
ignore_punctuation: bool,
collator: icu_collator::Collator,
collator: NativeCollator,
bound_compare: Option<JsFunction>,
}

Expand All @@ -65,22 +67,6 @@ impl Collator {
}
}

impl std::fmt::Debug for Collator {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Collator")
.field("locale", &self.locale)
.field("collation", &self.collation)
.field("numeric", &self.numeric)
.field("case_first", &self.case_first)
.field("usage", &self.usage)
.field("sensitivity", &self.sensitivity)
.field("ignore_punctuation", &self.ignore_punctuation)
.field("collator", &"ICUCollator")
.field("bound_compare", &self.bound_compare)
.finish()
}
}

#[derive(Debug, Clone)]
pub(in crate::builtins::intl) struct CollatorLocaleOptions {
collation: Option<Value>,
Expand Down Expand Up @@ -346,10 +332,8 @@ impl BuiltInConstructor for Collator {
.then_some((AlternateHandling::Shifted, MaxVariable::Punctuation))
.unzip();

let collator = context
.icu()
.provider()
.try_new_collator(&collator_locale, {
let collator =
NativeCollator::try_new_unstable(&context.icu().provider(), &collator_locale, {
let mut options = icu_collator::CollatorOptions::new();
options.strength = strength;
options.case_level = case_level;
Expand Down
44 changes: 25 additions & 19 deletions boa_engine/src/builtins/intl/list_format/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,13 @@ use super::{

mod options;
pub(crate) use options::*;

#[derive(Debug)]
pub struct ListFormat {
locale: Locale,
typ: ListFormatType,
style: ListLength,
formatter: ListFormatter,
}

impl std::fmt::Debug for ListFormat {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ListFormat")
.field("locale", &self.locale)
.field("typ", &self.typ)
.field("style", &self.style)
.field("formatter", &"ListFormatter")
.finish()
}
native: ListFormatter,
}

impl Service for ListFormat {
Expand Down Expand Up @@ -146,6 +137,25 @@ impl BuiltInConstructor for ListFormat {
// 16. Let dataLocaleData be localeData.[[<dataLocale>]].
// 17. Let dataLocaleTypes be dataLocaleData.[[<type>]].
// 18. Set listFormat.[[Templates]] to dataLocaleTypes.[[<style>]].
let data_locale = DataLocale::from(&locale);
let formatter = match typ {
ListFormatType::Conjunction => ListFormatter::try_new_and_with_length_unstable(
&context.icu().provider(),
&data_locale,
style,
),
ListFormatType::Disjunction => ListFormatter::try_new_or_with_length_unstable(
&context.icu().provider(),
&data_locale,
style,
),
ListFormatType::Unit => ListFormatter::try_new_unit_with_length_unstable(
&context.icu().provider(),
&data_locale,
style,
),
}
.map_err(|e| JsNativeError::typ().with_message(e.to_string()))?;

// 2. Let listFormat be ? OrdinaryCreateFromConstructor(NewTarget, "%ListFormat.prototype%", « [[InitializedListFormat]], [[Locale]], [[Type]], [[Style]], [[Templates]] »).
let prototype =
Expand All @@ -154,14 +164,10 @@ impl BuiltInConstructor for ListFormat {
context.root_shape(),
prototype,
ObjectData::list_format(Self {
formatter: context
.icu()
.provider()
.try_new_list_formatter(&DataLocale::from(&locale), typ, style)
.map_err(|e| JsNativeError::typ().with_message(e.to_string()))?,
locale,
typ,
style,
native: formatter,
}),
);

Expand Down Expand Up @@ -225,7 +231,7 @@ impl ListFormat {

// 4. Return ! FormatList(lf, stringList).
Ok(lf
.formatter
.native
.format_to_string(strings.into_iter().map(|s| s.to_std_string_escaped()))
.into())
}
Expand Down Expand Up @@ -349,7 +355,7 @@ impl ListFormat {

// 1. Let parts be ! CreatePartsFromList(listFormat, list).
let mut parts = PartsCollector(Vec::new());
lf.formatter
lf.native
.format(strings)
.write_to_parts(&mut parts)
.map_err(|e| JsNativeError::typ().with_message(e.to_string()))?;
Expand Down
69 changes: 47 additions & 22 deletions boa_engine/src/builtins/intl/locale/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@ use icu_locid::{
LanguageIdentifier, Locale,
};
use icu_locid_transform::LocaleCanonicalizer;
use icu_provider::{DataLocale, DataProvider, DataRequest, DataRequestMetadata, KeyedDataMarker};
use icu_provider::{
DataError, DataErrorKind, DataLocale, DataProvider, DataRequest, DataRequestMetadata,
KeyedDataMarker,
};
use icu_segmenter::provider::WordBreakDataV1Marker;
use indexmap::IndexSet;

Expand Down Expand Up @@ -184,28 +187,38 @@ pub(crate) fn best_available_locale<M: KeyedDataMarker>(
},
);

if let Ok(req) = response {
// `metadata.locale` returns None when the provider doesn't have a fallback mechanism,
// but supports the required locale. However, if the provider has a fallback mechanism,
// this will return `Some(locale)`, where the locale is the used locale after applying
// the fallback algorithm, even if the used locale is exactly the same as the required
// locale.
match req.metadata.locale {
// TODO: ugly hack to accept locales that fallback to "und" in the collator/segmenter services
Some(loc)
if loc == candidate
|| (loc.is_empty()
&& [
CollationMetadataV1Marker::KEY.path(),
WordBreakDataV1Marker::KEY.path(),
]
.contains(&M::KEY.path())) =>
{
return Some(candidate.into_locale().id)
match response {
Ok(req) => {
// `metadata.locale` returns None when the provider doesn't have a fallback mechanism,
// but supports the required locale. However, if the provider has a fallback mechanism,
// this will return `Some(locale)`, where the locale is the used locale after applying
// the fallback algorithm, even if the used locale is exactly the same as the required
// locale.
match req.metadata.locale {
// TODO: ugly hack to accept locales that fallback to "und" in the collator/segmenter services
Some(loc)
if loc == candidate
|| (loc.is_empty()
&& [
CollationMetadataV1Marker::KEY.path(),
WordBreakDataV1Marker::KEY.path(),
]
.contains(&M::KEY.path())) =>
{
return Some(candidate.into_locale().id)
}
None => return Some(candidate.into_locale().id),
_ => {}
}
None => return Some(candidate.into_locale().id),
_ => {}
}
Err(DataError {
kind: DataErrorKind::ExtraneousLocale,
..
}) => {
// This is essentially the same hack as above but for singleton keys
return Some(candidate.into_locale().id);
}
Err(_) => {}
}

// b. Let pos be the character index of the last occurrence of "-" (U+002D) within candidate. If that character does not occur, return undefined.
Expand Down Expand Up @@ -241,11 +254,23 @@ pub(crate) fn best_locale_for_provider<M: KeyedDataMarker>(
candidate: LanguageIdentifier,
provider: &(impl DataProvider<M> + ?Sized),
) -> Option<LanguageIdentifier> {
// another hack to the list...
// This time is because markers like `WordBreakDataV1Marker` throw an error if they receive
// a request with a locale, because they don't really need it. In this case, we can
// check if the key is one of those kinds and return the candidate as it is.
if M::KEY.metadata().singleton {
return Some(candidate);
}

let response = DataProvider::<M>::load(
provider,
DataRequest {
locale: &DataLocale::from(&candidate),
metadata: DataRequestMetadata::default(),
metadata: {
let mut md = DataRequestMetadata::default();
md.silent = true;
md
},
},
)
.ok()?;
Expand Down
10 changes: 7 additions & 3 deletions boa_engine/src/builtins/intl/mod.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
//! Boa's implementation of ECMAScript's global `Intl` object.
//!
//! `Intl` is a built-in object that has properties and methods for i18n. It's not a function object.
//! The `Intl` namespace object contains several constructors as well as functionality common to the
//! internationalization constructors and other language sensitive functions. Collectively, they
//! comprise the ECMAScript Internationalization API, which provides language sensitive string
//! comparison, number formatting, date and time formatting, and more.
//!
//! More information:
//! - [ECMAScript reference][spec]
//! - [MDN documentation][mdn]
//!
//!
//! [spec]: https://tc39.es/ecma402/#intl-object

#![allow(clippy::string_lit_as_bytes)]
//! [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl

use crate::{
builtins::{Array, BuiltInBuilder, BuiltInObject, IntrinsicObject},
Expand Down
21 changes: 16 additions & 5 deletions boa_engine/src/builtins/intl/plural_rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,11 +132,22 @@ impl BuiltInConstructor for PluralRules {
context.icu(),
);

let native = context
.icu()
.provider()
.try_new_plural_rules(&DataLocale::from(&locale), rule_type)
.map_err(|err| JsNativeError::typ().with_message(err.to_string()))?;
let native = match rule_type {
PluralRuleType::Cardinal => NativePluralRules::try_new_cardinal_unstable(
&context.icu().provider(),
&DataLocale::from(&locale),
),
PluralRuleType::Ordinal => NativePluralRules::try_new_ordinal_unstable(
&context.icu().provider(),
&DataLocale::from(&locale),
),
_ => {
return Err(JsNativeError::typ()
.with_message("unimplemented plural rule type")
.into())
}
}
.map_err(|e| JsNativeError::typ().with_message(e.to_string()))?;

let proto = get_prototype_from_constructor(
new_target,
Expand Down
Loading