Skip to content

Commit

Permalink
Bump ICU4X to 1.3
Browse files Browse the repository at this point in the history
  • Loading branch information
jedel1043 committed Sep 25, 2023
1 parent 25c120b commit 408eaa4
Show file tree
Hide file tree
Showing 33 changed files with 563 additions and 1,749 deletions.
1,481 changes: 411 additions & 1,070 deletions Cargo.lock

Large diffs are not rendered by default.

34 changes: 17 additions & 17 deletions boa_engine/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,17 @@ rust-version.workspace = true
profiler = ["boa_profiler/profiler"]
deser = ["boa_interner/serde", "boa_ast/serde"]
intl = [
"boa_icu_provider/full",
"icu_normalizer/serde",
"icu_normalizer/std",
"dep:boa_icu_provider",
"dep:icu_locid_transform",
"dep:icu_locid",
"dep:icu_datetime",
"dep:icu_plurals",
"dep:icu_provider",
"dep:icu_calendar",
"dep:icu_collator",
"dep:icu_casemapping",
"dep:icu_casemap",
"dep:icu_list",
"dep:icu_segmenter",
"dep:writeable",
Expand Down Expand Up @@ -53,7 +53,6 @@ boa_profiler.workspace = true
boa_macros.workspace = true
boa_ast.workspace = true
boa_parser.workspace = true
boa_icu_provider.workspace = true
serde = { version = "1.0.188", features = ["derive", "rc"] }
serde_json = "1.0.107"
rand = "0.8.5"
Expand All @@ -77,22 +76,23 @@ num_enum = "0.7.0"
pollster = "0.3.0"
thin-vec = "0.2.12"
itertools = { version = "0.11.0", default-features = false }
icu_normalizer = "1.2.0"
icu_normalizer = "1.3.0"

# intl deps
icu_locid_transform = { version = "1.2.1", features = ["std", "serde"], optional = true }
icu_locid = { version = "1.2.0", features = ["serde"], optional = true }
icu_datetime = { version = "1.2.1", features = ["serde", "experimental"], optional = true }
icu_calendar = { version = "1.2.0", optional = true }
icu_collator = { version = "1.2.0", features = ["serde"], optional = true }
icu_plurals = { version = "1.2.0", features = ["serde"], optional = true }
icu_provider = { version = "1.2.0", optional = true }
icu_list = { version = "1.2.0", features = ["serde"], optional = true }
icu_casemapping = { version = "0.7.2", features = ["serde"], optional = true}
icu_segmenter = { version = "1.2.1", features = ["serde"], optional = true }
writeable = { version = "0.5.2", optional = true }
yoke = { version = "0.7.1", optional = true }
zerofrom = { version = "0.1.2", optional = true }
boa_icu_provider = {workspace = true, features = ["std"], optional = true }
icu_provider = { version = "1.3.0", optional = true }
icu_locid = { version = "1.3.0", features = ["serde"], optional = true }
icu_locid_transform = { version = "1.3.0", default-features = false, features = ["std", "serde"], optional = true }
icu_datetime = { version = "1.3.0", default-features = false, features = ["serde", "experimental"], optional = true }
icu_calendar = { version = "1.3.0", default-features = false, optional = true }
icu_collator = { version = "1.3.0", default-features = false, features = ["serde"], optional = true }
icu_plurals = { version = "1.3.0", default-features = false, features = ["serde"], optional = true }
icu_list = { version = "1.3.0", default-features = false, features = ["serde"], optional = true }
icu_casemap = { version = "1.3.0", default-features = false, features = ["serde"], optional = true}
icu_segmenter = { version = "1.3.0", default-features = false, features = ["auto", "serde"], optional = true }
writeable = { version = "0.5.3", optional = true }
yoke = { version = "0.7.2", optional = true }
zerofrom = { version = "0.1.3", optional = true }
sys-locale = { version = "0.3.1", optional = true }
fixed_decimal = { version = "0.5.4", features = ["ryu"], optional = true}

Expand Down
28 changes: 6 additions & 22 deletions boa_engine/src/builtins/intl/collator/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use boa_gc::{custom_trace, Finalize, Trace};
use boa_profiler::Profiler;
use icu_collator::{
provider::CollationMetadataV1Marker, AlternateHandling, CaseFirst, MaxVariable, Numeric,
provider::CollationMetadataV1Marker, AlternateHandling, CaseFirst, Collator as NativeCollator,
MaxVariable, Numeric,
};

use icu_locid::{
Expand Down Expand Up @@ -39,6 +40,7 @@ use super::{
mod options;
pub(crate) use options::*;

#[derive(Debug)]
pub struct Collator {
locale: Locale,
collation: Value,
Expand All @@ -47,7 +49,7 @@ pub struct Collator {
usage: Usage,
sensitivity: Sensitivity,
ignore_punctuation: bool,
collator: icu_collator::Collator,
collator: NativeCollator,
bound_compare: Option<JsFunction>,
}

Expand All @@ -65,22 +67,6 @@ impl Collator {
}
}

impl std::fmt::Debug for Collator {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Collator")
.field("locale", &self.locale)
.field("collation", &self.collation)
.field("numeric", &self.numeric)
.field("case_first", &self.case_first)
.field("usage", &self.usage)
.field("sensitivity", &self.sensitivity)
.field("ignore_punctuation", &self.ignore_punctuation)
.field("collator", &"ICUCollator")
.field("bound_compare", &self.bound_compare)
.finish()
}
}

#[derive(Debug, Clone)]
pub(in crate::builtins::intl) struct CollatorLocaleOptions {
collation: Option<Value>,
Expand Down Expand Up @@ -346,10 +332,8 @@ impl BuiltInConstructor for Collator {
.then_some((AlternateHandling::Shifted, MaxVariable::Punctuation))
.unzip();

let collator = context
.icu()
.provider()
.try_new_collator(&collator_locale, {
let collator =
NativeCollator::try_new_unstable(&context.icu().provider(), &collator_locale, {
let mut options = icu_collator::CollatorOptions::new();
options.strength = strength;
options.case_level = case_level;
Expand Down
44 changes: 25 additions & 19 deletions boa_engine/src/builtins/intl/list_format/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,13 @@ use super::{

mod options;
pub(crate) use options::*;

#[derive(Debug)]
pub struct ListFormat {
locale: Locale,
typ: ListFormatType,
style: ListLength,
formatter: ListFormatter,
}

impl std::fmt::Debug for ListFormat {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ListFormat")
.field("locale", &self.locale)
.field("typ", &self.typ)
.field("style", &self.style)
.field("formatter", &"ListFormatter")
.finish()
}
native: ListFormatter,
}

impl Service for ListFormat {
Expand Down Expand Up @@ -146,6 +137,25 @@ impl BuiltInConstructor for ListFormat {
// 16. Let dataLocaleData be localeData.[[<dataLocale>]].
// 17. Let dataLocaleTypes be dataLocaleData.[[<type>]].
// 18. Set listFormat.[[Templates]] to dataLocaleTypes.[[<style>]].
let data_locale = DataLocale::from(&locale);
let formatter = match typ {
ListFormatType::Conjunction => ListFormatter::try_new_and_with_length_unstable(
&context.icu().provider(),
&data_locale,
style,
),
ListFormatType::Disjunction => ListFormatter::try_new_or_with_length_unstable(
&context.icu().provider(),
&data_locale,
style,
),
ListFormatType::Unit => ListFormatter::try_new_unit_with_length_unstable(
&context.icu().provider(),
&data_locale,
style,
),
}
.map_err(|e| JsNativeError::typ().with_message(e.to_string()))?;

// 2. Let listFormat be ? OrdinaryCreateFromConstructor(NewTarget, "%ListFormat.prototype%", « [[InitializedListFormat]], [[Locale]], [[Type]], [[Style]], [[Templates]] »).
let prototype =
Expand All @@ -154,14 +164,10 @@ impl BuiltInConstructor for ListFormat {
context.root_shape(),
prototype,
ObjectData::list_format(Self {
formatter: context
.icu()
.provider()
.try_new_list_formatter(&DataLocale::from(&locale), typ, style)
.map_err(|e| JsNativeError::typ().with_message(e.to_string()))?,
locale,
typ,
style,
native: formatter,
}),
);

Expand Down Expand Up @@ -225,7 +231,7 @@ impl ListFormat {

// 4. Return ! FormatList(lf, stringList).
Ok(lf
.formatter
.native
.format_to_string(strings.into_iter().map(|s| s.to_std_string_escaped()))
.into())
}
Expand Down Expand Up @@ -349,7 +355,7 @@ impl ListFormat {

// 1. Let parts be ! CreatePartsFromList(listFormat, list).
let mut parts = PartsCollector(Vec::new());
lf.formatter
lf.native
.format(strings)
.write_to_parts(&mut parts)
.map_err(|e| JsNativeError::typ().with_message(e.to_string()))?;
Expand Down
10 changes: 7 additions & 3 deletions boa_engine/src/builtins/intl/mod.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
//! Boa's implementation of ECMAScript's global `Intl` object.
//!
//! `Intl` is a built-in object that has properties and methods for i18n. It's not a function object.
//! The `Intl` namespace object contains several constructors as well as functionality common to the
//! internationalization constructors and other language sensitive functions. Collectively, they
//! comprise the ECMAScript Internationalization API, which provides language sensitive string
//! comparison, number formatting, date and time formatting, and more.
//!
//! More information:
//! - [ECMAScript reference][spec]
//! - [MDN documentation][mdn]
//!
//!
//! [spec]: https://tc39.es/ecma402/#intl-object
#![allow(clippy::string_lit_as_bytes)]
//! [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl
use crate::{
builtins::{Array, BuiltInBuilder, BuiltInObject, IntrinsicObject},
Expand Down
21 changes: 16 additions & 5 deletions boa_engine/src/builtins/intl/plural_rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,11 +132,22 @@ impl BuiltInConstructor for PluralRules {
context.icu(),
);

let native = context
.icu()
.provider()
.try_new_plural_rules(&DataLocale::from(&locale), rule_type)
.map_err(|err| JsNativeError::typ().with_message(err.to_string()))?;
let native = match rule_type {
PluralRuleType::Cardinal => NativePluralRules::try_new_cardinal_unstable(
&context.icu().provider(),
&DataLocale::from(&locale),
),
PluralRuleType::Ordinal => NativePluralRules::try_new_ordinal_unstable(
&context.icu().provider(),
&DataLocale::from(&locale),
),
_ => {
return Err(JsNativeError::typ()
.with_message("unimplemented plural rule type")
.into())
}
}
.map_err(|e| JsNativeError::typ().with_message(e.to_string()))?;

let proto = get_prototype_from_constructor(
new_target,
Expand Down
33 changes: 20 additions & 13 deletions boa_engine/src/builtins/intl/segmenter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ use std::ops::Range;
use boa_macros::utf16;
use boa_profiler::Profiler;
use icu_locid::Locale;
use icu_segmenter::provider::WordBreakDataV1Marker;
use icu_segmenter::{
provider::WordBreakDataV1Marker, GraphemeClusterSegmenter, SentenceSegmenter, WordSegmenter,
};

use crate::{
builtins::{
Expand Down Expand Up @@ -41,9 +43,9 @@ pub struct Segmenter {

#[derive(Debug)]
pub(crate) enum NativeSegmenter {
Grapheme(Box<icu_segmenter::GraphemeClusterSegmenter>),
Word(Box<icu_segmenter::WordSegmenter>),
Sentence(Box<icu_segmenter::SentenceSegmenter>),
Grapheme(Box<GraphemeClusterSegmenter>),
Word(Box<WordSegmenter>),
Sentence(Box<SentenceSegmenter>),
}

impl NativeSegmenter {
Expand Down Expand Up @@ -149,16 +151,21 @@ impl BuiltInConstructor for Segmenter {
.unwrap_or_default();
// 13. Set segmenter.[[SegmenterGranularity]] to granularity.

let kind = context
.icu()
.provider()
.try_new_segmenter(granularity)
.map_err(|err| JsNativeError::typ().with_message(err.to_string()))?;
let native = match granularity {
Granularity::Grapheme => {
GraphemeClusterSegmenter::try_new_unstable(&context.icu().provider())
.map(|s| NativeSegmenter::Grapheme(Box::new(s)))
}

Granularity::Word => WordSegmenter::try_new_auto_unstable(&context.icu().provider())
.map(|s| NativeSegmenter::Word(Box::new(s))),

Granularity::Sentence => SentenceSegmenter::try_new_unstable(&context.icu().provider())
.map(|s| NativeSegmenter::Sentence(Box::new(s))),
}
.map_err(|err| JsNativeError::typ().with_message(err.to_string()))?;

let segmenter = Self {
locale,
native: kind,
};
let segmenter = Self { locale, native };

// 2. Let internalSlotsList be « [[InitializedSegmenter]], [[Locale]], [[SegmenterGranularity]] ».
// 3. Let segmenter be ? OrdinaryCreateFromConstructor(NewTarget, "%Segmenter.prototype%", internalSlotsList).
Expand Down
34 changes: 12 additions & 22 deletions boa_engine/src/builtins/string/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ impl IntrinsicObject for String {
.method(Self::at, "at", 1);

#[cfg(feature = "annex-b")]
{
let builder = {
builder
.property(
utf16!("trimLeft"),
Expand All @@ -173,10 +173,8 @@ impl IntrinsicObject for String {
.method(Self::strike, "strike", 0)
.method(Self::sub, "sub", 0)
.method(Self::sup, "sup", 0)
.build();
}
};

#[cfg(not(feature = "annex-b"))]
builder.build();
}

Expand Down Expand Up @@ -1742,7 +1740,7 @@ impl String {
use super::intl::locale::{
best_available_locale, canonicalize_locale_list, default_locale,
};
use icu_casemapping::{provider::CaseMappingV1Marker, CaseMapping};
use icu_casemap::provider::CaseMapV1Marker;
use icu_locid::LanguageIdentifier;

// 1. Let O be ? RequireObjectCoercible(this value).
Expand Down Expand Up @@ -1770,25 +1768,22 @@ impl String {
// 5. Let availableLocales be a List with language tags that includes the languages for which the Unicode Character Database contains language sensitive case mappings. Implementations may add additional language tags if they support case mapping for additional locales.
// 6. Let locale be ! BestAvailableLocale(availableLocales, noExtensionsLocale).
// 7. If locale is undefined, set locale to "und".
let lang =
best_available_locale::<CaseMappingV1Marker>(lang, &context.icu().provider())
.unwrap_or(LanguageIdentifier::UND);
let lang = best_available_locale::<CaseMapV1Marker>(lang, &context.icu().provider())
.unwrap_or(LanguageIdentifier::UND);

let casemapper =
CaseMapping::try_new_with_locale(&context.icu().provider(), &lang.into())
.map_err(|err| JsNativeError::typ().with_message(err.to_string()))?;
let casemapper = context.icu().case_mapper();

// 8. Let codePoints be StringToCodePoints(S).
let result = string.map_valid_segments(|segment| {
if UPPER {
// 10. Else,
// a. Assert: targetCase is upper.
// b. Let newCodePoints be a List whose elements are the result of an uppercase transformation of codePoints according to an implementation-derived algorithm using locale or the Unicode Default Case Conversion algorithm.
casemapper.to_full_uppercase(&segment)
casemapper.uppercase_to_string(&segment, &lang)
} else {
// 9. If targetCase is lower, then
// a. Let newCodePoints be a List whose elements are the result of a lowercase transformation of codePoints according to an implementation-derived algorithm using locale or the Unicode Default Case Conversion algorithm.
casemapper.to_full_lowercase(&segment)
casemapper.lowercase_to_string(&segment, &lang)
}
});

Expand Down Expand Up @@ -2148,15 +2143,10 @@ impl String {
{
use once_cell::sync::Lazy;
static NORMALIZERS: Lazy<StringNormalizers> = Lazy::new(|| {
let provider = &boa_icu_provider::minimal();
let nfc = ComposingNormalizer::try_new_nfc_unstable(provider)
.expect("minimal data should always be updated");
let nfkc = ComposingNormalizer::try_new_nfkc_unstable(provider)
.expect("minimal data should always be updated");
let nfd = DecomposingNormalizer::try_new_nfd_unstable(provider)
.expect("minimal data should always be updated");
let nfkd = DecomposingNormalizer::try_new_nfkd_unstable(provider)
.expect("minimal data should always be updated");
let nfc = ComposingNormalizer::new_nfc(provider);
let nfkc = ComposingNormalizer::new_nfkc(provider);
let nfd = DecomposingNormalizer::new_nfd(provider);
let nfkd = DecomposingNormalizer::new_nfkd(provider);

StringNormalizers {
nfc,
Expand Down
Loading

0 comments on commit 408eaa4

Please sign in to comment.