diff --git a/core/engine/src/string.rs b/core/engine/src/string.rs index cfddb9328ce..920ee786254 100644 --- a/core/engine/src/string.rs +++ b/core/engine/src/string.rs @@ -7,6 +7,15 @@ #[doc(inline)] pub use boa_string::*; +static_assertions::const_assert_eq!( + // SAFETY: + // Compiler will throw error if `transmute` does not meet the requirement. + unsafe { std::mem::transmute::, usize>(std::cell::Cell::new(0usize)) }, + // SAFETY: + // Compiler will throw error if `transmute` does not meet the requirement. + unsafe { std::mem::transmute::, usize>(None) } +); + /// Utility macro to create a [`JsString`]. /// /// # Examples @@ -54,9 +63,40 @@ macro_rules! js_string { () => { $crate::string::JsString::default() }; - ($s:literal) => { - $crate::string::JsString::from($crate::js_str!($s)) - }; + ($s:literal) => {{ + if $s.is_ascii() { + use $crate::string::JsStr; + + #[allow(clippy::items_after_statements)] + // Create a static `JsStr` that references an ASCII literal + static ORIGINAL_JS_STR: JsStr<'static> = JsStr::latin1($s.as_bytes()); + + #[allow(clippy::items_after_statements)] + // Use `[Option<&usize>; 2]` which has the same size with primitive `RawJsString` + // to represent `RawJsString` since `Cell` is unable to construct in static + // and `RawJsString` is private. + // With `Null Pointer Optimization` we could use `None` + // to represent `Cell(0usize)` to mark it as being created from ASCII literal. + static DUMMY_RAW_JS_STRING: &[Option<&usize>; 2] = &[ + // SAFETY: + // Reference of static variable is always valid to cast into an non-null pointer, + // And the primitive size of `RawJsString` is twice as large as `usize`. + Some(unsafe { &*std::ptr::addr_of!(ORIGINAL_JS_STR).cast::() }), + None, + ]; + #[allow(trivial_casts)] + // SAFETY: + // Reference of static variable is always valid to cast into non-null pointer, + // size of `[Option<&usize>; 2]` is equal to the primitive size of `RawJsString`. + unsafe { + $crate::string::JsString::from_opaque_ptr( + std::ptr::from_ref(DUMMY_RAW_JS_STRING) as *mut _ + ) + } + } else { + $crate::string::JsString::from($crate::js_str!($s)) + } + }}; ($s:expr) => { $crate::string::JsString::from($s) }; @@ -92,6 +132,9 @@ mod tests { #[test] fn refcount() { let x = js_string!("Hello world"); + assert_eq!(x.refcount(), None); + + let x = js_string!("你好"); assert_eq!(x.refcount(), Some(1)); { diff --git a/core/string/src/lib.rs b/core/string/src/lib.rs index 01a571cbab0..92d049e04f9 100644 --- a/core/string/src/lib.rs +++ b/core/string/src/lib.rs @@ -152,14 +152,30 @@ impl CodePoint { /// The raw representation of a [`JsString`] in the heap. #[repr(C)] struct RawJsString { + /// A field represented for **`flag_and_len`** or **`pointer`**. + /// + /// ## `flag_and_len`: + /// ```text + /// ┌───────────────────────────────────────────────────┐ + /// │ length((usize::BITS - 1) bits) │ flag(1 bit) │ + /// └───────────────────────────────────────────────────┘ + /// `````` /// Contains the flags and Latin1/UTF-16 length. /// /// The latin1 flag is stored in the bottom bit. - flags_and_len: usize, + /// + /// ## `pointer`: + /// A pointer to a static `JsStr` that references an ASCII literal. + flags_and_len_or_ptr: usize, /// The number of references to the string. /// /// When this reaches `0` the string is deallocated. + /// + /// Since reference count of `RawJsString` created from `try_allocate_inner` + /// will only reach `0` in `drop`, + /// we can set reference count of `RawJsString` created from an ASCII literal to `0` as a mark, + /// see detail in `js_string` macro. refcount: Cell, /// An empty array which is used to get the offset of string data. @@ -170,12 +186,32 @@ impl RawJsString { const LATIN1_BITFLAG: usize = 1 << 0; const BITFLAG_COUNT: usize = 1; + /// This should be called to check if it is from an ASCII literal + /// before modifying reference count to avoid dropping static value + /// that might causes UB. + fn is_ascii_literal(&self) -> bool { + self.refcount.get() == 0 + } + + /// Returns `JsStr` from ASCII literal by using `flags_and_len` as pointer to dereference. + /// # Safety + /// + /// Caller must ensure that `RawJsString` is created from ASCII literal + /// so that pointer casting and dereferencing are valid. + unsafe fn ascii_literal_js_str(&self) -> JsStr<'static> { + // SAFETY: + // + // Caller must ensure that the `RawJsString` is created from ASCII literal + // so that pointer casting and dereferencing are valid. + unsafe { *(self.flags_and_len_or_ptr as *const _) } + } + const fn is_latin1(&self) -> bool { - (self.flags_and_len & Self::LATIN1_BITFLAG) != 0 + (self.flags_and_len_or_ptr & Self::LATIN1_BITFLAG) != 0 } const fn len(&self) -> usize { - self.flags_and_len >> Self::BITFLAG_COUNT + self.flags_and_len_or_ptr >> Self::BITFLAG_COUNT } const fn encode_flags_and_len(len: usize, latin1: bool) -> usize { @@ -221,6 +257,21 @@ impl<'a> IntoIterator for &'a JsString { } impl JsString { + /// Create a [`JsString`] from a raw opaque pointer + /// # Safety + /// + /// Caller must ensure the pointer is valid and the data pointed \ + /// has the same size and alignment of `RawJsString`. + #[must_use] + pub const unsafe fn from_opaque_ptr(src: *mut ()) -> Self { + JsString { + // SAFETY: + // Caller must ensure the pointer is valid and point to data + // with the same size and alignment of `RawJsString`. + ptr: unsafe { Tagged::from_ptr(src.cast()) }, + } + } + /// Create an iterator over the [`JsString`]. #[inline] #[must_use] @@ -250,19 +301,17 @@ impl JsString { // // - The lifetime of `&Self::Target` is shorter than the lifetime of `self`, as seen // by its signature, so this doesn't outlive `self`. + // + // - The `RawJsString` created from ASCII literal has a static lifetime `JsStr`. unsafe { - let h = h.as_ptr(); - - if (*h).is_latin1() { - JsStr::latin1(std::slice::from_raw_parts( - addr_of!((*h).data).cast(), - (*h).len(), - )) + let h = h.as_ref(); + if h.is_ascii_literal() { + return h.ascii_literal_js_str(); + } + if h.is_latin1() { + JsStr::latin1(std::slice::from_raw_parts(addr_of!(h.data).cast(), h.len())) } else { - JsStr::utf16(std::slice::from_raw_parts( - addr_of!((*h).data).cast(), - (*h).len(), - )) + JsStr::utf16(std::slice::from_raw_parts(addr_of!(h.data).cast(), h.len())) } } } @@ -665,7 +714,7 @@ impl JsString { unsafe { // Write the first part, the `RawJsString`. inner.as_ptr().write(RawJsString { - flags_and_len: RawJsString::encode_flags_and_len(str_len, latin1), + flags_and_len_or_ptr: RawJsString::encode_flags_and_len(str_len, latin1), refcount: Cell::new(1), data: [0; 0], }); @@ -749,8 +798,12 @@ impl JsString { // - The lifetime of `&Self::Target` is shorter than the lifetime of `self`, as seen // by its signature, so this doesn't outlive `self`. unsafe { - let h = h.as_ptr(); - (*h).len() + let h = h.as_ref(); + if h.is_ascii_literal() { + h.ascii_literal_js_str().len() + } else { + h.len() + } } } UnwrappedTagged::Tag(index) => { @@ -860,9 +913,13 @@ impl JsString { UnwrappedTagged::Ptr(inner) => { // SAFETY: The reference count of `JsString` guarantees that `inner` is always valid. let inner = unsafe { inner.as_ref() }; - Some(inner.refcount.get()) + if inner.is_ascii_literal() { + None + } else { + Some(inner.refcount.get()) + } } - UnwrappedTagged::Tag(_inner) => None, + UnwrappedTagged::Tag(_) => None, } } } @@ -873,11 +930,15 @@ impl Clone for JsString { if let UnwrappedTagged::Ptr(inner) = self.ptr.unwrap() { // SAFETY: The reference count of `JsString` guarantees that `raw` is always valid. let inner = unsafe { inner.as_ref() }; - let strong = inner.refcount.get().wrapping_add(1); - if strong == 0 { - abort() + + // Do not increase reference count when it is created from ASCII literal. + if !inner.is_ascii_literal() { + let strong = inner.refcount.get().wrapping_add(1); + if strong == 0 { + abort() + } + inner.refcount.set(strong); } - inner.refcount.set(strong); } Self { ptr: self.ptr } } @@ -898,6 +959,12 @@ impl Drop for JsString { // SAFETY: The reference count of `JsString` guarantees that `raw` is always valid. let inner = unsafe { raw.as_ref() }; + + // Do not drop `JsString` created from ASCII literal. + if inner.is_ascii_literal() { + return; + } + inner.refcount.set(inner.refcount.get() - 1); if inner.refcount.get() != 0 { return;