Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add way to create JsString from ASCII literal without heap allocation #3922

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 46 additions & 3 deletions core/engine/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@
#[doc(inline)]
pub use boa_string::*;

static_assertions::const_assert_eq!(
// SAFETY:
// Compiler will throw error if `transmute` does not meet the requirement.
unsafe { std::mem::transmute::<std::cell::Cell<usize>, usize>(std::cell::Cell::new(0usize)) },
// SAFETY:
// Compiler will throw error if `transmute` does not meet the requirement.
unsafe { std::mem::transmute::<Option<&'static usize>, usize>(None) }
);

/// Utility macro to create a [`JsString`].
///
/// # Examples
Expand Down Expand Up @@ -54,9 +63,40 @@ macro_rules! js_string {
() => {
$crate::string::JsString::default()
};
($s:literal) => {
$crate::string::JsString::from($crate::js_str!($s))
};
($s:literal) => {{
if $s.is_ascii() {
use $crate::string::JsStr;

#[allow(clippy::items_after_statements)]
// Create a static `JsStr` that references an ASCII literal
static ORIGINAL_JS_STR: JsStr<'static> = JsStr::latin1($s.as_bytes());

#[allow(clippy::items_after_statements)]
// Use `[Option<&usize>; 2]` which has the same size with primitive `RawJsString`
// to represent `RawJsString` since `Cell` is unable to construct in static
// and `RawJsString` is private.
// With `Null Pointer Optimization` we could use `None`
// to represent `Cell(0usize)` to mark it as being created from ASCII literal.
static DUMMY_RAW_JS_STRING: &[Option<&usize>; 2] = &[
// SAFETY:
// Reference of static variable is always valid to cast into an non-null pointer,
// And the primitive size of `RawJsString` is twice as large as `usize`.
Some(unsafe { &*std::ptr::addr_of!(ORIGINAL_JS_STR).cast::<usize>() }),
None,
];
#[allow(trivial_casts)]
// SAFETY:
// Reference of static variable is always valid to cast into non-null pointer,
// size of `[Option<&usize>; 2]` is equal to the primitive size of `RawJsString`.
unsafe {
$crate::string::JsString::from_opaque_ptr(
std::ptr::from_ref(DUMMY_RAW_JS_STRING) as *mut _
)
}
} else {
$crate::string::JsString::from($crate::js_str!($s))
}
}};
($s:expr) => {
$crate::string::JsString::from($s)
};
Expand Down Expand Up @@ -92,6 +132,9 @@ mod tests {
#[test]
fn refcount() {
let x = js_string!("Hello world");
assert_eq!(x.refcount(), None);

let x = js_string!("你好");
assert_eq!(x.refcount(), Some(1));

{
Expand Down
113 changes: 90 additions & 23 deletions core/string/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,14 +152,30 @@ impl CodePoint {
/// The raw representation of a [`JsString`] in the heap.
#[repr(C)]
struct RawJsString {
/// A field represented for **`flag_and_len`** or **`pointer`**.
///
/// ## `flag_and_len`:
/// ```text
/// ┌───────────────────────────────────────────────────┐
/// │ length((usize::BITS - 1) bits) │ flag(1 bit) │
/// └───────────────────────────────────────────────────┘
/// ``````
/// Contains the flags and Latin1/UTF-16 length.
///
/// The latin1 flag is stored in the bottom bit.
flags_and_len: usize,
///
/// ## `pointer`:
/// A pointer to a static `JsStr` that references an ASCII literal.
flags_and_len_or_ptr: usize,

/// The number of references to the string.
///
/// When this reaches `0` the string is deallocated.
///
/// Since reference count of `RawJsString` created from `try_allocate_inner`
/// will only reach `0` in `drop`,
/// we can set reference count of `RawJsString` created from an ASCII literal to `0` as a mark,
/// see detail in `js_string` macro.
refcount: Cell<usize>,

/// An empty array which is used to get the offset of string data.
Expand All @@ -170,12 +186,32 @@ impl RawJsString {
const LATIN1_BITFLAG: usize = 1 << 0;
const BITFLAG_COUNT: usize = 1;

/// This should be called to check if it is from an ASCII literal
/// before modifying reference count to avoid dropping static value
/// that might causes UB.
fn is_ascii_literal(&self) -> bool {
self.refcount.get() == 0
}

/// Returns `JsStr` from ASCII literal by using `flags_and_len` as pointer to dereference.
/// # Safety
///
/// Caller must ensure that `RawJsString` is created from ASCII literal
/// so that pointer casting and dereferencing are valid.
unsafe fn ascii_literal_js_str(&self) -> JsStr<'static> {
// SAFETY:
//
// Caller must ensure that the `RawJsString` is created from ASCII literal
// so that pointer casting and dereferencing are valid.
unsafe { *(self.flags_and_len_or_ptr as *const _) }
}

const fn is_latin1(&self) -> bool {
(self.flags_and_len & Self::LATIN1_BITFLAG) != 0
(self.flags_and_len_or_ptr & Self::LATIN1_BITFLAG) != 0
}

const fn len(&self) -> usize {
self.flags_and_len >> Self::BITFLAG_COUNT
self.flags_and_len_or_ptr >> Self::BITFLAG_COUNT
}

const fn encode_flags_and_len(len: usize, latin1: bool) -> usize {
Expand Down Expand Up @@ -221,6 +257,21 @@ impl<'a> IntoIterator for &'a JsString {
}

impl JsString {
/// Create a [`JsString`] from a raw opaque pointer
/// # Safety
///
/// Caller must ensure the pointer is valid and the data pointed \
/// has the same size and alignment of `RawJsString`.
#[must_use]
pub const unsafe fn from_opaque_ptr(src: *mut ()) -> Self {
JsString {
// SAFETY:
// Caller must ensure the pointer is valid and point to data
// with the same size and alignment of `RawJsString`.
ptr: unsafe { Tagged::from_ptr(src.cast()) },
}
}

/// Create an iterator over the [`JsString`].
#[inline]
#[must_use]
Expand Down Expand Up @@ -250,19 +301,17 @@ impl JsString {
//
// - The lifetime of `&Self::Target` is shorter than the lifetime of `self`, as seen
// by its signature, so this doesn't outlive `self`.
//
// - The `RawJsString` created from ASCII literal has a static lifetime `JsStr`.
unsafe {
let h = h.as_ptr();

if (*h).is_latin1() {
JsStr::latin1(std::slice::from_raw_parts(
addr_of!((*h).data).cast(),
(*h).len(),
))
let h = h.as_ref();
if h.is_ascii_literal() {
return h.ascii_literal_js_str();
}
if h.is_latin1() {
JsStr::latin1(std::slice::from_raw_parts(addr_of!(h.data).cast(), h.len()))
} else {
JsStr::utf16(std::slice::from_raw_parts(
addr_of!((*h).data).cast(),
(*h).len(),
))
JsStr::utf16(std::slice::from_raw_parts(addr_of!(h.data).cast(), h.len()))
}
}
}
Expand Down Expand Up @@ -665,7 +714,7 @@ impl JsString {
unsafe {
// Write the first part, the `RawJsString`.
inner.as_ptr().write(RawJsString {
flags_and_len: RawJsString::encode_flags_and_len(str_len, latin1),
flags_and_len_or_ptr: RawJsString::encode_flags_and_len(str_len, latin1),
refcount: Cell::new(1),
data: [0; 0],
});
Expand Down Expand Up @@ -749,8 +798,12 @@ impl JsString {
// - The lifetime of `&Self::Target` is shorter than the lifetime of `self`, as seen
// by its signature, so this doesn't outlive `self`.
unsafe {
let h = h.as_ptr();
(*h).len()
let h = h.as_ref();
if h.is_ascii_literal() {
h.ascii_literal_js_str().len()
} else {
h.len()
}
}
}
UnwrappedTagged::Tag(index) => {
Expand Down Expand Up @@ -860,9 +913,13 @@ impl JsString {
UnwrappedTagged::Ptr(inner) => {
// SAFETY: The reference count of `JsString` guarantees that `inner` is always valid.
let inner = unsafe { inner.as_ref() };
Some(inner.refcount.get())
if inner.is_ascii_literal() {
None
} else {
Some(inner.refcount.get())
}
}
UnwrappedTagged::Tag(_inner) => None,
UnwrappedTagged::Tag(_) => None,
}
}
}
Expand All @@ -873,11 +930,15 @@ impl Clone for JsString {
if let UnwrappedTagged::Ptr(inner) = self.ptr.unwrap() {
// SAFETY: The reference count of `JsString` guarantees that `raw` is always valid.
let inner = unsafe { inner.as_ref() };
let strong = inner.refcount.get().wrapping_add(1);
if strong == 0 {
abort()

// Do not increase reference count when it is created from ASCII literal.
if !inner.is_ascii_literal() {
let strong = inner.refcount.get().wrapping_add(1);
if strong == 0 {
abort()
}
inner.refcount.set(strong);
}
inner.refcount.set(strong);
}
Self { ptr: self.ptr }
}
Expand All @@ -898,6 +959,12 @@ impl Drop for JsString {

// SAFETY: The reference count of `JsString` guarantees that `raw` is always valid.
let inner = unsafe { raw.as_ref() };

// Do not drop `JsString` created from ASCII literal.
if inner.is_ascii_literal() {
return;
}

inner.refcount.set(inner.refcount.get() - 1);
if inner.refcount.get() != 0 {
return;
Expand Down
Loading