From cda37a465b44bc7506052579764e5c48fc770d7a Mon Sep 17 00:00:00 2001 From: Bodil Stokke <17880+bodil@users.noreply.github.com> Date: Wed, 23 Feb 2022 20:00:04 +0000 Subject: [PATCH 1/3] Use a boxed string with a controlled layout to avoid bad assumptions. --- .github/workflows/ci.yml | 22 ++ CHANGELOG.md | 19 ++ README.md | 42 +--- proptest-regressions/test.txt | 2 + src/arbitrary.rs | 4 + src/boxed.rs | 183 +++++++++++++--- src/casts.rs | 14 +- src/config.rs | 20 +- src/inline.rs | 113 ++++------ src/iter.rs | 143 ++++--------- src/lib.rs | 392 ++++++---------------------------- src/marker_byte.rs | 33 +-- src/ops.rs | 271 +++++++++++++++++++++++ src/proptest.rs | 4 + src/serde.rs | 4 + src/test.rs | 5 - 16 files changed, 644 insertions(+), 627 deletions(-) create mode 100644 src/ops.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 605e7d4..75afa13 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -51,6 +51,27 @@ jobs: command: test args: --all-features + nostd: + name: no_std build + runs-on: ubuntu-latest + strategy: + matrix: + rust: + - stable + - nightly + - 1.56.0 # lowest supported version + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: ${{ matrix.rust }} + override: true + - uses: actions-rs/cargo@v1 + with: + command: build + args: --no-default-features + fmt: name: Rustfmt runs-on: ubuntu-latest @@ -88,6 +109,7 @@ jobs: name: Clippy-${{ matrix.rust }} token: ${{ secrets.GITHUB_TOKEN }} args: --all-features + # miri: # name: Miri # runs-on: ubuntu-latest diff --git a/CHANGELOG.md b/CHANGELOG.md index 815f625..60e4d65 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,25 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### CHANGED + +- `smartstring` now implements its own boxed string type rather than deferring directly to + `String`, so it no longer makes assumptions it shouldn't be making about the layout of the + `String` struct. This also allows us to organise the boxed struct in a way that will let us rely + only on our basic assumption that heap memory is word aligned on both big and little endian + architectures. The most immediate consequence of this is that `smartstring` will now compile on + 32-bit big endian architectures such as `mips`. + + In short: `smartstring` no longer relies on undefined behaviour, and should be safe to use + anywhere. + +- The above means that the boxed `SmartString` is no longer pointer compatible with `String`, so + if you were relying on that despite the documentation telling you not to, you'll really have to + stop it now. Converting between `SmartString` and `String` using `From` and `Into` traits is + still efficient and allocation free. + ## [0.2.10] - 2022-02-20 ### CHANGED diff --git a/README.md b/README.md index 1776193..5a5486f 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,5 @@ # smartstring -[![Travis CI](https://travis-ci.org/bodil/smartstring.svg?branch=master&status=passed)](https://travis-ci.org/github/bodil/smartstring) - Compact inlined strings. ## tl;dr @@ -18,42 +16,9 @@ beyond the inline capacity. This has the advantage of avoiding heap allocations well as improving performance thanks to keeping the strings on the stack. This is all accomplished without the need for an external discriminant, so a `SmartString` is -exactly the same size as a `String` on the stack, regardless of whether it's inlined or not, and -when not inlined it's pointer compatible with `String`, meaning that you can safely coerce a -`SmartString` to a `String` using `std::mem::replace()` or `pointer::cast()` and go on using it as -if it had never been a `SmartString`. (But please don't do that, there's an `Into` -implementation that's much safer.) - -## Supported architectures -`smartstring` currently doesn't run on 32-bit big endian architectures like `powerpc`, so its use -in any crates that intend to run on those architectures should ideally be gated behind a -[platform specific dependency](https://doc.rust-lang.org/cargo/reference/specifying-dependencies.html#platform-specific-dependencies) -in your `Cargo.toml`, like so: -```toml -[target.'cfg(not(all(target_endian = "big", target_pointer_width = "32")))'.dependencies] -smartstring = "0.2" -``` - -This will ensure that `cargo` does not try to build `smartstring` on these unsupported -architectures, which will otherwise [always fail](https://github.com/bodil/smartstring/blob/v0.2.9/src/config.rs#L91-L93). - -## Caveat - -The way `smartstring` gets by without a discriminant is dependent on the memory layout of the -`std::string::String` struct, which isn't something the Rust compiler and standard library make any -guarantees about. `smartstring` makes an assumption about how it's been laid out, which has held -basically since rustc came into existence, but is nonetheless not a safe assumption to make, and if -the layout ever changes, `smartstring` will stop working properly (at least on little-endian -architectures, the assumptions made on big-endian archs will hold regardless of the actual memory -layout). Its test suite does comprehensive validation of these assumptions, and as long as the -[CI build](https://travis-ci.org/github/bodil/smartstring) is passing for any given rustc version, -you can be sure it will do its job properly on all tested architectures. You can also check out the -`smartstring` source tree yourself and run `cargo test` to validate it for your particular -configuration. - -As an extra precaution, some runtime checks are made as well, so that if the memory layout -assumption no longer holds, `smartstring` will not work correctly, but there should be no security -implications and it should crash early. +exactly the same size as a `String` on the stack, regardless of whether it's inlined or not. +Converting a heap allocated `SmartString` into a `String` and vice versa is also a zero cost +operation, as one will reuse the allocated memory of the other. ## Documentation @@ -71,5 +36,4 @@ was not distributed with this file, You can obtain one at &String; - fn string_mut(&mut self) -> &mut String; - fn into_string(self) -> String; +use crate::{ops::GenericString, MAX_INLINE}; - fn cmp_with_str(&self, other: &str) -> Ordering; - fn cmp_with_self(&self, other: &Self) -> Ordering; - fn eq_with_str(&self, other: &str) -> bool; - fn eq_with_self(&self, other: &Self) -> bool; +#[cfg(not(endian = "big"))] +#[repr(C)] +pub(crate) struct BoxedString { + ptr: NonNull, + cap: usize, + len: usize, +} + +#[cfg(endian = "big")] +#[repr(C)] +pub(crate) struct BoxedString { + length: usize, + cap: usize, + ptr: NunNull, +} - fn len(&self) -> usize { - self.string().len() +impl GenericString for BoxedString { + fn set_size(&mut self, size: usize) { + self.len = size; + debug_assert!(self.len <= self.cap); + } + + fn as_mut_capacity_slice(&mut self) -> &mut [u8] { + #[allow(unsafe_code)] + unsafe { + core::slice::from_raw_parts_mut(self.ptr.as_ptr(), self.capacity()) + } } } -impl BoxedString for String { - #[inline(always)] - fn string(&self) -> &String { - self +impl BoxedString { + const MINIMAL_CAPACITY: usize = MAX_INLINE * 2; + + fn layout_for(cap: usize) -> Layout { + let layout = Layout::array::(cap).unwrap(); + assert!( + layout.size() <= isize::MAX as usize, + "allocation too large!" + ); + layout + } + + fn alloc(cap: usize) -> NonNull { + let layout = Self::layout_for(cap); + #[allow(unsafe_code)] + let ptr = unsafe { alloc::alloc::alloc(layout) }; + match NonNull::new(ptr) { + Some(ptr) => ptr, + None => alloc::alloc::handle_alloc_error(layout), + } + } + + fn realloc(&mut self, cap: usize) { + let layout = Self::layout_for(cap); + let old_layout = Self::layout_for(self.cap); + let old_ptr = self.ptr.as_ptr(); + #[allow(unsafe_code)] + let ptr = unsafe { alloc::alloc::realloc(old_ptr, old_layout, layout.size()) }; + self.ptr = match NonNull::new(ptr) { + Some(ptr) => ptr, + None => alloc::alloc::handle_alloc_error(layout), + }; + self.cap = cap; } - #[inline(always)] - fn string_mut(&mut self) -> &mut String { - self + pub(crate) fn ensure_capacity(&mut self, target_cap: usize) { + let mut cap = self.cap; + while cap < target_cap { + cap *= 2; + } + self.realloc(cap) } - fn into_string(self) -> String { - self + pub(crate) fn new(cap: usize) -> Self { + let cap = cap.max(Self::MINIMAL_CAPACITY); + Self { + cap, + len: 0, + ptr: Self::alloc(cap), + } } - #[inline(always)] - fn cmp_with_str(&self, other: &str) -> Ordering { - self.as_str().cmp(other) + pub(crate) fn from_str(cap: usize, src: &str) -> Self { + let mut out = Self::new(cap); + out.len = src.len(); + out.as_mut_capacity_slice()[..src.len()].copy_from_slice(src.as_bytes()); + out } - #[inline(always)] - fn cmp_with_self(&self, other: &Self) -> Ordering { - self.cmp(other) + pub(crate) fn capacity(&self) -> usize { + self.cap } - #[inline(always)] - fn eq_with_str(&self, other: &str) -> bool { - self == other + pub(crate) fn shrink_to_fit(&mut self) { + self.realloc(self.len); } +} + +impl Drop for BoxedString { + fn drop(&mut self) { + #[allow(unsafe_code)] + unsafe { + alloc::alloc::dealloc(self.ptr.as_ptr(), Self::layout_for(self.cap)) + } + } +} + +impl Clone for BoxedString { + fn clone(&self) -> Self { + Self::from_str(self.capacity(), self.deref()) + } +} + +impl Deref for BoxedString { + type Target = str; + + fn deref(&self) -> &Self::Target { + #[allow(unsafe_code)] + unsafe { + core::str::from_utf8_unchecked(core::slice::from_raw_parts(self.ptr.as_ptr(), self.len)) + } + } +} + +impl DerefMut for BoxedString { + fn deref_mut(&mut self) -> &mut Self::Target { + #[allow(unsafe_code)] + unsafe { + core::str::from_utf8_unchecked_mut(core::slice::from_raw_parts_mut( + self.ptr.as_ptr(), + self.len, + )) + } + } +} + +impl From for BoxedString { + fn from(mut s: String) -> Self { + if s.is_empty() { + Self::new(s.capacity()) + } else { + // TODO: Use String::into_raw_parts when stabilised, meanwhile let's get unsafe + let len = s.len(); + let cap = s.capacity(); + #[allow(unsafe_code)] + let ptr = unsafe { NonNull::new_unchecked(s.as_mut_ptr()) }; + forget(s); + Self { cap, len, ptr } + } + } +} - #[inline(always)] - fn eq_with_self(&self, other: &Self) -> bool { - self == other +impl Into for BoxedString { + fn into(self) -> String { + #[allow(unsafe_code)] + let s = unsafe { String::from_raw_parts(self.ptr.as_ptr(), self.len(), self.capacity()) }; + forget(self); + s } } diff --git a/src/casts.rs b/src/casts.rs index 53f5866..047f851 100644 --- a/src/casts.rs +++ b/src/casts.rs @@ -2,19 +2,19 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. -use crate::{inline::InlineString, SmartStringMode}; +use crate::{boxed::BoxedString, inline::InlineString}; -pub(crate) enum StringCast<'a, Mode: SmartStringMode> { - Boxed(&'a Mode::BoxedString), +pub(crate) enum StringCast<'a> { + Boxed(&'a BoxedString), Inline(&'a InlineString), } -pub(crate) enum StringCastMut<'a, Mode: SmartStringMode> { - Boxed(&'a mut Mode::BoxedString), +pub(crate) enum StringCastMut<'a> { + Boxed(&'a mut BoxedString), Inline(&'a mut InlineString), } -pub(crate) enum StringCastInto { - Boxed(Mode::BoxedString), +pub(crate) enum StringCastInto { + Boxed(BoxedString), Inline(InlineString), } diff --git a/src/config.rs b/src/config.rs index c5c2c59..5135444 100644 --- a/src/config.rs +++ b/src/config.rs @@ -5,7 +5,7 @@ use crate::{boxed::BoxedString, inline::InlineString, SmartString}; use alloc::string::String; use core::mem::{align_of, size_of}; -use static_assertions::{assert_cfg, assert_eq_size, const_assert, const_assert_eq}; +use static_assertions::{assert_eq_size, const_assert, const_assert_eq}; /// A compact string representation equal to [`String`] in size with guaranteed inlining. /// @@ -40,8 +40,6 @@ pub struct LazyCompact; /// /// See [`LazyCompact`] and [`Compact`]. pub trait SmartStringMode { - /// The boxed string type for this layout. - type BoxedString: BoxedString + From; /// The inline string type for this layout. type InlineArray: AsRef<[u8]> + AsMut<[u8]> + Clone + Copy; /// A constant to decide whether to turn a wrapped string back into an inlined @@ -51,13 +49,11 @@ pub trait SmartStringMode { } impl SmartStringMode for Compact { - type BoxedString = String; type InlineArray = [u8; size_of::() - 1]; const DEALLOC: bool = true; } impl SmartStringMode for LazyCompact { - type BoxedString = String; type InlineArray = [u8; size_of::() - 1]; const DEALLOC: bool = false; } @@ -70,14 +66,8 @@ pub const MAX_INLINE: usize = size_of::() - 1; const_assert!(MAX_INLINE < 128); // Assert that all the structs are of the expected size. -assert_eq_size!( - ::BoxedString, - SmartString -); -assert_eq_size!( - ::BoxedString, - SmartString -); +assert_eq_size!(BoxedString, SmartString); +assert_eq_size!(BoxedString, SmartString); assert_eq_size!(InlineString, SmartString); assert_eq_size!(InlineString, SmartString); @@ -87,7 +77,3 @@ assert_eq_size!(String, SmartString); // Assert that `SmartString` is aligned correctly. const_assert_eq!(align_of::(), align_of::>()); const_assert_eq!(align_of::(), align_of::>()); - -// This hack isn't going to work out very well on 32-bit big endian archs, -// so let's not compile on those. -assert_cfg!(not(all(target_endian = "big", target_pointer_width = "32"))); diff --git a/src/inline.rs b/src/inline.rs index 7bf0f57..be06258 100644 --- a/src/inline.rs +++ b/src/inline.rs @@ -2,18 +2,26 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. -use crate::{config::MAX_INLINE, marker_byte::Marker}; +use crate::{config::MAX_INLINE, marker_byte::Marker, ops::GenericString}; use core::{ - slice::{from_raw_parts, from_raw_parts_mut}, + ops::{Deref, DerefMut}, str::{from_utf8_unchecked, from_utf8_unchecked_mut}, }; +#[cfg(not(endian = "big"))] #[repr(C)] pub(crate) struct InlineString { pub(crate) marker: Marker, pub(crate) data: [u8; MAX_INLINE], } +#[cfg(endian = "big")] +#[repr(C)] +pub(crate) struct InlineString { + pub(crate) data: [u8; MAX_INLINE], + pub(crate) marker: Marker, +} + impl Clone for InlineString { fn clone(&self) -> Self { unreachable!("InlineString should be copy!") @@ -22,92 +30,59 @@ impl Clone for InlineString { impl Copy for InlineString {} -impl InlineString { - pub(crate) const fn new() -> Self { - Self { - marker: Marker::empty(), - data: [0; MAX_INLINE], - } - } +impl Deref for InlineString { + type Target = str; - pub(crate) fn set_size(&mut self, size: usize) { - self.marker.set_data(size as u8); + fn deref(&self) -> &Self::Target { + #[allow(unsafe_code)] + unsafe { + from_utf8_unchecked(&self.data[..self.len()]) + } } +} - pub(crate) fn len(&self) -> usize { - let len = self.marker.data() as usize; - // Panic immediately if inline length is too high, which suggests - // assumptions made about `String`'s memory layout are invalid. - assert!(len <= MAX_INLINE); - len +impl DerefMut for InlineString { + fn deref_mut(&mut self) -> &mut Self::Target { + let len = self.len(); + #[allow(unsafe_code)] + unsafe { + from_utf8_unchecked_mut(&mut self.data[..len]) + } } +} - pub(crate) fn as_slice(&self) -> &[u8] { - self.data.as_ref() +impl GenericString for InlineString { + fn set_size(&mut self, size: usize) { + self.marker.set_data(size as u8); } - pub(crate) fn as_mut_slice(&mut self) -> &mut [u8] { + fn as_mut_capacity_slice(&mut self) -> &mut [u8] { self.data.as_mut() } +} - pub(crate) fn as_str(&self) -> &str { - unsafe { - let data = from_raw_parts(self.data.as_ref().as_ptr(), self.len()); - from_utf8_unchecked(data) - } - } - - pub(crate) fn as_mut_str(&mut self) -> &mut str { - unsafe { - let data = from_raw_parts_mut(self.data.as_mut().as_mut_ptr(), self.len()); - from_utf8_unchecked_mut(data) - } - } - - pub(crate) fn insert_bytes(&mut self, index: usize, bytes: &[u8]) { - assert!(self.as_str().is_char_boundary(index)); - if bytes.is_empty() { - return; - } - let len = self.len(); - unsafe { - let ptr = self.data.as_mut().as_mut_ptr(); - if index != len { - ptr.add(index + bytes.len()) - .copy_from(&self.data.as_ref()[index], len - index); - } - ptr.add(index) - .copy_from_nonoverlapping(bytes.as_ptr(), bytes.len()); +impl InlineString { + pub(crate) const fn new() -> Self { + Self { + marker: Marker::empty(), + data: [0; MAX_INLINE], } - self.set_size(len + bytes.len()); } - pub(crate) fn remove_bytes(&mut self, start: usize, end: usize) { - let len = self.len(); - assert!(start <= end); - assert!(end <= len); - assert!(self.as_str().is_char_boundary(start)); - assert!(self.as_str().is_char_boundary(end)); - if start == end { - return; - } - if end < len { - unsafe { - let ptr = self.data.as_mut().as_mut_ptr(); - ptr.add(start).copy_from(ptr.add(end), len - end); - } - } - self.set_size(len - (end - start)); + pub(crate) fn len(&self) -> usize { + let len = self.marker.data() as usize; + debug_assert!(len <= MAX_INLINE); + len } } -impl From<&'_ [u8]> for InlineString { - fn from(bytes: &[u8]) -> Self { - let len = bytes.len(); +impl From<&str> for InlineString { + fn from(string: &str) -> Self { + let len = string.len(); debug_assert!(len <= MAX_INLINE); let mut out = Self::new(); out.marker = Marker::new_inline(len as u8); - out.data.as_mut()[..len].copy_from_slice(bytes); + out.data.as_mut()[..len].copy_from_slice(string.as_bytes()); out } } diff --git a/src/iter.rs b/src/iter.rs index ceab41a..c86ea94 100644 --- a/src/iter.rs +++ b/src/iter.rs @@ -1,5 +1,8 @@ -use crate::{bounds_for, boxed::BoxedString, inline::InlineString, SmartString, SmartStringMode}; -use alloc::string::Drain as StringDrain; +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +use crate::{ops::bounds_for, SmartString, SmartStringMode}; use core::{ fmt::{Debug, Error, Formatter}, iter::FusedIterator, @@ -8,140 +11,70 @@ use core::{ }; /// A draining iterator for a [`SmartString`]. -pub struct Drain<'a, Mode>(DrainCast<'a, Mode>) -where - Mode: SmartStringMode; - -enum DrainCast<'a, Mode> -where - Mode: SmartStringMode, -{ - Boxed { - string: *mut SmartString, - iter: Option>, - }, - Inline { - string: *mut InlineString, - start: usize, - end: usize, - iter: Chars<'a>, - }, +pub struct Drain<'a, Mode: SmartStringMode> { + string: *mut SmartString, + start: usize, + end: usize, + iter: Chars<'a>, } -impl<'a, Mode> Drain<'a, Mode> -where - Mode: SmartStringMode, -{ +impl<'a, Mode: SmartStringMode> Drain<'a, Mode> { pub(crate) fn new(string: &'a mut SmartString, range: R) -> Self where R: RangeBounds, { let string_ptr: *mut _ = string; - Drain(match string.cast_mut() { - crate::casts::StringCastMut::Boxed(boxed) => DrainCast::Boxed { - string: string_ptr, - iter: Some(boxed.string_mut().drain(range)), - }, - crate::casts::StringCastMut::Inline(inline) => { - let len = inline.len(); - let (start, end) = bounds_for(&range, len); - let string_ptr: *mut _ = inline; - let iter = inline.as_str()[start..end].chars(); - DrainCast::Inline { - string: string_ptr, - start, - end, - iter, - } - } - }) + let len = string.len(); + let (start, end) = bounds_for(&range, len); + assert!(start <= end); + assert!(end <= len); + assert!(string.as_str().is_char_boundary(start)); + assert!(string.as_str().is_char_boundary(end)); + + let iter = string.as_str()[start..end].chars(); + Drain { + string: string_ptr, + start, + end, + iter, + } } } -impl<'a, Mode> Drop for Drain<'a, Mode> -where - Mode: SmartStringMode, -{ +impl<'a, Mode: SmartStringMode> Drop for Drain<'a, Mode> { fn drop(&mut self) { - match self.0 { - DrainCast::Boxed { - string, - ref mut iter, - } => unsafe { - iter.take(); - (*string).try_demote(); - }, - DrainCast::Inline { - string, start, end, .. - } => { - unsafe { (*string).remove_bytes(start, end) }; - } - } + #[allow(unsafe_code)] + let string = unsafe { &mut *self.string }; + debug_assert!(string.as_str().is_char_boundary(self.start)); + debug_assert!(string.as_str().is_char_boundary(self.end)); + string.replace_range(self.start..self.end, ""); } } -impl<'a, Mode> Iterator for Drain<'a, Mode> -where - Mode: SmartStringMode, -{ +impl<'a, Mode: SmartStringMode> Iterator for Drain<'a, Mode> { type Item = char; #[inline] fn next(&mut self) -> Option { - match &mut self.0 { - DrainCast::Boxed { - iter: Some(iter), .. - } => iter.next(), - DrainCast::Boxed { iter: None, .. } => unreachable!(), - DrainCast::Inline { iter, .. } => iter.next(), - } + self.iter.next() } #[inline] fn size_hint(&self) -> (usize, Option) { - match &self.0 { - DrainCast::Boxed { - iter: Some(iter), .. - } => iter.size_hint(), - DrainCast::Boxed { iter: None, .. } => unreachable!(), - DrainCast::Inline { iter, .. } => iter.size_hint(), - } - } - - #[inline] - fn last(mut self) -> Option { - match &mut self.0 { - DrainCast::Boxed { - iter: Some(iter), .. - } => iter.next_back(), - DrainCast::Boxed { iter: None, .. } => unreachable!(), - DrainCast::Inline { iter, .. } => iter.next_back(), - } + self.iter.size_hint() } } -impl<'a, Mode> DoubleEndedIterator for Drain<'a, Mode> -where - Mode: SmartStringMode, -{ +impl<'a, Mode: SmartStringMode> DoubleEndedIterator for Drain<'a, Mode> { #[inline] fn next_back(&mut self) -> Option { - match &mut self.0 { - DrainCast::Boxed { - iter: Some(iter), .. - } => iter.next_back(), - DrainCast::Boxed { iter: None, .. } => unreachable!(), - DrainCast::Inline { iter, .. } => iter.next_back(), - } + self.iter.next_back() } } -impl<'a, Mode> FusedIterator for Drain<'a, Mode> where Mode: SmartStringMode {} +impl<'a, Mode: SmartStringMode> FusedIterator for Drain<'a, Mode> {} -impl<'a, Mode> Debug for Drain<'a, Mode> -where - Mode: SmartStringMode, -{ +impl<'a, Mode: SmartStringMode> Debug for Drain<'a, Mode> { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { f.pad("Drain { ... }") } diff --git a/src/lib.rs b/src/lib.rs index 4d26e90..fb7b52a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,18 +25,6 @@ //! //! ## How To Use It? //! -//! Before using [`SmartString`], you should always call -//! [`smartstring::validate()`][validate], to make sure [`SmartString`] is safe to use. -//! Ideally, you should call it at the start of your `main()` function, and -//! from your test suite. [`SmartString`] will also attempt to crash early if -//! inconsistencies are detected, but this isn't foolproof. -//! -//! ```rust -//! fn main() { -//! smartstring::validate(); -//! } -//! ``` -//! //! [`SmartString`] has the exact same API as [`String`], //! all the clever bits happen automatically behind the scenes, so you could just: //! @@ -63,15 +51,6 @@ //! contents. Likewise, if the string's length should drop below its inline //! capacity again, it deallocates the string and moves its contents inline. //! -//! Given that we use the knowledge that a certain bit in the memory layout -//! of [`String`] will always be unset as a discriminant, you would be -//! able to call [`std::mem::transmute::()`][std::mem::transmute] on a boxed -//! smart string and start using it as a normal [`String`] immediately - -//! there's no pointer tagging or similar trickery going on here. -//! (But please don't do that, there's an efficient [`Into`][IntoString] -//! implementation that does the exact same thing with no need to go unsafe -//! in your own code.) -//! //! In [`Compact`] mode, it is aggressive about inlining strings, meaning that if you modify a heap allocated //! string such that it becomes short enough for inlining, it will be inlined immediately //! and the allocated [`String`] will be dropped. This may cause multiple @@ -95,24 +74,6 @@ //! memory efficient in these cases. There will always be a slight overhead on all //! operations on boxed strings, compared to [`String`]. //! -//! ## Caveat -//! -//! The way `smartstring` gets by without a discriminant is dependent on the memory layout of the -//! [`std::string::String`] struct, which isn't something the Rust compiler and standard library make any -//! guarantees about. `smartstring` makes an assumption about how it's been laid out, which has held -//! basically since rustc came into existence, but is nonetheless not a safe assumption to make, and if -//! the layout ever changes, `smartstring` will stop working properly (at least on little-endian -//! architectures, the assumptions made on big-endian archs will hold regardless of the actual memory -//! layout). Its test suite does comprehensive validation of these assumptions, and as long as the -//! [CI build](https://travis-ci.org/github/bodil/smartstring) is passing for any given rustc version, -//! you can be sure it will do its job properly on all tested architectures. More directly, as mentioned -//! above, you should always call [`smartstring::validate()`][validate] before using [`SmartString`] to -//! remove any doubt. -//! -//! As an extra precaution, some runtime checks are made as well, so that if the memory layout -//! assumption no longer holds, `smartstring` will not work correctly, but there should be no security -//! implications and it should crash early. -//! //! ## Feature Flags //! //! `smartstring` comes with optional support for the following crates through Cargo @@ -135,6 +96,8 @@ //! [Deserialize]: https://docs.rs/serde/latest/serde/trait.Deserialize.html //! [Arbitrary]: https://docs.rs/arbitrary/latest/arbitrary/trait.Arbitrary.html +// Ensure all unsafe blocks get flagged for manual validation. +#![deny(unsafe_code)] #![forbid(rust_2018_idioms)] #![deny(nonstandard_style)] #![warn(unreachable_pub, missing_debug_implementations, missing_docs)] @@ -156,7 +119,7 @@ use core::{ marker::PhantomData, mem::{forget, MaybeUninit}, ops::{ - Add, Bound, Deref, DerefMut, Index, IndexMut, Range, RangeBounds, RangeFrom, RangeFull, + Add, Deref, DerefMut, Index, IndexMut, Range, RangeBounds, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive, }, ptr::drop_in_place, @@ -170,7 +133,7 @@ mod config; pub use config::{Compact, LazyCompact, SmartStringMode, MAX_INLINE}; mod marker_byte; -use marker_byte::{Discriminant, Marker}; +use marker_byte::Discriminant; mod inline; use inline::InlineString; @@ -184,6 +147,9 @@ use casts::{StringCast, StringCastInto, StringCastMut}; mod iter; pub use iter::Drain; +mod ops; +use ops::{string_op_grow, string_op_shrink}; + #[cfg(feature = "serde")] mod serde; @@ -193,50 +159,6 @@ mod arbitrary; #[cfg(feature = "proptest")] pub mod proptest; -/// Validate the crate's assumptions about [`String`] memory layout. -/// -/// Because [`SmartString`] makes some assumptions about how [`String`] is -/// laid out in memory that rustc does not actually guarantee, you should -/// always run this function in your target environment before using -/// [`SmartString`], ideally at the start of your application's `main()` -/// function, so your application will crash as early as possible without -/// any security risks. You should also run it as part of your application's -/// test suite, to catch problems before runtime. -/// -/// If the assumptions don't hold, this function will panic. -/// -/// # Example -/// -/// ``` -/// fn main() { -/// smartstring::validate(); -/// } -/// ``` -pub fn validate() { - let mut s = String::with_capacity(5); - s.push_str("lol"); - assert_eq!(3, s.len(), "SmartString memory layout check failed"); - assert_eq!(5, s.capacity(), "SmartString memory layout check failed"); - let ptr: *const String = &s; - let ptr: *const usize = ptr.cast(); - let first_bytes = unsafe { *ptr }; - assert_ne!(3, first_bytes, "SmartString memory layout check failed"); - assert_ne!(5, first_bytes, "SmartString memory layout check failed"); - let first_byte = unsafe { *(ptr as *const u8) }; - #[cfg(target_endian = "little")] - assert_eq!( - 0, - first_byte & 0x01, - "SmartString memory layout check failed" - ); - #[cfg(target_endian = "big")] - assert_eq!( - 0, - first_byte & 0x80, - "SmartString memory layout check failed" - ); -} - /// Convenient type aliases. pub mod alias { use super::*; @@ -277,7 +199,10 @@ pub struct SmartString { impl Drop for SmartString { fn drop(&mut self) { if let StringCastMut::Boxed(string) = self.cast_mut() { - unsafe { drop_in_place(string) }; + #[allow(unsafe_code)] + unsafe { + drop_in_place(string) + }; } } } @@ -286,10 +211,10 @@ impl Clone for SmartString { /// Clone a [`SmartString`]. /// /// If the string is inlined, this is a [`Copy`] operation. Otherwise, - /// [`String::clone()`][String::clone] is invoked. + /// a string with the same capacity as the source is allocated. fn clone(&self) -> Self { match self.cast() { - StringCast::Boxed(string) => Self::from_boxed(string.string().clone().into()), + StringCast::Boxed(string) => Self::from_boxed(string.clone()), StringCast::Inline(string) => Self::from_inline(*string), } } @@ -301,8 +226,8 @@ impl Deref for SmartString { #[inline(always)] fn deref(&self) -> &Self::Target { match self.cast() { - StringCast::Boxed(string) => string.string().as_str(), - StringCast::Inline(string) => string.as_str(), + StringCast::Boxed(string) => string.deref(), + StringCast::Inline(string) => string.deref(), } } } @@ -311,8 +236,8 @@ impl DerefMut for SmartString { #[inline(always)] fn deref_mut(&mut self) -> &mut Self::Target { match self.cast_mut() { - StringCastMut::Boxed(string) => string.string_mut().as_mut_str(), - StringCastMut::Inline(string) => string.as_mut_str(), + StringCastMut::Boxed(string) => string.deref_mut(), + StringCastMut::Inline(string) => string.deref_mut(), } } } @@ -354,13 +279,16 @@ impl SmartString { Self::from_inline(InlineString::new()) } - fn from_boxed(boxed: Mode::BoxedString) -> Self { + fn from_boxed(boxed: BoxedString) -> Self { let mut out = Self { data: MaybeUninit::uninit(), mode: PhantomData, }; - let data_ptr: *mut Mode::BoxedString = out.data.as_mut_ptr().cast(); - unsafe { data_ptr.write(boxed) }; + let data_ptr: *mut BoxedString = out.data.as_mut_ptr().cast(); + #[allow(unsafe_code)] + unsafe { + data_ptr.write(boxed) + }; out } @@ -372,18 +300,20 @@ impl SmartString { } fn discriminant(&self) -> Discriminant { - let ptr: *const Marker = self.data.as_ptr().cast(); - unsafe { *ptr }.discriminant() + #[allow(unsafe_code)] + unsafe { self.data.assume_init() }.marker.discriminant() } - fn cast(&self) -> StringCast<'_, Mode> { + fn cast(&self) -> StringCast<'_> { + #[allow(unsafe_code)] match self.discriminant() { Discriminant::Inline => StringCast::Inline(unsafe { &*self.data.as_ptr() }), Discriminant::Boxed => StringCast::Boxed(unsafe { &*self.data.as_ptr().cast() }), } } - fn cast_mut(&mut self) -> StringCastMut<'_, Mode> { + fn cast_mut(&mut self) -> StringCastMut<'_> { + #[allow(unsafe_code)] match self.discriminant() { Discriminant::Inline => StringCastMut::Inline(unsafe { &mut *self.data.as_mut_ptr() }), Discriminant::Boxed => { @@ -392,11 +322,12 @@ impl SmartString { } } - fn cast_into(mut self) -> StringCastInto { + fn cast_into(mut self) -> StringCastInto { + #[allow(unsafe_code)] match self.discriminant() { Discriminant::Inline => StringCastInto::Inline(unsafe { self.data.assume_init() }), Discriminant::Boxed => StringCastInto::Boxed(unsafe { - let boxed_ptr: *mut Mode::BoxedString = self.data.as_mut_ptr().cast(); + let boxed_ptr: *mut BoxedString = self.data.as_mut_ptr().cast(); let string = boxed_ptr.read(); forget(self); string @@ -404,11 +335,13 @@ impl SmartString { } } - fn promote_from(&mut self, string: String) { + fn promote_from(&mut self, string: BoxedString) { debug_assert!(self.discriminant() == Discriminant::Inline); - let string: Mode::BoxedString = string.into(); - let data: *mut Mode::BoxedString = self.data.as_mut_ptr().cast(); - unsafe { data.write(string) }; + let data: *mut BoxedString = self.data.as_mut_ptr().cast(); + #[allow(unsafe_code)] + unsafe { + data.write(string) + }; } /// Attempt to inline the string if it's currently heap allocated. @@ -428,11 +361,12 @@ impl SmartString { if string.len() > MAX_INLINE { false } else { - let inlined = string.string().as_bytes().into(); + let s: &str = string.deref(); + let inlined = s.into(); + #[allow(unsafe_code)] unsafe { drop_in_place(string); - let data = &mut self.data.as_mut_ptr(); - data.write(inlined); + self.data.as_mut_ptr().write(inlined); } true } @@ -471,46 +405,6 @@ impl SmartString { self.deref_mut() } - /// Push a character to the end of the string. - pub fn push(&mut self, ch: char) { - match self.cast_mut() { - StringCastMut::Boxed(string) => string.string_mut().push(ch), - StringCastMut::Inline(string) => { - let len = string.len(); - let new_len = len + ch.len_utf8(); - if new_len > MAX_INLINE { - let mut new_str = String::with_capacity(new_len); - new_str.push_str(string.as_str()); - new_str.push(ch); - self.promote_from(new_str); - } else { - let written = ch.encode_utf8(&mut string.as_mut_slice()[len..]).len(); - string.set_size(len + written); - } - } - } - } - - /// Copy a string slice onto the end of the string. - pub fn push_str(&mut self, string: &str) { - let len = self.len(); - match self.cast_mut() { - StringCastMut::Boxed(this) => this.string_mut().push_str(string), - StringCastMut::Inline(this) => { - let new_len = len + string.len(); - if new_len > MAX_INLINE { - let mut new_str = String::with_capacity(new_len); - new_str.push_str(this.as_str()); - new_str.push_str(string); - self.promote_from(new_str); - } else { - this.as_mut_slice()[len..len + string.len()].copy_from_slice(string.as_bytes()); - this.set_size(len + string.len()); - } - } - } - } - /// Return the currently allocated capacity of the string. /// /// Note that if this is a boxed string, it returns [`String::capacity()`][String::capacity], @@ -521,12 +415,22 @@ impl SmartString { /// it will be reallocated with a default capacity. pub fn capacity(&self) -> usize { if let StringCast::Boxed(string) = self.cast() { - string.string().capacity() + string.capacity() } else { MAX_INLINE } } + /// Push a character to the end of the string. + pub fn push(&mut self, ch: char) { + string_op_grow!(ops::Push, self, ch) + } + + /// Copy a string slice onto the end of the string. + pub fn push_str(&mut self, string: &str) { + string_op_grow!(ops::PushStr, self, string) + } + /// Shrink the capacity of the string to fit its contents exactly. /// /// This has no effect on inline strings, which always have a fixed capacity. @@ -539,7 +443,7 @@ impl SmartString { pub fn shrink_to_fit(&mut self) { if let StringCastMut::Boxed(string) = self.cast_mut() { if string.len() > MAX_INLINE { - string.string_mut().shrink_to_fit(); + string.shrink_to_fit(); } } self.really_try_demote(); @@ -550,99 +454,33 @@ impl SmartString { /// If `new_len` is larger than the string's current length, this does nothing. /// If `new_len` isn't on a UTF-8 character boundary, this method panics. pub fn truncate(&mut self, new_len: usize) { - match self.cast_mut() { - StringCastMut::Boxed(string) => string.string_mut().truncate(new_len), - StringCastMut::Inline(string) => { - if new_len < string.len() { - assert!(string.as_str().is_char_boundary(new_len)); - string.set_size(new_len); - } - return; - } - } - self.try_demote(); + string_op_shrink!(ops::Truncate, self, new_len) } /// Pop a `char` off the end of the string. pub fn pop(&mut self) -> Option { - let result = match self.cast_mut() { - StringCastMut::Boxed(string) => string.string_mut().pop()?, - StringCastMut::Inline(string) => { - let ch = string.as_str().chars().rev().next()?; - string.set_size(string.len() - ch.len_utf8()); - return Some(ch); - } - }; - self.try_demote(); - Some(result) + string_op_shrink!(ops::Pop, self) } /// Remove a `char` from the string at the given index. /// /// If the index doesn't fall on a UTF-8 character boundary, this method panics. pub fn remove(&mut self, index: usize) -> char { - let result = match self.cast_mut() { - StringCastMut::Boxed(string) => string.string_mut().remove(index), - StringCastMut::Inline(string) => { - let ch = match string.as_str()[index..].chars().next() { - Some(ch) => ch, - None => panic!("cannot remove a char from the end of a string"), - }; - let next = index + ch.len_utf8(); - let len = string.len(); - let tail_len = len - next; - if tail_len > 0 { - unsafe { - (&mut string.as_mut_slice()[index] as *mut u8) - .copy_from(&string.as_slice()[next], tail_len); - } - } - string.set_size(len - (next - index)); - return ch; - } - }; - self.try_demote(); - result + string_op_shrink!(ops::Remove, self, index) } /// Insert a `char` into the string at the given index. /// /// If the index doesn't fall on a UTF-8 character boundary, this method panics. pub fn insert(&mut self, index: usize, ch: char) { - match self.cast_mut() { - StringCastMut::Boxed(string) => { - string.string_mut().insert(index, ch); - } - StringCastMut::Inline(string) if string.len() + ch.len_utf8() <= MAX_INLINE => { - let mut buffer = [0; 4]; - let buffer = ch.encode_utf8(&mut buffer).as_bytes(); - string.insert_bytes(index, buffer); - } - _ => { - let mut string = self.to_string(); - string.insert(index, ch); - self.promote_from(string); - } - } + string_op_grow!(ops::Insert, self, index, ch) } /// Insert a string slice into the string at the given index. /// /// If the index doesn't fall on a UTF-8 character boundary, this method panics. pub fn insert_str(&mut self, index: usize, string: &str) { - match self.cast_mut() { - StringCastMut::Boxed(this) => { - this.string_mut().insert_str(index, string); - } - StringCastMut::Inline(this) if this.len() + string.len() <= MAX_INLINE => { - this.insert_bytes(index, string.as_bytes()); - } - _ => { - let mut this = self.to_string(); - this.insert_str(index, string); - self.promote_from(this); - } - } + string_op_grow!(ops::InsertStr, self, index, string) } /// Split the string into two at the given index. @@ -652,18 +490,7 @@ impl SmartString { /// /// If the index doesn't fall on a UTF-8 character boundary, this method panics. pub fn split_off(&mut self, index: usize) -> Self { - let result = match self.cast_mut() { - StringCastMut::Boxed(string) => string.string_mut().split_off(index), - StringCastMut::Inline(string) => { - let s = string.as_str(); - assert!(s.is_char_boundary(index)); - let result = s[index..].into(); - string.set_size(index); - return result; - } - }; - self.try_demote(); - result.into() + string_op_shrink!(ops::SplitOff, self, index) } /// Clear the string. @@ -674,45 +501,11 @@ impl SmartString { } /// Filter out `char`s not matching a predicate. - pub fn retain(&mut self, mut f: F) + pub fn retain(&mut self, f: F) where F: FnMut(char) -> bool, { - match self.cast_mut() { - StringCastMut::Boxed(string) => { - string.string_mut().retain(f); - } - StringCastMut::Inline(string) => { - let len = string.len(); - let mut del_bytes = 0; - let mut index = 0; - - while index < len { - let ch = unsafe { - string - .as_mut_str() - .get_unchecked(index..len) - .chars() - .next() - .unwrap() - }; - let ch_len = ch.len_utf8(); - - if !f(ch) { - del_bytes += ch_len; - } else if del_bytes > 0 { - let ptr = string.as_mut_slice().as_mut_ptr(); - unsafe { ptr.add(index - del_bytes).copy_from(ptr.add(index), ch_len) }; - } - index += ch_len; - } - if del_bytes > 0 { - string.set_size(len - del_bytes); - } - return; - } - } - self.try_demote(); + string_op_shrink!(ops::Retain, self, f) } /// Construct a draining iterator over a given range. @@ -731,58 +524,11 @@ impl SmartString { where R: RangeBounds, { - match self.cast_mut() { - StringCastMut::Boxed(string) => { - string.string_mut().replace_range(range, replace_with); - } - StringCastMut::Inline(string) => { - let len = string.len(); - let (start, end) = bounds_for(&range, len); - assert!(end >= start); - assert!(end <= len); - assert!(string.as_str().is_char_boundary(start)); - assert!(string.as_str().is_char_boundary(end)); - let replaced_len = end - start; - let replace_len = replace_with.len(); - if (len - replaced_len) + replace_len > MAX_INLINE { - let mut string = string.as_str().to_string(); - string.replace_range(range, replace_with); - self.promote_from(string); - } else { - let new_end = start + replace_len; - let end_size = len - end; - let ptr = string.as_mut_slice().as_mut_ptr(); - unsafe { - ptr.add(end).copy_to(ptr.add(new_end), end_size); - ptr.add(start) - .copy_from(replace_with.as_bytes().as_ptr(), replace_len); - } - string.set_size(start + replace_len + end_size); - } - return; - } - } + string_op_grow!(ops::ReplaceRange, self, &range, replace_with); self.try_demote(); } } -fn bounds_for(range: &R, max_len: usize) -> (usize, usize) -where - R: RangeBounds, -{ - let start = match range.start_bound() { - Bound::Included(&n) => n, - Bound::Excluded(&n) => n.checked_add(1).unwrap(), - Bound::Unbounded => 0, - }; - let end = match range.end_bound() { - Bound::Included(&n) => n.checked_add(1).unwrap(), - Bound::Excluded(&n) => n, - Bound::Unbounded => max_len, - }; - (start, end) -} - impl Default for SmartString { fn default() -> Self { Self::new() @@ -902,7 +648,7 @@ impl From<&'_ str> for SmartString { if string.len() > MAX_INLINE { Self::from_boxed(string.to_string().into()) } else { - Self::from_inline(string.as_bytes().into()) + Self::from_inline(string.into()) } } } @@ -912,7 +658,7 @@ impl From<&'_ mut str> for SmartString { if string.len() > MAX_INLINE { Self::from_boxed(string.to_string().into()) } else { - Self::from_inline(string.as_bytes().into()) + Self::from_inline(string.deref().into()) } } } @@ -922,7 +668,7 @@ impl From<&'_ String> for SmartString { if string.len() > MAX_INLINE { Self::from_boxed(string.clone().into()) } else { - Self::from_inline(string.as_bytes().into()) + Self::from_inline(string.deref().into()) } } } @@ -932,7 +678,7 @@ impl From for SmartString { if string.len() > MAX_INLINE { Self::from_boxed(string.into()) } else { - Self::from_inline(string.as_bytes().into()) + Self::from_inline(string.deref().into()) } } } @@ -1125,8 +871,8 @@ impl Into for SmartString { /// [String]: https://doc.rust-lang.org/std/string/struct.String.html fn into(self) -> String { match self.cast_into() { - StringCastInto::Boxed(string) => string.into_string(), - StringCastInto::Inline(string) => string.as_str().to_string(), + StringCastInto::Boxed(string) => string.into(), + StringCastInto::Inline(string) => string.to_string(), } } } diff --git a/src/marker_byte.rs b/src/marker_byte.rs index 38ac612..b958a4e 100644 --- a/src/marker_byte.rs +++ b/src/marker_byte.rs @@ -2,12 +2,6 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. -// We need to know the endianness of the platform to know how to deal with pointers. -#[cfg(target_endian = "big")] -const UPSIDE_DOWN_LAND: bool = false; -#[cfg(target_endian = "little")] -const UPSIDE_DOWN_LAND: bool = true; - #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub(crate) enum Discriminant { Boxed, @@ -39,12 +33,7 @@ pub(crate) struct Marker(u8); impl Marker { #[inline(always)] const fn assemble(discriminant: Discriminant, data: u8) -> u8 { - let data = data; - if UPSIDE_DOWN_LAND { - data << 1 | discriminant.bit() - } else { - discriminant.bit() << 7 | data - } + data << 1 | discriminant.bit() } #[inline(always)] @@ -53,31 +42,19 @@ impl Marker { } #[inline(always)] - pub(crate) const unsafe fn new_inline_unsafe(data: u8) -> Self { - Self(Self::assemble(Discriminant::Inline, data)) - } - - pub(crate) fn new_inline(data: u8) -> Self { + pub(crate) const fn new_inline(data: u8) -> Self { debug_assert!(data < 0x80); - unsafe { Self::new_inline_unsafe(data) } + Self(Self::assemble(Discriminant::Inline, data)) } #[inline(always)] pub(crate) const fn discriminant(self) -> Discriminant { - Discriminant::from_bit(if UPSIDE_DOWN_LAND { - self.0 & 0x01 != 0 - } else { - self.0 & 0x80 != 0 - }) + Discriminant::from_bit(self.0 & 0x01 != 0) } #[inline(always)] pub(crate) const fn data(self) -> u8 { - if UPSIDE_DOWN_LAND { - self.0 >> 1 - } else { - self.0 & 0x7f - } + self.0 >> 1 } #[inline(always)] diff --git a/src/ops.rs b/src/ops.rs new file mode 100644 index 0000000..f662264 --- /dev/null +++ b/src/ops.rs @@ -0,0 +1,271 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Generic string ops. +//! +//! `string_op_grow` is for ops which may grow but not shrink the target +//! string, and should have a `cap` method which will return the new +//! minimum required capacity. +//! +//! `string_op_shrink` is for ops which may shrinl but not grow the target +//! string. They don't need a `cap` method, and will try to demote the +//! string as appropriate after calling `op`. + +use core::{ + marker::PhantomData, + ops::{Bound, Deref, DerefMut, RangeBounds}, +}; + +pub(crate) trait GenericString: Deref + DerefMut { + fn set_size(&mut self, size: usize); + fn as_mut_capacity_slice(&mut self) -> &mut [u8]; +} + +macro_rules! string_op_grow { + ($action:ty, $target:ident, $($arg:expr),*) => { + match $target.cast_mut() { + StringCastMut::Boxed(this) => { + this.ensure_capacity(<$action>::cap(this, $($arg),*)); + <$action>::op(this, $($arg),*) + } + StringCastMut::Inline(this) => { + let new_size = <$action>::cap(this,$($arg),*); + if new_size > MAX_INLINE { + let mut new_str = BoxedString::from_str(new_size, this); + let result = <$action>::op(&mut new_str, $($arg),*); + $target.promote_from(new_str); + result + } else { + <$action>::op(this, $($arg),*) + } + } + } + }; +} +pub(crate) use string_op_grow; + +macro_rules! string_op_shrink { + ($action:ty, $target:ident, $($arg:expr),*) => {{ + let result = match $target.cast_mut() { + StringCastMut::Boxed(this) => { + <$action>::op(this, $($arg),*) + } + StringCastMut::Inline(this) => { + <$action>::op(this, $($arg),*) + } + }; + $target.try_demote(); + result + }}; + + ($action:ty, $target:ident) => { + string_op_shrink!($action, $target,) + } +} +pub(crate) use string_op_shrink; + +use crate::{SmartString, SmartStringMode}; + +pub(crate) fn bounds_for(range: &R, max_len: usize) -> (usize, usize) +where + R: RangeBounds, +{ + let start = match range.start_bound() { + Bound::Included(&n) => n, + Bound::Excluded(&n) => n.checked_add(1).unwrap(), + Bound::Unbounded => 0, + }; + let end = match range.end_bound() { + Bound::Included(&n) => n.checked_add(1).unwrap(), + Bound::Excluded(&n) => n, + Bound::Unbounded => max_len, + }; + (start, end) +} + +fn insert_bytes(this: &mut S, index: usize, src: &[u8]) { + let len = this.len(); + let src_len = src.len(); + let tail_index = index + src_len; + if src_len > 0 { + let buf = this.as_mut_capacity_slice(); + buf.copy_within(index..len, tail_index); + buf[index..tail_index].copy_from_slice(src); + this.set_size(len + src_len); + } +} + +pub(crate) struct PushStr; +impl PushStr { + pub(crate) fn cap(this: &S, string: &str) -> usize { + this.len() + string.len() + } + + pub(crate) fn op(this: &mut S, string: &str) { + let len = this.len(); + let new_len = len + string.len(); + this.as_mut_capacity_slice()[len..new_len].copy_from_slice(string.as_bytes()); + this.set_size(new_len); + } +} + +pub(crate) struct Push; +impl Push { + pub(crate) fn cap(this: &S, ch: char) -> usize { + this.len() + ch.len_utf8() + } + + pub(crate) fn op(this: &mut S, ch: char) { + let len = this.len(); + let written = ch + .encode_utf8(&mut this.as_mut_capacity_slice()[len..]) + .len(); + this.set_size(len + written); + } +} + +pub(crate) struct Truncate; +impl Truncate { + pub(crate) fn op(this: &mut S, new_len: usize) { + if new_len < this.len() { + assert!(this.deref().is_char_boundary(new_len)); + this.set_size(new_len) + } + } +} + +pub(crate) struct Pop; +impl Pop { + pub(crate) fn op(this: &mut S) -> Option { + let ch = this.deref().chars().rev().next()?; + this.set_size(this.len() - ch.len_utf8()); + Some(ch) + } +} + +pub(crate) struct Remove; +impl Remove { + pub(crate) fn op(this: &mut S, index: usize) -> char { + let ch = match this.deref()[index..].chars().next() { + Some(ch) => ch, + None => panic!("cannot remove a char from the end of a string"), + }; + let next = index + ch.len_utf8(); + let len = this.len(); + let tail_len = len - next; + if tail_len > 0 { + this.as_mut_capacity_slice().copy_within(next..len, index); + } + this.set_size(len - (next - index)); + ch + } +} + +pub(crate) struct Insert; +impl Insert { + pub(crate) fn cap(this: &S, index: usize, ch: char) -> usize { + assert!(this.deref().is_char_boundary(index)); + this.len() + ch.len_utf8() + } + + pub(crate) fn op(this: &mut S, index: usize, ch: char) { + let mut buffer = [0; 4]; + let buffer = ch.encode_utf8(&mut buffer).as_bytes(); + insert_bytes(this, index, buffer); + } +} + +pub(crate) struct InsertStr; +impl InsertStr { + pub(crate) fn cap(this: &S, index: usize, string: &str) -> usize { + assert!(this.deref().is_char_boundary(index)); + this.len() + string.len() + } + + pub(crate) fn op(this: &mut S, index: usize, string: &str) { + insert_bytes(this, index, string.as_bytes()); + } +} + +pub(crate) struct SplitOff(PhantomData); +impl SplitOff { + pub(crate) fn op(this: &mut S, index: usize) -> SmartString { + assert!(this.deref().is_char_boundary(index)); + let result = this.deref()[index..].into(); + this.set_size(index); + result + } +} + +pub(crate) struct Retain; +impl Retain { + pub(crate) fn op(this: &mut S, mut f: F) + where + F: FnMut(char) -> bool, + S: GenericString, + { + let len = this.len(); + let mut del_bytes = 0; + let mut index = 0; + + while index < len { + let ch = this + .deref_mut() + .get(index..len) + .unwrap() + .chars() + .next() + .unwrap(); + let ch_len = ch.len_utf8(); + + if !f(ch) { + del_bytes += ch_len; + } else if del_bytes > 0 { + this.as_mut_capacity_slice() + .copy_within(index..index + ch_len, index - del_bytes); + } + index += ch_len; + } + + if del_bytes > 0 { + this.set_size(len - del_bytes); + } + } +} + +pub(crate) struct ReplaceRange; +impl ReplaceRange { + pub(crate) fn cap(this: &S, range: &R, replace_with: &str) -> usize + where + R: RangeBounds, + S: GenericString, + { + let len = this.len(); + let (start, end) = bounds_for(range, len); + assert!(end >= start); + assert!(end <= len); + assert!(this.deref().is_char_boundary(start)); + assert!(this.deref().is_char_boundary(end)); + let replace_len = replace_with.len(); + let end_size = len - end; + start + replace_len + end_size + } + + pub(crate) fn op(this: &mut S, range: &R, replace_with: &str) + where + R: RangeBounds, + S: GenericString, + { + let len = this.len(); + let (start, end) = bounds_for(range, len); + let replace_len = replace_with.len(); + let new_end = start + replace_len; + let end_size = len - end; + this.as_mut_capacity_slice().copy_within(end..len, new_end); + if replace_len > 0 { + this.as_mut_capacity_slice()[start..new_end].copy_from_slice(replace_with.as_bytes()); + } + this.set_size(start + replace_len + end_size); + } +} diff --git a/src/proptest.rs b/src/proptest.rs index 1379493..6784d37 100644 --- a/src/proptest.rs +++ b/src/proptest.rs @@ -1,3 +1,7 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + //! `proptest` strategies (requires the `proptest` feature flag). use crate::{SmartString, SmartStringMode}; diff --git a/src/serde.rs b/src/serde.rs index 8ab0c36..44efee4 100644 --- a/src/serde.rs +++ b/src/serde.rs @@ -1,3 +1,7 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + use crate::{SmartString, SmartStringMode}; use alloc::string::String; use core::{fmt, marker::PhantomData}; diff --git a/src/test.rs b/src/test.rs index 041a7ca..bb0587b 100644 --- a/src/test.rs +++ b/src/test.rs @@ -523,9 +523,4 @@ mod tests { SmartString::::from("\u{323}\u{323}\u{323}ω\u{323}\u{323}\u{323}㌣\u{e323}㤘"); s.remove(20); } - - #[test] - fn string_layout_consistency_check() { - crate::validate(); - } } From ec4d0da939324e13bd8b02171daed8461a3144f6 Mon Sep 17 00:00:00 2001 From: Bodil Stokke <17880+bodil@users.noreply.github.com> Date: Wed, 23 Feb 2022 20:17:49 +0000 Subject: [PATCH 2/3] Set MSRV to 1.57 so we can panic in const functions. --- .github/workflows/ci.yml | 6 +++--- CHANGELOG.md | 2 ++ Cargo.toml | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 75afa13..1c2d8df 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,7 @@ jobs: rust: - stable - nightly - - 1.56.0 # lowest supported version + - 1.57.0 # lowest supported version flags: - --all-features - --no-default-features @@ -38,7 +38,7 @@ jobs: rust: - stable - nightly - - 1.56.0 # lowest supported version + - 1.57.0 # lowest supported version steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 @@ -59,7 +59,7 @@ jobs: rust: - stable - nightly - - 1.56.0 # lowest supported version + - 1.57.0 # lowest supported version steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 diff --git a/CHANGELOG.md b/CHANGELOG.md index 60e4d65..ca719ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,8 @@ adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). stop it now. Converting between `SmartString` and `String` using `From` and `Into` traits is still efficient and allocation free. +- The minimum supported rustc version is now 1.57.0. + ## [0.2.10] - 2022-02-20 ### CHANGED diff --git a/Cargo.toml b/Cargo.toml index 18e78b7..74b0f6e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,7 @@ readme = "./README.md" categories = ["data-structures"] keywords = ["cache-local", "cpu-cache", "small-string", "sso", "inline-string"] exclude = ["release.toml", "proptest-regressions/**"] -rust-version = "1.56" +rust-version = "1.57" [package.metadata.docs.rs] features = ["arbitrary", "proptest", "serde"] From 0b1aa692f73e703201da471f703e4b998a005a6e Mon Sep 17 00:00:00 2001 From: Bodil Stokke <17880+bodil@users.noreply.github.com> Date: Wed, 23 Feb 2022 20:25:00 +0000 Subject: [PATCH 3/3] Switch the Into impls over to From. --- src/boxed.rs | 10 +++++----- src/lib.rs | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/boxed.rs b/src/boxed.rs index b339df6..34d04a0 100644 --- a/src/boxed.rs +++ b/src/boxed.rs @@ -163,11 +163,11 @@ impl From for BoxedString { } } -impl Into for BoxedString { - fn into(self) -> String { +impl From for String { + fn from(s: BoxedString) -> Self { #[allow(unsafe_code)] - let s = unsafe { String::from_raw_parts(self.ptr.as_ptr(), self.len(), self.capacity()) }; - forget(self); - s + let out = unsafe { String::from_raw_parts(s.ptr.as_ptr(), s.len(), s.capacity()) }; + forget(s); + out } } diff --git a/src/lib.rs b/src/lib.rs index fb7b52a..c3741db 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -865,12 +865,12 @@ impl FromStr for SmartString { } } -impl Into for SmartString { +impl From> for String { /// Unwrap a boxed [`String`][String], or copy an inline string into a new [`String`][String]. /// /// [String]: https://doc.rust-lang.org/std/string/struct.String.html - fn into(self) -> String { - match self.cast_into() { + fn from(s: SmartString) -> Self { + match s.cast_into() { StringCastInto::Boxed(string) => string.into(), StringCastInto::Inline(string) => string.to_string(), }