Skip to content

Commit

Permalink
Auto merge of rust-lang#3122 - RalfJung:intrptrcast-clean, r=saethlin
Browse files Browse the repository at this point in the history
intptrcast: remove information about dead allocations

The discussion in rust-lang/miri#3103 convinced me we don't have to keep `int_to_ptr_map` around for dead allocations. But we should not make that depend on the GC, we can just tie it to when the allocation gets freed. That means everything still behaves deterministically, if anything weird happens (but it shouldn't).

r? `@saethlin`
Only the first and last commit contain logic changes, the 2nd commit just moves code around a bit.
  • Loading branch information
bors committed Oct 19, 2023
2 parents 5d62040 + ecaf828 commit 1e71277
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 104 deletions.
221 changes: 123 additions & 98 deletions src/tools/miri/src/intptrcast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@ pub type GlobalState = RefCell<GlobalStateInner>;

#[derive(Clone, Debug)]
pub struct GlobalStateInner {
/// This is used as a map between the address of each allocation and its `AllocId`.
/// It is always sorted
/// This is used as a map between the address of each allocation and its `AllocId`. It is always
/// sorted. We cannot use a `HashMap` since we can be given an address that is offset from the
/// base address, and we need to find the `AllocId` it belongs to.
/// This is not the *full* inverse of `base_addr`; dead allocations have been removed.
int_to_ptr_map: Vec<(u64, AllocId)>,
/// The base address for each allocation. We cannot put that into
/// `AllocExtra` because function pointers also have a base address, and
Expand Down Expand Up @@ -62,10 +64,21 @@ impl GlobalStateInner {
}
}

impl<'mir, 'tcx> GlobalStateInner {
/// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple
/// of `align` that is larger or equal to `addr`
fn align_addr(addr: u64, align: u64) -> u64 {
match addr % align {
0 => addr,
rem => addr.checked_add(align).unwrap() - rem,
}
}

impl<'mir, 'tcx: 'mir> EvalContextExtPriv<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
// Returns the exposed `AllocId` that corresponds to the specified addr,
// or `None` if the addr is out of bounds
fn alloc_id_from_addr(ecx: &MiriInterpCx<'mir, 'tcx>, addr: u64) -> Option<AllocId> {
fn alloc_id_from_addr(&self, addr: u64) -> Option<AllocId> {
let ecx = self.eval_context_ref();
let global_state = ecx.machine.intptrcast.borrow();
assert!(global_state.provenance_mode != ProvenanceMode::Strict);

Expand All @@ -82,94 +95,40 @@ impl<'mir, 'tcx> GlobalStateInner {
let (glb, alloc_id) = global_state.int_to_ptr_map[pos - 1];
// This never overflows because `addr >= glb`
let offset = addr - glb;
// If the offset exceeds the size of the allocation, don't use this `alloc_id`.
// We require this to be strict in-bounds of the allocation. This arm is only
// entered for addresses that are not the base address, so even zero-sized
// allocations will get recognized at their base address -- but all other
// allocations will *not* be recognized at their "end" address.
let size = ecx.get_alloc_info(alloc_id).0;
if offset <= size.bytes() { Some(alloc_id) } else { None }
if offset < size.bytes() { Some(alloc_id) } else { None }
}
}?;

// We only use this provenance if it has been exposed, *and* is still live.
// We only use this provenance if it has been exposed.
if global_state.exposed.contains(&alloc_id) {
let (_size, _align, kind) = ecx.get_alloc_info(alloc_id);
match kind {
AllocKind::LiveData | AllocKind::Function | AllocKind::VTable => {
return Some(alloc_id);
}
AllocKind::Dead => {}
}
}

None
}

pub fn expose_ptr(
ecx: &mut MiriInterpCx<'mir, 'tcx>,
alloc_id: AllocId,
tag: BorTag,
) -> InterpResult<'tcx> {
let global_state = ecx.machine.intptrcast.get_mut();
// In strict mode, we don't need this, so we can save some cycles by not tracking it.
if global_state.provenance_mode != ProvenanceMode::Strict {
trace!("Exposing allocation id {alloc_id:?}");
global_state.exposed.insert(alloc_id);
if ecx.machine.borrow_tracker.is_some() {
ecx.expose_tag(alloc_id, tag)?;
}
}
Ok(())
}

pub fn ptr_from_addr_cast(
ecx: &MiriInterpCx<'mir, 'tcx>,
addr: u64,
) -> InterpResult<'tcx, Pointer<Option<Provenance>>> {
trace!("Casting {:#x} to a pointer", addr);

// Potentially emit a warning.
let global_state = ecx.machine.intptrcast.borrow();
match global_state.provenance_mode {
ProvenanceMode::Default => {
// The first time this happens at a particular location, print a warning.
thread_local! {
// `Span` is non-`Send`, so we use a thread-local instead.
static PAST_WARNINGS: RefCell<FxHashSet<Span>> = RefCell::default();
}
PAST_WARNINGS.with_borrow_mut(|past_warnings| {
let first = past_warnings.is_empty();
if past_warnings.insert(ecx.cur_span()) {
// Newly inserted, so first time we see this span.
ecx.emit_diagnostic(NonHaltingDiagnostic::Int2Ptr { details: first });
}
});
}
ProvenanceMode::Strict => {
throw_machine_stop!(TerminationInfo::Int2PtrWithStrictProvenance);
}
ProvenanceMode::Permissive => {}
// This must still be live, since we remove allocations from `int_to_ptr_map` when they get freed.
debug_assert!(!matches!(ecx.get_alloc_info(alloc_id).2, AllocKind::Dead));
Some(alloc_id)
} else {
None
}

// We do *not* look up the `AllocId` here! This is a `ptr as usize` cast, and it is
// completely legal to do a cast and then `wrapping_offset` to another allocation and only
// *then* do a memory access. So the allocation that the pointer happens to point to on a
// cast is fairly irrelevant. Instead we generate this as a "wildcard" pointer, such that
// *every time the pointer is used*, we do an `AllocId` lookup to find the (exposed)
// allocation it might be referencing.
Ok(Pointer::new(Some(Provenance::Wildcard), Size::from_bytes(addr)))
}

fn alloc_base_addr(
ecx: &MiriInterpCx<'mir, 'tcx>,
alloc_id: AllocId,
) -> InterpResult<'tcx, u64> {
fn addr_from_alloc_id(&self, alloc_id: AllocId) -> InterpResult<'tcx, u64> {
let ecx = self.eval_context_ref();
let mut global_state = ecx.machine.intptrcast.borrow_mut();
let global_state = &mut *global_state;

Ok(match global_state.base_addr.entry(alloc_id) {
Entry::Occupied(entry) => *entry.get(),
Entry::Vacant(entry) => {
// There is nothing wrong with a raw pointer being cast to an integer only after
// it became dangling. Hence we allow dead allocations.
let (size, align, _kind) = ecx.get_alloc_info(alloc_id);
let (size, align, kind) = ecx.get_alloc_info(alloc_id);
// This is either called immediately after allocation (and then cached), or when
// adjusting `tcx` pointers (which never get freed). So assert that we are looking
// at a live allocation. This also ensures that we never re-assign an address to an
// allocation that previously had an address, but then was freed and the address
// information was removed.
assert!(!matches!(kind, AllocKind::Dead));

// This allocation does not have a base address yet, pick one.
// Leave some space to the previous allocation, to give it some chance to be less aligned.
Expand All @@ -183,7 +142,7 @@ impl<'mir, 'tcx> GlobalStateInner {
.next_base_addr
.checked_add(slack)
.ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
let base_addr = Self::align_addr(base_addr, align.bytes());
let base_addr = align_addr(base_addr, align.bytes());
entry.insert(base_addr);
trace!(
"Assigning base address {:#x} to allocation {:?} (size: {}, align: {}, slack: {})",
Expand All @@ -205,6 +164,7 @@ impl<'mir, 'tcx> GlobalStateInner {
if global_state.next_base_addr > ecx.target_usize_max() {
throw_exhaust!(AddressSpaceFull);
}
// Also maintain the opposite mapping in `int_to_ptr_map`.
// Given that `next_base_addr` increases in each allocation, pushing the
// corresponding tuple keeps `int_to_ptr_map` sorted
global_state.int_to_ptr_map.push((base_addr, alloc_id));
Expand All @@ -213,15 +173,71 @@ impl<'mir, 'tcx> GlobalStateInner {
}
})
}
}

impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
fn expose_ptr(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
let ecx = self.eval_context_mut();
let global_state = ecx.machine.intptrcast.get_mut();
// In strict mode, we don't need this, so we can save some cycles by not tracking it.
if global_state.provenance_mode != ProvenanceMode::Strict {
trace!("Exposing allocation id {alloc_id:?}");
global_state.exposed.insert(alloc_id);
if ecx.machine.borrow_tracker.is_some() {
ecx.expose_tag(alloc_id, tag)?;
}
}
Ok(())
}

fn ptr_from_addr_cast(&self, addr: u64) -> InterpResult<'tcx, Pointer<Option<Provenance>>> {
trace!("Casting {:#x} to a pointer", addr);

let ecx = self.eval_context_ref();
let global_state = ecx.machine.intptrcast.borrow();

// Potentially emit a warning.
match global_state.provenance_mode {
ProvenanceMode::Default => {
// The first time this happens at a particular location, print a warning.
thread_local! {
// `Span` is non-`Send`, so we use a thread-local instead.
static PAST_WARNINGS: RefCell<FxHashSet<Span>> = RefCell::default();
}
PAST_WARNINGS.with_borrow_mut(|past_warnings| {
let first = past_warnings.is_empty();
if past_warnings.insert(ecx.cur_span()) {
// Newly inserted, so first time we see this span.
ecx.emit_diagnostic(NonHaltingDiagnostic::Int2Ptr { details: first });
}
});
}
ProvenanceMode::Strict => {
throw_machine_stop!(TerminationInfo::Int2PtrWithStrictProvenance);
}
ProvenanceMode::Permissive => {}
}

// We do *not* look up the `AllocId` here! This is a `ptr as usize` cast, and it is
// completely legal to do a cast and then `wrapping_offset` to another allocation and only
// *then* do a memory access. So the allocation that the pointer happens to point to on a
// cast is fairly irrelevant. Instead we generate this as a "wildcard" pointer, such that
// *every time the pointer is used*, we do an `AllocId` lookup to find the (exposed)
// allocation it might be referencing.
Ok(Pointer::new(Some(Provenance::Wildcard), Size::from_bytes(addr)))
}

/// Convert a relative (tcx) pointer to a Miri pointer.
pub fn ptr_from_rel_ptr(
ecx: &MiriInterpCx<'mir, 'tcx>,
fn ptr_from_rel_ptr(
&self,
ptr: Pointer<AllocId>,
tag: BorTag,
) -> InterpResult<'tcx, Pointer<Provenance>> {
let ecx = self.eval_context_ref();

let (alloc_id, offset) = ptr.into_parts(); // offset is relative (AllocId provenance)
let base_addr = GlobalStateInner::alloc_base_addr(ecx, alloc_id)?;
let base_addr = ecx.addr_from_alloc_id(alloc_id)?;

// Add offset with the right kind of pointer-overflowing arithmetic.
let dl = ecx.data_layout();
Expand All @@ -231,22 +247,21 @@ impl<'mir, 'tcx> GlobalStateInner {

/// When a pointer is used for a memory access, this computes where in which allocation the
/// access is going.
pub fn ptr_get_alloc(
ecx: &MiriInterpCx<'mir, 'tcx>,
ptr: Pointer<Provenance>,
) -> Option<(AllocId, Size)> {
fn ptr_get_alloc(&self, ptr: Pointer<Provenance>) -> Option<(AllocId, Size)> {
let ecx = self.eval_context_ref();

let (tag, addr) = ptr.into_parts(); // addr is absolute (Tag provenance)

let alloc_id = if let Provenance::Concrete { alloc_id, .. } = tag {
alloc_id
} else {
// A wildcard pointer.
GlobalStateInner::alloc_id_from_addr(ecx, addr.bytes())?
ecx.alloc_id_from_addr(addr.bytes())?
};

// This cannot fail: since we already have a pointer with that provenance, rel_ptr_to_addr
// must have been called in the past.
let base_addr = GlobalStateInner::alloc_base_addr(ecx, alloc_id).unwrap();
// must have been called in the past, so we can just look up the address in the map.
let base_addr = ecx.addr_from_alloc_id(alloc_id).unwrap();

// Wrapping "addr - base_addr"
#[allow(clippy::cast_possible_wrap)] // we want to wrap here
Expand All @@ -256,14 +271,24 @@ impl<'mir, 'tcx> GlobalStateInner {
Size::from_bytes(ecx.overflowing_signed_offset(addr.bytes(), neg_base_addr).0),
))
}
}

/// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple
/// of `align` that is larger or equal to `addr`
fn align_addr(addr: u64, align: u64) -> u64 {
match addr % align {
0 => addr,
rem => addr.checked_add(align).unwrap() - rem,
}
impl GlobalStateInner {
pub fn free_alloc_id(&mut self, dead_id: AllocId) {
// We can *not* remove this from `base_addr`, since the interpreter design requires that we
// be able to retrieve an AllocId + offset for any memory access *before* we check if the
// access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory
// access to determine the allocation ID and offset -- and there can still be pointers with
// `dead_id` that one can attempt to use for a memory access. `ptr_get_alloc` may return
// `None` only if the pointer truly has no provenance (this ensures consistent error
// messages).
// However, we *can* remove it from `int_to_ptr_map`, since any wildcard pointers that exist
// can no longer actually be accessing that address. This ensures `alloc_id_from_addr` never
// returns a dead allocation.
self.int_to_ptr_map.retain(|&(_, id)| id != dead_id);
// We can also remove it from `exposed`, since this allocation can anyway not be returned by
// `alloc_id_from_addr` any more.
self.exposed.remove(&dead_id);
}
}

Expand All @@ -273,7 +298,7 @@ mod tests {

#[test]
fn test_align_addr() {
assert_eq!(GlobalStateInner::align_addr(37, 4), 40);
assert_eq!(GlobalStateInner::align_addr(44, 4), 44);
assert_eq!(align_addr(37, 4), 40);
assert_eq!(align_addr(44, 4), 44);
}
}
2 changes: 1 addition & 1 deletion src/tools/miri/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ pub use crate::eval::{
create_ecx, eval_entry, AlignmentCheck, BacktraceStyle, IsolatedOp, MiriConfig, RejectOpWith,
};
pub use crate::helpers::EvalContextExt as _;
pub use crate::intptrcast::ProvenanceMode;
pub use crate::intptrcast::{EvalContextExt as _, ProvenanceMode};
pub use crate::machine::{
AllocExtra, FrameExtra, MiriInterpCx, MiriInterpCxExt, MiriMachine, MiriMemoryKind,
PrimitiveLayouts, Provenance, ProvenanceExtra,
Expand Down
10 changes: 5 additions & 5 deletions src/tools/miri/src/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1149,7 +1149,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
// Value does not matter, SB is disabled
BorTag::default()
};
intptrcast::GlobalStateInner::ptr_from_rel_ptr(ecx, ptr, tag)
ecx.ptr_from_rel_ptr(ptr, tag)
}

/// Called on `usize as ptr` casts.
Expand All @@ -1158,7 +1158,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
ecx: &MiriInterpCx<'mir, 'tcx>,
addr: u64,
) -> InterpResult<'tcx, Pointer<Option<Self::Provenance>>> {
intptrcast::GlobalStateInner::ptr_from_addr_cast(ecx, addr)
ecx.ptr_from_addr_cast(addr)
}

/// Called on `ptr as usize` casts.
Expand All @@ -1169,8 +1169,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
ptr: Pointer<Self::Provenance>,
) -> InterpResult<'tcx> {
match ptr.provenance {
Provenance::Concrete { alloc_id, tag } =>
intptrcast::GlobalStateInner::expose_ptr(ecx, alloc_id, tag),
Provenance::Concrete { alloc_id, tag } => ecx.expose_ptr(alloc_id, tag),
Provenance::Wildcard => {
// No need to do anything for wildcard pointers as
// their provenances have already been previously exposed.
Expand All @@ -1191,7 +1190,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
ecx: &MiriInterpCx<'mir, 'tcx>,
ptr: Pointer<Self::Provenance>,
) -> Option<(AllocId, Size, Self::ProvenanceExtra)> {
let rel = intptrcast::GlobalStateInner::ptr_get_alloc(ecx, ptr);
let rel = ecx.ptr_get_alloc(ptr);

rel.map(|(alloc_id, size)| {
let tag = match ptr.provenance {
Expand Down Expand Up @@ -1263,6 +1262,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
{
*deallocated_at = Some(machine.current_span());
}
machine.intptrcast.get_mut().free_alloc_id(alloc_id);
Ok(())
}

Expand Down

0 comments on commit 1e71277

Please sign in to comment.