From f78523616ea4989148f60c0943bd2c6cae364339 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Fri, 2 Aug 2024 10:18:56 +0800 Subject: [PATCH 1/5] Extra assertions for mutators and epilogues (#1182) This commit adds assertions about the number of mutators and the list of mutators returned from the VM binding. Also asserts that epilogues are not silently dropped before they are executed. It could happen if a buggy VM binding returns a number of mutators that does not match the actual list of mutators. This should detect bugs like https://github.com/mmtk/mmtk-ruby/issues/84 --- src/plan/marksweep/global.rs | 4 +++ src/policy/immix/immixspace.rs | 8 +++++- src/policy/marksweepspace/native_ms/global.rs | 14 ++++++++++ src/scheduler/gc_work.rs | 28 +++++++++++++------ src/util/epilogue.rs | 13 +++++++++ src/util/mod.rs | 1 + 6 files changed, 59 insertions(+), 9 deletions(-) create mode 100644 src/util/epilogue.rs diff --git a/src/plan/marksweep/global.rs b/src/plan/marksweep/global.rs index fb6c271cab..4a19f77841 100644 --- a/src/plan/marksweep/global.rs +++ b/src/plan/marksweep/global.rs @@ -65,6 +65,10 @@ impl Plan for MarkSweep { self.common.release(tls, true); } + fn end_of_gc(&mut self, _tls: VMWorkerThread) { + self.ms.end_of_gc(); + } + fn collection_required(&self, space_full: bool, _space: Option>) -> bool { self.base().collection_required(self, space_full) } diff --git a/src/policy/immix/immixspace.rs b/src/policy/immix/immixspace.rs index 73756d2f90..e5be6bbcb4 100644 --- a/src/policy/immix/immixspace.rs +++ b/src/policy/immix/immixspace.rs @@ -9,7 +9,6 @@ use crate::policy::sft_map::SFTMap; use crate::policy::space::{CommonSpace, Space}; use crate::util::alloc::allocator::AllocatorContext; use crate::util::constants::LOG_BYTES_IN_PAGE; -use crate::util::copy::*; use crate::util::heap::chunk_map::*; use crate::util::heap::BlockPageResource; use crate::util::heap::PageResource; @@ -19,6 +18,7 @@ use crate::util::metadata::side_metadata::SideMetadataSpec; use crate::util::metadata::vo_bit; use crate::util::metadata::{self, MetadataSpec}; use crate::util::object_forwarding; +use crate::util::{copy::*, epilogue}; use crate::util::{Address, ObjectReference}; use crate::vm::*; use crate::{ @@ -979,6 +979,12 @@ impl FlushPageResource { } } +impl Drop for FlushPageResource { + fn drop(&mut self) { + epilogue::debug_assert_counter_zero(&self.counter, "FlushPageResource::counter"); + } +} + use crate::policy::copy_context::PolicyCopyContext; use crate::util::alloc::Allocator; use crate::util::alloc::ImmixAllocator; diff --git a/src/policy/marksweepspace/native_ms/global.rs b/src/policy/marksweepspace/native_ms/global.rs index 41773be244..b4699db30c 100644 --- a/src/policy/marksweepspace/native_ms/global.rs +++ b/src/policy/marksweepspace/native_ms/global.rs @@ -8,6 +8,7 @@ use crate::{ scheduler::{GCWorkScheduler, GCWorker, WorkBucketStage}, util::{ copy::CopySemantics, + epilogue, heap::{BlockPageResource, PageResource}, metadata::{self, side_metadata::SideMetadataSpec, MetadataSpec}, ObjectReference, @@ -373,6 +374,13 @@ impl MarkSweepSpace { self.scheduler.work_buckets[crate::scheduler::WorkBucketStage::Release].add(work_packet); } + pub fn end_of_gc(&mut self) { + epilogue::debug_assert_counter_zero( + &self.pending_release_packets, + "pending_release_packets", + ); + } + /// Release a block. pub fn release_block(&self, block: Block) { self.block_clear_metadata(block); @@ -581,3 +589,9 @@ impl RecycleBlocks { } } } + +impl Drop for RecycleBlocks { + fn drop(&mut self) { + epilogue::debug_assert_counter_zero(&self.counter, "RecycleBlocks::counter"); + } +} diff --git a/src/scheduler/gc_work.rs b/src/scheduler/gc_work.rs index 4335ceea16..ff2fb75511 100644 --- a/src/scheduler/gc_work.rs +++ b/src/scheduler/gc_work.rs @@ -60,11 +60,17 @@ impl GCWork for Prepare { plan_mut.prepare(worker.tls); if plan_mut.constraints().needs_prepare_mutator { - for mutator in ::VMActivePlan::mutators() { - mmtk.scheduler.work_buckets[WorkBucketStage::Prepare] - .add(PrepareMutator::::new(mutator)); - } + let prepare_mutator_packets = ::VMActivePlan::mutators() + .map(|mutator| Box::new(PrepareMutator::::new(mutator)) as _) + .collect::>(); + // Just in case the VM binding is inconsistent about the number of mutators and the actual mutator list. + debug_assert_eq!( + prepare_mutator_packets.len(), + ::VMActivePlan::number_of_mutators() + ); + mmtk.scheduler.work_buckets[WorkBucketStage::Prepare].bulk_add(prepare_mutator_packets); } + for w in &mmtk.scheduler.worker_group.workers_shared { let result = w.designated_work.push(Box::new(PrepareCollector)); debug_assert!(result.is_ok()); @@ -133,10 +139,16 @@ impl GCWork for Release { let plan_mut: &mut C::PlanType = unsafe { &mut *(self.plan as *const _ as *mut _) }; plan_mut.release(worker.tls); - for mutator in ::VMActivePlan::mutators() { - mmtk.scheduler.work_buckets[WorkBucketStage::Release] - .add(ReleaseMutator::::new(mutator)); - } + let release_mutator_packets = ::VMActivePlan::mutators() + .map(|mutator| Box::new(ReleaseMutator::::new(mutator)) as _) + .collect::>(); + // Just in case the VM binding is inconsistent about the number of mutators and the actual mutator list. + debug_assert_eq!( + release_mutator_packets.len(), + ::VMActivePlan::number_of_mutators() + ); + mmtk.scheduler.work_buckets[WorkBucketStage::Release].bulk_add(release_mutator_packets); + for w in &mmtk.scheduler.worker_group.workers_shared { let result = w.designated_work.push(Box::new(ReleaseCollector)); debug_assert!(result.is_ok()); diff --git a/src/util/epilogue.rs b/src/util/epilogue.rs new file mode 100644 index 0000000000..b97a88fc74 --- /dev/null +++ b/src/util/epilogue.rs @@ -0,0 +1,13 @@ +//! Utilities for implementing epilogues. +//! +//! Epilogues are operations that are done when several other operations are done. + +use std::sync::atomic::{AtomicUsize, Ordering}; + +/// A debugging method for detecting the case when the epilogue is never executed. +pub fn debug_assert_counter_zero(counter: &AtomicUsize, what: &'static str) { + let value = counter.load(Ordering::SeqCst); + if value != 0 { + panic!("{what} is still {value}."); + } +} diff --git a/src/util/mod.rs b/src/util/mod.rs index 8036ad7ae1..54ccc3ba8e 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -44,6 +44,7 @@ pub mod test_util; /// An analysis framework for collecting data and profiling in GC. #[cfg(feature = "analysis")] pub(crate) mod analysis; +pub(crate) mod epilogue; /// Non-generic refs to generic types of ``. pub(crate) mod erase_vm; /// Finalization implementation. From 8623b77452531c4dc611650dc2b8b0d3e9563259 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Tue, 6 Aug 2024 22:11:01 +0800 Subject: [PATCH 2/5] Refactor iterate_meta_bits (#1181) This PR refactors the mechanism for visiting (reading and/or updating) side metadata in bulk, specifically the function `SideMetadataSpec::iterate_meta_bits`. The function now uses a single `FnMut` callback instead of two `Fn` callbacks. It uses the enum type `BitByteRange` to distinguish whole byte ranges from bit ranges in a byte. This allows the user to capture variables mutably in the callback. This also removes the `Cell` used in `find_prev_non_zero_value_fast`. The function is made non-recursive to improve the performance. Some test cases are added to test for corner cases. The function is moved to a dedicated `ranges` module and renamed to `break_bit_range`, for several reasons: - It was a method of `SideMetadataSpec`, but it does not access any member of `SideMetadataSpec`. - It needs a non-trivial amount of testing to get corner cases correct, especially after refactoring into a non-recursive function. - Related types and functions can be added to the `ranges` module in the future. - Breaking a range of bytes into a range of aligned words and unaligned bytes in the beginning and the end. It will be used by finding VO bits from internal pointers and finding all VO bits in a region (for heap traversal). - Breaking a range of bytes at chunk boundaries. It will be used by `bulk_update_metadata`. --- Cargo.toml | 5 +- benches/main.rs | 51 +-- benches/mock_bench/mod.rs | 31 ++ benches/regular_bench/bulk_meta/bzero_bset.rs | 62 +++ benches/regular_bench/bulk_meta/mod.rs | 7 + benches/regular_bench/mod.rs | 7 + src/util/metadata/side_metadata/global.rs | 316 ++++++--------- src/util/metadata/side_metadata/mod.rs | 1 + src/util/metadata/side_metadata/ranges.rs | 369 ++++++++++++++++++ src/util/mod.rs | 2 + src/util/test_private/mod.rs | 35 ++ 11 files changed, 645 insertions(+), 241 deletions(-) create mode 100644 benches/regular_bench/bulk_meta/bzero_bset.rs create mode 100644 benches/regular_bench/bulk_meta/mod.rs create mode 100644 benches/regular_bench/mod.rs create mode 100644 src/util/metadata/side_metadata/ranges.rs create mode 100644 src/util/test_private/mod.rs diff --git a/Cargo.toml b/Cargo.toml index b089819007..45f528520c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -73,7 +73,10 @@ perf_counter = ["pfm"] # This feature is only used for tests with MockVM. # CI scripts run those tests with this feature. -mock_test = [] +mock_test = ["test_private"] + +# This feature will expose some private functions for testings or benchmarking. +test_private = [] # .github/scripts/ci-common.sh extracts features from the following part (including from comments). # So be careful when editing or adding stuff to the section below. diff --git a/benches/main.rs b/benches/main.rs index 6c735ce4e2..651299694f 100644 --- a/benches/main.rs +++ b/benches/main.rs @@ -2,42 +2,25 @@ use criterion::criterion_group; use criterion::criterion_main; use criterion::Criterion; -// As we can only initialize one MMTk instance, we have to run each benchmark in a separate process. -// So we only register one benchmark to criterion ('bench_main'), and based on the env var MMTK_BENCH, -// we pick the right benchmark to run. +#[cfg(all(feature = "mock_test", feature = "test_private"))] +pub mod mock_bench; -// The benchmark can be executed with the following command. The feature `mock_test` is required, as the tests use MockVM. -// MMTK_BENCH=alloc cargo bench --features mock_test -// MMTK_BENCH=sft cargo bench --features mock_test +#[cfg(all(not(feature = "mock_test"), feature = "test_private"))] +pub mod regular_bench; -// [Yi] I am not sure if these benchmarks are helpful any more after the MockVM refactoring. MockVM is really slow, as it -// is accessed with a lock, and it dispatches every call to function pointers in a struct. These tests may use MockVM, -// so they become slower as well. And the slowdown -// from MockVM may hide the actual performance difference when we change the functions that are benchmarked. -// We may want to improve the MockVM implementation so we can skip dispatching for benchmarking, or introduce another MockVM -// implementation for benchmarking. -// However, I will just keep these benchmarks here. If we find it not useful, and we do not plan to improve MockVM, we can delete -// them. - -#[cfg(feature = "mock_test")] -mod mock_bench; - -pub fn bench_main(_c: &mut Criterion) { - #[cfg(feature = "mock_test")] - match std::env::var("MMTK_BENCH") { - Ok(bench) => match bench.as_str() { - "alloc" => mock_bench::alloc::bench(_c), - "internal_pointer" => mock_bench::internal_pointer::bench(_c), - "sft" => mock_bench::sft::bench(_c), - _ => panic!("Unknown benchmark {:?}", bench), - }, - Err(_) => panic!("Need to name a benchmark by the env var MMTK_BENCH"), - } - - #[cfg(not(feature = "mock_test"))] - { - eprintln!("ERROR: Currently there are no benchmarks when the \"mock_test\" feature is not enabled."); - std::process::exit(1); +pub fn bench_main(c: &mut Criterion) { + cfg_if::cfg_if! { + if #[cfg(feature = "mock_test")] { + // If the "mock_test" feature is enabled, we only run mock test. + mock_bench::bench(c); + } else if #[cfg(feature = "test_private")] { + regular_bench::bench(c); + } else { + eprintln!("ERROR: Benchmarks in mmtk_core requires the test_priavte feature (implied by mock_test) to run."); + eprintln!(" Rerun with `MMTK_BENCH=\"bench_name\" cargo bench --features mock_test` to run mock-test benchmarks."); + eprintln!(" Rerun with `cargo bench --features test_private -- bench_name` to run other benchmarks."); + std::process::exit(1); + } } } diff --git a/benches/mock_bench/mod.rs b/benches/mock_bench/mod.rs index f4ca1c4428..95466bfdc7 100644 --- a/benches/mock_bench/mod.rs +++ b/benches/mock_bench/mod.rs @@ -1,3 +1,34 @@ +use criterion::Criterion; + pub mod alloc; pub mod internal_pointer; pub mod sft; + +// As we can only initialize one MMTk instance, we have to run each benchmark in a separate process. +// So we only register one benchmark to criterion ('bench_main'), and based on the env var MMTK_BENCH, +// we pick the right benchmark to run. + +// The benchmark can be executed with the following command. The feature `mock_test` is required, as the tests use MockVM. +// MMTK_BENCH=alloc cargo bench --features mock_test +// MMTK_BENCH=sft cargo bench --features mock_test + +// [Yi] I am not sure if these benchmarks are helpful any more after the MockVM refactoring. MockVM is really slow, as it +// is accessed with a lock, and it dispatches every call to function pointers in a struct. These tests may use MockVM, +// so they become slower as well. And the slowdown +// from MockVM may hide the actual performance difference when we change the functions that are benchmarked. +// We may want to improve the MockVM implementation so we can skip dispatching for benchmarking, or introduce another MockVM +// implementation for benchmarking. +// However, I will just keep these benchmarks here. If we find it not useful, and we do not plan to improve MockVM, we can delete +// them. + +pub fn bench(c: &mut Criterion) { + match std::env::var("MMTK_BENCH") { + Ok(bench) => match bench.as_str() { + "alloc" => alloc::bench(c), + "internal_pointer" => internal_pointer::bench(c), + "sft" => sft::bench(c), + _ => panic!("Unknown benchmark {:?}", bench), + }, + Err(_) => panic!("Need to name a benchmark by the env var MMTK_BENCH"), + } +} diff --git a/benches/regular_bench/bulk_meta/bzero_bset.rs b/benches/regular_bench/bulk_meta/bzero_bset.rs new file mode 100644 index 0000000000..fc01e58b4c --- /dev/null +++ b/benches/regular_bench/bulk_meta/bzero_bset.rs @@ -0,0 +1,62 @@ +//! Benchmarks for bulk zeroing and setting. + +use std::os::raw::c_void; + +use criterion::Criterion; +use mmtk::util::{constants::LOG_BITS_IN_WORD, test_private, Address}; + +fn allocate_aligned(size: usize) -> Address { + let ptr = unsafe { + std::alloc::alloc_zeroed(std::alloc::Layout::from_size_align(size, size).unwrap()) + }; + Address::from_mut_ptr(ptr) +} + +const LINE_BYTES: usize = 256usize; // Match an Immix line size. +const BLOCK_BYTES: usize = 32768usize; // Match an Immix block size. + +// Asssume one-bit-per-word metadata (matching VO bits). +const LINE_META_BYTES: usize = LINE_BYTES >> LOG_BITS_IN_WORD; +const BLOCK_META_BYTES: usize = BLOCK_BYTES >> LOG_BITS_IN_WORD; + +pub fn bench(c: &mut Criterion) { + c.bench_function("bzero_bset_line", |b| { + let start = allocate_aligned(LINE_META_BYTES); + let end = start + LINE_META_BYTES; + + b.iter(|| { + test_private::set_meta_bits(start, 0, end, 0); + test_private::zero_meta_bits(start, 0, end, 0); + }) + }); + + c.bench_function("bzero_bset_line_memset", |b| { + let start = allocate_aligned(LINE_META_BYTES); + let end = start + LINE_META_BYTES; + + b.iter(|| unsafe { + libc::memset(start.as_mut_ref() as *mut c_void, 0xff, end - start); + libc::memset(start.as_mut_ref() as *mut c_void, 0x00, end - start); + }) + }); + + c.bench_function("bzero_bset_block", |b| { + let start = allocate_aligned(BLOCK_META_BYTES); + let end = start + BLOCK_META_BYTES; + + b.iter(|| { + test_private::set_meta_bits(start, 0, end, 0); + test_private::zero_meta_bits(start, 0, end, 0); + }) + }); + + c.bench_function("bzero_bset_block_memset", |b| { + let start = allocate_aligned(BLOCK_META_BYTES); + let end = start + BLOCK_META_BYTES; + + b.iter(|| unsafe { + libc::memset(start.as_mut_ref() as *mut c_void, 0xff, end - start); + libc::memset(start.as_mut_ref() as *mut c_void, 0x00, end - start); + }) + }); +} diff --git a/benches/regular_bench/bulk_meta/mod.rs b/benches/regular_bench/bulk_meta/mod.rs new file mode 100644 index 0000000000..488258cd96 --- /dev/null +++ b/benches/regular_bench/bulk_meta/mod.rs @@ -0,0 +1,7 @@ +pub mod bzero_bset; + +pub use criterion::Criterion; + +pub fn bench(c: &mut Criterion) { + bzero_bset::bench(c); +} diff --git a/benches/regular_bench/mod.rs b/benches/regular_bench/mod.rs new file mode 100644 index 0000000000..63b1f3a1a5 --- /dev/null +++ b/benches/regular_bench/mod.rs @@ -0,0 +1,7 @@ +pub use criterion::Criterion; + +mod bulk_meta; + +pub fn bench(c: &mut Criterion) { + bulk_meta::bench(c); +} diff --git a/src/util/metadata/side_metadata/global.rs b/src/util/metadata/side_metadata/global.rs index 39df588b41..23764525a7 100644 --- a/src/util/metadata/side_metadata/global.rs +++ b/src/util/metadata/side_metadata/global.rs @@ -8,6 +8,7 @@ use crate::util::metadata::metadata_val_traits::*; use crate::util::metadata::vo_bit::VO_BIT_SIDE_METADATA_SPEC; use crate::util::Address; use num_traits::FromPrimitive; +use ranges::BitByteRange; use std::fmt; use std::io::Result; use std::sync::atomic::{AtomicU8, Ordering}; @@ -154,188 +155,77 @@ impl SideMetadataSpec { MMAPPER.is_mapped_address(meta_addr) } - /// This method is used for iterating side metadata for a data address range. As we cannot guarantee - /// that the data address range can be mapped to whole metadata bytes, we have to deal with cases that - /// we need to mask and zero certain bits in a metadata byte. The end address and the end bit are exclusive. - /// The end bit for update_bits could be 8, so overflowing needs to be taken care of. - /// - /// Returns true if we iterate through every bits in the range. Return false if we abort iteration early. - /// - /// Arguments: - /// * `forwards`: If true, we iterate forwards (from start/low address to end/high address). Otherwise, - /// we iterate backwards (from end/high address to start/low address). - /// * `visit_bytes`/`visit_bits`: The closures returns whether the itertion is early terminated. - pub(super) fn iterate_meta_bits( + /// This method is used for bulk zeroing side metadata for a data address range. + pub(crate) fn zero_meta_bits( meta_start_addr: Address, meta_start_bit: u8, meta_end_addr: Address, meta_end_bit: u8, - forwards: bool, - visit_bytes: &impl Fn(Address, Address) -> bool, - visit_bits: &impl Fn(Address, u8, u8) -> bool, - ) -> bool { - trace!( - "iterate_meta_bits: {} {}, {} {}", - meta_start_addr, - meta_start_bit, - meta_end_addr, - meta_end_bit - ); - // Start/end is the same, we don't need to do anything. - if meta_start_addr == meta_end_addr && meta_start_bit == meta_end_bit { - return false; - } - - // zeroing bytes - if meta_start_bit == 0 && meta_end_bit == 0 { - return visit_bytes(meta_start_addr, meta_end_addr); - } - - if meta_start_addr == meta_end_addr { - // Update bits in the same byte between start and end bit - visit_bits(meta_start_addr, meta_start_bit, meta_end_bit) - } else if meta_start_addr + 1usize == meta_end_addr && meta_end_bit == 0 { - // Update bits in the same byte after the start bit (between start bit and 8) - visit_bits(meta_start_addr, meta_start_bit, 8) - } else { - // Update each segments. - // Clippy wants to move this if block up as a else-if block. But I think this is logically more clear. So disable the clippy warning. - #[allow(clippy::collapsible_else_if)] - if forwards { - // update bits in the first byte - if Self::iterate_meta_bits( - meta_start_addr, - meta_start_bit, - meta_start_addr + 1usize, - 0, - forwards, - visit_bytes, - visit_bits, - ) { - return true; - } - // update bytes in the middle - if Self::iterate_meta_bits( - meta_start_addr + 1usize, - 0, - meta_end_addr, - 0, - forwards, - visit_bytes, - visit_bits, - ) { - return true; - } - // update bits in the last byte - if Self::iterate_meta_bits( - meta_end_addr, - 0, - meta_end_addr, - meta_end_bit, - forwards, - visit_bytes, - visit_bits, - ) { - return true; - } - false - } else { - // update bits in the last byte - if Self::iterate_meta_bits( - meta_end_addr, - 0, - meta_end_addr, - meta_end_bit, - forwards, - visit_bytes, - visit_bits, - ) { - return true; - } - // update bytes in the middle - if Self::iterate_meta_bits( - meta_start_addr + 1usize, - 0, - meta_end_addr, - 0, - forwards, - visit_bytes, - visit_bits, - ) { - return true; + ) { + let mut visitor = |range| { + match range { + BitByteRange::Bytes { start, end } => { + memory::zero(start, end - start); + false } - // update bits in the first byte - if Self::iterate_meta_bits( - meta_start_addr, - meta_start_bit, - meta_start_addr + 1usize, - 0, - forwards, - visit_bytes, - visit_bits, - ) { - return true; + BitByteRange::BitsInByte { + addr, + bit_start, + bit_end, + } => { + // we are zeroing selected bit in one byte + // Get a mask that the bits we need to zero are set to zero, and the other bits are 1. + let mask: u8 = + u8::MAX.checked_shl(bit_end as u32).unwrap_or(0) | !(u8::MAX << bit_start); + unsafe { addr.as_ref::() }.fetch_and(mask, Ordering::SeqCst); + false } - false } - } - } - - /// This method is used for bulk zeroing side metadata for a data address range. - pub(super) fn zero_meta_bits( - meta_start_addr: Address, - meta_start_bit: u8, - meta_end_addr: Address, - meta_end_bit: u8, - ) { - let zero_bytes = |start: Address, end: Address| -> bool { - memory::zero(start, end - start); - false }; - let zero_bits = |addr: Address, start_bit: u8, end_bit: u8| -> bool { - // we are zeroing selected bits in one byte - let mask: u8 = - u8::MAX.checked_shl(end_bit.into()).unwrap_or(0) | !(u8::MAX << start_bit); // Get a mask that the bits we need to zero are set to zero, and the other bits are 1. - unsafe { addr.as_ref::() }.fetch_and(mask, Ordering::SeqCst); - false - }; - Self::iterate_meta_bits( + ranges::break_bit_range( meta_start_addr, meta_start_bit, meta_end_addr, meta_end_bit, true, - &zero_bytes, - &zero_bits, + &mut visitor, ); } /// This method is used for bulk setting side metadata for a data address range. - pub(super) fn set_meta_bits( + pub(crate) fn set_meta_bits( meta_start_addr: Address, meta_start_bit: u8, meta_end_addr: Address, meta_end_bit: u8, ) { - let set_bytes = |start: Address, end: Address| -> bool { - memory::set(start, 0xff, end - start); - false - }; - let set_bits = |addr: Address, start_bit: u8, end_bit: u8| -> bool { - // we are setting selected bits in one byte - let mask: u8 = - !(u8::MAX.checked_shl(end_bit.into()).unwrap_or(0)) & (u8::MAX << start_bit); // Get a mask that the bits we need to set are 1, and the other bits are 0. - unsafe { addr.as_ref::() }.fetch_or(mask, Ordering::SeqCst); - false + let mut visitor = |range| { + match range { + BitByteRange::Bytes { start, end } => { + memory::set(start, 0xff, end - start); + false + } + BitByteRange::BitsInByte { + addr, + bit_start, + bit_end, + } => { + // we are setting selected bits in one byte + // Get a mask that the bits we need to set are 1, and the other bits are 0. + let mask: u8 = !(u8::MAX.checked_shl(bit_end as u32).unwrap_or(0)) + & (u8::MAX << bit_start); + unsafe { addr.as_ref::() }.fetch_or(mask, Ordering::SeqCst); + false + } + } }; - Self::iterate_meta_bits( + ranges::break_bit_range( meta_start_addr, meta_start_bit, meta_end_addr, meta_end_bit, true, - &set_bytes, - &set_bits, + &mut visitor, ); } @@ -498,37 +388,44 @@ impl SideMetadataSpec { debug_assert_eq!(dst_meta_start_bit, src_meta_start_bit); - let copy_bytes = |dst_start: Address, dst_end: Address| -> bool { - unsafe { - let byte_offset = dst_start - dst_meta_start_addr; - let src_start = src_meta_start_addr + byte_offset; - let size = dst_end - dst_start; - std::ptr::copy::(src_start.to_ptr(), dst_start.to_mut_ptr(), size); - false + let mut visitor = |range| { + match range { + BitByteRange::Bytes { + start: dst_start, + end: dst_end, + } => unsafe { + let byte_offset = dst_start - dst_meta_start_addr; + let src_start = src_meta_start_addr + byte_offset; + let size = dst_end - dst_start; + std::ptr::copy::(src_start.to_ptr(), dst_start.to_mut_ptr(), size); + false + }, + BitByteRange::BitsInByte { + addr: dst, + bit_start, + bit_end, + } => { + let byte_offset = dst - dst_meta_start_addr; + let src = src_meta_start_addr + byte_offset; + // we are setting selected bits in one byte + let mask: u8 = !(u8::MAX.checked_shl(bit_end as u32).unwrap_or(0)) + & (u8::MAX << bit_start); // Get a mask that the bits we need to set are 1, and the other bits are 0. + let old_src = unsafe { src.as_ref::() }.load(Ordering::Relaxed); + let old_dst = unsafe { dst.as_ref::() }.load(Ordering::Relaxed); + let new = (old_src & mask) | (old_dst & !mask); + unsafe { dst.as_ref::() }.store(new, Ordering::Relaxed); + false + } } }; - let copy_bits = |dst: Address, start_bit: u8, end_bit: u8| -> bool { - let byte_offset = dst - dst_meta_start_addr; - let src = src_meta_start_addr + byte_offset; - // we are setting selected bits in one byte - let mask: u8 = - !(u8::MAX.checked_shl(end_bit.into()).unwrap_or(0)) & (u8::MAX << start_bit); // Get a mask that the bits we need to set are 1, and the other bits are 0. - let old_src = unsafe { src.as_ref::() }.load(Ordering::Relaxed); - let old_dst = unsafe { dst.as_ref::() }.load(Ordering::Relaxed); - let new = (old_src & mask) | (old_dst & !mask); - unsafe { dst.as_ref::() }.store(new, Ordering::Relaxed); - false - }; - - Self::iterate_meta_bits( + ranges::break_bit_range( dst_meta_start_addr, dst_meta_start_bit, dst_meta_end_addr, dst_meta_end_bit, true, - ©_bytes, - ©_bits, + &mut visitor, ); } @@ -1169,47 +1066,54 @@ impl SideMetadataSpec { // The result will be set by one of the following closures. // Use Cell so it doesn't need to be mutably borrowed by the two closures which Rust will complain. - let res = std::cell::Cell::new(None); - - let check_bytes_backwards = |start: Address, end: Address| -> bool { - match helpers::find_last_non_zero_bit_in_metadata_bytes(start, end) { - helpers::FindMetaBitResult::Found { addr, bit } => { - res.set(Some(contiguous_meta_address_to_address(self, addr, bit))); - // Return true to abort the search. We found the bit. - true + let mut res = None; + + let mut visitor = |range: BitByteRange| { + match range { + BitByteRange::Bytes { start, end } => { + match helpers::find_last_non_zero_bit_in_metadata_bytes(start, end) { + helpers::FindMetaBitResult::Found { addr, bit } => { + res = Some(contiguous_meta_address_to_address(self, addr, bit)); + // Return true to abort the search. We found the bit. + true + } + // If we see unmapped metadata, we don't need to search any more. + helpers::FindMetaBitResult::UnmappedMetadata => true, + // Return false to continue searching. + helpers::FindMetaBitResult::NotFound => false, + } } - // If we see unmapped metadata, we don't need to search any more. - helpers::FindMetaBitResult::UnmappedMetadata => true, - // Return false to continue searching. - helpers::FindMetaBitResult::NotFound => false, - } - }; - let check_bits_backwards = |addr: Address, start_bit: u8, end_bit: u8| -> bool { - match helpers::find_last_non_zero_bit_in_metadata_bits(addr, start_bit, end_bit) { - helpers::FindMetaBitResult::Found { addr, bit } => { - res.set(Some(contiguous_meta_address_to_address(self, addr, bit))); - // Return true to abort the search. We found the bit. - true + BitByteRange::BitsInByte { + addr, + bit_start, + bit_end, + } => { + match helpers::find_last_non_zero_bit_in_metadata_bits(addr, bit_start, bit_end) + { + helpers::FindMetaBitResult::Found { addr, bit } => { + res = Some(contiguous_meta_address_to_address(self, addr, bit)); + // Return true to abort the search. We found the bit. + true + } + // If we see unmapped metadata, we don't need to search any more. + helpers::FindMetaBitResult::UnmappedMetadata => true, + // Return false to continue searching. + helpers::FindMetaBitResult::NotFound => false, + } } - // If we see unmapped metadata, we don't need to search any more. - helpers::FindMetaBitResult::UnmappedMetadata => true, - // Return false to continue searching. - helpers::FindMetaBitResult::NotFound => false, } }; - Self::iterate_meta_bits( + ranges::break_bit_range( start_meta_addr, start_meta_shift, end_meta_addr, end_meta_shift, false, - &check_bytes_backwards, - &check_bits_backwards, + &mut visitor, ); - res.get() - .map(|addr| addr.align_down(1 << self.log_bytes_in_region)) + res.map(|addr| addr.align_down(1 << self.log_bytes_in_region)) } } diff --git a/src/util/metadata/side_metadata/mod.rs b/src/util/metadata/side_metadata/mod.rs index 406f91167d..612c69223f 100644 --- a/src/util/metadata/side_metadata/mod.rs +++ b/src/util/metadata/side_metadata/mod.rs @@ -7,6 +7,7 @@ mod helpers; mod helpers_32; mod global; +pub(crate) mod ranges; mod sanity; mod side_metadata_tests; pub(crate) mod spec_defs; diff --git a/src/util/metadata/side_metadata/ranges.rs b/src/util/metadata/side_metadata/ranges.rs new file mode 100644 index 0000000000..48d12a7068 --- /dev/null +++ b/src/util/metadata/side_metadata/ranges.rs @@ -0,0 +1,369 @@ +//! Data types for visiting metadata ranges at different granularities. +//! +//! Currently, the `break_bit_range` function can break a bit range into sub-ranges of whole bytes +//! and in-byte bits. +//! +//! TODO: +//! +//! - Add a function to break a byte range into sub-ranges of whole words and in-word bytes. +//! - And use it for searching side metadata for non-zero bits. +//! - Add a function to break a byte range at chunk boundaries. +//! - And use it for visiting discontiguous side metadata in bulk. + +use crate::util::Address; + +/// The type for bit offset in a byte. +pub type BitOffset = u8; + +/// A range of bytes or bits within a byte. It is the unit of visiting a contiguous bit range of a +/// side metadata. +/// +/// In general, a bit range of a bitmap starts with multiple bits in the byte, followed by many +/// whole bytes, and ends with multiple bits in the last byte. +/// +/// A range is never empty. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BitByteRange { + /// A range of whole bytes. + Bytes { + /// The starting address (inclusive) of the bytes. + start: Address, + /// The ending address (exclusive) of the bytes. + end: Address, + }, + /// A range of bits within a byte. + BitsInByte { + /// The address of the byte. + addr: Address, + /// The starting bit index (inclusive), starting with zero from the low-order bit. + bit_start: BitOffset, + /// The ending bit index (exclusive), starting with zero from the low-order bit. This may + /// be 8 which means the range includes the highest bit. Be careful when shifting a `u8` + /// value because shifting an `u8` by 8 is considered an overflow in Rust. + bit_end: BitOffset, + }, +} + +/// Break a bit range into sub-ranges of whole bytes and in-byte bits. +/// +/// This method is primarily used for iterating side metadata for a data address range. As we cannot +/// guarantee that the data address range can be mapped to whole metadata bytes, we have to deal +/// with visiting only a bit range in a metadata byte. +/// +/// The bit range starts at the bit at index `start_bit` in the byte at address `start_addr`, and +/// ends at (but does not include) the bit at index `end_bit` in the byte at address `end_addr`. +/// +/// Arguments: +/// * `forwards`: If true, we iterate forwards (from start/low address to end/high address). +/// Otherwise, we iterate backwards (from end/high address to start/low address). +/// * `visitor`: The callback that visits ranges of bits or bytes. It returns whether the +/// itertion is early terminated. +/// +/// Returns true if we iterate through every bits in the range. Return false if we abort iteration +/// early. +pub fn break_bit_range( + start_addr: Address, + start_bit: BitOffset, + end_addr: Address, + end_bit: BitOffset, + forwards: bool, + visitor: &mut V, +) -> bool +where + V: FnMut(BitByteRange) -> bool, +{ + // The start and the end are the same, we don't need to do anything. + if start_addr == end_addr && start_bit == end_bit { + return false; + } + + // If the range is already byte-aligned, visit the entire range as whole bytes. + if start_bit == 0 && end_bit == 0 { + return visitor(BitByteRange::Bytes { + start: start_addr, + end: end_addr, + }); + } + + // If the start and the end are within the same byte, + // visit the bit range within the byte. + if start_addr == end_addr { + return visitor(BitByteRange::BitsInByte { + addr: start_addr, + bit_start: start_bit, + bit_end: end_bit, + }); + } + + // If the end is the 0th bit of the next byte of the start, + // visit the bit range from the start to the end (bit 8) of the same byte. + if start_addr + 1usize == end_addr && end_bit == 0 { + return visitor(BitByteRange::BitsInByte { + addr: start_addr, + bit_start: start_bit, + bit_end: 8_u8, + }); + } + + // Otherwise, the range spans over multiple bytes, and is bit-unaligned at either the start or + // the end. Try to break it into (at most) three sub-ranges. + + let start_aligned = start_bit == 0; + let end_aligned = end_bit == 0; + + // We cannot let multiple closures capture `visitor` mutably at the same time, so we pass the + // visitor in as `v` every time. + + // visit bits within the first byte + let visit_start = |v: &mut V| { + if !start_aligned { + v(BitByteRange::BitsInByte { + addr: start_addr, + bit_start: start_bit, + bit_end: 8_u8, + }) + } else { + // The start is already aligned. No sub-byte range at the start. + false + } + }; + + // visit whole bytes in the middle + let visit_middle = |v: &mut V| { + let start = if start_aligned { + start_addr + } else { + // If the start is not aligned, the whole-byte range starts after the first byte. + start_addr + 1usize + }; + let end = end_addr; + if start < end { + v(BitByteRange::Bytes { start, end }) + } else { + // There are no whole bytes in the middle. + false + } + }; + + // visit bits within the last byte + let visit_end = |v: &mut V| { + if !end_aligned { + v(BitByteRange::BitsInByte { + addr: end_addr, + bit_start: 0_u8, + bit_end: end_bit, + }) + } else { + // The end is aligned. No sub-byte range at the end. + false + } + }; + + // Visit the three segments forwards or backwards. + if forwards { + visit_start(visitor) || visit_middle(visitor) || visit_end(visitor) + } else { + visit_end(visitor) || visit_middle(visitor) || visit_start(visitor) + } +} + +#[cfg(test)] +mod tests { + use crate::util::constants::BITS_IN_BYTE; + + use super::*; + + fn mk_addr(addr: usize) -> Address { + unsafe { Address::from_usize(addr) } + } + + fn break_bit_range_wrapped( + start_addr: Address, + start_bit: usize, + end_addr: Address, + end_bit: usize, + ) -> Vec { + let mut vec = vec![]; + break_bit_range( + start_addr, + start_bit as u8, + end_addr, + end_bit as u8, + true, + &mut |range| { + vec.push(range); + false + }, + ); + vec + } + + #[test] + fn test_empty_range() { + let base = mk_addr(0x1000); + for bit in 0..BITS_IN_BYTE { + let result = break_bit_range_wrapped(base, bit, base, bit); + assert!( + result.is_empty(), + "Not empty. bit: {bit}, result: {result:?}" + ); + } + } + + #[test] + fn test_subbyte_range() { + let base = mk_addr(0x1000); + for bit0 in 0..BITS_IN_BYTE { + for bit1 in (bit0 + 1)..BITS_IN_BYTE { + let result = break_bit_range_wrapped(base, bit0, base, bit1); + assert_eq!( + result, + vec![BitByteRange::BitsInByte { + addr: base, + bit_start: bit0 as u8, + bit_end: bit1 as u8 + }], + "Not equal. bit0: {bit0}, bit1: {bit1}", + ); + } + } + } + + #[test] + fn test_end_byte_range() { + let base = mk_addr(0x1000); + for bit0 in 1..BITS_IN_BYTE { + let result = break_bit_range_wrapped(base, bit0, base + 1usize, 0); + assert_eq!( + result, + vec![BitByteRange::BitsInByte { + addr: base, + bit_start: bit0 as u8, + bit_end: BITS_IN_BYTE as u8 + }], + "Not equal. bit0: {bit0}", + ); + } + } + + #[test] + fn test_adjacent_grain_range() { + let base = mk_addr(0x1000); + for bit0 in 1..BITS_IN_BYTE { + for bit1 in 1..BITS_IN_BYTE { + let result = break_bit_range_wrapped(base, bit0, base + 1usize, bit1); + assert_eq!( + result, + vec![ + BitByteRange::BitsInByte { + addr: base, + bit_start: bit0 as u8, + bit_end: BITS_IN_BYTE as u8, + }, + BitByteRange::BitsInByte { + addr: base + 1usize, + bit_start: 0, + bit_end: bit1 as u8, + }, + ], + "Not equal. bit0: {bit0}, bit1: {bit1}", + ); + } + } + } + + #[test] + fn test_left_and_whole_range() { + let base = mk_addr(0x1000); + for bit0 in 1..BITS_IN_BYTE { + for byte1 in 2usize..8 { + let result = break_bit_range_wrapped(base, bit0, base + byte1, 0); + assert_eq!( + result, + vec![ + BitByteRange::BitsInByte { + addr: base, + bit_start: bit0 as u8, + bit_end: BITS_IN_BYTE as u8, + }, + BitByteRange::Bytes { + start: base + 1usize, + end: base + byte1, + }, + ], + "Not equal. bit0: {bit0}, byte1: {byte1}", + ); + } + } + } + + #[test] + fn test_whole_and_right_range() { + let base = mk_addr(0x1000); + for byte0 in 1..8 { + for bit1 in 1..BITS_IN_BYTE { + let result = break_bit_range_wrapped(base - byte0, 0, base, bit1); + assert_eq!( + result, + vec![ + BitByteRange::Bytes { + start: base - byte0, + end: base, + }, + BitByteRange::BitsInByte { + addr: base, + bit_start: 0, + bit_end: bit1 as u8, + }, + ], + "Not equal. byte0: {byte0}, bit1: {bit1}", + ); + } + } + } + + #[test] + fn test_whole_range() { + let base = mk_addr(0x1000); + let result = break_bit_range_wrapped(base, 0, base + 42usize, 0); + assert_eq!( + result, + vec![BitByteRange::Bytes { + start: base, + end: base + 42usize, + },], + ); + } + + #[test] + fn test_left_whole_right_range() { + let base0 = mk_addr(0x1000); + let base1 = mk_addr(0x2000); + + for bit0 in 1..BITS_IN_BYTE { + for bit1 in 1..BITS_IN_BYTE { + let result = break_bit_range_wrapped(base0 - 1usize, bit0, base1, bit1); + assert_eq!( + result, + vec![ + BitByteRange::BitsInByte { + addr: base0 - 1usize, + bit_start: bit0 as u8, + bit_end: BITS_IN_BYTE as u8, + }, + BitByteRange::Bytes { + start: base0, + end: base1, + }, + BitByteRange::BitsInByte { + addr: base1, + bit_start: 0, + bit_end: bit1 as u8, + }, + ], + "Not equal. bit0: {bit0}, bit1: {bit1}", + ); + } + } + } +} diff --git a/src/util/mod.rs b/src/util/mod.rs index 54ccc3ba8e..bac77a0b88 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -36,6 +36,8 @@ pub mod metadata; pub mod opaque_pointer; /// MMTk command line options. pub mod options; +#[cfg(feature = "test_private")] +pub mod test_private; /// Test utilities. We need this module for `MockVM` in criterion benches, which does not include code with `cfg(test)`. #[cfg(any(test, feature = "mock_test"))] pub mod test_util; diff --git a/src/util/test_private/mod.rs b/src/util/test_private/mod.rs new file mode 100644 index 0000000000..783dbeeac8 --- /dev/null +++ b/src/util/test_private/mod.rs @@ -0,0 +1,35 @@ +//! This module exposes private items in mmtk-core for testing and benchmarking. They must not be +//! used in production. +//! +//! # Notes on inlining +//! +//! In mmtk-core, we refrain from inserting inlining hints manually. But we use `#[inline(always)]` +//! in this module explicitly because the functions here are simple wrappers of private functions, +//! and the compiler usually fails to make the right decision given that those functions are not +//! used often, and we don't compile the benchmarks using feedback-directed optimizations. + +use crate::util::metadata::side_metadata::SideMetadataSpec; + +use super::Address; + +/// Expose `zero_meta_bits` when running `cargo bench`. +#[inline(always)] +pub fn zero_meta_bits( + meta_start_addr: Address, + meta_start_bit: u8, + meta_end_addr: Address, + meta_end_bit: u8, +) { + SideMetadataSpec::zero_meta_bits(meta_start_addr, meta_start_bit, meta_end_addr, meta_end_bit) +} + +/// Expose `set_meta_bits` when running `cargo bench`. +#[inline(always)] +pub fn set_meta_bits( + meta_start_addr: Address, + meta_start_bit: u8, + meta_end_addr: Address, + meta_end_bit: u8, +) { + SideMetadataSpec::set_meta_bits(meta_start_addr, meta_start_bit, meta_end_addr, meta_end_bit) +} From 38b3fc32340bd70ba709535a78a770f9e0619e0e Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Wed, 7 Aug 2024 12:38:46 +0800 Subject: [PATCH 3/5] Parallelize and workaround CI style checks (#1184) We now do style checks in parallel to `cargo test` so that style problems will fail fast. Black-listed clippy 0.1.77 on x86_64-apple-darwin because it is known to crash randomly. Also added missing style checks for non-mock benchmarks. --- .github/scripts/ci-style.sh | 57 +++++++++++++++--------- .github/workflows/merge-check.yml | 5 ++- .github/workflows/minimal-tests-core.yml | 43 ++++++++++++++++++ benches/main.rs | 6 +-- 4 files changed, 86 insertions(+), 25 deletions(-) diff --git a/.github/scripts/ci-style.sh b/.github/scripts/ci-style.sh index c4e9f4123b..0482321b52 100755 --- a/.github/scripts/ci-style.sh +++ b/.github/scripts/ci-style.sh @@ -13,36 +13,51 @@ if [[ $CLIPPY_VERSION == "clippy 0.1.72"* ]]; then export CARGO_INCREMENTAL=0 fi +if [[ $CLIPPY_VERSION == "clippy 0.1.77"* && $CARGO_BUILD_TARGET == "x86_64-apple-darwin" ]]; then + export SKIP_CLIPPY=1 +fi + # --- Check main crate --- -# check base -cargo clippy -# check all features -for_all_features "cargo clippy" -# check release -for_all_features "cargo clippy --release" -# check for tests -for_all_features "cargo clippy --tests" - -# target-specific features -if [[ $arch == "x86_64" && $os == "linux" ]]; then - cargo clippy --features perf_counter - cargo clippy --release --features perf_counter - cargo clippy --tests --features perf_counter -fi +if [[ $SKIP_CLIPPY == 1 ]]; then + echo "Skipping clippy version $CLIPPY_VERSION on $CARGO_BUILD_TARGET" +else + # check base + cargo clippy + # check all features + for_all_features "cargo clippy" + # check release + for_all_features "cargo clippy --release" + # check for tests + for_all_features "cargo clippy --tests" -# mock tests -cargo clippy --features mock_test -cargo clippy --features mock_test --tests -cargo clippy --features mock_test --benches + # target-specific features + if [[ $arch == "x86_64" && $os == "linux" ]]; then + cargo clippy --features perf_counter + cargo clippy --release --features perf_counter + cargo clippy --tests --features perf_counter + fi + + # mock tests + cargo clippy --features mock_test + cargo clippy --features mock_test --tests + cargo clippy --features mock_test --benches + + # non-mock benchmarks + cargo clippy --features test_private --benches +fi # --- Check auxiliary crate --- style_check_auxiliary_crate() { crate_path=$1 - cargo clippy --manifest-path=$crate_path/Cargo.toml - cargo fmt --manifest-path=$crate_path/Cargo.toml -- --check + if [[ $SKIP_CLIPPY == 1 ]]; then + echo "Skipping clippy test for $crate_path" + else + cargo clippy --manifest-path=$crate_path/Cargo.toml + cargo fmt --manifest-path=$crate_path/Cargo.toml -- --check + fi } style_check_auxiliary_crate macros diff --git a/.github/workflows/merge-check.yml b/.github/workflows/merge-check.yml index 41297ccd5e..8dd53785e9 100644 --- a/.github/workflows/merge-check.yml +++ b/.github/workflows/merge-check.yml @@ -11,7 +11,7 @@ env: # Ignore some actions for the merge check: # - This action itself # - Public API check, doc broken link check: we allow them to fail. - # - Minimal tests for stable Rust: we allow them to fail. + # - Minimal tests and style checks for stable Rust: we allow them to fail. # - Extended binding tests: it may take long to run. We don't want to wait for them. # Note: The action name for openjdk tests is different due to the reused workflow. IGNORED_ACTIONS: | @@ -23,6 +23,9 @@ env: "minimal-tests-core/x86_64-unknown-linux-gnu/stable", "minimal-tests-core/i686-unknown-linux-gnu/stable", "minimal-tests-core/x86_64-apple-darwin/stable", + "style-check/x86_64-unknown-linux-gnu/stable", + "style-check/i686-unknown-linux-gnu/stable", + "style-check/x86_64-apple-darwin/stable", "extended-tests-openjdk / test", "extended-tests-v8", "extended-tests-jikesrvm", diff --git a/.github/workflows/minimal-tests-core.yml b/.github/workflows/minimal-tests-core.yml index f1f2f4c34b..3a72c531e7 100644 --- a/.github/workflows/minimal-tests-core.yml +++ b/.github/workflows/minimal-tests-core.yml @@ -79,6 +79,49 @@ jobs: - name: Test run: ./.github/scripts/ci-test.sh + style-check: + needs: setup-test-matrix + strategy: + fail-fast: false + matrix: + target: + - { os: ubuntu-22.04, triple: x86_64-unknown-linux-gnu } + - { os: ubuntu-22.04, triple: i686-unknown-linux-gnu } + - { os: macos-12, triple: x86_64-apple-darwin } + rust: ${{ fromJson(needs.setup-test-matrix.outputs.rust )}} + + name: style-check/${{ matrix.target.triple }}/${{ matrix.rust }} + runs-on: ${{ matrix.target.os }} + + env: + # This determines the default target which cargo-build, cargo-test, etc. use. + CARGO_BUILD_TARGET: "${{ matrix.target.triple }}" + + steps: + - uses: actions/checkout@v4 + - name: Install Rust + run: | + # "rustup toolchain install" should always install the host toolchain, + # so we don't specify the triple. + rustup toolchain install ${{ matrix.rust }} + rustup override set ${{ matrix.rust }} + + # Ensure we install the target support for the target we are testing for. + # This is especially important for i686-unknown-linux-gnu + # because it's different from the host. + rustup target add ${{ matrix.target.triple }} + + rustup component add rustfmt clippy + + # Show the Rust toolchain and target we are actually using + - run: rustup show + - run: cargo --version + - run: cargo rustc -- --print cfg + + # Setup Environments + - name: Setup Environments + run: ./.github/scripts/ci-setup-${{ matrix.target.triple }}.sh + # Style checks - name: Style checks run: ./.github/scripts/ci-style.sh diff --git a/benches/main.rs b/benches/main.rs index 651299694f..f68bc3c071 100644 --- a/benches/main.rs +++ b/benches/main.rs @@ -8,13 +8,13 @@ pub mod mock_bench; #[cfg(all(not(feature = "mock_test"), feature = "test_private"))] pub mod regular_bench; -pub fn bench_main(c: &mut Criterion) { +pub fn bench_main(_c: &mut Criterion) { cfg_if::cfg_if! { if #[cfg(feature = "mock_test")] { // If the "mock_test" feature is enabled, we only run mock test. - mock_bench::bench(c); + mock_bench::bench(_c); } else if #[cfg(feature = "test_private")] { - regular_bench::bench(c); + regular_bench::bench(_c); } else { eprintln!("ERROR: Benchmarks in mmtk_core requires the test_priavte feature (implied by mock_test) to run."); eprintln!(" Rerun with `MMTK_BENCH=\"bench_name\" cargo bench --features mock_test` to run mock-test benchmarks."); From 25051ea59a7c64f332e13f629c2f2d8d83e6e7f7 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 8 Aug 2024 18:29:11 +0800 Subject: [PATCH 4/5] Heap traversal (#1174) This PR adds a heap traversal API `MMTK::enumerate_objects` which enumerates all objects in the MMTk heap at the time of calling. We added `SideMetadataSpec::scan_non_zero_values` to support enumerating objects by scanning the VO bit metadata. It can also be used for scanning other side metadata when needed. Note, however, that it is inefficient (but should work correctly) if the metadata is not contiguous or the metadata has more than one bit per region. If there is any need for scanning such metadata, we need further refactoring. --- Cargo.toml | 1 + benches/regular_bench/bulk_meta/bscan.rs | 87 ++++++++++++++ benches/regular_bench/bulk_meta/mod.rs | 2 + src/mmtk.rs | 52 +++++++++ src/policy/copyspace.rs | 7 +- src/policy/immix/block.rs | 7 ++ src/policy/immix/immixspace.rs | 7 +- src/policy/immortalspace.rs | 5 + src/policy/largeobjectspace.rs | 5 + src/policy/lockfreeimmortalspace.rs | 5 + src/policy/markcompactspace.rs | 5 + src/policy/marksweepspace/malloc_ms/global.rs | 5 + src/policy/marksweepspace/native_ms/block.rs | 7 ++ src/policy/marksweepspace/native_ms/global.rs | 5 + src/policy/space.rs | 23 ++++ src/policy/vmspace.rs | 8 ++ src/util/metadata/side_metadata/global.rs | 103 +++++++++++++++++ src/util/metadata/side_metadata/helpers.rs | 54 +++++++++ src/util/metadata/side_metadata/mod.rs | 2 +- src/util/metadata/vo_bit/mod.rs | 2 +- src/util/mod.rs | 1 + src/util/object_enum.rs | 107 ++++++++++++++++++ src/util/test_private/mod.rs | 1 + src/util/treadmill.rs | 13 +++ .../mock_tests/mock_test_heap_traversal.rs | 87 ++++++++++++++ src/vm/tests/mock_tests/mod.rs | 2 + 26 files changed, 599 insertions(+), 4 deletions(-) create mode 100644 benches/regular_bench/bulk_meta/bscan.rs create mode 100644 src/util/object_enum.rs create mode 100644 src/vm/tests/mock_tests/mock_test_heap_traversal.rs diff --git a/Cargo.toml b/Cargo.toml index 45f528520c..3c2fc8f179 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,6 +55,7 @@ sysinfo = "0.30.9" [dev-dependencies] paste = "1.0.8" rand = "0.8.5" +rand_chacha = "0.3.1" criterion = "0.4" [build-dependencies] diff --git a/benches/regular_bench/bulk_meta/bscan.rs b/benches/regular_bench/bulk_meta/bscan.rs new file mode 100644 index 0000000000..b8d57187ce --- /dev/null +++ b/benches/regular_bench/bulk_meta/bscan.rs @@ -0,0 +1,87 @@ +//! Benchmarks for scanning side metadata for non-zero bits. + +use criterion::Criterion; +use mmtk::util::{ + constants::LOG_BITS_IN_WORD, test_private::scan_non_zero_bits_in_metadata_bytes, Address, +}; +use rand::{seq::IteratorRandom, SeedableRng}; +use rand_chacha::ChaCha8Rng; + +fn allocate_aligned(size: usize) -> Address { + let ptr = unsafe { + std::alloc::alloc_zeroed(std::alloc::Layout::from_size_align(size, size).unwrap()) + }; + Address::from_mut_ptr(ptr) +} + +const BLOCK_BYTES: usize = 32768usize; // Match an Immix block size. + +// Asssume one-bit-per-word metadata (matching VO bits). +const BLOCK_META_BYTES: usize = BLOCK_BYTES >> LOG_BITS_IN_WORD; + +/// Set this many distinct bits in the bitmap. +const NUM_OBJECTS: usize = 200; + +/// Get a deterministic seeded Rng. +fn get_rng() -> ChaCha8Rng { + // Create an Rng from a seed and an explicit Rng type. + // Not secure at all, but completely deterministic and reproducible. + // The following seed is read from /dev/random + const SEED64: u64 = 0x4050cb1b5ab26c70; + ChaCha8Rng::seed_from_u64(SEED64) +} + +/// A bitmap, with known location of each bit for assertion. +struct PreparedBitmap { + start: Address, + end: Address, + set_bits: Vec<(Address, u8)>, +} + +/// Make a bitmap of the desired size and set bits. +fn make_standard_bitmap() -> PreparedBitmap { + let start = allocate_aligned(BLOCK_META_BYTES); + let end = start + BLOCK_META_BYTES; + let mut rng = get_rng(); + + let mut set_bits = (0..(BLOCK_BYTES >> LOG_BITS_IN_WORD)) + .choose_multiple(&mut rng, NUM_OBJECTS) + .iter() + .map(|total_bit_offset| { + let word_offset = total_bit_offset >> LOG_BITS_IN_WORD; + let bit_offset = total_bit_offset & ((1 << LOG_BITS_IN_WORD) - 1); + (start + (word_offset << LOG_BITS_IN_WORD), bit_offset as u8) + }) + .collect::>(); + + set_bits.sort(); + + for (addr, bit) in set_bits.iter() { + let word = unsafe { addr.load::() }; + let new_word = word | (1 << bit); + unsafe { addr.store::(new_word) }; + } + + PreparedBitmap { + start, + end, + set_bits, + } +} + +pub fn bench(c: &mut Criterion) { + c.bench_function("bscan_block", |b| { + let bitmap = make_standard_bitmap(); + let mut holder: Vec<(Address, u8)> = Vec::with_capacity(NUM_OBJECTS); + + b.iter(|| { + holder.clear(); + scan_non_zero_bits_in_metadata_bytes(bitmap.start, bitmap.end, &mut |addr, shift| { + holder.push((addr, shift)); + }); + }); + + assert_eq!(holder.len(), NUM_OBJECTS); + assert_eq!(holder, bitmap.set_bits); + }); +} diff --git a/benches/regular_bench/bulk_meta/mod.rs b/benches/regular_bench/bulk_meta/mod.rs index 488258cd96..2abc48b849 100644 --- a/benches/regular_bench/bulk_meta/mod.rs +++ b/benches/regular_bench/bulk_meta/mod.rs @@ -1,7 +1,9 @@ +pub mod bscan; pub mod bzero_bset; pub use criterion::Criterion; pub fn bench(c: &mut Criterion) { + bscan::bench(c); bzero_bset::bench(c); } diff --git a/src/mmtk.rs b/src/mmtk.rs index ed69f31e70..f5f7622130 100644 --- a/src/mmtk.rs +++ b/src/mmtk.rs @@ -6,6 +6,8 @@ use crate::plan::Plan; use crate::policy::sft_map::{create_sft_map, SFTMap}; use crate::scheduler::GCWorkScheduler; +#[cfg(feature = "vo_bit")] +use crate::util::address::ObjectReference; #[cfg(feature = "analysis")] use crate::util::analysis::AnalysisManager; use crate::util::finalizable_processor::FinalizableProcessor; @@ -467,4 +469,54 @@ impl MMTK { pub fn get_options(&self) -> &Options { &self.options } + + /// Enumerate objects in all spaces in this MMTK instance. + /// + /// The call-back function `f` is called for every object that has the valid object bit (VO + /// bit), i.e. objects that are allocated in the heap of this MMTK instance, but has not been + /// reclaimed, yet. + /// + /// # Notes about object initialization and finalization + /// + /// When this function visits an object, it only guarantees that its VO bit must have been set. + /// It is not guaranteed if the object has been "fully initialized" in the sense of the + /// programming language the VM is implementing. For example, the object header and the type + /// information may not have been written. + /// + /// It will also visit objects that have been "finalized" in the sense of the programming + /// langauge the VM is implementing, as long as the object has not been reclaimed by the GC, + /// yet. Be careful. If the object header is destroyed, it may not be safe to access such + /// objects in the high-level language. + /// + /// # Interaction with allocation and GC + /// + /// This function does not mutate the heap. It is safe if multiple threads execute this + /// function concurrently during mutator time. + /// + /// It has *undefined behavior* if allocation or GC happens while this function is being + /// executed. The VM binding must ensure no threads are allocating and GC does not start while + /// executing this function. One way to do this is stopping all mutators before calling this + /// function. + /// + /// Some high-level languages may provide an API that allows the user to allocate objects and + /// trigger GC while enumerating objects. One example is [`ObjectSpace::each_object`][os_eo] in + /// Ruby. The VM binding may use the callback of this function to save all visited object + /// references and let the user visit those references after this function returns. Make sure + /// those saved references are in the root set or in an object that will live through GCs before + /// the high-level language finishes visiting the saved object references. + /// + /// [os_eo]: https://docs.ruby-lang.org/en/master/ObjectSpace.html#method-c-each_object + #[cfg(feature = "vo_bit")] + pub fn enumerate_objects(&self, f: F) + where + F: FnMut(ObjectReference), + { + use crate::util::object_enum; + + let mut enumerator = object_enum::ClosureObjectEnumerator::<_, VM>::new(f); + let plan = self.get_plan(); + plan.for_each_space(&mut |space| { + space.enumerate_objects(&mut enumerator); + }) + } } diff --git a/src/policy/copyspace.rs b/src/policy/copyspace.rs index bf73255033..84e875191e 100644 --- a/src/policy/copyspace.rs +++ b/src/policy/copyspace.rs @@ -6,10 +6,11 @@ use crate::policy::sft::SFT; use crate::policy::space::{CommonSpace, Space}; use crate::scheduler::GCWorker; use crate::util::alloc::allocator::AllocatorContext; -use crate::util::copy::*; use crate::util::heap::{MonotonePageResource, PageResource}; use crate::util::metadata::{extract_side_metadata, MetadataSpec}; +use crate::util::object_enum::ObjectEnumerator; use crate::util::object_forwarding; +use crate::util::{copy::*, object_enum}; use crate::util::{Address, ObjectReference}; use crate::vm::*; use libc::{mprotect, PROT_EXEC, PROT_NONE, PROT_READ, PROT_WRITE}; @@ -133,6 +134,10 @@ impl Space for CopySpace { fn set_copy_for_sft_trace(&mut self, semantics: Option) { self.common.copy = semantics; } + + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { + object_enum::enumerate_blocks_from_monotonic_page_resource(enumerator, &self.pr); + } } impl crate::policy::gc_work::PolicyTraceObject for CopySpace { diff --git a/src/policy/immix/block.rs b/src/policy/immix/block.rs index 8c772924c8..85ed76ddef 100644 --- a/src/policy/immix/block.rs +++ b/src/policy/immix/block.rs @@ -10,6 +10,7 @@ use crate::util::metadata::side_metadata::{MetadataByteArrayRef, SideMetadataSpe use crate::util::metadata::vo_bit; #[cfg(feature = "object_pinning")] use crate::util::metadata::MetadataSpec; +use crate::util::object_enum::BlockMayHaveObjects; use crate::util::Address; use crate::vm::*; use std::sync::atomic::Ordering; @@ -86,6 +87,12 @@ impl Region for Block { } } +impl BlockMayHaveObjects for Block { + fn may_have_objects(&self) -> bool { + self.get_state() != BlockState::Unallocated + } +} + impl Block { /// Log pages in block pub const LOG_PAGES: usize = Self::LOG_BYTES - LOG_BYTES_IN_PAGE as usize; diff --git a/src/policy/immix/immixspace.rs b/src/policy/immix/immixspace.rs index e5be6bbcb4..0e3d303202 100644 --- a/src/policy/immix/immixspace.rs +++ b/src/policy/immix/immixspace.rs @@ -17,8 +17,9 @@ use crate::util::metadata::side_metadata::SideMetadataSpec; #[cfg(feature = "vo_bit")] use crate::util::metadata::vo_bit; use crate::util::metadata::{self, MetadataSpec}; +use crate::util::object_enum::ObjectEnumerator; use crate::util::object_forwarding; -use crate::util::{copy::*, epilogue}; +use crate::util::{copy::*, epilogue, object_enum}; use crate::util::{Address, ObjectReference}; use crate::vm::*; use crate::{ @@ -189,6 +190,10 @@ impl Space for ImmixSpace { fn set_copy_for_sft_trace(&mut self, _semantics: Option) { panic!("We do not use SFT to trace objects for Immix. set_copy_context() cannot be used.") } + + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { + object_enum::enumerate_blocks_from_chunk_map::(enumerator, &self.chunk_map); + } } impl crate::policy::gc_work::PolicyTraceObject for ImmixSpace { diff --git a/src/policy/immortalspace.rs b/src/policy/immortalspace.rs index 6d3e63922d..2ebb14e4d4 100644 --- a/src/policy/immortalspace.rs +++ b/src/policy/immortalspace.rs @@ -6,6 +6,7 @@ use crate::util::address::Address; use crate::util::heap::{MonotonePageResource, PageResource}; use crate::util::metadata::mark_bit::MarkState; +use crate::util::object_enum::{self, ObjectEnumerator}; use crate::util::{metadata, ObjectReference}; use crate::plan::{ObjectQueue, VectorObjectQueue}; @@ -112,6 +113,10 @@ impl Space for ImmortalSpace { fn release_multiple_pages(&mut self, _start: Address) { panic!("immortalspace only releases pages enmasse") } + + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { + object_enum::enumerate_blocks_from_monotonic_page_resource(enumerator, &self.pr); + } } use crate::scheduler::GCWorker; diff --git a/src/policy/largeobjectspace.rs b/src/policy/largeobjectspace.rs index f2b9ec34d1..9d948201bb 100644 --- a/src/policy/largeobjectspace.rs +++ b/src/policy/largeobjectspace.rs @@ -8,6 +8,7 @@ use crate::policy::space::{CommonSpace, Space}; use crate::util::constants::BYTES_IN_PAGE; use crate::util::heap::{FreeListPageResource, PageResource}; use crate::util::metadata; +use crate::util::object_enum::ObjectEnumerator; use crate::util::opaque_pointer::*; use crate::util::treadmill::TreadMill; use crate::util::{Address, ObjectReference}; @@ -175,6 +176,10 @@ impl Space for LargeObjectSpace { fn release_multiple_pages(&mut self, start: Address) { self.pr.release_pages(start); } + + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { + self.treadmill.enumerate_objects(enumerator); + } } use crate::scheduler::GCWorker; diff --git a/src/policy/lockfreeimmortalspace.rs b/src/policy/lockfreeimmortalspace.rs index 80b65e288d..dee3205181 100644 --- a/src/policy/lockfreeimmortalspace.rs +++ b/src/policy/lockfreeimmortalspace.rs @@ -16,6 +16,7 @@ use crate::util::heap::VMRequest; use crate::util::memory::MmapStrategy; use crate::util::metadata::side_metadata::SideMetadataContext; use crate::util::metadata::side_metadata::SideMetadataSanity; +use crate::util::object_enum::ObjectEnumerator; use crate::util::opaque_pointer::*; use crate::util::ObjectReference; use crate::vm::VMBinding; @@ -166,6 +167,10 @@ impl Space for LockFreeImmortalSpace { side_metadata_sanity_checker .verify_metadata_context(std::any::type_name::(), &self.metadata) } + + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { + enumerator.visit_address_range(self.start, self.start + self.total_bytes); + } } use crate::plan::{ObjectQueue, VectorObjectQueue}; diff --git a/src/policy/markcompactspace.rs b/src/policy/markcompactspace.rs index 9db05e56aa..bc0f5659c2 100644 --- a/src/policy/markcompactspace.rs +++ b/src/policy/markcompactspace.rs @@ -11,6 +11,7 @@ use crate::util::constants::LOG_BYTES_IN_WORD; use crate::util::copy::CopySemantics; use crate::util::heap::{MonotonePageResource, PageResource}; use crate::util::metadata::{extract_side_metadata, vo_bit}; +use crate::util::object_enum::{self, ObjectEnumerator}; use crate::util::{Address, ObjectReference}; use crate::{vm::*, ObjectQueue}; use atomic::Ordering; @@ -131,6 +132,10 @@ impl Space for MarkCompactSpace { fn release_multiple_pages(&mut self, _start: Address) { panic!("markcompactspace only releases pages enmasse") } + + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { + object_enum::enumerate_blocks_from_monotonic_page_resource(enumerator, &self.pr); + } } impl crate::policy::gc_work::PolicyTraceObject for MarkCompactSpace { diff --git a/src/policy/marksweepspace/malloc_ms/global.rs b/src/policy/marksweepspace/malloc_ms/global.rs index d95aa8437b..dbe386db5c 100644 --- a/src/policy/marksweepspace/malloc_ms/global.rs +++ b/src/policy/marksweepspace/malloc_ms/global.rs @@ -15,6 +15,7 @@ use crate::util::metadata::side_metadata::{ SideMetadataContext, SideMetadataSanity, SideMetadataSpec, }; use crate::util::metadata::MetadataSpec; +use crate::util::object_enum::ObjectEnumerator; use crate::util::opaque_pointer::*; use crate::util::Address; use crate::util::ObjectReference; @@ -229,6 +230,10 @@ impl Space for MallocSpace { side_metadata_sanity_checker .verify_metadata_context(std::any::type_name::(), &self.metadata) } + + fn enumerate_objects(&self, _enumerator: &mut dyn ObjectEnumerator) { + unimplemented!() + } } use crate::scheduler::GCWorker; diff --git a/src/policy/marksweepspace/native_ms/block.rs b/src/policy/marksweepspace/native_ms/block.rs index 6727430e9e..a150d974b5 100644 --- a/src/policy/marksweepspace/native_ms/block.rs +++ b/src/policy/marksweepspace/native_ms/block.rs @@ -7,6 +7,7 @@ use super::MarkSweepSpace; use crate::util::constants::LOG_BYTES_IN_PAGE; use crate::util::heap::chunk_map::*; use crate::util::linear_scan::Region; +use crate::util::object_enum::BlockMayHaveObjects; use crate::vm::ObjectModel; use crate::{ util::{ @@ -48,6 +49,12 @@ impl Region for Block { } } +impl BlockMayHaveObjects for Block { + fn may_have_objects(&self) -> bool { + self.get_state() != BlockState::Unallocated + } +} + impl Block { /// Log pages in block pub const LOG_PAGES: usize = Self::LOG_BYTES - LOG_BYTES_IN_PAGE as usize; diff --git a/src/policy/marksweepspace/native_ms/global.rs b/src/policy/marksweepspace/native_ms/global.rs index b4699db30c..4a86dda6fa 100644 --- a/src/policy/marksweepspace/native_ms/global.rs +++ b/src/policy/marksweepspace/native_ms/global.rs @@ -11,6 +11,7 @@ use crate::{ epilogue, heap::{BlockPageResource, PageResource}, metadata::{self, side_metadata::SideMetadataSpec, MetadataSpec}, + object_enum::{self, ObjectEnumerator}, ObjectReference, }, vm::{ActivePlan, VMBinding}, @@ -247,6 +248,10 @@ impl Space for MarkSweepSpace { fn release_multiple_pages(&mut self, _start: crate::util::Address) { todo!() } + + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { + object_enum::enumerate_blocks_from_chunk_map::(enumerator, &self.chunk_map); + } } impl crate::policy::gc_work::PolicyTraceObject for MarkSweepSpace { diff --git a/src/policy/space.rs b/src/policy/space.rs index 556e0c6bfe..0329c995b7 100644 --- a/src/policy/space.rs +++ b/src/policy/space.rs @@ -5,6 +5,7 @@ use crate::util::conversions::*; use crate::util::metadata::side_metadata::{ SideMetadataContext, SideMetadataSanity, SideMetadataSpec, }; +use crate::util::object_enum::ObjectEnumerator; use crate::util::Address; use crate::util::ObjectReference; @@ -348,6 +349,28 @@ pub trait Space: 'static + SFT + Sync + Downcast { side_metadata_sanity_checker .verify_metadata_context(std::any::type_name::(), &self.common().metadata) } + + /// Enumerate objects in the current space. + /// + /// Implementers can use the `enumerator` to report + /// + /// - individual objects within the space using `enumerator.visit_object`, and + /// - ranges of address that may contain objects using `enumerator.visit_address_range`. The + /// caller will then enumerate objects in the range using the VO bits metadata. + /// + /// Each object in the space shall be covered by one of the two methods above. + /// + /// # Implementation considerations + /// + /// **Skipping empty ranges**: When enumerating address ranges, spaces can skip ranges (blocks, + /// chunks, etc.) that are guarenteed not to contain objects. + /// + /// **Dynamic dispatch**: Because `Space` is a trait object type and `enumerator` is a `dyn` + /// reference, invoking methods of `enumerator` involves a dynamic dispatching. But the + /// overhead is OK if we call it a block at a time because scanning the VO bits will dominate + /// the execution time. For LOS, it will be cheaper to enumerate individual objects than + /// scanning VO bits because it is sparse. + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator); } /// Print the VM map for a space. diff --git a/src/policy/vmspace.rs b/src/policy/vmspace.rs index 5d38a77042..38fc6011da 100644 --- a/src/policy/vmspace.rs +++ b/src/policy/vmspace.rs @@ -11,6 +11,7 @@ use crate::util::heap::PageResource; use crate::util::metadata::mark_bit::MarkState; #[cfg(feature = "set_unlog_bits_vm_space")] use crate::util::metadata::MetadataSpec; +use crate::util::object_enum::ObjectEnumerator; use crate::util::opaque_pointer::*; use crate::util::ObjectReference; use crate::vm::{ObjectModel, VMBinding}; @@ -145,6 +146,13 @@ impl Space for VMSpace { // mmapped by the runtime rather than us). So we we use SFT here. SFT_MAP.get_checked(start).name() == self.name() } + + fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { + let external_pages = self.pr.get_external_pages(); + for ep in external_pages.iter() { + enumerator.visit_address_range(ep.start, ep.end); + } + } } use crate::scheduler::GCWorker; diff --git a/src/util/metadata/side_metadata/global.rs b/src/util/metadata/side_metadata/global.rs index 23764525a7..ad39a3d565 100644 --- a/src/util/metadata/side_metadata/global.rs +++ b/src/util/metadata/side_metadata/global.rs @@ -1115,6 +1115,109 @@ impl SideMetadataSpec { res.map(|addr| addr.align_down(1 << self.log_bytes_in_region)) } + + /// Search for data addresses that have non zero values in the side metadata. This method is + /// primarily used for heap traversal by scanning the VO bits. + /// + /// This function searches the side metadata for the data address range from `data_start_addr` + /// (inclusive) to `data_end_addr` (exclusive). The data address range must be fully mapped. + /// + /// For each data region that has non-zero side metadata, `visit_data` is called with the lowest + /// address of that region. Note that it may not be the original address used to set the + /// metadata bits. + pub fn scan_non_zero_values( + &self, + data_start_addr: Address, + data_end_addr: Address, + visit_data: &mut impl FnMut(Address), + ) { + if self.uses_contiguous_side_metadata() && self.log_num_of_bits == 0 { + // Contiguous one-bit-per-region side metadata + // TODO: VO bits is one-bit-per-word. But if we want to scan other metadata (such as + // the forwarding bits which has two bits per word), we will need to refactor the + // algorithm of `scan_non_zero_values_fast`. + self.scan_non_zero_values_fast(data_start_addr, data_end_addr, visit_data); + } else { + // TODO: VO bits are always contiguous. But if we want to scan other metadata, such as + // side mark bits, we need to refactor `bulk_update_metadata` to support `FnMut`, too, + // and use it to apply `scan_non_zero_values_fast` on each contiguous side metadata + // range. + warn!( + "We are trying to search for non zero bits in a discontiguous side metadata \ + or the metadata has more than one bit per region. \ + The performance is slow, as MMTk does not optimize for this case." + ); + self.scan_non_zero_values_simple::(data_start_addr, data_end_addr, visit_data); + } + } + + fn scan_non_zero_values_simple( + &self, + data_start_addr: Address, + data_end_addr: Address, + visit_data: &mut impl FnMut(Address), + ) { + let region_bytes = 1usize << self.log_bytes_in_region; + + let mut cursor = data_start_addr; + while cursor < data_end_addr { + debug_assert!(cursor.is_mapped()); + + // If we find non-zero value, just call back. + if !unsafe { self.load::(cursor).is_zero() } { + visit_data(cursor); + } + cursor += region_bytes; + } + } + + fn scan_non_zero_values_fast( + &self, + data_start_addr: Address, + data_end_addr: Address, + visit_data: &mut impl FnMut(Address), + ) { + debug_assert!(self.uses_contiguous_side_metadata()); + debug_assert_eq!(self.log_num_of_bits, 0); + + // Then figure out the start and end metadata address and bits. + let start_meta_addr = address_to_contiguous_meta_address(self, data_start_addr); + let start_meta_shift = meta_byte_lshift(self, data_start_addr); + let end_meta_addr = address_to_contiguous_meta_address(self, data_end_addr); + let end_meta_shift = meta_byte_lshift(self, data_end_addr); + + let mut visitor = |range| { + match range { + BitByteRange::Bytes { start, end } => { + helpers::scan_non_zero_bits_in_metadata_bytes(start, end, &mut |addr, bit| { + visit_data(helpers::contiguous_meta_address_to_address(self, addr, bit)); + }); + } + BitByteRange::BitsInByte { + addr, + bit_start, + bit_end, + } => helpers::scan_non_zero_bits_in_metadata_bits( + addr, + bit_start, + bit_end, + &mut |addr, bit| { + visit_data(helpers::contiguous_meta_address_to_address(self, addr, bit)); + }, + ), + } + false + }; + + ranges::break_bit_range( + start_meta_addr, + start_meta_shift, + end_meta_addr, + end_meta_shift, + false, + &mut visitor, + ); + } } impl fmt::Debug for SideMetadataSpec { diff --git a/src/util/metadata/side_metadata/helpers.rs b/src/util/metadata/side_metadata/helpers.rs index 845ad79666..9eb8823649 100644 --- a/src/util/metadata/side_metadata/helpers.rs +++ b/src/util/metadata/side_metadata/helpers.rs @@ -1,3 +1,4 @@ +use super::ranges::BitOffset; use super::SideMetadataSpec; use crate::util::constants::LOG_BYTES_IN_PAGE; use crate::util::constants::{BITS_IN_WORD, BYTES_IN_PAGE, LOG_BITS_IN_BYTE}; @@ -269,6 +270,59 @@ fn find_last_non_zero_bit_in_u8(byte_value: u8) -> Option { } } +pub fn scan_non_zero_bits_in_metadata_bytes( + meta_start: Address, + meta_end: Address, + visit_bit: &mut impl FnMut(Address, BitOffset), +) { + use crate::util::constants::BYTES_IN_ADDRESS; + + let mut cursor = meta_start; + while cursor < meta_end && !cursor.is_aligned_to(BYTES_IN_ADDRESS) { + let byte = unsafe { cursor.load::() }; + scan_non_zero_bits_in_metadata_word(cursor, byte as usize, visit_bit); + cursor += 1usize; + } + + while cursor + BYTES_IN_ADDRESS < meta_end { + let word = unsafe { cursor.load::() }; + scan_non_zero_bits_in_metadata_word(cursor, word, visit_bit); + cursor += BYTES_IN_ADDRESS; + } + + while cursor < meta_end { + let byte = unsafe { cursor.load::() }; + scan_non_zero_bits_in_metadata_word(cursor, byte as usize, visit_bit); + cursor += 1usize; + } +} + +fn scan_non_zero_bits_in_metadata_word( + meta_addr: Address, + mut word: usize, + visit_bit: &mut impl FnMut(Address, BitOffset), +) { + while word != 0 { + let bit = word.trailing_zeros(); + visit_bit(meta_addr, bit as u8); + word = word & (word - 1); + } +} + +pub fn scan_non_zero_bits_in_metadata_bits( + meta_addr: Address, + bit_start: BitOffset, + bit_end: BitOffset, + visit_bit: &mut impl FnMut(Address, BitOffset), +) { + let byte = unsafe { meta_addr.load::() }; + for bit in bit_start..bit_end { + if byte & (1 << bit) != 0 { + visit_bit(meta_addr, bit); + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/util/metadata/side_metadata/mod.rs b/src/util/metadata/side_metadata/mod.rs index 612c69223f..16111b42c1 100644 --- a/src/util/metadata/side_metadata/mod.rs +++ b/src/util/metadata/side_metadata/mod.rs @@ -2,7 +2,7 @@ // For convenience, this module is public and the bindings may create and use side metadata for their purpose. mod constants; -mod helpers; +pub(crate) mod helpers; #[cfg(target_pointer_width = "32")] mod helpers_32; diff --git a/src/util/metadata/vo_bit/mod.rs b/src/util/metadata/vo_bit/mod.rs index e7b0f2551b..3ae0aee8d7 100644 --- a/src/util/metadata/vo_bit/mod.rs +++ b/src/util/metadata/vo_bit/mod.rs @@ -201,7 +201,7 @@ fn get_in_object_address_for_potential_object(potential_obj: Addr } /// Get the object reference from an aligned address where VO bit is set. -fn get_object_ref_for_vo_addr(vo_addr: Address) -> ObjectReference { +pub(crate) fn get_object_ref_for_vo_addr(vo_addr: Address) -> ObjectReference { let addr = vo_addr.offset(-VM::VMObjectModel::IN_OBJECT_ADDRESS_OFFSET); let aligned = addr.align_up(ObjectReference::ALIGNMENT); unsafe { ObjectReference::from_raw_address_unchecked(aligned) } diff --git a/src/util/mod.rs b/src/util/mod.rs index bac77a0b88..d22c29a2e3 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -53,6 +53,7 @@ pub(crate) mod erase_vm; pub(crate) mod finalizable_processor; /// Logger initialization pub(crate) mod logger; +pub(crate) mod object_enum; /// Forwarding word in object copying. pub(crate) mod object_forwarding; /// Reference processing implementation. diff --git a/src/util/object_enum.rs b/src/util/object_enum.rs new file mode 100644 index 0000000000..61378f7a57 --- /dev/null +++ b/src/util/object_enum.rs @@ -0,0 +1,107 @@ +//! Helper types for object enumeration + +use std::marker::PhantomData; + +use crate::vm::VMBinding; + +use super::{ + heap::{ + chunk_map::{ChunkMap, ChunkState}, + MonotonePageResource, + }, + linear_scan::Region, + metadata::{side_metadata::spec_defs::VO_BIT, vo_bit}, + Address, ObjectReference, +}; + +/// A trait for enumerating objects in spaces, used by [`Space::enumerate_objects`]. +/// +/// [`Space::enumerate_objects`]: crate::policy::space::Space::enumerate_objects +pub trait ObjectEnumerator { + /// Visit a single object. + fn visit_object(&mut self, object: ObjectReference); + /// Visit an address range that may contain objects. + fn visit_address_range(&mut self, start: Address, end: Address); +} + +/// An implementation of `ObjectEnumerator` that wraps a callback. +pub struct ClosureObjectEnumerator +where + F: FnMut(ObjectReference), + VM: VMBinding, +{ + object_callback: F, + phantom_data: PhantomData, +} + +impl ClosureObjectEnumerator +where + F: FnMut(ObjectReference), + VM: VMBinding, +{ + pub fn new(object_callback: F) -> Self { + Self { + object_callback, + phantom_data: PhantomData, + } + } +} + +impl ObjectEnumerator for ClosureObjectEnumerator +where + F: FnMut(ObjectReference), + VM: VMBinding, +{ + fn visit_object(&mut self, object: ObjectReference) { + (self.object_callback)(object); + } + + fn visit_address_range(&mut self, start: Address, end: Address) { + VO_BIT.scan_non_zero_values::(start, end, &mut |address| { + let object = vo_bit::get_object_ref_for_vo_addr::(address); + (self.object_callback)(object); + }) + } +} + +/// Allow querying if a block may have objects. `MarkSweepSpace` and `ImmixSpace` use different +/// `Block` types, and they have different block states. This trait lets both `Block` types provide +/// the same `may_have_objects` method. +pub(crate) trait BlockMayHaveObjects: Region { + /// Return `true` if the block may contain valid objects (objects with the VO bit set). Return + /// `false` otherwise. + /// + /// This function is used during object enumeration to filter out memory regions that do not + /// contain objects. Because object enumeration may happen at mutator time, and another mutators + /// may be allocating objects, this function only needs to reflect the state of the block at the + /// time of calling, and is allowed to conservatively return `true`. + fn may_have_objects(&self) -> bool; +} + +pub(crate) fn enumerate_blocks_from_chunk_map( + enumerator: &mut dyn ObjectEnumerator, + chunk_map: &ChunkMap, +) where + B: BlockMayHaveObjects, +{ + for chunk in chunk_map.all_chunks() { + if chunk_map.get(chunk) == ChunkState::Allocated { + for block in chunk.iter_region::() { + if block.may_have_objects() { + enumerator.visit_address_range(block.start(), block.end()); + } + } + } + } +} + +pub(crate) fn enumerate_blocks_from_monotonic_page_resource( + enumerator: &mut dyn ObjectEnumerator, + pr: &MonotonePageResource, +) where + VM: VMBinding, +{ + for (start, size) in pr.iterate_allocated_regions() { + enumerator.visit_address_range(start, start + size); + } +} diff --git a/src/util/test_private/mod.rs b/src/util/test_private/mod.rs index 783dbeeac8..c3f4dfceae 100644 --- a/src/util/test_private/mod.rs +++ b/src/util/test_private/mod.rs @@ -8,6 +8,7 @@ //! and the compiler usually fails to make the right decision given that those functions are not //! used often, and we don't compile the benchmarks using feedback-directed optimizations. +pub use crate::util::metadata::side_metadata::helpers::scan_non_zero_bits_in_metadata_bytes; use crate::util::metadata::side_metadata::SideMetadataSpec; use super::Address; diff --git a/src/util/treadmill.rs b/src/util/treadmill.rs index e8cfe92d4f..96b5eb9e7d 100644 --- a/src/util/treadmill.rs +++ b/src/util/treadmill.rs @@ -4,6 +4,8 @@ use std::sync::Mutex; use crate::util::ObjectReference; +use super::object_enum::ObjectEnumerator; + pub struct TreadMill { from_space: Mutex>, to_space: Mutex>, @@ -99,6 +101,17 @@ impl TreadMill { trace!("Flipped from_space and to_space"); } } + + pub(crate) fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) { + let mut visit_objects = |set: &Mutex>| { + let set = set.lock().unwrap(); + for object in set.iter() { + enumerator.visit_object(*object); + } + }; + visit_objects(&self.alloc_nursery); + visit_objects(&self.to_space); + } } impl Default for TreadMill { diff --git a/src/vm/tests/mock_tests/mock_test_heap_traversal.rs b/src/vm/tests/mock_tests/mock_test_heap_traversal.rs new file mode 100644 index 0000000000..430c46ff2a --- /dev/null +++ b/src/vm/tests/mock_tests/mock_test_heap_traversal.rs @@ -0,0 +1,87 @@ +// GITHUB-CI: MMTK_PLAN=NoGC,MarkSweep,MarkCompact,SemiSpace,Immix +// GITHUB-CI: FEATURES=vo_bit + +// Note on the plans chosen for CI: +// - Those plans cover the MarkSweepSpace, MarkCompactSpace, CopySpace and ImmixSpace. +// Each plan other than NoGC also include ImmortalSpace and LOS. +// - PageProtect consumes too much memory and the test will fail with the default heap size +// chosen by the MutatorFixture. + +use std::collections::HashSet; + +use constants::BYTES_IN_WORD; + +use super::mock_test_prelude::*; + +use crate::{util::*, AllocationSemantics, MMTK}; + +lazy_static! { + static ref FIXTURE: Fixture = Fixture::new(); +} + +pub fn get_all_objects(mmtk: &'static MMTK) -> HashSet { + let mut result = HashSet::new(); + mmtk.enumerate_objects(|object| { + result.insert(object); + }); + result +} + +#[test] +pub fn test_heap_traversal() { + with_mockvm( + default_setup, + || { + FIXTURE.with_fixture_mut(|fixture| { + let mmtk = fixture.mmtk(); + let traversal0 = get_all_objects(mmtk); + assert!(traversal0.is_empty()); + + let mutator = &mut fixture.mutator; + + let align = BYTES_IN_WORD; + + let mut new_obj = |size: usize, semantics: AllocationSemantics| { + let start = memory_manager::alloc(mutator, size, align, 0, semantics); + let object = MockVM::object_start_to_ref(start); + memory_manager::post_alloc(mutator, object, size, semantics); + object + }; + + let mut known_objects = HashSet::new(); + + let mut new_and_assert = |size: usize, semantics: AllocationSemantics| { + let object = new_obj(size, semantics); // a random size + known_objects.insert(object); + let traversal = get_all_objects(mmtk); + assert_eq!(traversal, known_objects); + }; + + { + use AllocationSemantics::*; + + // Add some single objects. Size doesn't matter. + new_and_assert(40, Default); + new_and_assert(64, Default); + new_and_assert(96, Immortal); + new_and_assert(131000, Los); + + // Allocate enough memory to fill up a 64KB Immix block + for _ in 0..1000 { + new_and_assert(112, Default); + } + // Allocate a few objects in the immortal space. + for _ in 0..10 { + new_and_assert(64, Immortal); + } + // And a few objects in the LOS. + for _ in 0..3 { + // The MutatorFixture only has 1MB of memory. Don't allocate too much. + new_and_assert(65504, Immortal); + } + } + }); + }, + no_cleanup, + ) +} diff --git a/src/vm/tests/mock_tests/mod.rs b/src/vm/tests/mock_tests/mod.rs index 751bc58c08..114d8f1859 100644 --- a/src/vm/tests/mock_tests/mod.rs +++ b/src/vm/tests/mock_tests/mod.rs @@ -35,6 +35,8 @@ mod mock_test_conservatism; #[cfg(target_os = "linux")] mod mock_test_handle_mmap_conflict; mod mock_test_handle_mmap_oom; +#[cfg(feature = "vo_bit")] +mod mock_test_heap_traversal; mod mock_test_init_fork; #[cfg(feature = "is_mmtk_object")] mod mock_test_internal_ptr_before_object_ref; From 160b7702fccda133c9407234821ad35103623179 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Fri, 9 Aug 2024 17:37:25 +1200 Subject: [PATCH 5/5] Bump version to v0.27 (#1185) --- CHANGELOG.md | 27 +++++++++++++++++++++++++++ Cargo.toml | 4 ++-- macros/Cargo.toml | 2 +- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 75f479c763..308e14da96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,30 @@ +0.27.0 (2024-08-09) +=== + +## What's Changed + +### Policy +* Clear pin bits by @udesou in https://github.com/mmtk/mmtk-core/pull/1166 +* Set the unlog bits for objects in the bootimage by @k-sareen in https://github.com/mmtk/mmtk-core/pull/1168 +* Fix bug in line mark when `MARK_LINE_AT_SCAN_TIME=false` by @k-sareen in https://github.com/mmtk/mmtk-core/pull/1171 + +### API +* Require object reference to be aligned by @qinsoon in https://github.com/mmtk/mmtk-core/pull/1159 +* Internal pointer support by @qinsoon in https://github.com/mmtk/mmtk-core/pull/1165 +* Heap traversal by @wks in https://github.com/mmtk/mmtk-core/pull/1174 + +### CI +* Parallelize and workaround CI style checks by @wks in https://github.com/mmtk/mmtk-core/pull/1184 + +### Misc +* Extensible eBPF timeline attributes by @wks in https://github.com/mmtk/mmtk-core/pull/1162 +* Only map with executable permissions when using code space by @k-sareen in https://github.com/mmtk/mmtk-core/pull/1176 +* Fix style check for Rust 1.80 by @qinsoon in https://github.com/mmtk/mmtk-core/pull/1178 +* Extra assertions for mutators and epilogues by @wks in https://github.com/mmtk/mmtk-core/pull/1182 +* Refactor iterate_meta_bits by @wks in https://github.com/mmtk/mmtk-core/pull/1181 + +**Full Changelog**: https://github.com/mmtk/mmtk-core/compare/v0.26.0...v0.27.0 + 0.26.0 (2024-07-01) === diff --git a/Cargo.toml b/Cargo.toml index 3c2fc8f179..b92291190f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mmtk" -version = "0.26.0" +version = "0.27.0" authors = ["The MMTk Developers <>"] edition = "2021" license = "MIT OR Apache-2.0" @@ -38,7 +38,7 @@ log = { version = "0.4", features = ["max_level_trace", "release_max_level_off"] memoffset = "0.9" mimalloc-sys = { version = "0.1.6", optional = true } # MMTk macros - we have to specify a version here in order to publish the crate, even though we use the dependency from a local path. -mmtk-macros = { version="0.26.0", path = "macros/" } +mmtk-macros = { version="0.27.0", path = "macros/" } num_cpus = "1.8" num-traits = "0.2" pfm = { version = "0.1.1", optional = true } diff --git a/macros/Cargo.toml b/macros/Cargo.toml index 258b375b83..44b8848119 100644 --- a/macros/Cargo.toml +++ b/macros/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "mmtk-macros" # the macro crate uses the same version as mmtk-core -version = "0.26.0" +version = "0.27.0" edition = "2021" license = "MIT OR Apache-2.0" description = "MMTk macros provides procedural macros used by mmtk-core."