Skip to content

Commit

Permalink
Add aarch64 cache coherency tests. These are confirmed to fail right …
Browse files Browse the repository at this point in the history
…now.
  • Loading branch information
CensoredUsername committed Sep 30, 2024
1 parent 6796c0b commit 73bb33b
Show file tree
Hide file tree
Showing 3 changed files with 198 additions and 3 deletions.
196 changes: 196 additions & 0 deletions testing/tests/aarch64_cache_coherency.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
// This file contains test cases designed to validate proper assembler cache invalidation
// this is needed because aarch64's modified harvard architecture has an incoherent instruction and
// data cache. Therefore, it is needed to explicitly command the cache hierarchy to flush the dcache
// to the coherent layers, invalidate the icache, and ensure no stale data is left in the
// instruction pipeline. Testcases in this file are designed to break if this isn't handled properly
#![allow(unused_imports)]

extern crate dynasmrt;

use dynasmrt::dynasm;
use dynasmrt::{DynasmApi, DynasmLabelApi};

#[cfg(target_arch="aarch64")]
fn test_cache_coherency_same_core() {
let mut ops = dynasmrt::aarch64::Assembler::new().unwrap();
let reader = ops.reader();

// write some code
let start = ops.offset();
dynasm!(ops
; .arch aarch64
; mov w0, 0xABCD
; ret
);
let end = ops.offset();

ops.commit().unwrap();

// execute it once
{
let buf = reader.lock();
let callable: extern "C" fn() -> u32 = unsafe { std::mem::transmute(buf.ptr(start)) };
assert_eq!(callable(), 0xABCD);
drop(buf);
}

// change the code
ops.alter(|modifier| {
modifier.goto(start);

dynasm!(modifier
; .arch aarch64
; mov w0, 0xCDEF
; ret
);
modifier.check_exact(end).unwrap();

}).unwrap();

// execute it again!
{
let buf = reader.lock();
let callable: extern "C" fn() -> u32 = unsafe { std::mem::transmute(buf.ptr(start)) };
assert_eq!(callable(), 0xCDEF);
drop(buf);
}
}

#[cfg(target_arch="aarch64")]
#[test]
fn test_cache_coherency_same_core_loop() {
for _ in 0 .. 10000 {
test_cache_coherency_same_core()
}
}

#[cfg(target_arch="aarch64")]
#[test]
fn test_cache_coherency_other_cores() {
// spawn a bunch of threads, and have them all racing to execute some assembly
// then modify the assembly, and see if we execute stale data
let thread_count = 4;

use std::sync::atomic::{AtomicU32, AtomicBool, Ordering};

// the code we'll generate tries to read one of these atomics with acquire ordering,
// and always expects to read 0x12345678. At first it tries to read the first one, and
// then we update it to read the second one, at which point we also change the second one
// to hold the expected value, and invalidate the first one. If stale code is read
// it will read the first value instead, which at that point should be updated to be invalid
let first_value = AtomicU32::new(0x12345678);
let second_value = AtomicU32::new(0xDEADC0DE);
let rejoin_threads = AtomicBool::new(false);

let mut ops = dynasmrt::aarch64::Assembler::new().unwrap();

// write some code;
dynasm!(ops
; .arch aarch64
; .align 8
; -> first_addr:
; .qword first_value.as_ptr() as *mut u8 as _
; -> second_addr:
; .qword second_value.as_ptr() as *mut u8 as _
);
let start = ops.offset();
dynasm!(ops
; .arch aarch64
; adr x1, ->first_addr
; adr x2, ->second_addr
);
let edit = ops.offset();
dynasm!(ops
; .arch aarch64
; ldr x0, [x1]
; ldar w0, [x0]
; ret
);
let end = ops.offset();

ops.commit().unwrap();

std::thread::scope(|scope| {

// start our racing threads
let mut handles = Vec::new();
for _ in 0 .. thread_count {

// these get moved to each threads
let reader = ops.reader();
let rejoin_threads_borrow = &rejoin_threads;

handles.push(scope.spawn(move || {

let mut bad_results = 0usize;
while !rejoin_threads_borrow.load(Ordering::Acquire) {

let buf = reader.lock();
let callable: extern "C" fn() -> u32 = unsafe { std::mem::transmute(buf.ptr(start)) };

let value = callable();
if value != 0x12345678 {
bad_results += 1;
}
}

bad_results
}));
}

// wait a bit
std::thread::sleep(std::time::Duration::from_millis(1));

// change the code back and forth to see if errors happen
for _ in 0 .. 100 {
ops.alter(|modifier| {
modifier.goto(edit);

dynasm!(modifier
; .arch aarch64
; ldr x0, [x2]
; ldar w0, [x0]
; ret
);
modifier.check_exact(end).unwrap();

// also change the values
first_value.store(0xDEADBEEF, Ordering::Release);
second_value.store(0x12345678, Ordering::Release);

}).unwrap();

// wait a bit more
std::thread::sleep(std::time::Duration::from_millis(1));

// change it back
ops.alter(|modifier| {
modifier.goto(edit);

dynasm!(modifier
; .arch aarch64
; ldr x0, [x1]
; ldar w0, [x0]
; ret
);
modifier.check_exact(end).unwrap();

// also change the values
first_value.store(0x12345678, Ordering::Release);
second_value.store(0xDEADBEEF, Ordering::Release);

}).unwrap();

// wait a bit more
std::thread::sleep(std::time::Duration::from_millis(1));
}

// join our threads
rejoin_threads.store(true, Ordering::Release);

let errors: usize = handles.into_iter().map(|handle| handle.join().unwrap()).sum();

assert_eq!(errors, 0, "racing threads read the wrong value");

});
}
3 changes: 1 addition & 2 deletions testing/tests/aarch64_complex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use dynasmrt::components::LitPool;
macro_rules! my_dynasm {
($ops:ident $($t:tt)*) => {
dynasm!($ops
; .arch x64
; .arch aarch64
; .alias test, x1
$($t)*
)
Expand All @@ -35,7 +35,6 @@ fn complex() {

// interesting testcases
my_dynasm!(ops
; .arch aarch64
; aligned:
// no args
; nop
Expand Down
2 changes: 1 addition & 1 deletion testing/tests/x64_0_complex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ fn complex() {
// dynasm in expr position
match 1 {
0 => (),
_ => dynasm!(ops; inc rax)
_ => my_dynasm!(ops; inc rax)
}

// fixups
Expand Down

0 comments on commit 73bb33b

Please sign in to comment.