diff --git a/src/core/memory/mem.rs b/src/core/memory/mem.rs index 97b62b3..74b845c 100644 --- a/src/core/memory/mem.rs +++ b/src/core/memory/mem.rs @@ -1,5 +1,5 @@ use crate::core::cp15::TcmState; -use crate::core::emu::{get_cp15, get_regs, Emu}; +use crate::core::emu::{get_cp15, Emu}; use crate::core::memory::bios::{BiosArm7, BiosArm9}; use crate::core::memory::io_arm7::IoArm7; use crate::core::memory::io_arm9::IoArm9; @@ -205,7 +205,7 @@ impl Memory { if aligned_addr < cp15.itcm_size && cp15.itcm_state != TcmState::Disabled { self.tcm.write_itcm(aligned_addr, value); debug_println!("{:?} itcm write at {:x} with value {:x}", CPU, aligned_addr, value.into(),); - self.breakout_imm = self.jit.invalidate_block::<{ JitRegion::Itcm }>(aligned_addr, size_of::(), get_regs!(emu, CPU).pc); + self.jit.invalidate_block::<{ JitRegion::Itcm }>(aligned_addr, size_of::()); } } } @@ -216,12 +216,12 @@ impl Memory { }, regions::MAIN_MEMORY_OFFSET => { self.main.write(addr_offset, value); - self.breakout_imm = self.jit.invalidate_block::<{ JitRegion::Main }>(aligned_addr, size_of::(), get_regs!(emu, CPU).pc); + self.jit.invalidate_block::<{ JitRegion::Main }>(aligned_addr, size_of::()); } regions::SHARED_WRAM_OFFSET => { self.wram.write::(addr_offset, value); if CPU == ARM7 { - self.breakout_imm = self.jit.invalidate_block::<{ JitRegion::Wram }>(aligned_addr, size_of::(), get_regs!(emu, CPU).pc); + self.jit.invalidate_block::<{ JitRegion::Wram }>(aligned_addr, size_of::()); } } regions::IO_PORTS_OFFSET => match CPU { @@ -243,7 +243,7 @@ impl Memory { regions::VRAM_OFFSET => { self.vram.write::(addr_offset, value); if CPU == ARM7 { - self.breakout_imm = self.jit.invalidate_block::<{ JitRegion::VramArm7 }>(aligned_addr, size_of::(), get_regs!(emu, CPU).pc); + self.jit.invalidate_block::<{ JitRegion::VramArm7 }>(aligned_addr, size_of::()); } } regions::OAM_OFFSET => self.oam.write(addr_offset, value), diff --git a/src/jit/jit_memory.rs b/src/jit/jit_memory.rs index bffd866..b903a6f 100644 --- a/src/jit/jit_memory.rs +++ b/src/jit/jit_memory.rs @@ -10,11 +10,11 @@ use lazy_static::lazy_static; use paste::paste; use std::intrinsics::unlikely; use std::marker::ConstParamTy; -use std::ptr; +use std::{ptr, slice}; use CpuType::{ARM7, ARM9}; const JIT_MEMORY_SIZE: usize = 16 * 1024 * 1024; -const JIT_LIVE_RANGE_PAGE_SIZE_SHIFT: u32 = 8; +pub const JIT_LIVE_RANGE_PAGE_SIZE_SHIFT: u32 = 8; const JIT_LIVE_RANGE_PAGE_SIZE: u32 = 1 << JIT_LIVE_RANGE_PAGE_SIZE_SHIFT; #[derive(ConstParamTy, Eq, PartialEq)] @@ -93,13 +93,13 @@ create_jit_blocks!( ); #[derive(Default)] -struct JitLiveRanges { - itcm: HeapMemU32<{ (regions::INSTRUCTION_TCM_SIZE / JIT_LIVE_RANGE_PAGE_SIZE / 32) as usize }>, - main: HeapMemU32<{ (regions::MAIN_MEMORY_SIZE / JIT_LIVE_RANGE_PAGE_SIZE / 32) as usize }>, - wram: HeapMemU32<{ ((regions::SHARED_WRAM_SIZE + regions::ARM7_WRAM_SIZE) / JIT_LIVE_RANGE_PAGE_SIZE / 32) as usize }>, - vram_arm7: HeapMemU32<{ (vram::ARM7_SIZE / JIT_LIVE_RANGE_PAGE_SIZE / 32) as usize }>, - arm9_bios: HeapMemU32<{ (regions::ARM9_BIOS_SIZE / JIT_LIVE_RANGE_PAGE_SIZE / 32) as usize }>, - arm7_bios: HeapMemU32<{ (regions::ARM7_BIOS_SIZE / JIT_LIVE_RANGE_PAGE_SIZE / 32) as usize }>, +pub struct JitLiveRanges { + pub itcm: HeapMemU32<{ (regions::INSTRUCTION_TCM_SIZE / JIT_LIVE_RANGE_PAGE_SIZE / 32) as usize }>, + pub main: HeapMemU32<{ (regions::MAIN_MEMORY_SIZE / JIT_LIVE_RANGE_PAGE_SIZE / 32) as usize }>, + pub wram: HeapMemU32<{ ((regions::SHARED_WRAM_SIZE + regions::ARM7_WRAM_SIZE) / JIT_LIVE_RANGE_PAGE_SIZE / 32) as usize }>, + pub vram_arm7: HeapMemU32<{ (vram::ARM7_SIZE / JIT_LIVE_RANGE_PAGE_SIZE / 32) as usize }>, + pub arm9_bios: HeapMemU32<{ (regions::ARM9_BIOS_SIZE / JIT_LIVE_RANGE_PAGE_SIZE / 32) as usize }>, + pub arm7_bios: HeapMemU32<{ (regions::ARM7_BIOS_SIZE / JIT_LIVE_RANGE_PAGE_SIZE / 32) as usize }>, } #[cfg(target_os = "linux")] @@ -118,12 +118,13 @@ pub struct JitMemory { impl JitMemory { pub fn new() -> Self { let jit_entries = JitEntries::new(); - let jit_memory_map = JitMemoryMap::new(&jit_entries); + let jit_live_ranges = JitLiveRanges::default(); + let jit_memory_map = JitMemoryMap::new(&jit_entries, &jit_live_ranges); JitMemory { mem: Mmap::executable("code", JIT_MEMORY_SIZE).unwrap(), mem_offset: 0, jit_entries, - jit_live_ranges: JitLiveRanges::default(), + jit_live_ranges, jit_memory_map, } } @@ -172,12 +173,14 @@ impl JitMemory { let entries_index = (guest_pc >> 1) as usize; let entries_index = entries_index % $entries.len(); $entries[entries_index] = JitEntry(jit_entry_addr); + assert_eq!(ptr::addr_of!($entries[entries_index]), self.jit_memory_map.get_jit_entry::(guest_pc)); // >> 5 for u32 (each bit represents a page) let live_ranges_index = ((guest_pc >> JIT_LIVE_RANGE_PAGE_SIZE_SHIFT) >> 5) as usize; let live_ranges_index = live_ranges_index % $live_ranges.len(); let live_ranges_bit = (guest_pc >> JIT_LIVE_RANGE_PAGE_SIZE_SHIFT) & 31; $live_ranges[live_ranges_index] |= 1 << live_ranges_bit; + assert_eq!(ptr::addr_of!($live_ranges[live_ranges_index]), self.jit_memory_map.get_live_range::(guest_pc)); jit_entry_addr }}; @@ -220,35 +223,23 @@ impl JitMemory { unsafe { (*self.jit_memory_map.get_jit_entry::(guest_pc)).0 } } - pub fn invalidate_block(&mut self, guest_addr: u32, size: usize, guest_pc: u32) -> bool { - let mut should_breakout = false; - + pub fn invalidate_block(&mut self, guest_addr: u32, size: usize) { macro_rules! invalidate { - ($guest_addr:expr, $live_range:ident, [$(($entries:ident, $default_entry:expr)),+]) => {{ - let live_ranges_index = (($guest_addr >> JIT_LIVE_RANGE_PAGE_SIZE_SHIFT) >> 5) as usize; - let live_ranges_index = live_ranges_index % self.jit_live_ranges.$live_range.len(); + ($guest_addr:expr, $live_range:ident, $cpu:expr, [$(($cpu_entry:expr, $entries:ident)),+]) => {{ + let live_range = unsafe { self.jit_memory_map.get_live_range::<{ $cpu }>($guest_addr).as_mut_unchecked() }; let live_ranges_bit = ($guest_addr >> JIT_LIVE_RANGE_PAGE_SIZE_SHIFT) & 31; - - if unlikely(self.jit_live_ranges.$live_range[live_ranges_index] & (1 << live_ranges_bit) != 0) { - self.jit_live_ranges.$live_range[live_ranges_index] &= !(1 << live_ranges_bit); - - let guest_pc_index = ((guest_pc >> JIT_LIVE_RANGE_PAGE_SIZE_SHIFT) >> 5) as usize; - let guest_pc_index = guest_pc_index % self.jit_live_ranges.$live_range.len(); - let guest_pc_bit = (guest_pc >> JIT_LIVE_RANGE_PAGE_SIZE_SHIFT) & 31; - - should_breakout |= live_ranges_index == guest_pc_index && live_ranges_bit == guest_pc_bit; + if unlikely(*live_range & (1 << live_ranges_bit) != 0) { + *live_range &= !(1 << live_ranges_bit); let guest_addr_start = $guest_addr & !(JIT_LIVE_RANGE_PAGE_SIZE - 1); - let guest_addr_end = guest_addr_start + JIT_LIVE_RANGE_PAGE_SIZE; - $( - { - let entries_index_start = (guest_addr_start >> 1) as usize; - let entries_index_start = entries_index_start % self.jit_entries.$entries.len(); - let entries_index_end = (guest_addr_end >> 1) as usize; - let entries_index_end = entries_index_end % self.jit_entries.$entries.len(); - self.jit_entries.$entries[entries_index_start..entries_index_end].fill($default_entry); - } + let jit_entry_start = self.jit_memory_map.get_jit_entry::<{ $cpu_entry }>(guest_addr_start); + unsafe { slice::from_raw_parts_mut(jit_entry_start, JIT_LIVE_RANGE_PAGE_SIZE as usize).fill( + match $cpu_entry { + ARM9 => DEFAULT_JIT_ENTRY_ARM9, + ARM7 => DEFAULT_JIT_ENTRY_ARM7, + } + ) } )* } }}; @@ -256,24 +247,22 @@ impl JitMemory { match REGION { JitRegion::Itcm => { - invalidate!(guest_addr, itcm, [(itcm, DEFAULT_JIT_ENTRY_ARM9)]); - invalidate!(guest_addr + size as u32 - 1, itcm, [(itcm, DEFAULT_JIT_ENTRY_ARM9)]); + invalidate!(guest_addr, itcm, ARM9, [(ARM9, itcm)]); + invalidate!(guest_addr + size as u32 - 1, itcm, ARM9, [(ARM9, itcm)]); } JitRegion::Main => { - invalidate!(guest_addr, main, [(main_arm9, DEFAULT_JIT_ENTRY_ARM9), (main_arm7, DEFAULT_JIT_ENTRY_ARM7)]); - invalidate!(guest_addr + size as u32 - 1, main, [(main_arm9, DEFAULT_JIT_ENTRY_ARM9), (main_arm7, DEFAULT_JIT_ENTRY_ARM7)]); + invalidate!(guest_addr, main, ARM9, [(ARM9, main_arm9), (ARM7, main_arm7)]); + invalidate!(guest_addr + size as u32 - 1, main, ARM9, [(ARM9, main_arm9), (ARM7, main_arm7)]); } JitRegion::Wram => { - invalidate!(guest_addr, wram, [(wram, DEFAULT_JIT_ENTRY_ARM7)]); - invalidate!(guest_addr + size as u32 - 1, wram, [(wram, DEFAULT_JIT_ENTRY_ARM7)]); + invalidate!(guest_addr, wram, ARM7, [(ARM7, wram)]); + invalidate!(guest_addr + size as u32 - 1, wram, ARM7, [(ARM7, wram)]); } JitRegion::VramArm7 => { - invalidate!(guest_addr, vram_arm7, [(vram_arm7, DEFAULT_JIT_ENTRY_ARM7)]); - invalidate!(guest_addr + size as u32 - 1, vram_arm7, [(vram_arm7, DEFAULT_JIT_ENTRY_ARM7)]); + invalidate!(guest_addr, vram_arm7, ARM7, [(ARM7, vram_arm7)]); + invalidate!(guest_addr + size as u32 - 1, vram_arm7, ARM7, [(ARM7, vram_arm7)]); } } - - should_breakout } pub fn invalidate_wram(&mut self) { diff --git a/src/jit/jit_memory_map.rs b/src/jit/jit_memory_map.rs index f5177f4..d5050ec 100644 --- a/src/jit/jit_memory_map.rs +++ b/src/jit/jit_memory_map.rs @@ -1,22 +1,27 @@ use crate::core::memory::regions; use crate::core::CpuType; -use crate::jit::jit_memory::{JitEntries, JitEntry}; +use crate::jit::jit_memory::{JitEntries, JitEntry, JitLiveRanges, JIT_LIVE_RANGE_PAGE_SIZE_SHIFT}; use crate::utils::HeapMemU32; const BLOCK_SHIFT: usize = 13; const BLOCK_SIZE: usize = 1 << BLOCK_SHIFT; const SIZE: usize = (1 << 31) / BLOCK_SIZE; +const LIVE_RANGES_SIZE: usize = 1 << (32 - JIT_LIVE_RANGE_PAGE_SIZE_SHIFT - 5); pub struct JitMemoryMap { map_arm9: HeapMemU32, map_arm7: HeapMemU32, + live_ranges_map_arm9: HeapMemU32, + live_ranges_map_arm7: HeapMemU32, } impl JitMemoryMap { - pub fn new(entries: &JitEntries) -> Self { + pub fn new(entries: &JitEntries, live_ranges: &JitLiveRanges) -> Self { let mut instance = JitMemoryMap { map_arm9: HeapMemU32::new(), map_arm7: HeapMemU32::new(), + live_ranges_map_arm9: HeapMemU32::new(), + live_ranges_map_arm7: HeapMemU32::new(), }; for i in 0..SIZE { @@ -47,14 +52,42 @@ impl JitMemoryMap { } } + for i in 0..LIVE_RANGES_SIZE { + let addr = i << (JIT_LIVE_RANGE_PAGE_SIZE_SHIFT + 5); + let arm9_ptr = &mut instance.live_ranges_map_arm9[i]; + let arm7_ptr = &mut instance.live_ranges_map_arm7[i]; + + macro_rules! get_ptr { + ($live_ranges:expr) => {{ + (unsafe { $live_ranges.as_ptr().add(i % $live_ranges.len()) } as u32) + }}; + } + + match (addr as u32) & 0xFF000000 { + 0 => { + *arm9_ptr = get_ptr!(live_ranges.itcm); + *arm7_ptr = get_ptr!(live_ranges.arm7_bios); + } + regions::INSTRUCTION_TCM_MIRROR_OFFSET => *arm9_ptr = get_ptr!(live_ranges.itcm), + regions::MAIN_MEMORY_OFFSET => { + *arm9_ptr = get_ptr!(live_ranges.main); + *arm7_ptr = get_ptr!(live_ranges.main); + } + regions::SHARED_WRAM_OFFSET => *arm7_ptr = get_ptr!(live_ranges.wram), + regions::VRAM_OFFSET => *arm7_ptr = get_ptr!(live_ranges.vram_arm7), + 0xFF000000 => *arm9_ptr = get_ptr!(live_ranges.arm9_bios), + _ => {} + } + } + instance } - pub fn get_jit_entry(&self, addr: u32) -> *const JitEntry { + pub fn get_jit_entry(&self, addr: u32) -> *mut JitEntry { let addr = addr >> 1; macro_rules! get_jit_entry { ($map:expr) => {{ - unsafe { ($map[(addr >> BLOCK_SHIFT) as usize] as *const JitEntry).add((addr as usize) & (BLOCK_SIZE - 1)) } + unsafe { ($map[(addr >> BLOCK_SHIFT) as usize] as *mut JitEntry).add((addr as usize) & (BLOCK_SIZE - 1)) } }}; } match CPU { @@ -62,4 +95,11 @@ impl JitMemoryMap { CpuType::ARM7 => get_jit_entry!(self.map_arm7), } } + + pub fn get_live_range(&self, addr: u32) -> *mut u32 { + match CPU { + CpuType::ARM9 => self.live_ranges_map_arm9[(addr >> (JIT_LIVE_RANGE_PAGE_SIZE_SHIFT + 5)) as usize] as _, + CpuType::ARM7 => self.live_ranges_map_arm7[(addr >> (JIT_LIVE_RANGE_PAGE_SIZE_SHIFT + 5)) as usize] as _, + } + } } diff --git a/src/main.rs b/src/main.rs index e59507c..ddbf567 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,6 +10,7 @@ #![feature(isqrt)] #![feature(naked_functions)] #![feature(new_zeroed_alloc)] +#![feature(ptr_as_ref_unchecked)] #![feature(seek_stream_len)] #![feature(stmt_expr_attributes)] @@ -255,7 +256,9 @@ pub fn main() { // #[export_name = "sceUserMainThreadStackSize"] // pub static SCE_USER_MAIN_THREAD_STACK_SIZE: u32 = 4 * 1024 * 1024; // Instead just create a new thread with stack size set - set_thread_prio_affinity(ThreadPriority::Low, ThreadAffinity::Core1); + if cfg!(target_os = "vita") { + set_thread_prio_affinity(ThreadPriority::Low, ThreadAffinity::Core0); + } thread::Builder::new() .name("actual_main".to_string()) .stack_size(4 * 1024 * 1024) // We reserve 2MB for jit registers @@ -267,7 +270,9 @@ pub fn main() { // Must be pub for vita pub fn actual_main() { - set_thread_prio_affinity(ThreadPriority::High, ThreadAffinity::Core0); + if cfg!(target_os = "vita") { + set_thread_prio_affinity(ThreadPriority::High, ThreadAffinity::Core1); + } if DEBUG_LOG { std::env::set_var("RUST_BACKTRACE", "full"); @@ -295,7 +300,7 @@ pub fn actual_main() { thread::Builder::new() .name("audio".to_owned()) .spawn(move || { - set_thread_prio_affinity(ThreadPriority::Default, ThreadAffinity::Core1); + set_thread_prio_affinity(ThreadPriority::Default, ThreadAffinity::Core0); let mut audio_buffer = HeapMemU32::<{ PRESENTER_AUDIO_BUF_SIZE }>::new(); loop { sound_sampler.consume(audio_buffer.deref_mut()); diff --git a/src/utils.rs b/src/utils.rs index 4dc2e15..ab8287d 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -188,6 +188,7 @@ pub enum ThreadPriority { High, } +#[derive(Copy, Clone)] #[repr(u8)] pub enum ThreadAffinity { Core0 = 0, @@ -197,7 +198,9 @@ pub enum ThreadAffinity { #[cfg(target_os = "linux")] pub fn set_thread_prio_affinity(_: ThreadPriority, affinity: ThreadAffinity) { - affinity::set_thread_affinity(&[affinity as usize]).unwrap(); + if (affinity as usize) < affinity::get_core_num() { + affinity::set_thread_affinity(&[affinity as usize]).unwrap(); + } } #[cfg(target_os = "vita")]