Skip to content

Commit

Permalink
Implement return stack
Browse files Browse the repository at this point in the history
  • Loading branch information
Grarak committed Oct 6, 2024
1 parent b1337f6 commit b80f0dc
Show file tree
Hide file tree
Showing 12 changed files with 292 additions and 102 deletions.
5 changes: 2 additions & 3 deletions src/core/graphics/gpu_3d/registers_3d.rs
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,6 @@ impl Gpu3DRegisters {
let mut param_count = FIFO_PARAM_COUNTS[entry.cmd as usize];
if param_count > 1 {
if param_count as usize > self.cmd_fifo.len() {
refresh_state(self);
break;
}

Expand Down Expand Up @@ -567,10 +566,10 @@ impl Gpu3DRegisters {
if self.cmd_pipe_size as usize > self.cmd_fifo.len() {
self.cmd_pipe_size = self.cmd_fifo.len() as u8;
}

refresh_state(self);
}

refresh_state(self);

if !self.is_cmd_fifo_full() {
get_cpu_regs_mut!(emu, ARM9).unhalt(1);
}
Expand Down
10 changes: 10 additions & 0 deletions src/jit/assembler/arm/branch_assembler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,16 @@ impl B {
u4::new(cond as u8),
))
}

pub fn bl(imm: i32, cond: Cond) -> u32 {
u32::from(B::new(
// Extract first 24 bits, also keep msb
u24::new((((imm << 8) >> 8) & 0xFFFFFF) as u32),
u1::new(1),
u3::new(0b101),
u4::new(cond as u8),
))
}
}

#[bitsize(32)]
Expand Down
10 changes: 10 additions & 0 deletions src/jit/assembler/block_asm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,18 +249,23 @@ impl<'a> BlockAsm<'a> {
pub fn load_u8(&mut self, op0: impl Into<BlockReg>, op1: impl Into<BlockReg>, op2: impl Into<BlockOperandShift>) {
self.transfer_read(op0, op1, op2, false, MemoryAmount::Byte)
}

pub fn store_u8(&mut self, op0: impl Into<BlockReg>, op1: impl Into<BlockReg>, op2: impl Into<BlockOperandShift>) {
self.transfer_write(op0, op1, op2, false, MemoryAmount::Byte)
}

pub fn load_u16(&mut self, op0: impl Into<BlockReg>, op1: impl Into<BlockReg>, op2: impl Into<BlockOperandShift>) {
self.transfer_read(op0, op1, op2, false, MemoryAmount::Half)
}

pub fn store_u16(&mut self, op0: impl Into<BlockReg>, op1: impl Into<BlockReg>, op2: impl Into<BlockOperandShift>) {
self.transfer_write(op0, op1, op2, false, MemoryAmount::Half)
}

pub fn load_u32(&mut self, op0: impl Into<BlockReg>, op1: impl Into<BlockReg>, op2: impl Into<BlockOperandShift>) {
self.transfer_read(op0, op1, op2, false, MemoryAmount::Word)
}

pub fn store_u32(&mut self, op0: impl Into<BlockReg>, op1: impl Into<BlockReg>, op2: impl Into<BlockOperandShift>) {
self.transfer_write(op0, op1, op2, false, MemoryAmount::Word)
}
Expand Down Expand Up @@ -401,6 +406,11 @@ impl<'a> BlockAsm<'a> {
self.buf.insts.push(BlockInst::Epilogue);
}

pub fn epilogue_previous_block(&mut self) {
self.add(BlockReg::Fixed(Reg::SP), BlockReg::Fixed(Reg::SP), ANY_REG_LIMIT as u32 * 4);
self.buf.insts.push(BlockInst::Epilogue);
}

pub fn call(&mut self, func: impl Into<BlockOperand>) {
self.call_internal(func, None::<BlockOperand>, None::<BlockOperand>, None::<BlockOperand>, None::<BlockOperand>, true)
}
Expand Down
5 changes: 3 additions & 2 deletions src/jit/assembler/block_inst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ impl BlockInst {
(block_reg_set!(Some(*thread_regs_addr_reg)), outputs)
}

BlockInst::Call { func_reg, args, .. } => {
BlockInst::Call { func_reg, args, has_return } => {
let mut inputs = BlockRegSet::new();
inputs += *func_reg;
for arg in args {
Expand All @@ -180,7 +180,8 @@ impl BlockInst {
Some(BlockReg::Fixed(Reg::R2)),
Some(BlockReg::Fixed(Reg::R3)),
Some(BlockReg::Fixed(Reg::R12)),
Some(BlockReg::Fixed(Reg::CPSR))
Some(BlockReg::Fixed(Reg::CPSR)),
if *has_return { Some(BlockReg::Fixed(Reg::LR)) } else { None }
),
)
}
Expand Down
2 changes: 1 addition & 1 deletion src/jit/disassembler/thumb/branch_instructions_thumb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ mod branch_thumb_ops {
#[inline]
pub fn blx_reg_t(opcode: u16, op: Op) -> InstInfoThumb {
let op0 = Reg::from(((opcode >> 3) & 0xF) as u8);
InstInfoThumb::new(opcode, op, Operands::new_1(Operand::reg(op0)), reg_reserve!(op0), reg_reserve!(Reg::CPSR), 1)
InstInfoThumb::new(opcode, op, Operands::new_1(Operand::reg(op0)), reg_reserve!(op0), reg_reserve!(Reg::LR, Reg::CPSR), 1)
}

#[inline]
Expand Down
41 changes: 31 additions & 10 deletions src/jit/emitter/emit.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::core::CpuType;
use crate::core::CpuType::ARM7;
use crate::jit::assembler::block_asm::BlockAsm;
use crate::jit::assembler::BlockReg;
use crate::jit::assembler::{BlockLabel, BlockReg};
use crate::jit::inst_threag_regs_handler::{register_restore_spsr, restore_thumb_after_restore_spsr, set_pc_arm_mode};
use crate::jit::jit_asm::{JitAsm, JitRuntimeData};
use crate::jit::op::Op;
Expand Down Expand Up @@ -60,6 +60,16 @@ impl<'a, const CPU: CpuType> JitAsm<'a, CPU> {
block_asm.call(restore_thumb_after_restore_spsr::<CPU> as *const ());
}

if (op.is_mov() && self.jit_buf.current_inst().src_regs.is_reserved(Reg::LR) && !self.jit_buf.current_inst().out_regs.is_reserved(Reg::CPSR))
|| (op.is_multiple_mem_transfer() && *self.jit_buf.current_inst().operands()[0].as_reg_no_shift().unwrap() == Reg::SP)
|| (op.is_single_mem_transfer() && self.jit_buf.current_inst().src_regs.is_reserved(Reg::SP))
{
let guest_pc_reg = block_asm.new_reg();
block_asm.load_u32(guest_pc_reg, block_asm.thread_regs_addr_reg, Reg::PC as u32 * 4);
self.emit_branch_return_stack_common(block_asm, guest_pc_reg);
block_asm.free_reg(guest_pc_reg);
}

self.emit_branch_out_metadata(block_asm);
block_asm.epilogue();
}
Expand All @@ -73,7 +83,7 @@ impl<'a, const CPU: CpuType> JitAsm<'a, CPU> {

let accumulated_cycles_reg = block_asm.new_reg();
block_asm.load_u16(accumulated_cycles_reg, runtime_data_addr_reg, JitRuntimeData::get_accumulated_cycles_offset() as u32);

// +2 for branching
block_asm.add(
result_accumulated_cycles_reg,
Expand Down Expand Up @@ -127,10 +137,11 @@ impl<'a, const CPU: CpuType> JitAsm<'a, CPU> {
self._emit_branch_out_metadata(block_asm, true, true)
}

pub fn emit_flush_cycles<ContinueFn: Fn(&mut Self, &mut BlockAsm, BlockReg), BreakoutFn: Fn(&mut Self, &mut BlockAsm)>(
pub fn emit_flush_cycles<ContinueFn: Fn(&mut Self, &mut BlockAsm, BlockReg, BlockLabel), BreakoutFn: Fn(&mut Self, &mut BlockAsm)>(
&mut self,
block_asm: &mut BlockAsm,
target_pre_cycle_count_sum: u16,
target_pre_cycle_count_sum: Option<u16>,
add_continue_label: bool,
continue_fn: ContinueFn,
breakout_fn: BreakoutFn,
) {
Expand All @@ -140,7 +151,7 @@ impl<'a, const CPU: CpuType> JitAsm<'a, CPU> {
let result_accumulated_cycles_reg = block_asm.new_reg();
self.emit_count_cycles(block_asm, runtime_data_addr_reg, result_accumulated_cycles_reg);

const MAX_LOOP_CYCLE_COUNT: u32 = 255;
const MAX_LOOP_CYCLE_COUNT: u32 = 127;
block_asm.cmp(
result_accumulated_cycles_reg,
match CPU {
Expand All @@ -149,18 +160,28 @@ impl<'a, const CPU: CpuType> JitAsm<'a, CPU> {
},
);

let continue_label = if add_continue_label { Some(block_asm.new_label()) } else { None };
let breakout_label = block_asm.new_label();
block_asm.branch(breakout_label, Cond::HS);

let target_pre_cycle_count_sum_reg = block_asm.new_reg();
block_asm.mov(target_pre_cycle_count_sum_reg, target_pre_cycle_count_sum as u32);
block_asm.store_u16(target_pre_cycle_count_sum_reg, runtime_data_addr_reg, JitRuntimeData::get_pre_cycle_count_sum_offset() as u32);
continue_fn(self, block_asm, runtime_data_addr_reg);
if let Some(target_pre_cycle_count_sum) = target_pre_cycle_count_sum {
let target_pre_cycle_count_sum_reg = block_asm.new_reg();
block_asm.mov(target_pre_cycle_count_sum_reg, target_pre_cycle_count_sum as u32);
block_asm.store_u16(target_pre_cycle_count_sum_reg, runtime_data_addr_reg, JitRuntimeData::get_pre_cycle_count_sum_offset() as u32);
block_asm.free_reg(target_pre_cycle_count_sum_reg);
}
continue_fn(self, block_asm, runtime_data_addr_reg, breakout_label);
if add_continue_label {
block_asm.branch(continue_label.unwrap(), Cond::AL);
}

block_asm.label(breakout_label);
breakout_fn(self, block_asm);

block_asm.free_reg(target_pre_cycle_count_sum_reg);
if add_continue_label {
block_asm.label(continue_label.unwrap());
}

block_asm.free_reg(result_accumulated_cycles_reg);
block_asm.free_reg(runtime_data_addr_reg);
}
Expand Down
Loading

0 comments on commit b80f0dc

Please sign in to comment.