From f44917243c3b24ae408acd8ef25a7627944917ca Mon Sep 17 00:00:00 2001 From: Javier Alvarez Date: Wed, 22 May 2024 10:59:07 +0200 Subject: [PATCH] Add profiling feature To avoid heavy inlining and provide more context on where time is being spent. --- Cargo.toml | 3 +++ ppu/Cargo.toml | 3 +++ ppu/src/lib.rs | 13 ++++++++++++- ppu/src/oam.rs | 2 ++ ppu/src/regs.rs | 2 ++ ppu/src/vram.rs | 6 ++++++ sm83/Cargo.toml | 3 +++ sm83/src/core.rs | 30 ++++++++++++++++++++++++++++++ sm83/src/decoder.rs | 2 ++ 9 files changed, 63 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index e5d262b..6b46481 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,3 +13,6 @@ members = [ exclude = [ "rusty-date", ] + +[profile.release] +debug = true diff --git a/ppu/Cargo.toml b/ppu/Cargo.toml index c8bdd6d..4351103 100644 --- a/ppu/Cargo.toml +++ b/ppu/Cargo.toml @@ -11,6 +11,9 @@ categories = ["embedded", "gaming"] keywords = ["embedded", "gameboy", "playdate"] readme = "../README.md" +[features] +profile = [] + [dependencies] sm83 = { path = "../sm83", version = "0.1.0" } tock-registers = "0.9.0" diff --git a/ppu/src/lib.rs b/ppu/src/lib.rs index b386ac4..9f5393a 100644 --- a/ppu/src/lib.rs +++ b/ppu/src/lib.rs @@ -112,6 +112,7 @@ const OBJ_OFFSET_X: usize = 8; static_assertions::const_assert_eq!(70224, LINE_LENGTH * NUM_LINES); +#[cfg_attr(feature = "profile", inline(never))] fn mode_for_current_cycle_count(line_cycles: Cycles, line: usize) -> Mode { // This is a simplified implementation that considers a fixed time mode 3. let line_cycles: usize = line_cycles.into(); @@ -159,7 +160,8 @@ impl Ppu { self.mode } - pub fn update_line_and_cycles(&mut self, cycles: Cycles) { + #[cfg_attr(feature = "profile", inline(never))] + fn update_line_and_cycles(&mut self, cycles: Cycles) { self.cycles = self.cycles + cycles; if self.cycles >= Cycles::new(LINE_LENGTH) { self.cycles = self.cycles - Cycles::new(LINE_LENGTH); @@ -171,6 +173,7 @@ impl Ppu { } /// Runs the PPU for the given number of cycles and then returns the PPU state + #[cfg_attr(feature = "profile", inline(never))] pub fn step( &mut self, cycles: Cycles, @@ -193,6 +196,7 @@ impl Ppu { &self.framebuffer } + #[cfg_attr(feature = "profile", inline(never))] fn update_lcd_irq(&mut self) -> Interrupts { let lyc_eq_ly = self.regs.status.read(STAT::LYC_INT_SELECT) != 0 && self.regs.status.read(STAT::LYC_EQ_LY) != 0; @@ -217,6 +221,7 @@ impl Ppu { } } + #[cfg_attr(feature = "profile", inline(never))] fn oam_scan(&mut self) { self.selected_oam_entries = heapless::Vec::new(); @@ -238,6 +243,7 @@ impl Ppu { .collect(); } + #[cfg_attr(feature = "profile", inline(never))] fn step_inner(&mut self, new_mode: Mode, render: bool) -> (Interrupts, PpuResult) { const NO_IRQ: Interrupts = Interrupts::new(); if self.mode == new_mode { @@ -265,6 +271,7 @@ impl Ppu { (NO_IRQ, PpuResult::InProgress(self.mode)) } + #[cfg_attr(feature = "profile", inline(never))] fn draw_line_background(&self, line: &mut [PaletteIndex; DISPLAY_WIDTH]) -> Palette { let bg_win_enable = self.regs.lcdc.read(regs::LCDC::BG_AND_WINDOW_ENABLE) != 0; if !bg_win_enable { @@ -319,6 +326,7 @@ impl Ppu { self.regs.bg_palette } + #[cfg_attr(feature = "profile", inline(never))] fn draw_line_window(&self, line: &mut [PaletteIndex; DISPLAY_WIDTH]) { let bg_win_enable = self.regs.lcdc.read(regs::LCDC::BG_AND_WINDOW_ENABLE) != 0; if !bg_win_enable { @@ -374,6 +382,7 @@ impl Ppu { .for_each(|(dest, palette_index)| *dest = palette_index); } + #[cfg_attr(feature = "profile", inline(never))] fn draw_line_objects( &self, bg_line: &[PaletteIndex; DISPLAY_WIDTH], @@ -445,6 +454,7 @@ impl Ppu { } } + #[cfg_attr(feature = "profile", inline(never))] fn draw_line(&mut self) { if self.regs.lcdc.read(regs::LCDC::ENABLE) == 0 { return; @@ -478,6 +488,7 @@ impl Ppu { } } + #[cfg_attr(feature = "profile", inline(never))] fn update_registers(&mut self) { let line = self.line as u8; self.regs.ly = line; diff --git a/ppu/src/oam.rs b/ppu/src/oam.rs index 1fefad8..e927b6c 100644 --- a/ppu/src/oam.rs +++ b/ppu/src/oam.rs @@ -131,11 +131,13 @@ impl Oam { &*self.objects } + #[cfg_attr(feature = "profile", inline(never))] pub fn read(&self, address: sm83::memory::Address) -> u8 { let (object_idx, object_member_offset) = Self::cpu_addr_to_object_addr(address); self.objects[object_idx].read(object_member_offset) } + #[cfg_attr(feature = "profile", inline(never))] pub fn write(&mut self, address: sm83::memory::Address, value: u8) { let (object_idx, object_member_offset) = Self::cpu_addr_to_object_addr(address); self.objects[object_idx].write(object_member_offset, value); diff --git a/ppu/src/regs.rs b/ppu/src/regs.rs index 5538540..5c080c2 100644 --- a/ppu/src/regs.rs +++ b/ppu/src/regs.rs @@ -159,6 +159,7 @@ impl Registers { .set((self.status.get() & RO_BITS) | (new_val & !RO_BITS)); } + #[cfg_attr(feature = "profile", inline(never))] pub fn read(&self, address: sm83::memory::Address) -> u8 { match address { 0xFF40 => self.lcdc.get(), @@ -179,6 +180,7 @@ impl Registers { } } + #[cfg_attr(feature = "profile", inline(never))] pub fn write(&mut self, address: sm83::memory::Address, value: u8) { match address { 0xFF40 => self.lcdc.set(value), diff --git a/ppu/src/vram.rs b/ppu/src/vram.rs index e04f66b..ecf4f7f 100644 --- a/ppu/src/vram.rs +++ b/ppu/src/vram.rs @@ -211,6 +211,7 @@ impl TileMap { }) } + #[cfg_attr(feature = "profile", inline(never))] pub fn line(&self, line: usize) -> &[TileIndex; TILE_MAP_WIDTH] { let line = (line / TILE_HEIGHT) % TILE_MAP_HEIGHT; &self.0[line] @@ -288,6 +289,7 @@ impl Vram { (tile_map_idx, tile_map_address) } + #[cfg_attr(feature = "profile", inline(never))] pub(crate) fn get_bg_tile_map(&self, map: crate::regs::LCDC::BG_TILE_MAP::Value) -> &TileMap { match map { crate::regs::LCDC::BG_TILE_MAP::Value::HighMap => &self.0.tile_maps[1], @@ -295,6 +297,7 @@ impl Vram { } } + #[cfg_attr(feature = "profile", inline(never))] pub(crate) fn get_win_tile_map( &self, map: crate::regs::LCDC::WINDOW_TILE_MAP::Value, @@ -305,6 +308,7 @@ impl Vram { } } + #[cfg_attr(feature = "profile", inline(never))] pub(crate) fn get_tile( &self, index: TileIndex, @@ -319,6 +323,7 @@ impl Vram { self.0.tile_blocks[block].get_tile(index) } + #[cfg_attr(feature = "profile", inline(never))] pub fn read(&self, address: sm83::memory::Address) -> u8 { if address < 0x9800 { let (blk_idx, blk_address) = Self::vram_address_to_block_address(address); @@ -329,6 +334,7 @@ impl Vram { } } + #[cfg_attr(feature = "profile", inline(never))] pub fn write(&mut self, address: sm83::memory::Address, value: u8) { if address < 0x9800 { let (blk_idx, blk_address) = Self::vram_address_to_block_address(address); diff --git a/sm83/Cargo.toml b/sm83/Cargo.toml index 81271e1..938de3d 100644 --- a/sm83/Cargo.toml +++ b/sm83/Cargo.toml @@ -11,6 +11,9 @@ categories = ["embedded", "gaming"] keywords = ["embedded", "gameboy", "playdate"] readme = "../README.md" +[features] +profile = [] + [dependencies] sm83_decoder_macros = { path = "../sm83_decoder_macros" } diff --git a/sm83/src/core.rs b/sm83/src/core.rs index e6b80b9..d8c81f8 100644 --- a/sm83/src/core.rs +++ b/sm83/src/core.rs @@ -134,6 +134,7 @@ const fn carry_bit32(a: u32, b: u32, c: u32, bit: usize) -> bool { (xor & (1 << bit)) != 0 } +#[cfg_attr(feature = "profile", inline(never))] fn add(a: u8, b: u8, carry: bool) -> (u8, Flags) { let a = a as u16; let b = b as u16; @@ -149,6 +150,7 @@ fn add(a: u8, b: u8, carry: bool) -> (u8, Flags) { (result as u8, flags) } +#[cfg_attr(feature = "profile", inline(never))] const fn add16(a: u16, b: u16, flags: Flags) -> (u16, Flags) { let a = a as u32; let b = b as u32; @@ -162,6 +164,7 @@ const fn add16(a: u16, b: u16, flags: Flags) -> (u16, Flags) { (result as u16, flags) } +#[cfg_attr(feature = "profile", inline(never))] const fn sub(a: u8, b: u8, carry: bool) -> (u8, Flags) { let a = a as u16; let b = b as u16; @@ -178,6 +181,7 @@ const fn sub(a: u8, b: u8, carry: bool) -> (u8, Flags) { (result as u8, flags) } +#[cfg_attr(feature = "profile", inline(never))] const fn and(a: u8, b: u8) -> (u8, Flags) { let result = a & b; @@ -186,18 +190,21 @@ const fn and(a: u8, b: u8) -> (u8, Flags) { (result, flags) } +#[cfg_attr(feature = "profile", inline(never))] const fn or(a: u8, b: u8) -> (u8, Flags) { let result = a | b; let flags = Flags::new().with(Flag::Z, result == 0); (result, flags) } +#[cfg_attr(feature = "profile", inline(never))] const fn xor(a: u8, b: u8) -> (u8, Flags) { let result = a ^ b; let flags = Flags::new().with(Flag::Z, result == 0); (result, flags) } +#[cfg_attr(feature = "profile", inline(never))] const fn daa(a: u8, flags: Flags) -> (u8, Flags) { let mut result = a; let mut carry = false; @@ -233,6 +240,7 @@ const fn daa(a: u8, flags: Flags) -> (u8, Flags) { // Some variants of this instruction (rlca) always set Z to 0, but others actually compute the // result +#[cfg_attr(feature = "profile", inline(never))] const fn rlc(value: u8, real_z: bool) -> (u8, Flags) { let carry = (value & 0x80) != 0; let mut shifted = value << 1; @@ -247,6 +255,7 @@ const fn rlc(value: u8, real_z: bool) -> (u8, Flags) { // Some variants of this instruction (rrca) always set Z to 0, but others actually compute the // result +#[cfg_attr(feature = "profile", inline(never))] const fn rrc(value: u8, real_z: bool) -> (u8, Flags) { let carry = (value & 0x01) != 0; let mut shifted = value >> 1; @@ -259,6 +268,7 @@ const fn rrc(value: u8, real_z: bool) -> (u8, Flags) { (shifted, flags) } +#[cfg_attr(feature = "profile", inline(never))] const fn rl(value: u8, old_carry: bool, real_z: bool) -> (u8, Flags) { let mut shifted = value << 1; if old_carry { @@ -271,6 +281,7 @@ const fn rl(value: u8, old_carry: bool, real_z: bool) -> (u8, Flags) { (shifted, flags) } +#[cfg_attr(feature = "profile", inline(never))] const fn rr(value: u8, old_carry: bool, real_z: bool) -> (u8, Flags) { let mut shifted = value >> 1; if old_carry { @@ -283,6 +294,7 @@ const fn rr(value: u8, old_carry: bool, real_z: bool) -> (u8, Flags) { (shifted, flags) } +#[cfg_attr(feature = "profile", inline(never))] const fn sla(value: u8) -> (u8, Flags) { let shifted = value << 1; let new_carry = (value & 0x80) != 0; @@ -292,6 +304,7 @@ const fn sla(value: u8) -> (u8, Flags) { (shifted, flags) } +#[cfg_attr(feature = "profile", inline(never))] const fn sra(value: u8) -> (u8, Flags) { let negative = (value & 0x80) != 0; let new_carry = (value & 0x01) != 0; @@ -302,12 +315,14 @@ const fn sra(value: u8) -> (u8, Flags) { (shifted, flags) } +#[cfg_attr(feature = "profile", inline(never))] const fn swap(value: u8) -> (u8, Flags) { let swapped = (value >> 4) | (value << 4); let flags = Flags::new().with(Flag::Z, swapped == 0); (swapped, flags) } +#[cfg_attr(feature = "profile", inline(never))] const fn srl(value: u8) -> (u8, Flags) { let new_carry = (value & 0x01) != 0; let shifted = value >> 1; @@ -321,6 +336,7 @@ const fn bit_mask(bit: Bit) -> u8 { 1 << (bit as u8) } +#[cfg_attr(feature = "profile", inline(never))] const fn bit(bit_idx: Bit, value: u8, flags: Flags) -> Flags { let bit = bit_mask(bit_idx); let z_flag = (bit & value) == 0; @@ -330,10 +346,12 @@ const fn bit(bit_idx: Bit, value: u8, flags: Flags) -> Flags { .with(Flag::Z, z_flag) } +#[cfg_attr(feature = "profile", inline(never))] const fn res(bit_idx: Bit, value: u8) -> u8 { value & !bit_mask(bit_idx) } +#[cfg_attr(feature = "profile", inline(never))] const fn set(bit_idx: Bit, value: u8) -> u8 { value | bit_mask(bit_idx) } @@ -538,6 +556,7 @@ impl Cpu { *lo = (value & 0xff) as u8; } + #[cfg_attr(feature = "profile", inline(never))] fn step_pc(&mut self) -> u16 { let regs = self.get_mut_regs(); let pc = regs.pc_reg; @@ -545,11 +564,13 @@ impl Cpu { pc } + #[cfg_attr(feature = "profile", inline(never))] fn read_8_bit_immediate(&mut self, memory: &mut T) -> u8 { let pc = self.step_pc(); memory.read(pc) } + #[cfg_attr(feature = "profile", inline(never))] fn read_16_bit_immediate(&mut self, memory: &mut T) -> u16 { let pc = self.step_pc(); let lo = memory.read(pc); @@ -567,6 +588,7 @@ impl Cpu { } } + #[cfg_attr(feature = "profile", inline(never))] fn stack_push(&mut self, memory: &mut T, value: u16) { let sp = self.get_reg_pair(RegisterPair::SP); let pos = sp.wrapping_sub(1); @@ -576,6 +598,7 @@ impl Cpu { self.set_reg_pair(RegisterPair::SP, pos); } + #[cfg_attr(feature = "profile", inline(never))] fn stack_pop(&mut self, memory: &mut T) -> u16 { let sp = self.get_reg_pair(RegisterPair::SP); let pos = sp; @@ -587,6 +610,7 @@ impl Cpu { (lo as u16) | ((hi as u16) << 8) } + #[cfg_attr(feature = "profile", inline(never))] fn fetch_and_decode(&mut self, memory: &mut T) -> OpCode { let pc = self.step_pc(); let insn = memory.read(pc); @@ -602,6 +626,7 @@ impl Cpu { } /// Executes a single CPU instruction and returns from the function. + #[cfg_attr(feature = "profile", inline(never))] pub fn step(&mut self, memory: &mut T, interrupts: Interrupts) -> ExitReason { if self.halted && !interrupts.has_any() { return ExitReason::Halt(Cycles::new(4)); @@ -624,6 +649,7 @@ impl Cpu { } } + #[cfg_attr(feature = "profile", inline(never))] fn load_8bit_with_addressing_mode( &mut self, memory: &mut T, @@ -652,6 +678,7 @@ impl Cpu { } } + #[cfg_attr(feature = "profile", inline(never))] fn store_8bit_with_addressing_mode( &mut self, memory: &mut T, @@ -687,6 +714,7 @@ impl Cpu { } } + #[cfg_attr(feature = "profile", inline(never))] fn load_16bit_with_addressing_mode( &mut self, memory: &mut T, @@ -699,6 +727,7 @@ impl Cpu { } } + #[cfg_attr(feature = "profile", inline(never))] fn store_16bit_with_addressing_mode( &mut self, memory: &mut T, @@ -720,6 +749,7 @@ impl Cpu { } } + #[cfg_attr(feature = "profile", inline(never))] fn execute(&mut self, memory: &mut T, opcode: OpCode) -> ExitReason { let cycles = match opcode { OpCode::Prefix => { diff --git a/sm83/src/decoder.rs b/sm83/src/decoder.rs index a47eceb..2a35fd1 100644 --- a/sm83/src/decoder.rs +++ b/sm83/src/decoder.rs @@ -325,11 +325,13 @@ mod generated { /// Decodes a single instruction. May return an OpCode::Prefix value, which indicates that this /// instruction is prefixed, and `decode_prefixed` must be invoked with the next byte in the stream +#[cfg_attr(feature = "profile", inline(never))] pub fn decode(byte: u8) -> OpCode { generated::DECODER_TABLE[byte as usize] } /// Decodes a prefixed instruction by looking at the byte after the 0xCB prefix byte. +#[cfg_attr(feature = "profile", inline(never))] pub fn decode_prefixed(byte: u8) -> OpCode { generated::PREFIXED_TABLE[byte as usize] }