diff --git a/Cargo.toml b/Cargo.toml index 8f58f904..b623d733 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,7 @@ fixed = ["dep:fixed"] [dependencies] voladdress = "1.3.0" bitfrob = "1" -bracer = "0.1.2" +bracer = "0.3.1" critical-section = { version = "1.1.2", features = [ "restore-state-bool", ], optional = true } diff --git a/examples/mode3_pong_example_game.rs b/examples/mode3_pong_example_game.rs index 67ec9cb2..9710f891 100644 --- a/examples/mode3_pong_example_game.rs +++ b/examples/mode3_pong_example_game.rs @@ -1,12 +1,12 @@ -/* -* Made by Evan Goemer -* Discord: @evangoemer -*/ +/* + * Made by Evan Goemer + * Discord: @evangoemer + */ #![no_std] #![no_main] -use gba::{prelude::*, mem_fns::__aeabi_memset}; +use gba::{mem_fns::__aeabi_memset, prelude::*}; const SCREEN_WIDTH: u16 = 240; const SCREEN_HEIGHT: u16 = 160; @@ -16,91 +16,87 @@ const PADDLE_HEIGHT: u16 = 20; const BALL_SIZE: u16 = 2; struct Paddle { - x: u16, - y: u16, + x: u16, + y: u16, } struct Ball { - x: u16, - y: u16, - dx: i16, - dy: i16, + x: u16, + y: u16, + dx: i16, + dy: i16, } impl Paddle { - fn new(x: u16, y: u16) -> Self { - Self { - x, - y, - } + fn new(x: u16, y: u16) -> Self { + Self { x, y } + } + + fn update(&mut self) { + let keys = KEYINPUT.read(); + if keys.up() && self.y > 1 { + self.y -= 1; } - fn update(&mut self) { - let keys = KEYINPUT.read(); - if keys.up() && self.y > 1 { - self.y -= 1; - } - - if keys.down() && self.y + PADDLE_HEIGHT + 1 < SCREEN_HEIGHT { - self.y += 1; - } + if keys.down() && self.y + PADDLE_HEIGHT + 1 < SCREEN_HEIGHT { + self.y += 1; } + } } impl Ball { - fn new(x: u16, y: u16) -> Self { - Self { x, y, dx: 1, dy: 1 } + fn new(x: u16, y: u16) -> Self { + Self { x, y, dx: 1, dy: 1 } + } + + fn update(&mut self, paddle1: &Paddle, paddle2: &Paddle) { + if self.y <= 0 || self.y + BALL_SIZE >= SCREEN_HEIGHT { + self.dy = -self.dy; + } + + if self.x + BALL_SIZE >= paddle1.x + && self.x <= paddle1.x + PADDLE_WIDTH + && self.y + BALL_SIZE >= paddle1.y + && self.y <= paddle1.y + PADDLE_HEIGHT + { + self.dx = -self.dx; + self.dy = -self.dy; + } + + if self.x + BALL_SIZE >= paddle2.x + && self.x <= paddle2.x + PADDLE_WIDTH + && self.y + BALL_SIZE >= paddle2.y + && self.y <= paddle2.y + PADDLE_HEIGHT + { + self.dx = -self.dx; + self.dy = -self.dy; } - fn update(&mut self, paddle1: &Paddle, paddle2: &Paddle) { - if self.y <= 0 || self.y + BALL_SIZE >= SCREEN_HEIGHT { - self.dy = -self.dy; - } - - if self.x + BALL_SIZE >= paddle1.x - && self.x <= paddle1.x + PADDLE_WIDTH - && self.y + BALL_SIZE >= paddle1.y - && self.y <= paddle1.y + PADDLE_HEIGHT - { - self.dx = -self.dx; - self.dy = -self.dy; - } - - if self.x + BALL_SIZE >= paddle2.x - && self.x <= paddle2.x + PADDLE_WIDTH - && self.y + BALL_SIZE >= paddle2.y - && self.y <= paddle2.y + PADDLE_HEIGHT - { - self.dx = -self.dx; - self.dy = -self.dy; - } - - - if self.x + BALL_SIZE <= 1 + BALL_SIZE { - self.x = SCREEN_WIDTH / 2 - BALL_SIZE / 2; - self.y = SCREEN_HEIGHT / 2 - BALL_SIZE / 2; - self.dx = 1; - self.dy = 1; - } - - if self.x >= SCREEN_WIDTH - BALL_SIZE - 1 { - self.x = SCREEN_WIDTH / 2 - BALL_SIZE / 2; - self.y = SCREEN_HEIGHT / 2 - BALL_SIZE / 2; - self.dx = -1; - self.dy = 1; - } - self.x = (self.x as i16 + self.dx) as u16; - self.y = (self.y as i16 + self.dy) as u16; + if self.x + BALL_SIZE <= 1 + BALL_SIZE { + self.x = SCREEN_WIDTH / 2 - BALL_SIZE / 2; + self.y = SCREEN_HEIGHT / 2 - BALL_SIZE / 2; + self.dx = 1; + self.dy = 1; } + + if self.x >= SCREEN_WIDTH - BALL_SIZE - 1 { + self.x = SCREEN_WIDTH / 2 - BALL_SIZE / 2; + self.y = SCREEN_HEIGHT / 2 - BALL_SIZE / 2; + self.dx = -1; + self.dy = 1; + } + self.x = (self.x as i16 + self.dx) as u16; + self.y = (self.y as i16 + self.dy) as u16; + } } static SPRITE_POSITIONS: [GbaCell; 6] = [ - GbaCell::new(0), - GbaCell::new(0), - GbaCell::new(0), - GbaCell::new(0), - GbaCell::new(0), - GbaCell::new(0), + GbaCell::new(0), + GbaCell::new(0), + GbaCell::new(0), + GbaCell::new(0), + GbaCell::new(0), + GbaCell::new(0), ]; #[panic_handler] @@ -110,50 +106,72 @@ fn panic_handler(_: &core::panic::PanicInfo) -> ! { #[no_mangle] fn main() -> ! { - DISPCNT.write( - DisplayControl::new().with_video_mode(VideoMode::_3).with_show_bg2(true), - ); - - RUST_IRQ_HANDLER.write(Some(draw_sprites)); - DISPSTAT.write(DisplayStatus::new().with_irq_vblank(true)); - IE.write(IrqBits::VBLANK); - IME.write(true); - - let mut left_paddle = Paddle::new(10, SCREEN_HEIGHT as u16 / 2 - PADDLE_HEIGHT / 2); - let mut right_paddle = Paddle::new(SCREEN_WIDTH as u16 - 10 - PADDLE_WIDTH, SCREEN_HEIGHT as u16 / 2 - PADDLE_HEIGHT / 2); - let mut ball = Ball::new(SCREEN_WIDTH as u16 / 2, SCREEN_HEIGHT as u16 / 2); - - loop { - left_paddle.update(); - right_paddle.update(); - ball.update(&left_paddle, &right_paddle); - - SPRITE_POSITIONS[0].write(left_paddle.x); - SPRITE_POSITIONS[1].write(left_paddle.y); - SPRITE_POSITIONS[2].write(right_paddle.x); - SPRITE_POSITIONS[3].write(right_paddle.y); - SPRITE_POSITIONS[4].write(ball.x); - SPRITE_POSITIONS[5].write(ball.y); - - VBlankIntrWait(); - } + DISPCNT.write( + DisplayControl::new().with_video_mode(VideoMode::_3).with_show_bg2(true), + ); + + RUST_IRQ_HANDLER.write(Some(draw_sprites)); + DISPSTAT.write(DisplayStatus::new().with_irq_vblank(true)); + IE.write(IrqBits::VBLANK); + IME.write(true); + + let mut left_paddle = + Paddle::new(10, SCREEN_HEIGHT as u16 / 2 - PADDLE_HEIGHT / 2); + let mut right_paddle = Paddle::new( + SCREEN_WIDTH as u16 - 10 - PADDLE_WIDTH, + SCREEN_HEIGHT as u16 / 2 - PADDLE_HEIGHT / 2, + ); + let mut ball = Ball::new(SCREEN_WIDTH as u16 / 2, SCREEN_HEIGHT as u16 / 2); + + loop { + left_paddle.update(); + right_paddle.update(); + ball.update(&left_paddle, &right_paddle); + + SPRITE_POSITIONS[0].write(left_paddle.x); + SPRITE_POSITIONS[1].write(left_paddle.y); + SPRITE_POSITIONS[2].write(right_paddle.x); + SPRITE_POSITIONS[3].write(right_paddle.y); + SPRITE_POSITIONS[4].write(ball.x); + SPRITE_POSITIONS[5].write(ball.y); + + VBlankIntrWait(); + } } extern "C" fn draw_sprites(_bits: IrqBits) { - unsafe { - let p = VIDEO3_VRAM.as_usize() as *mut u8; - __aeabi_memset(p, 240*160*2, 0) - } - - draw_rect(SPRITE_POSITIONS[0].read(), SPRITE_POSITIONS[1].read(), PADDLE_WIDTH, PADDLE_HEIGHT, Color::WHITE); - draw_rect(SPRITE_POSITIONS[2].read(), SPRITE_POSITIONS[3].read(), PADDLE_WIDTH, PADDLE_HEIGHT, Color::WHITE); - draw_rect(SPRITE_POSITIONS[4].read(), SPRITE_POSITIONS[5].read(), BALL_SIZE, BALL_SIZE, Color::WHITE); + unsafe { + let p = VIDEO3_VRAM.as_usize() as *mut u8; + __aeabi_memset(p, 240 * 160 * 2, 0) + } + + draw_rect( + SPRITE_POSITIONS[0].read(), + SPRITE_POSITIONS[1].read(), + PADDLE_WIDTH, + PADDLE_HEIGHT, + Color::WHITE, + ); + draw_rect( + SPRITE_POSITIONS[2].read(), + SPRITE_POSITIONS[3].read(), + PADDLE_WIDTH, + PADDLE_HEIGHT, + Color::WHITE, + ); + draw_rect( + SPRITE_POSITIONS[4].read(), + SPRITE_POSITIONS[5].read(), + BALL_SIZE, + BALL_SIZE, + Color::WHITE, + ); } fn draw_rect(x: u16, y: u16, width: u16, height: u16, color: Color) { - for i in 0..width { - for j in 0..height { - VIDEO3_VRAM.index((x + i) as usize, (y + j) as usize).write(color); - } + for i in 0..width { + for j in 0..height { + VIDEO3_VRAM.index((x + i) as usize, (y + j) as usize).write(color); } + } } diff --git a/src/asm_runtime.rs b/src/asm_runtime.rs index 63f2749b..d0b0a0db 100644 --- a/src/asm_runtime.rs +++ b/src/asm_runtime.rs @@ -14,7 +14,7 @@ use crate::{ gba_cell::GbaCell, interrupts::IrqFn, mgba::MGBA_LOGGING_ENABLE_REQUEST, - mmio::{DMA3_SRC, IME, MGBA_LOG_ENABLE}, + mmio::{DMA3_SRC, IME, MGBA_LOG_ENABLE, WAITCNT}, }; /// The function pointer that the assembly runtime calls when an interrupt @@ -26,321 +26,133 @@ const DMA_32_BIT_MEMCPY: DmaControl = const DMA3_OFFSET: usize = DMA3_SRC.as_usize() - 0x0400_0000; const IME_OFFSET: usize = IME.as_usize() - 0x0400_0000; - -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".text.gba_rom_header"] -unsafe extern "C" fn __start() -> ! { - core::arch::asm!( - "b 1f", - ".space 0xE0", - "1:", /* post header */ - "mov r12, #{mmio_base}", - "add r0, r12, #{waitcnt_offset}", - "ldr r1, ={waitcnt_setting}", - "strh r1, [r0]", - - /* iwram copy */ - "ldr r4, =__iwram_word_copy_count", - bracer::when!("r4" != "#0" [label_id=1] { - "add r3, r12, #{dma3_offset}", - "mov r5, #{dma3_setting}", - "ldr r0, =__iwram_start", - "ldr r2, =__iwram_position_in_rom", - "str r2, [r3]", /* source */ - "str r0, [r3, #4]", /* destination */ - "strh r4, [r3, #8]", /* word count */ - "strh r5, [r3, #10]", /* set control bits */ - }), - - /* ewram copy */ - "ldr r4, =__ewram_word_copy_count", - bracer::when!("r4" != "#0" [label_id=1] { - "add r3, r12, #{dma3_offset}", - "mov r5, #{dma3_setting}", - "ldr r0, =__ewram_start", - "ldr r2, =__ewram_position_in_rom", - "str r2, [r3]", /* source */ - "str r0, [r3, #4]", /* destination */ - "strh r4, [r3, #8]", /* word count */ - "strh r5, [r3, #10]", /* set control bits */ - }), - - /* bss zero */ - "ldr r4, =__bss_word_clear_count", - bracer::when!("r4" != "#0" [label_id=1] { - "ldr r0, =__bss_start", - "mov r2, #0", - "2:", - "str r2, [r0], #4", - "subs r4, r4, #1", - "bne 2b", - }), - - /* assign the runtime irq handler */ - "ldr r1, ={runtime_irq_handler}", - "str r1, [r12, #-4]", - - /* ask for mGBA logging to be enabled. This should be harmless if we're not using mgba. */ - "ldr r0, ={mgba_log_enable}", - "ldr r1, ={mgba_logging_enable_request}", - "str r1, [r0]", - - /* call to rust main */ - "ldr r0, =main", - "bx r0", - // main shouldn't return, but if it does just SoftReset - "swi #0", - mmio_base = const 0x0400_0000, - waitcnt_offset = const 0x204, - waitcnt_setting = const 0x4317 /*sram8,r0:3.1,r1:4.2,r2:8.2,no_phi,prefetch*/, - dma3_offset = const DMA3_OFFSET, - dma3_setting = const DMA_32_BIT_MEMCPY.to_u16(), - runtime_irq_handler = sym runtime_irq_handler, - mgba_log_enable = const MGBA_LOG_ENABLE.as_usize(), - mgba_logging_enable_request = const MGBA_LOGGING_ENABLE_REQUEST, - options(noreturn) - ) +const WAITCNT_OFFSET: usize = WAITCNT.as_usize() - 0x0400_0000; + +core::arch::global_asm! { + bracer::put_fn_in_section!(".text.gba_rom_header"), + ".global __start", + "__start:", + + // space for the header + "b 1f", + ".space 0xE0", + "1:", /* post header */ + + // set the waitstate control to the GBATEK suggested setting. + "mov r12, #{mmio_base}", + "add r0, r12, #{waitcnt_offset}", + "ldr r1, ={waitcnt_setting}", + "strh r1, [r0]", + + // Initialize IWRAM + "ldr r4, =__iwram_word_copy_count", + bracer::when!(("r4" != "#0")[1] { + "add r3, r12, #{dma3_offset}", + "mov r5, #{dma3_setting}", + "ldr r0, =__iwram_start", + "ldr r2, =__iwram_position_in_rom", + "str r2, [r3]", /* source */ + "str r0, [r3, #4]", /* destination */ + "strh r4, [r3, #8]", /* word count */ + "strh r5, [r3, #10]", /* set control bits */ + }), + + // Initialize EWRAM + "ldr r4, =__ewram_word_copy_count", + bracer::when!(("r4" != "#0")[1] { + "add r3, r12, #{dma3_offset}", + "mov r5, #{dma3_setting}", + "ldr r0, =__ewram_start", + "ldr r2, =__ewram_position_in_rom", + "str r2, [r3]", /* source */ + "str r0, [r3, #4]", /* destination */ + "strh r4, [r3, #8]", /* word count */ + "strh r5, [r3, #10]", /* set control bits */ + }), + + // Zero the BSS region + "ldr r4, =__bss_word_clear_count", + bracer::when!(("r4" != "#0")[1] { + "ldr r0, =__bss_start", + "mov r2, #0", + "2:", + "str r2, [r0], #4", + "subs r4, r4, #1", + "bne 2b", + }), + + // Tell the BIOS where our runtime's handler is. + "ldr r1, =__runtime_irq_handler", + "str r1, [r12, #-4]", + + // Enable mGBA logging, which is harmless when not in mGBA + "ldr r0, ={mgba_log_enable}", + "ldr r1, ={mgba_logging_enable_request}", + "str r1, [r0]", + + // Call the `main` function (defined by the user's program) + "ldr r0, =main", + "bx r0", + + // `main` shouldn't return, but if it does just SoftReset + "swi #0", + + // Define Our Constants + mmio_base = const 0x0400_0000, + waitcnt_offset = const WAITCNT_OFFSET, + waitcnt_setting = const 0x4317 /*sram8,r0:3.1,r1:4.2,r2:8.2,no_phi,prefetch*/, + dma3_offset = const DMA3_OFFSET, + dma3_setting = const DMA_32_BIT_MEMCPY.to_u16(), + mgba_log_enable = const MGBA_LOG_ENABLE.as_usize(), + mgba_logging_enable_request = const MGBA_LOGGING_ENABLE_REQUEST, } -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.runtime.irq.handler"] -unsafe extern "C" fn runtime_irq_handler() { +core::arch::global_asm! { + bracer::put_fn_in_section!(".text.gba_rom_header"), + ".global __runtime_irq_handler", // On Entry: r0 = 0x0400_0000 (mmio_base) - core::arch::asm!( - /* swap IME off, user can turn it back on if they want */ - "add r12, r0, #{ime_offset}", - "mov r3, #0", - "swp r3, r3, [r12]", - - /* Read/Update IE and IF */ - "ldr r0, [r12, #-8]", - "and r0, r0, r0, LSR #16", - "strh r0, [r12, #-6]", - - /* Read/Update BIOS_IF */ - "sub r2, r12, #(0x208+8)", - "ldrh r1, [r2]", - "orr r1, r1, r0", - "strh r1, [r2]", - - /* Call the Rust fn pointer (if set), using System mode */ - "ldr r1, ={RUST_IRQ_HANDLER}", - "ldr r1, [r1]", - bracer::when!("r1" != "#0" [label_id=9] { - bracer::with_spsr_held_in!("r2", { - bracer::set_cpu_control!(System, irq_masked: false, fiq_masked: false), - - // Note(Lokathor): We are *SKIPPING* the part where we ensure that the - // System stack pointer is aligned to 8 during the call to the rust - // function. This is *technically* against the AAPCS ABI, but the GBA's - // ARMv4T CPU does not even support any instructions that require an - // alignment of 8. By not bothering to align the stack, we save about 5 - // cycles total. Which is neat, but if this were on the DS (which has an - // ARMv5TE CPU) you'd want to ensure the aligned stack. - - bracer::with_pushed_registers!("{{r2, r3, r12, lr}}", { - bracer::adr_lr_then_bx_to!(reg="r1", label_id=1) - }), - - bracer::set_cpu_control!(Supervisor, irq_masked: true, fiq_masked: false), - }), - }), - - /* Restore initial IME setting and return */ - "swp r3, r3, [r12]", - "bx lr", - ime_offset = const IME_OFFSET, - RUST_IRQ_HANDLER = sym RUST_IRQ_HANDLER, - options(noreturn) - ) -} - -// For now, the division fns can just keep living here. - -/// Returns 0 in `r0`, while placing the `numerator` into `r1`. -/// -/// This is written in that slightly strange way so that `div` function and -/// `divmod` functions can share the same code path. -/// -/// See: [__aeabi_idiv0][aeabi-division-by-zero] -/// -/// [aeabi-division-by-zero]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#division-by-zero -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -// this should literally never get called for real, so we leave it in ROM -extern "C" fn __aeabi_idiv0(numerator: i32) -> i32 { - unsafe { - core::arch::asm!( - // this comment stops rustfmt from making this a one-liner - "mov r1, r0", - "mov r0, #0", - "bx lr", - options(noreturn) - ) - } -} - -/// Returns `u32 / u32` -/// -/// This implementation is *not* the fastest possible division, but it is -/// extremely compact. -/// -/// See: [__aeabi_uidiv][aeabi-integer-32-32-division] -/// -/// [aeabi-integer-32-32-division]: -/// https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#integer-32-32-32-division-functions -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.aeabi.uidiv"] -extern "C" fn __aeabi_uidiv(numerator: u32, denominator: u32) -> u32 { - // Note(Lokathor): Other code in this module relies on being able to call this - // function without affecting r12, so any future implementations of this code - // **must not** destroy r12. - unsafe { - core::arch::asm!( - // Check for divide by 0 - "cmp r1, #0", - "beq {__aeabi_idiv0}", - // r3(shifted_denom) = denom - "mov r3, r1", - // while shifted_denom < (num>>1): shifted_denom =<< 1; - "cmp r3, r0, lsr #1", - "2:", - "lslls r3, r3, #1", - "cmp r3, r0, lsr #1", - "bls 2b", - // r0=quot(init 0), r1=denom, r2=num, r3=shifted_denom - "mov r2, r0", - "mov r0, #0", - // subtraction loop - "3:", - "cmp r2, r3", - "subcs r2, r2, r3", - "adc r0, r0, r0", - "mov r3, r3, lsr #1", - "cmp r3, r1", - "bcs 3b", - "bx lr", - __aeabi_idiv0 = sym __aeabi_idiv0, - options(noreturn) - ) - } -} - -/// Returns `i32 / i32` -/// -/// See: [__aeabi_idiv][aeabi-integer-32-32-division] -/// -/// [aeabi-integer-32-32-division]: -/// https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#integer-32-32-32-division-functions -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.aeabi.idiv"] -extern "C" fn __aeabi_idiv(numerator: i32, denominator: i32) -> u32 { - unsafe { - core::arch::asm!( - // determine if `numerator` and `denominator` are the same sign - "eor r12, r1, r0", - // convert both values to their unsigned absolute value. - "cmp r0, #0", - "rsblt r0, r0, #0", - "cmp r1, #0", - "rsclt r1, r1, #0", - bracer::with_pushed_registers!("{{lr}}", { - // divide them using `u32` division (this will check for divide by 0) - "bl {__aeabi_uidiv}", - }), - // if they started as different signs, flip the output's sign. - "cmp r12, #0", - "rsblt r0, r0, #0", - "bx lr", - __aeabi_uidiv = sym __aeabi_uidiv, - options(noreturn) - ) - } -} - -/// Returns `(u32 / u32, u32 % u32)` in `(r0, r1)`. -/// -/// The `u64` return value is a mild lie that gets Rust to grab up both the `r0` -/// and `r1` values when the function returns. If you transmute the return value -/// into `[u32; 2]` then you can separate the two parts of the return value, and -/// it will have no runtime cost. -/// -/// See: [__aeabi_uidivmod][aeabi-integer-32-32-division] -/// -/// [aeabi-integer-32-32-division]: -/// https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#integer-32-32-32-division-functions -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.aeabi.uidivmod"] -extern "C" fn __aeabi_uidivmod(numerator: u32, denominator: u32) -> u64 { - unsafe { - core::arch::asm!( - // We need to save *both* input args until after the uidiv call. One of - // them can be saved in `r12` because we know our uidiv doesn't actually - // touch `r12`, while the other will be pushed onto the stack along with - // `lr`. Since the function's output will be in `r0`, we push/pop `r1`. - "mov r12, r0", - bracer::with_pushed_registers!("{{r1, lr}}", { - "bl {__aeabi_uidiv}", - }), - // Now r0 holds the `quot`, and we use it along with the input args to - // calculate the `rem`. - "mul r2, r0, r1", - "sub r1, r12, r2", - "bx lr", - __aeabi_uidiv = sym __aeabi_uidiv, - options(noreturn) - ) - } -} - -/// Returns `(i32 / i32, i32 % i32)` in `(r0, r1)`. -/// -/// The `u64` return value is a mild lie that gets Rust to grab up both the `r0` -/// and `r1` values when the function returns. If you transmute the return value -/// into `[i32; 2]` then you can separate the two parts of the return value, and -/// it will have no runtime cost. -/// -/// See: [__aeabi_idivmod][aeabi-integer-32-32-division] -/// -/// [aeabi-integer-32-32-division]: -/// https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#integer-32-32-32-division-functions -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.aeabi.idivmod"] -extern "C" fn __aeabi_idivmod(numerator: i32, denominator: i32) -> u64 { - unsafe { - core::arch::asm!( - bracer::with_pushed_registers!("{{r4, r5, lr}}", { - // store old numerator then make it the unsigned absolute - "movs r4, r0", - "rsblt r0, r0, #0", - // store old denominator then make it the unsigned absolute - "movs r5, r1", - "rsblt r1, r1, #0", - // divmod using unsigned. - "bl {__aeabi_uidivmod}", - // if signs started opposite, quot becomes negative - "eors r12, r4, r5", - "rsblt r0, r0, #0", - // if numerator started negative, rem is negative - "cmp r4, #0", - "rsblt r1, r1, #0", - }), - "bx lr", - __aeabi_uidivmod = sym __aeabi_uidivmod, - options(noreturn) - ) - } + "__runtime_irq_handler:", + + /* swap IME off, user can turn it back on if they want */ + "add r12, r0, #{ime_offset}", + "mov r3, #0", + "swp r3, r3, [r12]", + + /* Read/Update IE and IF */ + "ldr r0, [r12, #-8]", + "and r0, r0, r0, LSR #16", + "strh r0, [r12, #-6]", + + /* Read/Update BIOS_IF */ + "sub r2, r12, #(0x208+8)", + "ldrh r1, [r2]", + "orr r1, r1, r0", + "strh r1, [r2]", + + /* Call the Rust fn pointer (if set), using System mode */ + "ldr r1, ={RUST_IRQ_HANDLER}", + "ldr r1, [r1]", + bracer::when!(("r1" != "#0")[9] { + // Note(Lokathor): We are *SKIPPING* the part where we ensure that the + // System stack pointer is aligned to 8 during the call to the rust + // function. This is *technically* against the AAPCS ABI, but the GBA's + // ARMv4T CPU does not even support any instructions that require an + // alignment of 8. By not bothering to align the stack, we save about 5 + // cycles total. Which is neat, but if this were on the DS (which has an + // ARMv5TE CPU) you'd want to ensure the aligned stack. + + bracer::a32_read_spsr_to!("r2"), + bracer::a32_set_cpu_control!(System, irq_masked= false, fiq_masked= false), + "push {{r2, r3, r12, lr}}", + bracer::a32_fake_blx!("r1"), + "pop {{r2, r3, r12, lr}}", + bracer::a32_set_cpu_control!(Supervisor, irq_masked= true, fiq_masked= false), + bracer::a32_write_spsr_from!("r2") + }), + + /* Restore initial IME setting and return */ + "swp r3, r3, [r12]", + "bx lr", + ime_offset = const IME_OFFSET, + RUST_IRQ_HANDLER = sym RUST_IRQ_HANDLER, } diff --git a/src/div_fns.bak b/src/div_fns.bak new file mode 100644 index 00000000..da9face4 --- /dev/null +++ b/src/div_fns.bak @@ -0,0 +1,184 @@ + +/// Returns 0 in `r0`, while placing the `numerator` into `r1`. +/// +/// This is written in that slightly strange way so that `div` function and +/// `divmod` functions can share the same code path. +/// +/// See: [__aeabi_idiv0][aeabi-division-by-zero] +/// +/// [aeabi-division-by-zero]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#division-by-zero +#[naked] +#[no_mangle] +#[instruction_set(arm::a32)] +// this should literally never get called for real, so we leave it in ROM +extern "C" fn __aeabi_idiv0(numerator: i32) -> i32 { + unsafe { + core::arch::asm!( + // this comment stops rustfmt from making this a one-liner + "mov r1, r0", + "mov r0, #0", + "bx lr", + options(noreturn) + ) + } +} + +/// Returns `u32 / u32` +/// +/// This implementation is *not* the fastest possible division, but it is +/// extremely compact. +/// +/// See: [__aeabi_uidiv][aeabi-integer-32-32-division] +/// +/// [aeabi-integer-32-32-division]: +/// https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#integer-32-32-32-division-functions +#[naked] +#[no_mangle] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.aeabi.uidiv"] +extern "C" fn __aeabi_uidiv(numerator: u32, denominator: u32) -> u32 { + // Note(Lokathor): Other code in this module relies on being able to call this + // function without affecting r12, so any future implementations of this code + // **must not** destroy r12. + unsafe { + core::arch::asm!( + // Check for divide by 0 + "cmp r1, #0", + "beq {__aeabi_idiv0}", + // r3(shifted_denom) = denom + "mov r3, r1", + // while shifted_denom < (num>>1): shifted_denom =<< 1; + "cmp r3, r0, lsr #1", + "2:", + "lslls r3, r3, #1", + "cmp r3, r0, lsr #1", + "bls 2b", + // r0=quot(init 0), r1=denom, r2=num, r3=shifted_denom + "mov r2, r0", + "mov r0, #0", + // subtraction loop + "3:", + "cmp r2, r3", + "subcs r2, r2, r3", + "adc r0, r0, r0", + "mov r3, r3, lsr #1", + "cmp r3, r1", + "bcs 3b", + "bx lr", + __aeabi_idiv0 = sym __aeabi_idiv0, + options(noreturn) + ) + } +} + +/// Returns `i32 / i32` +/// +/// See: [__aeabi_idiv][aeabi-integer-32-32-division] +/// +/// [aeabi-integer-32-32-division]: +/// https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#integer-32-32-32-division-functions +#[naked] +#[no_mangle] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.aeabi.idiv"] +extern "C" fn __aeabi_idiv(numerator: i32, denominator: i32) -> u32 { + unsafe { + core::arch::asm!( + // determine if `numerator` and `denominator` are the same sign + "eor r12, r1, r0", + // convert both values to their unsigned absolute value. + "cmp r0, #0", + "rsblt r0, r0, #0", + "cmp r1, #0", + "rsclt r1, r1, #0", + bracer::with_pushed_registers!("{{lr}}", { + // divide them using `u32` division (this will check for divide by 0) + "bl {__aeabi_uidiv}", + }), + // if they started as different signs, flip the output's sign. + "cmp r12, #0", + "rsblt r0, r0, #0", + "bx lr", + __aeabi_uidiv = sym __aeabi_uidiv, + options(noreturn) + ) + } +} + +/// Returns `(u32 / u32, u32 % u32)` in `(r0, r1)`. +/// +/// The `u64` return value is a mild lie that gets Rust to grab up both the `r0` +/// and `r1` values when the function returns. If you transmute the return value +/// into `[u32; 2]` then you can separate the two parts of the return value, and +/// it will have no runtime cost. +/// +/// See: [__aeabi_uidivmod][aeabi-integer-32-32-division] +/// +/// [aeabi-integer-32-32-division]: +/// https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#integer-32-32-32-division-functions +#[naked] +#[no_mangle] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.aeabi.uidivmod"] +extern "C" fn __aeabi_uidivmod(numerator: u32, denominator: u32) -> u64 { + unsafe { + core::arch::asm!( + // We need to save *both* input args until after the uidiv call. One of + // them can be saved in `r12` because we know our uidiv doesn't actually + // touch `r12`, while the other will be pushed onto the stack along with + // `lr`. Since the function's output will be in `r0`, we push/pop `r1`. + "mov r12, r0", + bracer::with_pushed_registers!("{{r1, lr}}", { + "bl {__aeabi_uidiv}", + }), + // Now r0 holds the `quot`, and we use it along with the input args to + // calculate the `rem`. + "mul r2, r0, r1", + "sub r1, r12, r2", + "bx lr", + __aeabi_uidiv = sym __aeabi_uidiv, + options(noreturn) + ) + } +} + +/// Returns `(i32 / i32, i32 % i32)` in `(r0, r1)`. +/// +/// The `u64` return value is a mild lie that gets Rust to grab up both the `r0` +/// and `r1` values when the function returns. If you transmute the return value +/// into `[i32; 2]` then you can separate the two parts of the return value, and +/// it will have no runtime cost. +/// +/// See: [__aeabi_idivmod][aeabi-integer-32-32-division] +/// +/// [aeabi-integer-32-32-division]: +/// https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#integer-32-32-32-division-functions +#[naked] +#[no_mangle] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.aeabi.idivmod"] +extern "C" fn __aeabi_idivmod(numerator: i32, denominator: i32) -> u64 { + unsafe { + core::arch::asm!( + bracer::with_pushed_registers!("{{r4, r5, lr}}", { + // store old numerator then make it the unsigned absolute + "movs r4, r0", + "rsblt r0, r0, #0", + // store old denominator then make it the unsigned absolute + "movs r5, r1", + "rsblt r1, r1, #0", + // divmod using unsigned. + "bl {__aeabi_uidivmod}", + // if signs started opposite, quot becomes negative + "eors r12, r4, r5", + "rsblt r0, r0, #0", + // if numerator started negative, rem is negative + "cmp r4, #0", + "rsblt r1, r1, #0", + }), + "bx lr", + __aeabi_uidivmod = sym __aeabi_uidivmod, + options(noreturn) + ) + } +} diff --git a/src/lib.rs b/src/lib.rs index 7388f286..d6837d9d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,4 @@ #![no_std] -#![feature(naked_functions)] #![warn(clippy::missing_inline_in_public_items)] #![allow(clippy::let_and_return)] #![allow(clippy::result_unit_err)] @@ -104,8 +103,8 @@ pub mod fixed; pub mod gba_cell; pub mod interrupts; pub mod keys; -#[cfg(feature = "on_gba")] -pub mod mem_fns; +// #[cfg(feature = "on_gba")] +// pub mod mem_fns; #[cfg(feature = "on_gba")] pub mod mgba; #[cfg(feature = "on_gba")] diff --git a/src/mem_fns.rs b/src/mem_fns.bak similarity index 100% rename from src/mem_fns.rs rename to src/mem_fns.bak