From f9cc5c72210a0da13238c47b9af3318a5f9e2255 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Mon, 3 Jun 2024 17:50:27 -0600 Subject: [PATCH] Move USER_IRQ_HANDLER to the irq module --- examples/basic_keyinput.rs | 3 +- examples/objects.rs | 3 +- examples/paddle_ball.rs | 3 +- examples/timer.rs | 3 +- src/{asm_runtime.rs => asm.rs} | 81 +++++++++++++++++++++++++++------- src/irq.rs | 7 +++ src/lib.rs | 2 +- src/mem/copy.rs | 13 ++++-- src/mem/mod.rs | 2 + src/mem/set.rs | 49 ++++++++++++++++++++ src/video.rs | 41 ++--------------- 11 files changed, 140 insertions(+), 67 deletions(-) rename src/{asm_runtime.rs => asm.rs} (75%) create mode 100644 src/mem/set.rs diff --git a/examples/basic_keyinput.rs b/examples/basic_keyinput.rs index bc584be..ec51351 100644 --- a/examples/basic_keyinput.rs +++ b/examples/basic_keyinput.rs @@ -5,9 +5,8 @@ use core::ptr::{addr_of, addr_of_mut}; use gba::{ - asm_runtime::USER_IRQ_HANDLER, bios::VBlankIntrWait, - irq::{IrqBits, IE, IME}, + irq::{IrqBits, IE, IME, USER_IRQ_HANDLER}, keys::KEYINPUT, mgba::{MgbaLogLevel, MgbaLogger}, video::{ diff --git a/examples/objects.rs b/examples/objects.rs index 96dfdfa..dc96266 100644 --- a/examples/objects.rs +++ b/examples/objects.rs @@ -2,10 +2,9 @@ #![no_main] use gba::{ - asm_runtime::USER_IRQ_HANDLER, bios::VBlankIntrWait, gba_cell::GbaCell, - irq::{IrqBits, IE, IME}, + irq::{IrqBits, IE, IME, USER_IRQ_HANDLER}, keys::KEYINPUT, sample_art::{decompress_cga_face_to_vram_4bpp, Cga}, video::{ diff --git a/examples/paddle_ball.rs b/examples/paddle_ball.rs index 416f19e..5d02409 100644 --- a/examples/paddle_ball.rs +++ b/examples/paddle_ball.rs @@ -4,10 +4,9 @@ //! Made by Evan Goemer, Discord: @evangoemer use gba::{ - asm_runtime::USER_IRQ_HANDLER, bios::VBlankIntrWait, gba_cell::GbaCell, - irq::{IrqBits, IE, IME}, + irq::{IrqBits, IE, IME, USER_IRQ_HANDLER}, keys::{KeyInput, KEYINPUT}, video::{Color, DisplayControl, DisplayStatus, Mode3, DISPCNT, DISPSTAT}, }; diff --git a/examples/timer.rs b/examples/timer.rs index b4bed9a..43a1320 100644 --- a/examples/timer.rs +++ b/examples/timer.rs @@ -2,10 +2,9 @@ #![no_main] use gba::{ - asm_runtime::USER_IRQ_HANDLER, bios::VBlankIntrWait, gba_cell::GbaCell, - irq::{IrqBits, IE, IME}, + irq::{IrqBits, IE, IME, USER_IRQ_HANDLER}, timers::{CpusPerTick, TimerControl, TIMER0_CONTROL}, video::{ Color, DisplayControl, DisplayStatus, BACKDROP_COLOR, DISPCNT, DISPSTAT, diff --git a/src/asm_runtime.rs b/src/asm.rs similarity index 75% rename from src/asm_runtime.rs rename to src/asm.rs index 3d12d21..6311759 100644 --- a/src/asm_runtime.rs +++ b/src/asm.rs @@ -1,17 +1,28 @@ #![allow(unused_macros)] -//! Assembly runtime and support functions for the GBA. - -// Note(Lokathor): Functions here will *definitely* panic without the `on_gba` -// cargo feature enabled, and so they should all have the `track_caller` -// attribute set whenever the `on_gba` feature is *disabled* - -use crate::gba_cell::GbaCell; +//! Assembly support. +//! +//! ## Startup Code +//! +//! This module includes the `_start` function as well as the default +//! `_asm_runtime_irq_handler` that it sets. Both of which are defined within +//! [`global_asm!`][inline_asm] blocks. They are not intended to be called +//! directly from Rust, and so foreign function definitions for them are not +//! exposed to Rust in this module. +//! +//! [inline_asm]: +//! https://doc.rust-lang.org/nightly/reference/inline-assembly.html +//! +//! ## Assembly Helpers +//! +//! This module also includes a number of functions to allow you to force the +//! generation of particular assembly instructions that Rust and/or LLVM does +//! not otherwise make easy to generate. use bracer::*; /// Inserts a `nop` instruction. -#[inline(always)] +#[inline] #[cfg_attr(not(feature = "on_gba"), track_caller)] pub fn nop() { on_gba_or_unimplemented! { @@ -29,7 +40,6 @@ pub fn nop() { /// This both reads and writes `ptr`, so all the usual rules of that apply. #[inline] #[cfg_attr(feature = "on_gba", instruction_set(arm::a32))] -#[cfg_attr(not(feature = "on_gba"), track_caller)] pub unsafe fn swp(mut ptr: *mut u32, x: u32) -> u32 { on_gba_or_unimplemented! { let output: u32; @@ -55,7 +65,6 @@ pub unsafe fn swp(mut ptr: *mut u32, x: u32) -> u32 { /// This both reads and writes `ptr`, so all the usual rules of that apply. #[inline] #[cfg_attr(feature = "on_gba", instruction_set(arm::a32))] -#[cfg_attr(not(feature = "on_gba"), track_caller)] pub unsafe fn swpb(mut ptr: *mut u8, x: u8) -> u8 { on_gba_or_unimplemented! { let output: u8; @@ -75,6 +84,50 @@ pub unsafe fn swpb(mut ptr: *mut u8, x: u8) -> u8 { } } +/// Loads a `u16` pointer offset by `bytes` +/// +/// ## Safety +/// This is similar to `ptr.byte_add(bytes).read()`, and thus has all the same +/// safety requirements. +#[inline] +#[cfg_attr(feature = "on_gba", instruction_set(arm::a32))] +pub unsafe fn a32_load_u16_reg_offset(ptr: *mut u16, bytes: usize) -> u16 { + on_gba_or_unimplemented! { + let output: u16; + unsafe { + core::arch::asm! { + "ldrh {output}, [{ptr}, {bytes}]", + output = lateout(reg) output, + ptr = in(reg) ptr, + bytes = in(reg) bytes, + } + } + output + } +} + +/// Loads an `i16` pointer offset by `bytes` +/// +/// ## Safety +/// This is similar to `ptr.byte_add(bytes).read()`, and thus has all the same +/// safety requirements. +#[inline] +#[cfg_attr(feature = "on_gba", instruction_set(arm::a32))] +pub unsafe fn a32_load_i16_reg_offset(ptr: *mut i16, bytes: usize) -> i16 { + on_gba_or_unimplemented! { + let output: i16; + unsafe { + core::arch::asm! { + "ldrsh {output}, [{ptr}, {bytes}]", + output = lateout(reg) output, + ptr = in(reg) ptr, + bytes = in(reg) bytes, + } + } + output + } +} + // Proc-macros can't see the target being built for, so we use this declarative // macro to determine if we're on a thumb target (and need to force our asm into // a32 mode) or if we're not on thumb (and our asm can pass through untouched). @@ -211,11 +264,5 @@ core::arch::global_asm! { // return to the BIOS "bx lr", }, - USER_IRQ_HANDLER = sym USER_IRQ_HANDLER, + USER_IRQ_HANDLER = sym crate::irq::USER_IRQ_HANDLER, } - -/// The user-provided interrupt request handler function. -#[cfg(feature = "on_gba")] -pub static USER_IRQ_HANDLER: GbaCell< - Option, -> = GbaCell::new(None); diff --git a/src/irq.rs b/src/irq.rs index 5c7bef4..6d19704 100644 --- a/src/irq.rs +++ b/src/irq.rs @@ -1,6 +1,13 @@ //! Hardware interrupt handling use super::*; +use crate::gba_cell::GbaCell; + +/// The user-provided interrupt request handler function. +#[cfg(feature = "on_gba")] +pub static USER_IRQ_HANDLER: GbaCell< + Option, +> = GbaCell::new(None); /// Interrupt bit flags. #[derive(Clone, Copy, Default)] diff --git a/src/lib.rs b/src/lib.rs index 5b88588..bb993a8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -51,7 +51,7 @@ macro_rules! on_gba_or_unimplemented { } } -pub mod asm_runtime; +pub mod asm; pub mod bios; pub mod dma; pub mod gba_cell; diff --git a/src/mem/copy.rs b/src/mem/copy.rs index 4c3e47f..e9b9c27 100644 --- a/src/mem/copy.rs +++ b/src/mem/copy.rs @@ -32,11 +32,16 @@ pub unsafe extern "C" fn copy_u8_unchecked( }); } -/// Copies `[u32; 8]` sized blocks, to `dest` from `src` +/// Copies `[u32; 8]` sized chunks, to `dest` from `src` /// -/// Particularly, this is the size of one [`Tile4`][crate::video::Tile4], half a -/// [`Tile8`][crate::video::Tile8], or one complete palbank of -/// [`Color`][crate::video::Color] values. +/// Particularly, this helps with: +/// * [`Tile4`][crate::video::Tile4] (one loop per tile). +/// * [`Tile8`][crate::video::Tile8] (two loops per tile). +/// * A palbank of [`Color`][crate::video::Color] values (one loop per palbank). +/// * A text mode screenblock (64 loops per screenblock). +/// +/// This will, in general, be slightly faster than a generic `memcpy`, but +/// slightly slower than using DMA. /// /// ## Safety /// * As with all copying routines, the source must be readable for the size you diff --git a/src/mem/mod.rs b/src/mem/mod.rs index 88e7163..c41d797 100644 --- a/src/mem/mod.rs +++ b/src/mem/mod.rs @@ -1,5 +1,7 @@ //! Low-level memory manipulation functions. mod copy; +mod set; pub use copy::*; +pub use set::*; diff --git a/src/mem/set.rs b/src/mem/set.rs new file mode 100644 index 0000000..1f66771 --- /dev/null +++ b/src/mem/set.rs @@ -0,0 +1,49 @@ +/// Sets `word` in blocks of 80 per loop. +/// +/// This is intended for clearing VRAM to a particular color when using +/// background modes 3, 4, and 5. +/// * To clear the Mode 3 bitmap, pass `240` as the count. +/// * To clear a Mode 4 frame pass `120`. +/// * To clear a Mode 5 frame pass `128`. +#[cfg_attr(feature = "on_gba", instruction_set(arm::a32))] +#[cfg_attr(feature = "on_gba", link_section = ".iwram.set_u32x80_unchecked")] +pub unsafe extern "C" fn set_u32x80_unchecked( + dest: *mut u32, word: u32, count: usize, +) { + on_gba_or_unimplemented!(unsafe { + core::arch::asm!( + // Note(Lokathor): Same loop logic as `copy_u8_unchecked`, we're just + // processing bigger chunks of data at a time, and also setting rather + // than copying. + "1:", + "subs {count}, {count}, #1", + "stmge {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}", + "stmge {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}", + "stmge {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}", + "stmge {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}", + "stmge {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}", + "stmge {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}", + "stmge {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}", + "stmge {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}", + "stmge {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}", + "stmge {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}", + "bgt 1b", + + // The assembler will give us a warning (that we can't easily disable) + // if the reg_list for `stm` doesn't give the registers in order from + // low to high, so we just manually pick registers. The count register + // and the pointer register can be anything else. + in("r1") word, + in("r3") word, + in("r4") word, + in("r5") word, + in("r7") word, + in("r8") word, + in("r12") word, + in("lr") word, + dest = inout(reg) dest => _, + count = inout(reg) count => _, + options(nostack), + ) + }); +} diff --git a/src/video.rs b/src/video.rs index 0855bbf..d90fc37 100644 --- a/src/video.rs +++ b/src/video.rs @@ -3,6 +3,7 @@ use bitfrob::{ u16_get_bit, u16_with_bit, u16_with_region, u16_with_value, u8x2, }; +use mem::set_u32x80_unchecked; use voladdress::{VolBlock, VolGrid2d, VolGrid2dStrided, VolSeries}; use super::*; @@ -350,44 +351,10 @@ impl Mode3 { pub const BYTES_TOTAL: usize = Self::BYTES_PER_ROW * Self::HEIGHT_USIZE; /// Clears the entire bitmap to a color of your choosing. - #[cfg_attr(feature = "on_gba", instruction_set(arm::a32))] - #[cfg_attr(feature = "on_gba", link_section = ".iwram.mode3.clear_to")] + #[inline] pub fn clear_to(color: Color) { - on_gba_or_unimplemented!(unsafe { - let x: u32 = color.0 as u32 | ((color.0 as u32) << 16); - // now we spam out that `u32`, 10 stm per loop, 8 times per stm. - core::arch::asm!( - "1:", - "stm {ptr}!, {{r0-r5,r7-r8}}", - "stm {ptr}!, {{r0-r5,r7-r8}}", - "stm {ptr}!, {{r0-r5,r7-r8}}", - "stm {ptr}!, {{r0-r5,r7-r8}}", - "stm {ptr}!, {{r0-r5,r7-r8}}", - "stm {ptr}!, {{r0-r5,r7-r8}}", - "stm {ptr}!, {{r0-r5,r7-r8}}", - "stm {ptr}!, {{r0-r5,r7-r8}}", - "stm {ptr}!, {{r0-r5,r7-r8}}", - "stm {ptr}!, {{r0-r5,r7-r8}}", - "subs {count}, {count}, #1", - "bne 1b", - - // The assembler will give us a warning (that we can't easily disable) - // if the reg_list for `stm` doesn't give the registers in order from - // low to high, so we just manually pick registers. The count register - // and the pointer register can be anything else. - in("r0") x, - in("r1") x, - in("r2") x, - in("r3") x, - in("r4") x, - in("r5") x, - in("r7") x, - in("r8") x, - count = inout(reg) 240 => _, - ptr = inout(reg) MODE3_VRAM.as_usize() => _, - options(nostack), - ) - }); + let x: u32 = color.0 as u32 | ((color.0 as u32) << 16); + unsafe { set_u32x80_unchecked(MODE3_VRAM.as_usize() as _, x, 240) }; } /// Fills the given rectangle, clipped to the bounds of the bitmap.