Skip to content

Commit

Permalink
Improve update_cleared_playfield_tiles performance (#61)
Browse files Browse the repository at this point in the history
Re-implements update_cleared_playfield_tiles() to walk over the playfield with a single pointer instead of keeping indices. Also optimizes the math and logic as much as I think is possible while keeping it only in C.

This brings the entire sweep from 141747 to 113467 cycles (~20% or ~30000 fewer cycles) in some test scenario. This probably saves a frame or two during the sweep.

Performance while iterating over a completely unmarked playfield improves by about 10%.

Performance while iterating over a playfield with 44 marked tiles improves by about 27%.

The more tiles which are marked, the more noticeable the improvement would be.
  • Loading branch information
boingoing authored Jul 1, 2023
1 parent b11c89a commit 6256fad
Show file tree
Hide file tree
Showing 7 changed files with 78 additions and 39 deletions.
2 changes: 1 addition & 1 deletion src/constants/tiles.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#ifndef __JEZNES_CONSTANTS_TILES_H__
#define __JEZNES_CONSTANTS_TILES_H__

#define MAX_TILE_UPDATES_PER_FRAME 40
#define MAX_TILE_UPDATES_PER_FRAME 41

#define TILE_INDEX_PLAYFIELD_UNCLEARED 0x0
#define TILE_INDEX_PLAYFIELD_CLEARED 0x3
Expand Down
8 changes: 7 additions & 1 deletion src/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,19 @@
// Note: Make sure that functions in the same stack don't reuse local temps
// by abstracting the usage into these macros.

#define set_temp_ptr(p) (temp_ptr_1 = (p))
#define set_temp_ptr(p) (temp_ptr_1 = (unsigned char*)(p))
#define get_temp_ptr(type) ((type*)temp_ptr_1)

#define get_playfield_index() (temp_int_3)
#define set_playfield_index(a) (temp_int_3 = (a))
#define inc_playfield_index() (++temp_int_3)

#define get_should_initialize_clear_sweep() (temp_byte_6)
#define set_should_initialize_clear_sweep(a) (temp_byte_6 = (a))

#define get_temp_ppu_address() (temp_int_2)
#define set_temp_ppu_address(a) (temp_int_2 = (a))

#define get_game_over_mode() (temp_byte_6)
#define set_game_over_mode(a) (temp_byte_6 = (a))

Expand Down
11 changes: 4 additions & 7 deletions src/flood_fill.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,10 @@
#include "types.h"
#include "zeropage.h"

void compute_playfield_mark_bit_one_ball(unsigned char ball_index) {
// Set cur to starting playfield tile
set_current_position(balls[ball_index].nearest_playfield_tile);

// If the playfield tile where |ball_index| is located has already been
// marked, another ball is in the same region of the playfield as
// |ball_index|. There's no point in remarking the region.
void compute_playfield_mark_bit_one_region(void) {
// If the playfield tile at |get_current_position()| is marked, the region
// containing the tile has already been marked. There's no point in remarking
// the region.
if (!inside(get_current_position())) {
return;
}
Expand Down
15 changes: 8 additions & 7 deletions src/flood_fill.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,12 +137,13 @@ const unsigned char turn_left_table[] = {
? playfield_index_move_right_up(get_current_position()) \
: playfield_index_move_left_down(get_current_position()))

// Uses a constant-memory usage implementation of the painters algorithm to
// walk the playfield starting at the playfield tile where |ball_index| is
// currently located. Each reachable playfield tile is marked until we run
// out of unmarked playfield tiles to walk to.
// When this function returns, the region in which |ball_index| is bound will
// be made up entirely of marked playfield tiles.
void __fastcall__ compute_playfield_mark_bit_one_ball(unsigned char ball_index);
// Uses a constant-memory usage implementation of the painters algorithm to walk
// the playfield starting at the playfield tile returned via
// |get_current_position()|. Be sure to set the current position via
// |set_current_position()| before calling this function. Each reachable
// playfield tile is marked until we run out of unmarked playfield tiles to walk
// to. When this function returns, the region in which |get_current_position()|
// is located will be made up entirely of marked playfield tiles.
void compute_playfield_mark_bit_one_region(void);

#endif // __JEZNES_FLOOD_FILL_H__
66 changes: 44 additions & 22 deletions src/jeznes.c
Original file line number Diff line number Diff line change
Expand Up @@ -1406,17 +1406,18 @@ void update_nearest_tile(void) {

void line_completed(void) {
for (temp_byte_9 = 0; temp_byte_9 < get_ball_count(); ++temp_byte_9) {
compute_playfield_mark_bit_one_ball(temp_byte_9);
set_current_position(balls[temp_byte_9].nearest_playfield_tile);
compute_playfield_mark_bit_one_region();
}

// Grant score for clearing a line segment.
add_score_for_cleared_line();

// Reset |playfield_index|, set the game state to updating the playfield,
// which will cause us to call update_cleared_playfield_tiles() from the
// Set the game state to updating the playfield and re-initialize the sweep.
// This will cause us to call update_cleared_playfield_tiles() from the
// beginning next frame. If we need to call it again after that, we will call
// it in restartable mode.
set_playfield_index(0);
set_should_initialize_clear_sweep(TRUE);
game_state = GAME_STATE_UPDATING_PLAYFIELD;
}

Expand All @@ -1442,52 +1443,73 @@ void set_playfield_tile(unsigned int tile_index,
// all uncleared tiles have been updated. Note: This function can potentially
// queue more vram updates than are allowed during the next v-blank.
// For that reason, it is restartable.
// The current playfield_index needs to be reset to zero once at the
// beginning of the operation. Otherwise, calling this function will
// continue from where it left off last time. It returns TRUE when all
// vram updates are queued and FALSE if there are additonal vram updates
// pending.
//
// scratch:
// temp_byte_3
// If get_should_initialize_clear_sweep() is TRUE, we will reset the
// pointers used to walk over the playfield. Otherwise, calling this
// function will continue from where it left off last time. It returns
// TRUE when all vram updates are queued and FALSE if there are additonal
// vram updates pending.
unsigned char update_cleared_playfield_tiles(void) {
// If this is the first sweep over the playfield, we need to init the
// counters.
if (get_should_initialize_clear_sweep() == TRUE) {
// Keep pointer to the playfield in-memory structure.
set_temp_ptr(playfield);
// First ppu address of the playfield tiles.
set_temp_ppu_address(get_ppu_addr(0, playfield_pixel_coord_x[0],
playfield_pixel_coord_y[0]));
// Turn off the initialization flag for subsequent sweeps.
set_should_initialize_clear_sweep(FALSE);
}

// Reset per-sweep cleared counter.
temp_byte_3 = 0;

// Look over all tiles in the playfield and for each uncleared, unmarked tile
// change it to cleared.
for (; get_playfield_index() < PLAYFIELD_WIDTH * PLAYFIELD_HEIGHT;
inc_playfield_index()) {
temp_byte_4 = playfield[get_playfield_index()];
for (; get_temp_ptr(unsigned char) !=
(unsigned char*)(playfield + PLAYFIELD_WIDTH * PLAYFIELD_HEIGHT);
++temp_ptr_1) {
set_playfield_tile_value(*get_temp_ptr(unsigned char));

// Skip tiles which are not uncleared. These are walls or cleared tiles and
// we don't care if they're marked.
// TODO(boingoing): What about PLAYFIELD_LINE tiles from the other player?
if (get_playfield_tile_type_from_byte(temp_byte_4) != PLAYFIELD_UNCLEARED) {
if (get_playfield_tile_type_from_byte(get_playfield_tile_value()) !=
PLAYFIELD_UNCLEARED) {
continue;
}

// If the tile was marked, we aren't supposed to clear it. Mark implies
// there is a ball inside the same region.
if (get_playfield_is_marked_flag_from_byte(temp_byte_4)) {
if (get_playfield_is_marked_flag_from_byte(get_playfield_tile_value())) {
// While we're here... let's remove all the mark bits from uncleared
// tiles. We won't revisit this tile index during this sweep of the
// playfield.
unset_playfield_is_marked_flag(get_playfield_index());
*get_temp_ptr(unsigned char) &= ~(PLAYFIELD_BITMASK_MARK);
continue;
}

// Unmarked, uncleared playfield tile. Let's reset it to cleared and track
// the count for this sweep as well as all-time for the level.
++temp_byte_3;
++cleared_tile_count;
set_playfield_tile(get_playfield_index(), PLAYFIELD_WALL,
TILE_INDEX_PLAYFIELD_CLEARED);

// Update the playfield in-memory structure.
*get_temp_ptr(unsigned char) = PLAYFIELD_WALL;

// Calculate the ppu addr for the current tile and set the bg tile graphic.
one_vram_buffer(TILE_INDEX_PLAYFIELD_CLEARED,
get_temp_ppu_address() + temp_ptr_1 - playfield);

// We can only queue about 40 tile updates per v-blank.
if (temp_byte_3 >= MAX_TILE_UPDATES_PER_FRAME) {
if (temp_byte_3 == MAX_TILE_UPDATES_PER_FRAME) {
add_score_for_cleared_tiles(temp_byte_3);
cleared_tile_count += temp_byte_3;
++temp_ptr_1;
return FALSE;
}
}

add_score_for_cleared_tiles(temp_byte_3);
cleared_tile_count += temp_byte_3;
return TRUE;
}
13 changes: 13 additions & 0 deletions src/screens/playfield.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,19 @@
#define playfield_tile_from_pixel_coords(x, y) \
(((x) >> 3) + (((y) >> 3) << 5) - PLAYFIELD_FIRST_TILE_INDEX)

// Map of playfield x-coordinate index [0,31] to x-coordinate in pixel-space
const unsigned char playfield_pixel_coord_x[] = {
0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80,
88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168,
176, 184, 192, 200, 208, 216, 224, 232, 240, 248,
};

// Map of playfield y-coordinate index [0,21] to y-coordinate in pixel-space
const unsigned char playfield_pixel_coord_y[] = {
8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88,
96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176,
};

// Calculate the playfield tile position in (x,y) of the playfield tile |i|.
#define playfield_index_x(i) ((i) % 32)
#define playfield_index_y(i) ((i) >> 5)
Expand Down
2 changes: 1 addition & 1 deletion src/zeropage.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,6 @@ unsigned int temp_int_1;
unsigned int temp_int_2;
unsigned int temp_int_3;

void* temp_ptr_1;
unsigned char* temp_ptr_1;

#endif // __JEZNES_ZEROPAGE_H__

0 comments on commit 6256fad

Please sign in to comment.