Skip to content

Commit

Permalink
Merge pull request #374 from linebender/cpu_shader
Browse files Browse the repository at this point in the history
Add CPU shaders
  • Loading branch information
raphlinus authored Oct 11, 2023
2 parents 4b646dc + 3a43538 commit 9bdbb10
Show file tree
Hide file tree
Showing 26 changed files with 2,110 additions and 49 deletions.
7 changes: 5 additions & 2 deletions crates/encoding/src/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,11 @@ pub struct Path {
pub struct Tile {
/// Accumulated backdrop at the left edge of the tile.
pub backdrop: i32,
/// Index of first path segment.
pub segments: u32,
/// An enum that holds either the count of the number of path
/// segments in this tile, or an index to the beginning of an
/// allocated slice of `PathSegment` objects. In the latter case,
/// the bits are inverted.
pub segment_count_or_ix: u32,
}

/// Encoder for path segments.
Expand Down
1 change: 0 additions & 1 deletion shader/draw_leaf.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ fn main(
// let x1 = f32(bbox.x1);
// let y1 = f32(bbox.y1);
// let bbox_f = vec4(x0, y0, x1, y1);
let fill_mode = u32(bbox.linewidth >= 0.0);
var transform = Transform();
var linewidth = bbox.linewidth;
if linewidth >= 0.0 || tag_word == DRAWTAG_FILL_LIN_GRADIENT || tag_word == DRAWTAG_FILL_RAD_GRADIENT ||
Expand Down
89 changes: 70 additions & 19 deletions src/cpu_dispatch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
//! Support for CPU implementations of compute shaders.
use std::{
cell::{RefCell, RefMut},
ops::Deref,
cell::{Ref, RefCell, RefMut},
ops::{Deref, DerefMut},
};

use bytemuck::Pod;

#[derive(Clone, Copy)]
pub enum CpuBinding<'a> {
Buffer(&'a [u8]),
Expand All @@ -16,39 +18,88 @@ pub enum CpuBinding<'a> {
Texture(&'a CpuTexture),
}

pub enum CpuBufGuard<'a> {
Slice(&'a [u8]),
Interior(RefMut<'a, Vec<u8>>),
pub enum TypedBufGuard<'a, T: ?Sized> {
Slice(&'a T),
Interior(Ref<'a, T>),
}

pub enum TypedBufGuardMut<'a, T: ?Sized> {
Slice(&'a mut T),
Interior(RefMut<'a, T>),
}

impl<'a> Deref for CpuBufGuard<'a> {
type Target = [u8];
impl<'a, T: ?Sized> Deref for TypedBufGuard<'a, T> {
type Target = T;

fn deref(&self) -> &Self::Target {
match self {
CpuBufGuard::Slice(s) => s,
CpuBufGuard::Interior(r) => r,
TypedBufGuard::Slice(s) => s,
TypedBufGuard::Interior(r) => r,
}
}
}

impl<'a> CpuBufGuard<'a> {
/// Get a mutable reference to the buffer.
///
/// Panics if the underlying resource is read-only.
pub fn as_mut(&mut self) -> &mut [u8] {
impl<'a, T: ?Sized> Deref for TypedBufGuardMut<'a, T> {
type Target = T;

fn deref(&self) -> &Self::Target {
match self {
CpuBufGuard::Interior(r) => &mut *r,
_ => panic!("tried to borrow immutable buffer as mutable"),
TypedBufGuardMut::Slice(s) => s,
TypedBufGuardMut::Interior(r) => r,
}
}
}

impl<'a, T: ?Sized> DerefMut for TypedBufGuardMut<'a, T> {
fn deref_mut(&mut self) -> &mut Self::Target {
match self {
TypedBufGuardMut::Slice(s) => s,
TypedBufGuardMut::Interior(r) => r,
}
}
}

impl<'a> CpuBinding<'a> {
pub fn as_buf(&self) -> CpuBufGuard {
pub fn as_typed<T: Pod>(&self) -> TypedBufGuard<T> {
match self {
CpuBinding::Buffer(b) => TypedBufGuard::Slice(bytemuck::from_bytes(b)),
CpuBinding::BufferRW(b) => {
TypedBufGuard::Interior(Ref::map(b.borrow(), |buf| bytemuck::from_bytes(buf)))
}
_ => panic!("resource type mismatch"),
}
}

pub fn as_typed_mut<T: Pod>(&self) -> TypedBufGuardMut<T> {
match self {
CpuBinding::Buffer(_) => panic!("can't borrow external buffer mutably"),
CpuBinding::BufferRW(b) => {
TypedBufGuardMut::Interior(RefMut::map(b.borrow_mut(), |buf| {
bytemuck::from_bytes_mut(buf)
}))
}
_ => panic!("resource type mismatch"),
}
}

pub fn as_slice<T: Pod>(&self) -> TypedBufGuard<[T]> {
match self {
CpuBinding::Buffer(b) => TypedBufGuard::Slice(bytemuck::cast_slice(b)),
CpuBinding::BufferRW(b) => {
TypedBufGuard::Interior(Ref::map(b.borrow(), |buf| bytemuck::cast_slice(buf)))
}
_ => panic!("resource type mismatch"),
}
}

pub fn as_slice_mut<T: Pod>(&self) -> TypedBufGuardMut<[T]> {
match self {
CpuBinding::Buffer(b) => CpuBufGuard::Slice(b),
CpuBinding::BufferRW(b) => CpuBufGuard::Interior(b.borrow_mut()),
CpuBinding::Buffer(_) => panic!("can't borrow external buffer mutably"),
CpuBinding::BufferRW(b) => {
TypedBufGuardMut::Interior(RefMut::map(b.borrow_mut(), |buf| {
bytemuck::cast_slice_mut(buf)
}))
}
_ => panic!("resource type mismatch"),
}
}
Expand Down
30 changes: 30 additions & 0 deletions src/cpu_shader/backdrop.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Copyright 2023 The Vello authors
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense

use vello_encoding::{ConfigUniform, Path, Tile};

use crate::cpu_dispatch::CpuBinding;

fn backdrop_main(config: &ConfigUniform, paths: &[Path], tiles: &mut [Tile]) {
for drawobj_ix in 0..config.layout.n_draw_objects {
let path = paths[drawobj_ix as usize];
let width = path.bbox[2] - path.bbox[0];
let height = path.bbox[3] - path.bbox[1];
let base = path.tiles;
for y in 0..height {
let mut sum = 0;
for x in 0..width {
let tile = &mut tiles[(base + y * width + x) as usize];
sum += tile.backdrop;
tile.backdrop = sum;
}
}
}
}

pub fn backdrop(_n_wg: u32, resources: &[CpuBinding]) {
let config = resources[0].as_typed();
let paths = resources[1].as_slice();
let mut tiles = resources[2].as_slice_mut();
backdrop_main(&config, &paths, &mut tiles);
}
21 changes: 21 additions & 0 deletions src/cpu_shader/bbox_clear.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Copyright 2023 The Vello authors
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense

use vello_encoding::{ConfigUniform, PathBbox};

use crate::cpu_dispatch::CpuBinding;

fn bbox_clear_main(config: &ConfigUniform, path_bboxes: &mut [PathBbox]) {
for i in 0..(config.layout.n_paths as usize) {
path_bboxes[i].x0 = 0x7fff_ffff;
path_bboxes[i].y0 = 0x7fff_ffff;
path_bboxes[i].x1 = -0x8000_0000;
path_bboxes[i].y1 = -0x8000_0000;
}
}

pub fn bbox_clear(_n_wg: u32, resources: &[CpuBinding]) {
let config = resources[0].as_typed();
let mut path_bboxes = resources[1].as_slice_mut();
bbox_clear_main(&config, &mut path_bboxes);
}
128 changes: 128 additions & 0 deletions src/cpu_shader/binning.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
// Copyright 2023 The Vello authors
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense

use vello_encoding::{BinHeader, BumpAllocators, ConfigUniform, DrawMonoid, PathBbox};

use crate::cpu_dispatch::CpuBinding;

const WG_SIZE: usize = 256;
const TILE_WIDTH: usize = 16;
const TILE_HEIGHT: usize = 16;
const N_TILE_X: usize = 16;
const N_TILE_Y: usize = 16;
const SX: f32 = 1.0 / ((N_TILE_X * TILE_WIDTH) as f32);
const SY: f32 = 1.0 / ((N_TILE_Y * TILE_HEIGHT) as f32);

fn bbox_intersect(a: [f32; 4], b: [f32; 4]) -> [f32; 4] {
[
a[0].max(b[0]),
a[1].max(b[1]),
a[2].min(b[2]),
a[3].min(b[3]),
]
}

fn binning_main(
n_wg: u32,
config: &ConfigUniform,
draw_monoids: &[DrawMonoid],
path_bbox_buf: &[PathBbox],
clip_bbox_buf: &[[f32; 4]],
intersected_bbox: &mut [[f32; 4]],
bump: &mut BumpAllocators,
bin_data: &mut [u32],
bin_header: &mut [BinHeader],
) {
for wg in 0..n_wg as usize {
let mut counts = [0; WG_SIZE];
let mut bboxes = [[0, 0, 0, 0]; WG_SIZE];
let width_in_bins =
((config.width_in_tiles + N_TILE_X as u32 - 1) / N_TILE_X as u32) as i32;
let height_in_bins =
((config.height_in_tiles + N_TILE_Y as u32 - 1) / N_TILE_Y as u32) as i32;
for local_ix in 0..WG_SIZE {
let element_ix = wg * WG_SIZE + local_ix;
let mut x0 = 0;
let mut y0 = 0;
let mut x1 = 0;
let mut y1 = 0;
if element_ix < config.layout.n_draw_objects as usize {
let draw_monoid = draw_monoids[element_ix];
let mut clip_bbox = [-1e9, -1e9, 1e9, 1e9];
if draw_monoid.clip_ix > 0 {
assert!(draw_monoid.clip_ix - 1 < config.layout.n_clips);
clip_bbox = clip_bbox_buf[draw_monoid.clip_ix as usize - 1];
}
let path_bbox = path_bbox_buf[draw_monoid.path_ix as usize];
let pb = [
path_bbox.x0 as f32,
path_bbox.y0 as f32,
path_bbox.x1 as f32,
path_bbox.y1 as f32,
];
let bbox = bbox_intersect(clip_bbox, pb);
intersected_bbox[element_ix] = bbox;
if bbox[0] < bbox[2] && bbox[1] < bbox[3] {
x0 = (bbox[0] * SX).floor() as i32;
y0 = (bbox[1] * SY).floor() as i32;
x1 = (bbox[2] * SX).ceil() as i32;
y1 = (bbox[3] * SY).ceil() as i32;
}
}
x0 = x0.clamp(0, width_in_bins);
y0 = y0.clamp(0, height_in_bins);
x1 = x1.clamp(0, width_in_bins);
y1 = y1.clamp(0, height_in_bins);
for y in y0..y1 {
for x in x0..x1 {
counts[(y * width_in_bins + x) as usize] += 1;
}
}
bboxes[local_ix] = [x0, y0, x1, y1];
}
let mut chunk_offset = [0; WG_SIZE];
for local_ix in 0..WG_SIZE {
let global_ix = wg * WG_SIZE + local_ix;
chunk_offset[local_ix] = bump.binning;
bump.binning += counts[local_ix];
bin_header[global_ix] = BinHeader {
element_count: counts[local_ix],
chunk_offset: chunk_offset[local_ix],
};
}
for local_ix in 0..WG_SIZE {
let element_ix = wg * WG_SIZE + local_ix;
let bbox = bboxes[local_ix];
for y in bbox[1]..bbox[3] {
for x in bbox[0]..bbox[2] {
let bin_ix = (y * width_in_bins + x) as usize;
let ix = config.layout.bin_data_start + chunk_offset[bin_ix];
bin_data[ix as usize] = element_ix as u32;
chunk_offset[bin_ix] += 1;
}
}
}
}
}

pub fn binning(n_wg: u32, resources: &[CpuBinding]) {
let config = resources[0].as_typed();
let draw_monoids = resources[1].as_slice();
let path_bbox_buf = resources[2].as_slice();
let clip_bbox_buf = resources[3].as_slice();
let mut intersected_bbox = resources[4].as_slice_mut();
let mut bump = resources[5].as_typed_mut();
let mut bin_data = resources[6].as_slice_mut();
let mut bin_header = resources[7].as_slice_mut();
binning_main(
n_wg,
&config,
&draw_monoids,
&path_bbox_buf,
&clip_bbox_buf,
&mut intersected_bbox,
&mut bump,
&mut bin_data,
&mut bin_header,
);
}
Loading

0 comments on commit 9bdbb10

Please sign in to comment.