From 0bc53dbc6cdcb64b6c840d0c33e37227ee0852a9 Mon Sep 17 00:00:00 2001
From: Kevin Reid <kpreid@switchb.org>
Date: Sat, 2 Nov 2024 10:27:50 -0700
Subject: [PATCH] =?UTF-8?q?gpu:=20Don=E2=80=99t=20allocate=20an=20atlas=20?=
 =?UTF-8?q?texture=20that=20is=20bigger=20than=20the=20device=20limits.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We still don’t check if the actual memory allocation failed, though.

Also check against the maximum buffer size, because Chrome seems to
care even though the spec doesn’t say it should. I tried to reproduce
this failure independently with plain JavaScript WebGPU, but could not.
The test program I used (based on the console errors I was seeing, which
complained of a buffer size limit exceeded *inside writeTexture*) was:

    const adapter = await navigator.gpu.requestAdapter();
    if (!adapter) {
        console.error("no adapter");
    }
    const device = await adapter.requestDevice();
    if (!device) {
        console.error("no device");
    }
    const queue = device.queue;
    console.log("got device");

    const chunkSize = 16;
    const totalSize = 512;
    const chunks = totalSize / chunkSize;
    console.log("chunks =", chunkSize);

    const texture = device.createTexture({
        size: { width: totalSize, height: totalSize, depthOrArrayLayers: totalSize },
        dimension: "3d",
        format: "rgba8unorm",
        usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.RENDER_ATTACHMENT
    });

    for (let rep = 0; rep < 2; rep++) {
        console.log("repetition", rep);
        for (let x = 0; x < chunks; x++)
        for (let y = 0; y < chunks; y++)
        for (let z = 0; z < chunks; z++) {
            queue.writeTexture(
                {
                    texture,
                    origin: [x * chunkSize, y * chunkSize, z * chunkSize]
                },
                new Uint8Array(chunkSize * chunkSize * chunkSize * 4),
                { offset: 0, bytesPerRow: chunkSize * 4, rowsPerImage: chunkSize },
                [chunkSize, chunkSize, chunkSize],
            );
        }
        queue.submit([]);
        await null;
    }
    console.log("done");
---
 all-is-cubes-gpu/src/common/octree_alloc.rs   | 16 ++++---
 all-is-cubes-gpu/src/in_wgpu.rs               |  2 +-
 all-is-cubes-gpu/src/in_wgpu/block_texture.rs | 45 ++++++++++++++++---
 .../src/in_wgpu/shader_testing.rs             |  3 +-
 fuzz/fuzz_targets/fuzz_octree.rs              |  6 +--
 5 files changed, 55 insertions(+), 17 deletions(-)
diff --git a/all-is-cubes-gpu/src/common/octree_alloc.rs b/all-is-cubes-gpu/src/common/octree_alloc.rs
index dc7d43014..c70160bf9 100644
--- a/all-is-cubes-gpu/src/common/octree_alloc.rs
+++ b/all-is-cubes-gpu/src/common/octree_alloc.rs
@@ -82,9 +82,15 @@ impl<A> Alloctree<A> {
 
     /// Allocates a region of the given size, growing the overall bounds if needed.
     ///
-    /// Returns `None` if the tree cannot grow further.
-    pub fn allocate_with_growth(&mut self, request: GridAab) -> Option<AlloctreeHandle<A>> {
-        if !fits(request, Self::MAX_SIZE_EXPONENT) {
+    /// Returns `None` if the tree cannot grow further, or if growth is required but would exceed
+    /// `grow_to_at_most_size_exponent`.
+    pub fn allocate_with_growth(
+        &mut self,
+        request: GridAab,
+        mut grow_to_at_most_size_exponent: u8,
+    ) -> Option<AlloctreeHandle<A>> {
+        grow_to_at_most_size_exponent = grow_to_at_most_size_exponent.min(Self::MAX_SIZE_EXPONENT);
+        if !fits(request, grow_to_at_most_size_exponent) {
             // Too big, can never fit even with growth.
             return None;
         }
@@ -104,7 +110,7 @@ impl<A> Alloctree<A> {
             .max(requested_size_exponent)
             .checked_add(1)?;
 
-        if new_size_exponent <= Self::MAX_SIZE_EXPONENT {
+        if new_size_exponent <= grow_to_at_most_size_exponent {
             // Grow the allocatable region and try again.
             self.grow_to(new_size_exponent);
 
@@ -490,7 +496,7 @@ mod tests {
         assert_eq!(t.allocate(GridAab::ORIGIN_CUBE), None, "initially full");
 
         // Allocation with growth succeeds
-        t.allocate_with_growth(GridAab::ORIGIN_CUBE)
+        t.allocate_with_growth(GridAab::ORIGIN_CUBE, Alloctree::<Cube>::MAX_SIZE_EXPONENT)
             .expect("second allocation should succeed");
         assert_eq!(t.bounds().map(i32::from), GridAab::for_block(R16).into());
     }
diff --git a/all-is-cubes-gpu/src/in_wgpu.rs b/all-is-cubes-gpu/src/in_wgpu.rs
index 4c8f9d7bb..715ccc1f2 100644
--- a/all-is-cubes-gpu/src/in_wgpu.rs
+++ b/all-is-cubes-gpu/src/in_wgpu.rs
@@ -405,7 +405,7 @@ impl<I: time::Instant> EverythingRenderer<I> {
             postprocess::create_postprocess_bind_group_layout(&device);
 
         let pipelines = Pipelines::new(&device, &shaders, &fb, cameras.graphics_options_source());
-        let block_texture = AtlasAllocator::new("EverythingRenderer");
+        let block_texture = AtlasAllocator::new("EverythingRenderer", &device.limits());
 
         let mut new_self = EverythingRenderer {
             staging_belt: wgpu::util::StagingBelt::new(
diff --git a/all-is-cubes-gpu/src/in_wgpu/block_texture.rs b/all-is-cubes-gpu/src/in_wgpu/block_texture.rs
index 4f4273087..54bc961e5 100644
--- a/all-is-cubes-gpu/src/in_wgpu/block_texture.rs
+++ b/all-is-cubes-gpu/src/in_wgpu/block_texture.rs
@@ -108,6 +108,9 @@ struct AllocatorBacking {
     /// Tracks which regions of the texture are free or allocated.
     alloctree: Alloctree<AtlasTexel>,
 
+    /// log2 of the maximum texture size we may consider growing to.
+    maximum_texture_size_exponent: u8,
+
     /// Whether flush needs to do anything.
     dirty: bool,
 
@@ -144,12 +147,13 @@ struct GpuTexture {
 // Implementations
 
 impl AtlasAllocator {
-    pub fn new(label_prefix: &str) -> Self {
+    pub fn new(label_prefix: &str, limits: &wgpu::Limits) -> Self {
         Self {
-            reflectance_backing: AllocatorBacking::new(label_prefix, Channels::Reflectance),
+            reflectance_backing: AllocatorBacking::new(label_prefix, Channels::Reflectance, limits),
             reflectance_and_emission_backing: AllocatorBacking::new(
                 label_prefix,
                 Channels::ReflectanceEmission,
+                limits,
             ),
         }
     }
@@ -191,12 +195,13 @@ impl texture::Allocator for AtlasAllocator {
             Channels::Reflectance => &self.reflectance_backing,
             Channels::ReflectanceEmission => &self.reflectance_and_emission_backing,
         };
-        let mut backing_guard = backing_arc.lock().unwrap();
+        let backing_guard = &mut *backing_arc.lock().unwrap();
 
         // If alloctree grows, the next flush() will take care of reallocating the texture.
-        let handle = backing_guard
-            .alloctree
-            .allocate_with_growth(requested_bounds)?;
+        let handle = backing_guard.alloctree.allocate_with_growth(
+            requested_bounds,
+            backing_guard.maximum_texture_size_exponent,
+        )?;
         let allocated_bounds = handle.allocation;
 
         let result = AtlasTile {
@@ -282,10 +287,21 @@ impl texture::Tile for AtlasTile {
 }
 
 impl AllocatorBacking {
-    fn new(label_prefix: &str, channels: Channels) -> Arc<Mutex<Self>> {
+    fn new(label_prefix: &str, channels: Channels, limits: &wgpu::Limits) -> Arc<Mutex<Self>> {
+        let maximum_texture_size_exponent = limits
+            .max_texture_dimension_3d
+            // Kludge: Chrome WebGPU fails if the buffer size is exceeded, as of 129.0.6668.101,
+            // even though we’re not making any such buffer, only a texture.
+            // Could not reproduce standalone.
+            .min(u32::try_from(cube_root_u64(limits.max_buffer_size / 4)).unwrap_or(u32::MAX))
+            .ilog2()
+            .try_into()
+            .unwrap_or(u8::MAX);
+
         Arc::new(Mutex::new(AllocatorBacking {
             // Default size of 2⁵ = 32 holding up to 8 × 16³ block textures.
             alloctree: Alloctree::new(5),
+            maximum_texture_size_exponent,
             dirty: false,
             in_use: Vec::new(),
             channels,
@@ -303,6 +319,10 @@ impl AllocatorBacking {
         let backing = &mut *backing_lock_guard;
 
         let needed_texture_size = size3d_to_extent(backing.alloctree.bounds().size());
+        // Note: We have the Alloctree ensure that it does not exceed the device’s texture size
+        // limit, so needed_texture_size will not be too big.
+        // However, there is no handling of if texture allocation fails; that would require
+        // using an error scope and being able to recover asynchronously from the failed attempt.
 
         // If we have textures already, check if they are the right size.
         let old_textures: Option<Msw<Group<_>>> = if matches!(
@@ -554,3 +574,14 @@ impl Drop for TileBacking {
 fn zero_box(volume: usize) -> Box<[[u8; 4]]> {
     vec![[0, 0, 0, 0]; volume].into_boxed_slice()
 }
+
+/// Compute the cube root of `value`, rounded down.
+/// (This algorithm is probably wrong for certain large values, but we only use it to compute
+/// a size limit and underapproximating is OK.)
+fn cube_root_u64(value: u64) -> u64 {
+    let mut root = (value as f64).powf(3f64.recip()) as u64;
+    while root.saturating_pow(3) > value {
+        root -= 1;
+    }
+    root
+}
diff --git a/all-is-cubes-gpu/src/in_wgpu/shader_testing.rs b/all-is-cubes-gpu/src/in_wgpu/shader_testing.rs
index fe5956d0c..dd5632d60 100644
--- a/all-is-cubes-gpu/src/in_wgpu/shader_testing.rs
+++ b/all-is-cubes-gpu/src/in_wgpu/shader_testing.rs
@@ -132,7 +132,8 @@ where
     });
 
     // Placeholder space data for the bind group
-    let texture_allocator = in_wgpu::block_texture::AtlasAllocator::new("shader test space");
+    let texture_allocator =
+        in_wgpu::block_texture::AtlasAllocator::new("shader test space", &device.limits());
     let (texture_view, _) = texture_allocator.flush::<time::NoTime>(&device, &queue);
     let space_bind_group = in_wgpu::space::create_space_bind_group(
         "shader test space",
diff --git a/fuzz/fuzz_targets/fuzz_octree.rs b/fuzz/fuzz_targets/fuzz_octree.rs
index b667da4c9..9ccef5300 100644
--- a/fuzz/fuzz_targets/fuzz_octree.rs
+++ b/fuzz/fuzz_targets/fuzz_octree.rs
@@ -15,7 +15,7 @@ struct FuzzOctree {
 #[derive(Arbitrary, Debug)]
 enum Operation {
     Allocate(GridAab),
-    AllocateGrow(GridAab),
+    AllocateGrow(GridAab, u8),
     Free(usize),
 }
 
@@ -31,8 +31,8 @@ fuzz_target!(|input: FuzzOctree| {
                     handles.push(handle);
                 }
             }
-            Operation::AllocateGrow(request) => {
-                let result = t.allocate_with_growth(request);
+            Operation::AllocateGrow(request, max_growth) => {
+                let result = t.allocate_with_growth(request, max_growth);
                 if let Some(handle) = result {
                     handles.push(handle);
                 }