From cae02c643e758cef84970e9bfe2a8f1abe24cedf Mon Sep 17 00:00:00 2001
From: Frank Yellin <fy@fyellin.com>
Date: Thu, 12 Sep 2024 18:00:56 +0300
Subject: [PATCH 1/4] Initial implementation of:     overrideable constands    
 eliding entry_point when only one.

---
 tests/test_set_override.py        | 130 ++++++++++++++++++++++++++++++
 wgpu/backends/wgpu_native/_api.py |  72 ++++++++++++-----
 2 files changed, 184 insertions(+), 18 deletions(-)
 create mode 100644 tests/test_set_override.py
diff --git a/tests/test_set_override.py b/tests/test_set_override.py
new file mode 100644
index 00000000..b896bc87
--- /dev/null
+++ b/tests/test_set_override.py
@@ -0,0 +1,130 @@
+import numpy as np
+import pytest
+
+import wgpu.utils
+from tests.testutils import can_use_wgpu_lib, run_tests
+from wgpu import TextureFormat
+
+if not can_use_wgpu_lib:
+    pytest.skip("Skipping tests that need the wgpu lib", allow_module_level=True)
+
+
+"""
+This code is an amazingly slow way of adding together two 10-element arrays of 32-bit
+integers defined by push constants and store them into an output buffer.
+
+
+The source code assumes the topology is POINT-LIST, so that each call to vertexMain
+corresponds with one call to fragmentMain. We draw exactly one point.
+"""
+
+SHADER_SOURCE = """
+    @id(0) override a: i32 = 1;
+    @id(1) override b: u32 = 2u;
+    @id(2) override c: f32 = 3.0;
+    @id(3) override d: bool = true;
+
+    // Put the results here
+    @group(0) @binding(0) var<storage, read_write> data: array<u32>;
+
+    struct VertexOutput {
+        @location(0) a: i32,
+        @location(1) b: u32,
+        @location(2) c: f32,
+        @location(3) d: u32,
+        @builtin(position) position: vec4f,
+    }
+
+    @vertex
+    fn vertexMain(@builtin(vertex_index) index: u32) -> VertexOutput {
+        return VertexOutput(a, b, c, u32(d), vec4f(0, 0, 0, 1));
+    }
+
+    @fragment
+    fn fragmentMain(output: VertexOutput) -> @location(0) vec4f {
+        data[0] = u32(output.a);
+        data[1] = u32(output.b);
+        data[2] = u32(output.c);
+        data[3] = u32(output.d);
+        data[4] = u32(a);
+        data[5] = u32(b);
+        data[6] = u32(c);
+        data[7] = u32(d);
+        return vec4f();
+    }
+    
+    @compute @workgroup_size(1)
+    fn computeMain() {
+        data[0] = u32(a);
+        data[1] = u32(b);
+        data[2] = u32(c);
+        data[3] = u32(d);
+    }
+    
+"""
+
+BIND_GROUP_ENTRIES = [
+    {"binding": 0, "visibility": "FRAGMENT|COMPUTE", "buffer": {"type": "storage"}},
+]
+
+
+def test_override_constants():
+    device = wgpu.utils.get_default_device()
+    output_texture = device.create_texture(
+        # Actual size is immaterial.  Could just be 1x1
+        size=[128, 128],
+        format=TextureFormat.rgba8unorm,
+        usage="RENDER_ATTACHMENT|COPY_SRC",
+    )
+    shader = device.create_shader_module(code=SHADER_SOURCE)
+    bind_group_layout = device.create_bind_group_layout(entries=BIND_GROUP_ENTRIES)
+    render_pipeline_layout = device.create_pipeline_layout(
+        bind_group_layouts=[bind_group_layout],
+    )
+    pipeline = device.create_render_pipeline(
+        layout=render_pipeline_layout,
+        vertex={
+            "module": shader,
+            "constants": {0: -2, "1": 10, "2": 20.0, "3": 23},
+        },
+        fragment={
+            "module": shader,
+            "targets": [{"format": output_texture.format}],
+            "constants": {"a": -5, "b": 20, "c": 30.0, "d": True},
+        },
+        primitive={
+            "topology": "point-list",
+        },
+    )
+    render_pass_descriptor = {
+        "color_attachments": [
+            {
+                "clear_value": (0, 0, 0, 0),  # only first value matters
+                "load_op": "clear",
+                "store_op": "store",
+                "view": output_texture.create_view(),
+            }
+        ],
+    }
+    output_buffer = device.create_buffer(size=8 * 4, usage="STORAGE|COPY_SRC")
+    bind_group = device.create_bind_group(
+        layout=pipeline.get_bind_group_layout(0),
+        entries=[
+            {"binding": 0, "resource": {"buffer": output_buffer}},
+        ],
+    )
+
+    encoder = device.create_command_encoder()
+    this_pass = encoder.begin_render_pass(**render_pass_descriptor)
+    this_pass.set_pipeline(pipeline)
+    this_pass.set_bind_group(0, bind_group)
+    this_pass.draw(1)
+    this_pass.end()
+    device.queue.submit([encoder.finish()])
+    info_view = device.queue.read_buffer(output_buffer)
+    result = np.frombuffer(info_view, dtype=np.uint32)
+    print(result)
+
+
+if __name__ == "__main__":
+    run_tests(globals())
diff --git a/wgpu/backends/wgpu_native/_api.py b/wgpu/backends/wgpu_native/_api.py
index d3252c92..21e6653b 100644
--- a/wgpu/backends/wgpu_native/_api.py
+++ b/wgpu/backends/wgpu_native/_api.py
@@ -19,7 +19,7 @@
 import logging
 import ctypes.util
 from weakref import WeakKeyDictionary
-from typing import List, Dict, Union
+from typing import List, Dict, Optional, Union
 
 from ... import classes, flags, enums, structs
 from ..._coreutils import str_flag_to_int
@@ -173,6 +173,31 @@ def _tuple_from_color(rgba):
     return _tuple_from_tuple_or_dict(rgba, "rgba")
 
 
+def _get_override_constant_entries(constants):
+    if not constants:
+        return 0, ffi.NULL
+    key_value_pairs = []
+    for key, value in constants.items():
+        assert isinstance(key, (str, int))
+        assert isinstance(value, (int, float, bool))
+        c_constant_entry = new_struct(
+            "WGPUConstantEntry",
+            key=to_c_string(str(key)),
+            value=float(value),
+        )
+        key_value_pairs.append(c_constant_entry)
+    c_constants = ffi.new("WGPUConstantEntry[]", key_value_pairs)
+    return len(constants), c_constants
+
+
+def to_c_string(string: str):
+    return ffi.new("char []", string.encode())
+
+
+def to_c_string_or_null(string: Optional[str]):
+    return ffi.NULL if string is None else ffi.new("char []", string.encode())
+
+
 _empty_label = ffi.new("char []", b"")
 
 
@@ -181,7 +206,7 @@ def to_c_label(label):
     if not label:
         return _empty_label
     else:
-        return ffi.new("char []", label.encode())
+        return to_c_string(label)
 
 
 def feature_flag_to_feature_names(flag):
@@ -861,7 +886,7 @@ def _request_device(
 
         c_trace_path = ffi.NULL
         if trace_path:  # no-cover
-            c_trace_path = ffi.new("char []", trace_path.encode())
+            c_trace_path = to_c_string(trace_path)
 
         # H: chain: WGPUChainedStruct, tracePath: char *
         extras = new_struct_p(
@@ -1403,15 +1428,15 @@ def create_shader_module(
                         # H: name: char *, value: char *
                         new_struct(
                             "WGPUShaderDefine",
-                            name=ffi.new("char []", "gl_VertexID".encode()),
-                            value=ffi.new("char []", "gl_VertexIndex".encode()),
+                            name=ffi.new("char []", b"gl_VertexID"),
+                            value=ffi.new("char []", b"gl_VertexIndex"),
                         )
                     )
                 c_defines = ffi.new("WGPUShaderDefine []", defines)
                 # H: chain: WGPUChainedStruct, stage: WGPUShaderStage, code: char *, defineCount: int, defines: WGPUShaderDefine *
                 source_struct = new_struct_p(
                     "WGPUShaderModuleGLSLDescriptor *",
-                    code=ffi.new("char []", code.encode()),
+                    code=to_c_string(code),
                     stage=c_stage,
                     defineCount=len(defines),
                     defines=c_defines,
@@ -1424,7 +1449,7 @@ def create_shader_module(
                 # H: chain: WGPUChainedStruct, code: char *
                 source_struct = new_struct_p(
                     "WGPUShaderModuleWGSLDescriptor *",
-                    code=ffi.new("char []", code.encode()),
+                    code=to_c_string(code),
                     # not used: chain
                 )
                 source_struct[0].chain.next = ffi.NULL
@@ -1476,14 +1501,17 @@ def create_compute_pipeline(
         compute: "structs.ProgrammableStage",
     ):
         check_struct("ProgrammableStage", compute)
+        constant_count, c_constants = _get_override_constant_entries(
+            compute.get("constants")
+        )
         # H: nextInChain: WGPUChainedStruct *, module: WGPUShaderModule, entryPoint: char *, constantCount: int, constants: WGPUConstantEntry *
         c_compute_stage = new_struct(
             "WGPUProgrammableStageDescriptor",
             module=compute["module"]._internal,
-            entryPoint=ffi.new("char []", compute["entry_point"].encode()),
+            entryPoint=to_c_string_or_null(compute.get("entry_point")),
+            constantCount=constant_count,
+            constants=c_constants,
             # not used: nextInChain
-            # not used: constantCount
-            # not used: constants
         )
 
         if isinstance(layout, GPUPipelineLayout):
@@ -1535,6 +1563,8 @@ def create_render_pipeline(
         check_struct("DepthStencilState", depth_stencil)
         check_struct("MultisampleState", multisample)
         check_struct("PrimitiveState", primitive)
+        if fragment:
+            check_struct("FragmentState", fragment)
 
         c_vertex_buffer_layout_list = []
         for buffer_des in vertex.get("buffers", ()):
@@ -1561,16 +1591,19 @@ def create_render_pipeline(
         c_vertex_buffer_descriptors_array = ffi.new(
             "WGPUVertexBufferLayout []", c_vertex_buffer_layout_list
         )
+        constant_count, c_constants = _get_override_constant_entries(
+            vertex.get("constants")
+        )
         # H: nextInChain: WGPUChainedStruct *, module: WGPUShaderModule, entryPoint: char *, constantCount: int, constants: WGPUConstantEntry *, bufferCount: int, buffers: WGPUVertexBufferLayout *
         c_vertex_state = new_struct(
             "WGPUVertexState",
             module=vertex["module"]._internal,
-            entryPoint=ffi.new("char []", vertex["entry_point"].encode()),
+            entryPoint=to_c_string_or_null(vertex.get("entry_point")),
             buffers=c_vertex_buffer_descriptors_array,
             bufferCount=len(c_vertex_buffer_layout_list),
+            constantCount=constant_count,
+            constants=c_constants,
             # not used: nextInChain
-            # not used: constantCount
-            # not used: constants
         )
 
         # H: nextInChain: WGPUChainedStruct *, topology: WGPUPrimitiveTopology, stripIndexFormat: WGPUIndexFormat, frontFace: WGPUFrontFace, cullMode: WGPUCullMode
@@ -1671,16 +1704,19 @@ def create_render_pipeline(
                 "WGPUColorTargetState []", c_color_targets_list
             )
             check_struct("FragmentState", fragment)
+            constant_count, c_constants = _get_override_constant_entries(
+                vertex.get("constants")
+            )
             # H: nextInChain: WGPUChainedStruct *, module: WGPUShaderModule, entryPoint: char *, constantCount: int, constants: WGPUConstantEntry *, targetCount: int, targets: WGPUColorTargetState *
             c_fragment_state = new_struct_p(
                 "WGPUFragmentState *",
                 module=fragment["module"]._internal,
-                entryPoint=ffi.new("char []", fragment["entry_point"].encode()),
+                entryPoint=to_c_string_or_null(fragment.get("entry_point")),
                 targets=c_color_targets_array,
                 targetCount=len(c_color_targets_list),
+                constantCount=constant_count,
+                constants=c_constants,
                 # not used: nextInChain
-                # not used: constantCount
-                # not used: constants
             )
 
         if isinstance(layout, GPUPipelineLayout):
@@ -2270,7 +2306,7 @@ def set_bind_group(
 class GPUDebugCommandsMixin(classes.GPUDebugCommandsMixin):
     # whole class is likely going to solved better: https://github.com/pygfx/wgpu-py/pull/546
     def push_debug_group(self, group_label):
-        c_group_label = ffi.new("char []", group_label.encode())
+        c_group_label = to_c_string(group_label)
         # H: void wgpuCommandEncoderPushDebugGroup(WGPUCommandEncoder commandEncoder, char const * groupLabel)
         # H: void wgpuComputePassEncoderPushDebugGroup(WGPUComputePassEncoder computePassEncoder, char const * groupLabel)
         # H: void wgpuRenderPassEncoderPushDebugGroup(WGPURenderPassEncoder renderPassEncoder, char const * groupLabel)
@@ -2287,7 +2323,7 @@ def pop_debug_group(self):
         function(self._internal)
 
     def insert_debug_marker(self, marker_label):
-        c_marker_label = ffi.new("char []", marker_label.encode())
+        c_marker_label = to_c_string(marker_label)
         # H: void wgpuCommandEncoderInsertDebugMarker(WGPUCommandEncoder commandEncoder, char const * markerLabel)
         # H: void wgpuComputePassEncoderInsertDebugMarker(WGPUComputePassEncoder computePassEncoder, char const * markerLabel)
         # H: void wgpuRenderPassEncoderInsertDebugMarker(WGPURenderPassEncoder renderPassEncoder, char const * markerLabel)

From 1a5b86d23eb66026999e530e751a060393c6605f Mon Sep 17 00:00:00 2001
From: Frank Yellin <fy@fyellin.com>
Date: Thu, 12 Sep 2024 19:33:30 +0300
Subject: [PATCH 2/4] Fix lint and codegen issues.

---
 tests/test_set_override.py        | 4 ++--
 wgpu/backends/wgpu_native/_api.py | 2 ++
 wgpu/resources/codegen_report.md  | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/test_set_override.py b/tests/test_set_override.py
index b896bc87..4f81a7b6 100644
--- a/tests/test_set_override.py
+++ b/tests/test_set_override.py
@@ -52,7 +52,7 @@
         data[7] = u32(d);
         return vec4f();
     }
-    
+
     @compute @workgroup_size(1)
     fn computeMain() {
         data[0] = u32(a);
@@ -60,7 +60,7 @@
         data[2] = u32(c);
         data[3] = u32(d);
     }
-    
+
 """
 
 BIND_GROUP_ENTRIES = [
diff --git a/wgpu/backends/wgpu_native/_api.py b/wgpu/backends/wgpu_native/_api.py
index 21e6653b..c87246f8 100644
--- a/wgpu/backends/wgpu_native/_api.py
+++ b/wgpu/backends/wgpu_native/_api.py
@@ -180,10 +180,12 @@ def _get_override_constant_entries(constants):
     for key, value in constants.items():
         assert isinstance(key, (str, int))
         assert isinstance(value, (int, float, bool))
+        # H: nextInChain: WGPUChainedStruct *, key: char *, value: float
         c_constant_entry = new_struct(
             "WGPUConstantEntry",
             key=to_c_string(str(key)),
             value=float(value),
+            # not used: nextInChain
         )
         key_value_pairs.append(c_constant_entry)
     c_constants = ffi.new("WGPUConstantEntry[]", key_value_pairs)
diff --git a/wgpu/resources/codegen_report.md b/wgpu/resources/codegen_report.md
index ce53a383..549d2d7b 100644
--- a/wgpu/resources/codegen_report.md
+++ b/wgpu/resources/codegen_report.md
@@ -37,4 +37,4 @@
 * Wrote 236 enum mappings and 47 struct-field mappings to wgpu_native/_mappings.py
 * Validated 132 C function calls
 * Not using 73 C functions
-* Validated 78 C structs
+* Validated 79 C structs

From ecd2dd8dec95b034d3be7473988825a56cabc9ce Mon Sep 17 00:00:00 2001
From: Frank Yellin <fy@fyellin.com>
Date: Fri, 13 Sep 2024 13:47:47 +0300
Subject: [PATCH 3/4] More test info. Fix leak

---
 tests/test_set_override.py        | 157 ++++++++++++++++++++----------
 wgpu/backends/wgpu_native/_api.py |  39 ++++----
 2 files changed, 122 insertions(+), 74 deletions(-)

diff --git a/tests/test_set_override.py b/tests/test_set_override.py
index 4f81a7b6..0e6edb23 100644
--- a/tests/test_set_override.py
+++ b/tests/test_set_override.py
@@ -1,9 +1,11 @@
 import numpy as np
 import pytest
+import numpy.testing as npt
+
 
 import wgpu.utils
 from tests.testutils import can_use_wgpu_lib, run_tests
-from wgpu import TextureFormat
+from wgpu import GPUValidationError, TextureFormat
 
 if not can_use_wgpu_lib:
     pytest.skip("Skipping tests that need the wgpu lib", allow_module_level=True)
@@ -19,48 +21,57 @@
 """
 
 SHADER_SOURCE = """
-    @id(0) override a: i32 = 1;
-    @id(1) override b: u32 = 2u;
-    @id(2) override c: f32 = 3.0;
-    @id(3) override d: bool = true;
+    override a: i32 = 1;
+    override b: u32 = 2u;
+    override c: f32 = 3.0;
+    override d: bool;   // We must specify this!
+    @id(1) override aa: i32 = 10;
+    @id(2) override bb: u32 = 20u;
+    @id(3) override cc: f32 = 30.0;
+    @id(4) override dd: bool = false;
 
     // Put the results here
     @group(0) @binding(0) var<storage, read_write> data: array<u32>;
 
     struct VertexOutput {
-        @location(0) a: i32,
-        @location(1) b: u32,
-        @location(2) c: f32,
-        @location(3) d: u32,
+        @location(0) results1: vec4u,
+        @location(1) results2: vec4u,
         @builtin(position) position: vec4f,
     }
 
     @vertex
-    fn vertexMain(@builtin(vertex_index) index: u32) -> VertexOutput {
-        return VertexOutput(a, b, c, u32(d), vec4f(0, 0, 0, 1));
+    fn vertex(@builtin(vertex_index) index: u32) -> VertexOutput {
+        var output: VertexOutput;
+        output.position = vec4f(0, 0, 0, 1);
+        output.results1 = vec4u(u32(a), u32(b), u32(c), u32(d));
+        output.results2 = vec4u(u32(aa), u32(bb), u32(cc), u32(dd));
+        return output;
     }
 
     @fragment
-    fn fragmentMain(output: VertexOutput) -> @location(0) vec4f {
-        data[0] = u32(output.a);
-        data[1] = u32(output.b);
-        data[2] = u32(output.c);
-        data[3] = u32(output.d);
-        data[4] = u32(a);
-        data[5] = u32(b);
-        data[6] = u32(c);
-        data[7] = u32(d);
+    fn fragment(output: VertexOutput) -> @location(0) vec4f {
+        var i: u32;
+        let results1 = vec4u(u32(a), u32(b), u32(c), u32(d));
+        let results2 = vec4u(u32(aa), u32(bb), u32(cc), u32(dd));
+        write_results(results1, results2);
+
+        // write_results(output.results1, output.results2);
         return vec4f();
     }
 
     @compute @workgroup_size(1)
     fn computeMain() {
-        data[0] = u32(a);
-        data[1] = u32(b);
-        data[2] = u32(c);
-        data[3] = u32(d);
+        let results1 = vec4u(u32(a), u32(b), u32(c), u32(d));
+        let results2 = vec4u(u32(aa), u32(bb), u32(cc), u32(dd));
+        write_results(results1, results2);
+    }
+    
+    fn write_results(results1: vec4u, results2: vec4u) {
+        for (var i = 0; i < 4; i++) { 
+            data[i] = results1[i];
+            data[i+4] = results2[i];
+        }
     }
-
 """
 
 BIND_GROUP_ENTRIES = [
@@ -68,7 +79,13 @@
 ]
 
 
-def test_override_constants():
+def run_override_constant_test(
+    use_render=True,
+    *,
+    compute_constants=None,
+    vertex_constants=None,
+    fragment_constants=None
+):
     device = wgpu.utils.get_default_device()
     output_texture = device.create_texture(
         # Actual size is immaterial.  Could just be 1x1
@@ -81,31 +98,42 @@ def test_override_constants():
     render_pipeline_layout = device.create_pipeline_layout(
         bind_group_layouts=[bind_group_layout],
     )
-    pipeline = device.create_render_pipeline(
-        layout=render_pipeline_layout,
-        vertex={
-            "module": shader,
-            "constants": {0: -2, "1": 10, "2": 20.0, "3": 23},
-        },
-        fragment={
-            "module": shader,
-            "targets": [{"format": output_texture.format}],
-            "constants": {"a": -5, "b": 20, "c": 30.0, "d": True},
-        },
-        primitive={
-            "topology": "point-list",
-        },
-    )
-    render_pass_descriptor = {
-        "color_attachments": [
-            {
-                "clear_value": (0, 0, 0, 0),  # only first value matters
-                "load_op": "clear",
-                "store_op": "store",
-                "view": output_texture.create_view(),
-            }
-        ],
-    }
+    if use_render:
+        pipeline = device.create_render_pipeline(
+            layout=render_pipeline_layout,
+            vertex={
+                "module": shader,
+                "constants": vertex_constants,
+            },
+            fragment={
+                "module": shader,
+                "targets": [{"format": output_texture.format}],
+                "constants": fragment_constants,
+            },
+            primitive={
+                "topology": "point-list",
+            },
+        )
+        render_pass_descriptor = {
+            "color_attachments": [
+                {
+                    "clear_value": (0, 0, 0, 0),  # only first value matters
+                    "load_op": "clear",
+                    "store_op": "store",
+                    "view": output_texture.create_view(),
+                }
+            ],
+        }
+    else:
+        pipeline = device.create_compute_pipeline(
+            layout=render_pipeline_layout,
+            compute={
+                "module": shader,
+                "constants": compute_constants,
+            },
+        )
+        render_pass_descriptor = {}
+
     output_buffer = device.create_buffer(size=8 * 4, usage="STORAGE|COPY_SRC")
     bind_group = device.create_bind_group(
         layout=pipeline.get_bind_group_layout(0),
@@ -115,15 +143,38 @@ def test_override_constants():
     )
 
     encoder = device.create_command_encoder()
-    this_pass = encoder.begin_render_pass(**render_pass_descriptor)
+    if use_render:
+        this_pass = encoder.begin_render_pass(**render_pass_descriptor)
+    else:
+        this_pass = encoder.begin_compute_pass()
     this_pass.set_pipeline(pipeline)
     this_pass.set_bind_group(0, bind_group)
-    this_pass.draw(1)
+    if use_render:
+        this_pass.draw(1)
+    else:
+        this_pass.dispatch_workgroups(1)
     this_pass.end()
     device.queue.submit([encoder.finish()])
     info_view = device.queue.read_buffer(output_buffer)
     result = np.frombuffer(info_view, dtype=np.uint32)
     print(result)
+    return list(result)
+
+
+def test_no_constants():
+    with pytest.raises(GPUValidationError):
+        run_override_constant_test(use_render=True)
+
+    with pytest.raises(GPUValidationError):
+        run_override_constant_test(use_render=False)
+
+
+def test_with_minimal_constants():
+    run_override_constant_test(
+        use_render=True, vertex_constants={"d": 1}, fragment_constants={"d": 0}
+    )
+    run_override_constant_test(use_render=False, compute_constants={"d": 1})
+    pytest.fail("hello")
 
 
 if __name__ == "__main__":
diff --git a/wgpu/backends/wgpu_native/_api.py b/wgpu/backends/wgpu_native/_api.py
index c87246f8..c0b3b35f 100644
--- a/wgpu/backends/wgpu_native/_api.py
+++ b/wgpu/backends/wgpu_native/_api.py
@@ -173,10 +173,11 @@ def _tuple_from_color(rgba):
     return _tuple_from_tuple_or_dict(rgba, "rgba")
 
 
-def _get_override_constant_entries(constants):
+def _get_override_constant_entries(field):
+    constants = field.get("constants")
     if not constants:
-        return 0, ffi.NULL
-    key_value_pairs = []
+        return ffi.NULL, []
+    c_constant_entries = []
     for key, value in constants.items():
         assert isinstance(key, (str, int))
         assert isinstance(value, (int, float, bool))
@@ -187,9 +188,11 @@ def _get_override_constant_entries(constants):
             value=float(value),
             # not used: nextInChain
         )
-        key_value_pairs.append(c_constant_entry)
-    c_constants = ffi.new("WGPUConstantEntry[]", key_value_pairs)
-    return len(constants), c_constants
+        c_constant_entries.append(c_constant_entry)
+    # We need to return and hold onto c_constant_entries in order to prevent the C
+    # strings from being GC'ed.
+    c_constants = ffi.new("WGPUConstantEntry[]", c_constant_entries)
+    return c_constants, c_constant_entries
 
 
 def to_c_string(string: str):
@@ -1503,15 +1506,13 @@ def create_compute_pipeline(
         compute: "structs.ProgrammableStage",
     ):
         check_struct("ProgrammableStage", compute)
-        constant_count, c_constants = _get_override_constant_entries(
-            compute.get("constants")
-        )
+        c_constants, c_constant_entries = _get_override_constant_entries(compute)
         # H: nextInChain: WGPUChainedStruct *, module: WGPUShaderModule, entryPoint: char *, constantCount: int, constants: WGPUConstantEntry *
         c_compute_stage = new_struct(
             "WGPUProgrammableStageDescriptor",
             module=compute["module"]._internal,
             entryPoint=to_c_string_or_null(compute.get("entry_point")),
-            constantCount=constant_count,
+            constantCount=len(c_constant_entries),
             constants=c_constants,
             # not used: nextInChain
         )
@@ -1565,8 +1566,6 @@ def create_render_pipeline(
         check_struct("DepthStencilState", depth_stencil)
         check_struct("MultisampleState", multisample)
         check_struct("PrimitiveState", primitive)
-        if fragment:
-            check_struct("FragmentState", fragment)
 
         c_vertex_buffer_layout_list = []
         for buffer_des in vertex.get("buffers", ()):
@@ -1593,9 +1592,7 @@ def create_render_pipeline(
         c_vertex_buffer_descriptors_array = ffi.new(
             "WGPUVertexBufferLayout []", c_vertex_buffer_layout_list
         )
-        constant_count, c_constants = _get_override_constant_entries(
-            vertex.get("constants")
-        )
+        c_vertex_constants, c_vertex_entries = _get_override_constant_entries(vertex)
         # H: nextInChain: WGPUChainedStruct *, module: WGPUShaderModule, entryPoint: char *, constantCount: int, constants: WGPUConstantEntry *, bufferCount: int, buffers: WGPUVertexBufferLayout *
         c_vertex_state = new_struct(
             "WGPUVertexState",
@@ -1603,8 +1600,8 @@ def create_render_pipeline(
             entryPoint=to_c_string_or_null(vertex.get("entry_point")),
             buffers=c_vertex_buffer_descriptors_array,
             bufferCount=len(c_vertex_buffer_layout_list),
-            constantCount=constant_count,
-            constants=c_constants,
+            constantCount=len(c_vertex_entries),
+            constants=c_vertex_constants,
             # not used: nextInChain
         )
 
@@ -1706,8 +1703,8 @@ def create_render_pipeline(
                 "WGPUColorTargetState []", c_color_targets_list
             )
             check_struct("FragmentState", fragment)
-            constant_count, c_constants = _get_override_constant_entries(
-                vertex.get("constants")
+            c_fragment_constants, c_fragment_entries = _get_override_constant_entries(
+                fragment
             )
             # H: nextInChain: WGPUChainedStruct *, module: WGPUShaderModule, entryPoint: char *, constantCount: int, constants: WGPUConstantEntry *, targetCount: int, targets: WGPUColorTargetState *
             c_fragment_state = new_struct_p(
@@ -1716,8 +1713,8 @@ def create_render_pipeline(
                 entryPoint=to_c_string_or_null(fragment.get("entry_point")),
                 targets=c_color_targets_array,
                 targetCount=len(c_color_targets_list),
-                constantCount=constant_count,
-                constants=c_constants,
+                constantCount=len(c_fragment_entries),
+                constants=c_fragment_constants,
                 # not used: nextInChain
             )
 

From 478168eeed58b7673dc97062de7f431135da28a2 Mon Sep 17 00:00:00 2001
From: Frank Yellin <fy@fyellin.com>
Date: Tue, 17 Sep 2024 17:36:00 +0300
Subject: [PATCH 4/4] Finish up tests for override.

---
 tests/test_set_override.py       | 246 +++++++++++++++++--------------
 wgpu/resources/codegen_report.md |   2 +-
 2 files changed, 137 insertions(+), 111 deletions(-)

diff --git a/tests/test_set_override.py b/tests/test_set_override.py
index 0e6edb23..8e0fc138 100644
--- a/tests/test_set_override.py
+++ b/tests/test_set_override.py
@@ -1,41 +1,36 @@
-import numpy as np
 import pytest
-import numpy.testing as npt
-
 
 import wgpu.utils
 from tests.testutils import can_use_wgpu_lib, run_tests
-from wgpu import GPUValidationError, TextureFormat
+from wgpu import TextureFormat
 
 if not can_use_wgpu_lib:
     pytest.skip("Skipping tests that need the wgpu lib", allow_module_level=True)
 
 
 """
-This code is an amazingly slow way of adding together two 10-element arrays of 32-bit
-integers defined by push constants and store them into an output buffer.
+The vertex shader should be called exactly once, which then calls the fragment shader
+exactly once. Alternatively, we call the compute shader exactly once
 
+This copies the values of the four variables a, b, c, and d as seen by each of the shaders
+and writes it into a buffer.  We can then examine that buffer to see the values of the
+constants.
 
-The source code assumes the topology is POINT-LIST, so that each call to vertexMain
-corresponds with one call to fragmentMain. We draw exactly one point.
+This code is also showing that you no longer need to include the name of a shader when
+it is the only shader of that type.
 """
 
 SHADER_SOURCE = """
     override a: i32 = 1;
     override b: u32 = 2u;
-    override c: f32 = 3.0;
-    override d: bool;   // We must specify this!
-    @id(1) override aa: i32 = 10;
-    @id(2) override bb: u32 = 20u;
-    @id(3) override cc: f32 = 30.0;
-    @id(4) override dd: bool = false;
+    @id(1) override c: f32 = 3.0;
+    @id(2) override d: bool = false;
 
     // Put the results here
     @group(0) @binding(0) var<storage, read_write> data: array<u32>;
 
     struct VertexOutput {
-        @location(0) results1: vec4u,
-        @location(1) results2: vec4u,
+        @location(0) values: vec4u,
         @builtin(position) position: vec4f,
     }
 
@@ -43,33 +38,28 @@
     fn vertex(@builtin(vertex_index) index: u32) -> VertexOutput {
         var output: VertexOutput;
         output.position = vec4f(0, 0, 0, 1);
-        output.results1 = vec4u(u32(a), u32(b), u32(c), u32(d));
-        output.results2 = vec4u(u32(aa), u32(bb), u32(cc), u32(dd));
+        output.values = vec4u(u32(a), u32(b), u32(c), u32(d));
         return output;
     }
 
     @fragment
     fn fragment(output: VertexOutput) -> @location(0) vec4f {
-        var i: u32;
-        let results1 = vec4u(u32(a), u32(b), u32(c), u32(d));
-        let results2 = vec4u(u32(aa), u32(bb), u32(cc), u32(dd));
-        write_results(results1, results2);
-
-        // write_results(output.results1, output.results2);
+        let values1 = output.values;
+        let values2 = vec4u(u32(a), u32(b), u32(c), u32(d));
+        write_results(values1, values2);
         return vec4f();
     }
 
     @compute @workgroup_size(1)
     fn computeMain() {
-        let results1 = vec4u(u32(a), u32(b), u32(c), u32(d));
-        let results2 = vec4u(u32(aa), u32(bb), u32(cc), u32(dd));
-        write_results(results1, results2);
+        let results = vec4u(u32(a), u32(b), u32(c), u32(d));
+        write_results(results, results);
     }
-    
+
     fn write_results(results1: vec4u, results2: vec4u) {
-        for (var i = 0; i < 4; i++) { 
+        for (var i = 0; i < 4; i++) {
             data[i] = results1[i];
-            data[i+4] = results2[i];
+            data[i + 4] = results2[i];
         }
     }
 """
@@ -79,102 +69,138 @@
 ]
 
 
-def run_override_constant_test(
-    use_render=True,
-    *,
-    compute_constants=None,
-    vertex_constants=None,
-    fragment_constants=None
-):
-    device = wgpu.utils.get_default_device()
-    output_texture = device.create_texture(
-        # Actual size is immaterial.  Could just be 1x1
-        size=[128, 128],
-        format=TextureFormat.rgba8unorm,
-        usage="RENDER_ATTACHMENT|COPY_SRC",
-    )
-    shader = device.create_shader_module(code=SHADER_SOURCE)
-    bind_group_layout = device.create_bind_group_layout(entries=BIND_GROUP_ENTRIES)
-    render_pipeline_layout = device.create_pipeline_layout(
-        bind_group_layouts=[bind_group_layout],
-    )
-    if use_render:
-        pipeline = device.create_render_pipeline(
-            layout=render_pipeline_layout,
+class Runner:
+    def __init__(self):
+        self.device = device = wgpu.utils.get_default_device()
+        self.output_texture = device.create_texture(
+            # Actual size is immaterial.  Could just be 1x1
+            size=[128, 128],
+            format=TextureFormat.rgba8unorm,
+            usage="RENDER_ATTACHMENT|COPY_SRC",
+        )
+        self.shader = device.create_shader_module(code=SHADER_SOURCE)
+        bind_group_layout = device.create_bind_group_layout(entries=BIND_GROUP_ENTRIES)
+        self.render_pipeline_layout = device.create_pipeline_layout(
+            bind_group_layouts=[bind_group_layout],
+        )
+
+        self.output_buffer = device.create_buffer(size=8 * 4, usage="STORAGE|COPY_SRC")
+        self.bind_group = device.create_bind_group(
+            layout=bind_group_layout,
+            entries=[
+                {"binding": 0, "resource": {"buffer": self.output_buffer}},
+            ],
+        )
+
+        self.color_attachment = {
+            "clear_value": (0, 0, 0, 0),  # only first value matters
+            "load_op": "clear",
+            "store_op": "store",
+            "view": self.output_texture.create_view(),
+        }
+
+    def create_render_pipeline(self, vertex_constants, fragment_constants):
+        return self.device.create_render_pipeline(
+            layout=self.render_pipeline_layout,
             vertex={
-                "module": shader,
+                "module": self.shader,
                 "constants": vertex_constants,
             },
             fragment={
-                "module": shader,
-                "targets": [{"format": output_texture.format}],
+                "module": self.shader,
+                "targets": [{"format": self.output_texture.format}],
                 "constants": fragment_constants,
             },
             primitive={
                 "topology": "point-list",
             },
         )
-        render_pass_descriptor = {
-            "color_attachments": [
-                {
-                    "clear_value": (0, 0, 0, 0),  # only first value matters
-                    "load_op": "clear",
-                    "store_op": "store",
-                    "view": output_texture.create_view(),
-                }
-            ],
-        }
-    else:
-        pipeline = device.create_compute_pipeline(
-            layout=render_pipeline_layout,
+
+    def create_compute_pipeline(self, constants):
+        return self.device.create_compute_pipeline(
+            layout=self.render_pipeline_layout,
             compute={
-                "module": shader,
-                "constants": compute_constants,
+                "module": self.shader,
+                "constants": constants,
             },
         )
-        render_pass_descriptor = {}
-
-    output_buffer = device.create_buffer(size=8 * 4, usage="STORAGE|COPY_SRC")
-    bind_group = device.create_bind_group(
-        layout=pipeline.get_bind_group_layout(0),
-        entries=[
-            {"binding": 0, "resource": {"buffer": output_buffer}},
-        ],
+
+    def run_test(
+        self,
+        *,
+        render: bool = False,
+        compute: bool = False,
+        vertex_constants=None,
+        fragment_constants=None,
+        compute_constants=None
+    ):
+        assert render + compute == 1
+        device = self.device
+        encoder = device.create_command_encoder()
+        if render:
+            this_pass = encoder.begin_render_pass(
+                color_attachments=[self.color_attachment]
+            )
+            pipeline = self.create_render_pipeline(vertex_constants, fragment_constants)
+        else:
+            this_pass = encoder.begin_compute_pass()
+            pipeline = self.create_compute_pipeline(compute_constants)
+        this_pass.set_bind_group(0, self.bind_group)
+        this_pass.set_pipeline(pipeline)
+        if render:
+            this_pass.draw(1)
+        else:
+            this_pass.dispatch_workgroups(1)
+        this_pass.end()
+        device.queue.submit([encoder.finish()])
+        result = device.queue.read_buffer(self.output_buffer).cast("I").tolist()
+        if compute:
+            result = result[:4]
+        print(result)
+        return result
+
+
+@pytest.fixture(scope="module")
+def runner():
+    return Runner()
+
+
+def test_no_overridden_constants_render(runner):
+    assert runner.run_test(render=True) == [1, 2, 3, 0, 1, 2, 3, 0]
+
+
+def test_no_constants_compute(runner):
+    runner.run_test(compute=True) == [1, 2, 3, 0]
+
+
+def test_override_vertex_constants(runner):
+    # Note that setting "d" to any non-zero value is setting it to True
+    overrides = {"a": 21, "b": 22, 1: 23, 2: 24}
+    assert [21, 22, 23, 1, 1, 2, 3, 0] == runner.run_test(
+        render=True, vertex_constants=overrides
     )
 
-    encoder = device.create_command_encoder()
-    if use_render:
-        this_pass = encoder.begin_render_pass(**render_pass_descriptor)
-    else:
-        this_pass = encoder.begin_compute_pass()
-    this_pass.set_pipeline(pipeline)
-    this_pass.set_bind_group(0, bind_group)
-    if use_render:
-        this_pass.draw(1)
-    else:
-        this_pass.dispatch_workgroups(1)
-    this_pass.end()
-    device.queue.submit([encoder.finish()])
-    info_view = device.queue.read_buffer(output_buffer)
-    result = np.frombuffer(info_view, dtype=np.uint32)
-    print(result)
-    return list(result)
-
-
-def test_no_constants():
-    with pytest.raises(GPUValidationError):
-        run_override_constant_test(use_render=True)
-
-    with pytest.raises(GPUValidationError):
-        run_override_constant_test(use_render=False)
-
-
-def test_with_minimal_constants():
-    run_override_constant_test(
-        use_render=True, vertex_constants={"d": 1}, fragment_constants={"d": 0}
+
+def test_override_fragment_constants(runner):
+    # Note that setting "d" to any non-zero value is setting it to True
+    overrides = {"a": 21, "b": 22, 1: 23, 2: -1}
+    assert [1, 2, 3, 0, 21, 22, 23, 1] == runner.run_test(
+        render=True, fragment_constants=overrides
+    )
+
+
+def test_override_compute_constants(runner):
+    # Note that setting "d" to any non-zero value is setting it to True
+    overrides = {"a": 21, "b": 22, 1: 23, 2: 24}
+    assert [21, 22, 23, 1] == runner.run_test(compute=True, compute_constants=overrides)
+
+
+def test_numbered_constants_must_be_overridden_by_number(runner):
+    overrides = {"c": 23, "d": 24}
+    # This does absolutely nothing.  It doesn't even error.
+    assert [1, 2, 3, 0, 1, 2, 3, 0] == runner.run_test(
+        render=True, vertex_constants=overrides, fragment_constants=overrides
     )
-    run_override_constant_test(use_render=False, compute_constants={"d": 1})
-    pytest.fail("hello")
 
 
 if __name__ == "__main__":
diff --git a/wgpu/resources/codegen_report.md b/wgpu/resources/codegen_report.md
index a61155dc..09cd37c4 100644
--- a/wgpu/resources/codegen_report.md
+++ b/wgpu/resources/codegen_report.md
@@ -37,4 +37,4 @@
 * Wrote 236 enum mappings and 47 struct-field mappings to wgpu_native/_mappings.py
 * Validated 131 C function calls
 * Not using 72 C functions
-* Validated 80 C structs
+* Validated 81 C structs