From 8b3c69358c2b0e5499d8b4fabac50ee0449d6c73 Mon Sep 17 00:00:00 2001 From: Frank Yellin Date: Sat, 14 Sep 2024 17:02:56 +0300 Subject: [PATCH] First pass for multi_draw --- docs/backends.rst | 26 ++++++++ tests/test_wgpu_vertex_instance.py | 100 ++++++++++++++++------------ wgpu/backends/wgpu_native/extras.py | 4 ++ 3 files changed, 86 insertions(+), 44 deletions(-) diff --git a/docs/backends.rst b/docs/backends.rst index ec3bafcd..419068e8 100644 --- a/docs/backends.rst +++ b/docs/backends.rst @@ -59,6 +59,32 @@ The wgpu_native backend provides a few extra functionalities: :return: Device :rtype: wgpu.GPUDevice +There are two functions that allow you to perform multiple draw calls at once. +Both require that you enable the feature "multi-draw-indirect". + +Typically, these calls do not reduce work or increase parallelism on the GPU. Rather +they reduce drive overhead on the CPU. + +.. py:function:: wgpu.backends.wgpu_native.multi_draw_indirect(render_pass_encoder, buffer, *, offset=0, count): + This is equvalent to + for i in range(count): + render_pass_encoder.draw(buffer, offset + i * 16) + + :param render_pass_encoder: The current render pass encoder. + :param buffer: The indirect buffer containing the arguments. + :param offset: The byte offset in the indirect buffer containing the first argument. + :param count: The number of write operations to perform. + +.. py:function:: wgpu.backends.wgpu_native.multi_draw_indexed_indirect(render_pass_encoder, buffer, *, offset=0, count): + This is equvalent to + for i in range(count): + render_pass_encoder.draw(buffer, offset + i * 20) + + :param render_pass_encoder: The current render pass encoder. + :param buffer: The indirect buffer containing the arguments. + :param offset: The byte offset in the indirect buffer containing the first argument. + :param count: The number of write operations to perform. + The js_webgpu backend --------------------- diff --git a/tests/test_wgpu_vertex_instance.py b/tests/test_wgpu_vertex_instance.py index 57e7d313..046a4b5f 100644 --- a/tests/test_wgpu_vertex_instance.py +++ b/tests/test_wgpu_vertex_instance.py @@ -56,20 +56,26 @@ class Runner: - def __init__(self, device): - self.device = device - self.output_texture = device.create_texture( + def __init__(self, use_multidraw_if_available: bool = True): + adapter = wgpu.gpu.request_adapter(power_preference="high-performance") + features = [] + if use_multidraw_if_available and "multi-draw-indirect" in adapter.features: + features.append("multi-draw-indirect") + self.device = adapter.request_device(required_features=features) + self.output_texture = self.device.create_texture( # Actual size is immaterial. Could just be 1x1 size=[128, 128], format=TextureFormat.rgba8unorm, usage="RENDER_ATTACHMENT|COPY_SRC", ) - shader = device.create_shader_module(code=SHADER_SOURCE) - bind_group_layout = device.create_bind_group_layout(entries=BIND_GROUP_ENTRIES) - render_pipeline_layout = device.create_pipeline_layout( + shader = self.device.create_shader_module(code=SHADER_SOURCE) + bind_group_layout = self.device.create_bind_group_layout( + entries=BIND_GROUP_ENTRIES + ) + render_pipeline_layout = self.device.create_pipeline_layout( bind_group_layouts=[bind_group_layout] ) - self.pipeline = device.create_render_pipeline( + self.pipeline = self.device.create_render_pipeline( layout=render_pipeline_layout, vertex={ "module": shader, @@ -85,19 +91,29 @@ def __init__(self, device): }, ) - self.vertex_call_buffer = device.create_buffer( + self.vertex_call_buffer = self.device.create_buffer( size=MAX_INFO * 2 * 4, usage="STORAGE|COPY_SRC" ) - self.counter_buffer = device.create_buffer( + self.counter_buffer = self.device.create_buffer( size=4, usage="STORAGE|COPY_SRC|COPY_DST" ) - self.bind_group = device.create_bind_group( + self.bind_group = self.device.create_bind_group( layout=self.pipeline.get_bind_group_layout(0), entries=[ {"binding": 0, "resource": {"buffer": self.vertex_call_buffer}}, {"binding": 1, "resource": {"buffer": self.counter_buffer}}, ], ) + self.render_pass_descriptor = { + "color_attachments": [ + { + "clear_value": (0, 0, 0, 0), # only first value matters + "load_op": "clear", + "store_op": "store", + "view": self.output_texture.create_view(), + } + ], + } def create_render_bundle_encoder(self, draw_function): render_bundle_encoder = self.device.create_render_bundle_encoder( @@ -109,20 +125,9 @@ def create_render_bundle_encoder(self, draw_function): return render_bundle_encoder.finish() def run_function(self, expected_result, draw_function): - render_pass_descriptor = { - "color_attachments": [ - { - "clear_value": (0, 0, 0, 0), # only first value matters - "load_op": "clear", - "store_op": "store", - "view": self.output_texture.create_view(), - } - ], - } - encoder = self.device.create_command_encoder() encoder.clear_buffer(self.counter_buffer) - this_pass = encoder.begin_render_pass(**render_pass_descriptor) + this_pass = encoder.begin_render_pass(**self.render_pass_descriptor) this_pass.set_pipeline(self.pipeline) this_pass.set_bind_group(0, self.bind_group) draw_function(this_pass) @@ -146,18 +151,8 @@ def run_functions(self, expected_result, functions): self.run_function(expected_result, function) -def get_device(features): - adapter = wgpu.gpu.request_adapter(power_preference="high-performance") - try: - device = adapter.request_device(required_features=features) - except RuntimeError: - pytest.skip("Features needed for this test are not available") - return device - - def test_draw_no_index(): - device = get_device(features=["multi-draw-indirect"]) - runner = Runner(device) + runner = Runner() # vertex_count, index_count, first_vertex, first_index draw_args1 = [2, 3, 100, 10] @@ -165,7 +160,7 @@ def test_draw_no_index(): expected_result = set(itertools.product((100, 101), (10, 11, 12))) | {(30, 50)} draw_data_info = np.uint32([0, 0] + draw_args1 + draw_args2) - draw_data_buffer = device.create_buffer_with_data( + draw_data_buffer = runner.device.create_buffer_with_data( data=draw_data_info, usage="INDIRECT" ) @@ -177,16 +172,23 @@ def draw_indirect(encoder): encoder.draw_indirect(draw_data_buffer, 8) encoder.draw_indirect(draw_data_buffer, 8 + 16) - render_bundle_encoder = runner.create_render_bundle_encoder(draw_direct) + def draw_mixed(encoder): + encoder.draw(*draw_args1) + encoder.draw_indirect(draw_data_buffer, 8 + 16) + + def draw_indirect_multi(encoder): + multi_draw_indirect(encoder, draw_data_buffer, offset=8, count=2) + + render_bundle_encoder = runner.create_render_bundle_encoder(draw_mixed) + has_multi_draw_indirect = "multi-draw-indirect" in runner.device.features runner.run_functions( expected_result, [ draw_direct, draw_indirect, - lambda encoder: multi_draw_indirect( - encoder, draw_data_buffer, offset=8, count=2 - ), + draw_mixed, + *([draw_indirect_multi] if has_multi_draw_indirect else []), lambda encoder: encoder.execute_bundles([render_bundle_encoder]), lambda encoder: encoder.execute_bundles([render_bundle_encoder]), ], @@ -194,8 +196,7 @@ def draw_indirect(encoder): def test_draw_indexed(): - device = get_device(features=["multi-draw-indirect"]) - runner = Runner(device) + runner = Runner() # index_count, instance_count, first_index, base_vertex, first_intance draw_args1 = (4, 2, 1, 100, 1000) @@ -205,10 +206,14 @@ def test_draw_indexed(): expected_result.add((219, 2000)) index_buffer_data = np.uint32(index_buffer_data) - index_buffer = device.create_buffer_with_data(data=index_buffer_data, usage="INDEX") + index_buffer = runner.device.create_buffer_with_data( + data=index_buffer_data, usage="INDEX" + ) draw_data = np.uint32([0, 0] + list(draw_args1) + list(draw_args2)) - draw_data_buffer = device.create_buffer_with_data(data=draw_data, usage="INDIRECT") + draw_data_buffer = runner.device.create_buffer_with_data( + data=draw_data, usage="INDIRECT" + ) def draw_direct(encoder): encoder.set_index_buffer(index_buffer, "uint32") @@ -220,18 +225,25 @@ def draw_indirect(encoder): encoder.draw_indexed_indirect(draw_data_buffer, 8) encoder.draw_indexed_indirect(draw_data_buffer, 8 + 20) + def draw_mixed(encoder): + encoder.set_index_buffer(index_buffer, "uint32") + encoder.draw_indexed(*draw_args1) + encoder.draw_indexed_indirect(draw_data_buffer, 8 + 20) + def draw_indirect_multi(encoder): encoder.set_index_buffer(index_buffer, "uint32") multi_draw_indexed_indirect(encoder, draw_data_buffer, offset=8, count=2) - render_bundle_encoder = runner.create_render_bundle_encoder(draw_indirect) + render_bundle_encoder = runner.create_command_encoder(draw_mixed) + has_multi_draw_indirect = "multi-draw-indirect" in runner.device.features runner.run_functions( expected_result, [ draw_direct, draw_indirect, - draw_indirect_multi, + draw_mixed, + *([draw_indirect_multi] if has_multi_draw_indirect else []), lambda encoder: encoder.execute_bundles([render_bundle_encoder]), lambda encoder: encoder.execute_bundles([render_bundle_encoder]), ], diff --git a/wgpu/backends/wgpu_native/extras.py b/wgpu/backends/wgpu_native/extras.py index 6b83a6c9..bdaf466b 100644 --- a/wgpu/backends/wgpu_native/extras.py +++ b/wgpu/backends/wgpu_native/extras.py @@ -40,6 +40,8 @@ def multi_draw_indirect(render_pass_encoder, buffer, *, offset=0, count): This is equvalent to for i in range(count): render_pass_encoder.draw(buffer, offset + i * 16) + + You must enable the featue "multi-draw-indirect" to use this function. """ render_pass_encoder._multi_draw_indirect(buffer, offset, count) @@ -50,5 +52,7 @@ def multi_draw_indexed_indirect(render_pass_encoder, buffer, *, offset=0, count) for i in range(count): render_pass_encoder.draw_indexed(buffer, offset + i * 20) + + You must enable the featue "multi-draw-indirect" to use this function. """ render_pass_encoder._multi_draw_indexed_indirect(buffer, offset, count)