diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index d92ad2cd..2c7e79ab 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -75,9 +75,9 @@ jobs: - name: Install and test wheel shell: bash run: | - rm -rf ./wgpu # Install 'normally' to install deps, then force the install from dist-folder and nothing else - pip install --find-links dist wgpu + pip install . + rm -rf ./wgpu pip install --force-reinstall --no-deps --no-index --find-links dist wgpu pushd $HOME python -c 'import wgpu.backends.wgpu_native; print(wgpu.backends.wgpu_native._ffi.lib_path)' diff --git a/pyproject.toml b/pyproject.toml index 6e8a9345..447d64c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,11 @@ license = { file = "LICENSE" } authors = [{ name = "Almar Klein" }, { name = "Korijn van Golen" }] keywords = ["webgpu", "wgpu", "vulkan", "metal", "DX12", "opengl"] requires-python = ">= 3.9" -dependencies = ["cffi>=1.15.0", "rubicon-objc>=0.4.1; sys_platform == 'darwin'"] +dependencies = [ + "cffi>=1.15.0", + "rubicon-objc>=0.4.1; sys_platform == 'darwin'", + "sniffio", +] [project.optional-dependencies] # For users @@ -20,7 +24,15 @@ imgui = ["imgui-bundle>=1.2.1"] build = ["build", "hatchling", "requests", "twine"] codegen = ["pytest", "numpy", "ruff"] lint = ["ruff", "pre-commit"] -tests = ["numpy", "pytest", "psutil", "imageio"] +tests = [ + "numpy", + "pytest", + "pytest-anyio", + "psutil", + "imageio", + "anyio", + "trio", +] examples = [] docs = ["sphinx>7.2", "sphinx_rtd_theme"] dev = ["wgpu[build,codegen,lint,tests,examples,docs]"] @@ -63,6 +75,9 @@ artifacts = ["*.so", "*.dll", "*.dylib"] [tool.hatch.build.targets.wheel.hooks.custom] path = "tools/hatch_build.py" +[tool.pytest.ini_options] +asyncio_default_fixture_loop_scope = "function" + # ===== Tooling [tool.ruff] diff --git a/tests/test_async.py b/tests/test_async.py new file mode 100644 index 00000000..98bdd220 --- /dev/null +++ b/tests/test_async.py @@ -0,0 +1,130 @@ +import anyio + +from pytest import mark + +import wgpu.utils +from testutils import can_use_wgpu_lib, run_tests +from wgpu import GPUDevice, MapMode, TextureFormat +from wgpu.backends.wgpu_native import WgpuAwaitable + + +@mark.anyio +@mark.parametrize("use_async", [False, True]) +async def test_awaitable_async(use_async): + count = 0 + + def finalizer(i): + return i * i + + def callback(i): + awaitable.set_result(i) + + def poll_function(): + nonlocal count + count += 1 + if count >= 3: + callback(10) + + awaitable = WgpuAwaitable("test", callback, finalizer, poll_function) + + if use_async: + result = await awaitable + else: + result = awaitable.sync_wait() + assert result == 10 * 10 + + +@mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib") +@mark.anyio +async def test_enumerate_adapters_async(): + adapters = await wgpu.gpu.enumerate_adapters_async() + assert len(adapters) > 0 + for adapter in adapters: + device = await adapter.request_device_async() + assert isinstance(device, GPUDevice) + + +@mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib") +@mark.anyio +async def test_request_device_async(): + adapter = await wgpu.gpu.request_adapter_async(power_preference="high-performance") + device = await adapter.request_device_async() + assert device is not None + + +@mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib") +@mark.anyio +async def test_buffer_map_async(): + device = wgpu.utils.get_default_device() + + data = b"1" * 10000 + buffer1 = device.create_buffer(size=len(data), usage="MAP_WRITE|COPY_SRC") + buffer2 = device.create_buffer(size=len(data), usage="MAP_READ|COPY_DST") + await buffer1.map_async(MapMode.WRITE) + buffer1.write_mapped(data) + buffer1.unmap() + + command_encoder = device.create_command_encoder() + command_encoder.copy_buffer_to_buffer(buffer1, 0, buffer2, 0, len(data)) + device.queue.submit([command_encoder.finish()]) + + await buffer2.map_async(MapMode.READ) + data2 = buffer2.read_mapped() + buffer2.unmap() + + assert bytes(data2) == data + + +@mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib") +@mark.anyio +async def make_pipeline_async(): + device = wgpu.utils.get_default_device() + + shader_source = """ + @vertex + fn vertex_main() -> @builtin(position) vec4f { + return vec4f(0, 0, 0, 1.); + } + + @compute @workgroup_size(1) + fn compute_main() { } + """ + + shader = device.create_shader_module(code=shader_source) + + results = [None, None] + async with anyio.create_task_group() as tg: + # It's unfortunate anyio doesn't have async.gather. This code would just be + # compute_pipeline, render_pipeline = asyncio.gather(.....) + async def create_compute_pipeline(): + results[0] = await device.create_compute_pipeline_async( + layout="auto", compute={"module": shader} + ) + + async def create_render_pipeline(): + results[1] = await device.create_render_pipeline_async( + layout="auto", + vertex={ + "module": shader, + }, + depth_stencil={"format": TextureFormat.rgba8unorm}, + ) + + tg.start_soon(create_compute_pipeline) + tg.start_soon(create_render_pipeline) + + compute_pipeline, render_pipeline = results + assert compute_pipeline is not None + assert render_pipeline is not None + + command_encoder = device.create_command_encoder() + compute_pass = command_encoder.begin_compute_pass() + compute_pass.set_pipeline(compute_pipeline) + compute_pass.dispatch_workgroups(10, 10) + compute_pass.end() + device.queue.submit([command_encoder.finish()]) + await device.queue.on_submitted_work_done_async() + + +if __name__ == "__main__": + run_tests(globals()) diff --git a/tests/test_wgpu_native_basics.py b/tests/test_wgpu_native_basics.py index 21111d90..c30229d7 100644 --- a/tests/test_wgpu_native_basics.py +++ b/tests/test_wgpu_native_basics.py @@ -12,7 +12,6 @@ from testutils import run_tests, can_use_wgpu_lib, is_ci from pytest import mark, raises - is_win = sys.platform.startswith("win") @@ -468,7 +467,3 @@ def test_limits_are_not_legal(): if __name__ == "__main__": run_tests(globals()) - - -if __name__ == "__main__": - run_tests(globals()) diff --git a/tests/test_wgpu_native_buffer.py b/tests/test_wgpu_native_buffer.py index 994aa6af..f9281352 100644 --- a/tests/test_wgpu_native_buffer.py +++ b/tests/test_wgpu_native_buffer.py @@ -548,5 +548,26 @@ def test_create_buffer_with_data(size): assert copy[size:] == bytes(buffer._nbytes - size) +@pytest.mark.skip +def test_show_bug_wgpu_native_305_still_not_fixed(): + # When this bug is fixed, we can remove READ_NOSYNC, and just tread "READ" as if + # it were READ_NOSYNC. No need to handle the command buffer. + device = wgpu.utils.get_default_device() + data1 = b"abcdefghijkl" + + # Create buffer with data + buf = device.create_buffer( + size=len(data1), usage=wgpu.BufferUsage.MAP_READ, mapped_at_creation=True + ) + buf.write_mapped(data1) + buf.unmap() + + # Download from buffer to CPU + buf.map("READ_NOSYNC") + data2 = bytes(buf.read_mapped()) + buf.unmap() + assert data1 == data2 + + if __name__ == "__main__": run_tests(globals()) diff --git a/wgpu/_classes.py b/wgpu/_classes.py index e9b380bc..b7032135 100644 --- a/wgpu/_classes.py +++ b/wgpu/_classes.py @@ -153,7 +153,7 @@ def enumerate_adapters_sync(self): async def enumerate_adapters_async(self): """Get a list of adapter objects available on the current system. - An adapter can then be selected (e.g. using it's summary), and a device + An adapter can then be selected (e.g. using its summary), and a device then created from it. The order of the devices is such that Vulkan adapters go first, then diff --git a/wgpu/backends/wgpu_native/__init__.py b/wgpu/backends/wgpu_native/__init__.py index 87b38033..dc9116de 100644 --- a/wgpu/backends/wgpu_native/__init__.py +++ b/wgpu/backends/wgpu_native/__init__.py @@ -21,3 +21,4 @@ _register_backend(gpu) from .extras import request_device_sync, request_device +from ._helpers import WgpuAwaitable diff --git a/wgpu/backends/wgpu_native/_api.py b/wgpu/backends/wgpu_native/_api.py index 556348ec..4b2177ba 100644 --- a/wgpu/backends/wgpu_native/_api.py +++ b/wgpu/backends/wgpu_native/_api.py @@ -34,8 +34,8 @@ get_memoryview_and_address, to_snake_case, ErrorHandler, - WgpuAwaitable, SafeLibCalls, + WgpuAwaitable, ) logger = logging.getLogger("wgpu") @@ -68,6 +68,28 @@ def check_can_use_sync_variants(): } +def print_struct(s, indent=""): + """Tool to pretty-print struct contents during debugging.""" + for key in dir(s): + val = getattr(s, key) + if repr(val).startswith(" here we must resolve the promise, or something + def callback(status, _user_data_p): + if status == 0: + awaitable.set_result(True) + else: + result = {1: "Error", 2: "Unknown", 3: "DeviceLost"}.get( + status, "Other" + ) + awaitable.set_error(f"Queue work done status: {result}") - # H: void f(WGPUQueue queue, WGPUQueueOnSubmittedWorkDoneCallback callback, void * userdata) - libf.wgpuQueueOnSubmittedWorkDone(self._internal, callback, ffi.NULL) + def finalizer(_value): + return None - # Wait for the queue to process all tasks (including the mapping of the buffer). - # Also see WebGPU's onSubmittedWorkDone() and C's WGPUQueueWorkDoneCallback. - self._device._poll() + awaitable = WgpuAwaitable( + "on_submitted_work_done", callback, finalizer, self._device._poll_wait + ) - if status != 0: - raise RuntimeError(f"Queue work done status: {status}") + # H: void f(WGPUQueue queue, WGPUQueueOnSubmittedWorkDoneCallback callback, void * userdata) + libf.wgpuQueueOnSubmittedWorkDone(self._internal, callback, ffi.NULL) - async def on_submitted_work_done_async(self): - raise NotImplementedError() + return awaitable class GPURenderBundle(classes.GPURenderBundle, GPUObjectBase): diff --git a/wgpu/backends/wgpu_native/_helpers.py b/wgpu/backends/wgpu_native/_helpers.py index d12ff299..2dfc8ba1 100644 --- a/wgpu/backends/wgpu_native/_helpers.py +++ b/wgpu/backends/wgpu_native/_helpers.py @@ -1,21 +1,22 @@ """Utilities used in the wgpu-native backend.""" +import ctypes import sys import time -import ctypes from queue import deque +import sniffio + from ._ffi import ffi, lib, lib_path from ..._diagnostics import DiagnosticsBase from ...classes import ( GPUError, + GPUInternalError, GPUOutOfMemoryError, - GPUValidationError, GPUPipelineError, - GPUInternalError, + GPUValidationError, ) - ERROR_TYPES = { "": GPUError, "OutOfMemory": GPUOutOfMemoryError, @@ -227,6 +228,13 @@ def to_camel_case(name): return name2 +async def async_sleep(delay): + """Async sleep that uses sniffio to be compatible with asyncio, trio, rendercanvas.utils.asyncadapter, and possibly more.""" + libname = sniffio.current_async_library() + sleep = sys.modules[libname].sleep + await sleep(delay) + + class WgpuAwaitable: """An object that can be waited for, either synchronously using sync_wait() or asynchronously using await. @@ -234,49 +242,64 @@ class WgpuAwaitable: truely async manner, as well as to support a synchronous version of them. """ - def __init__(self, title, result, callback, poll_function, finalizer, timeout=5.0): + def __init__(self, title, callback, finalizer, poll_function=None, timeout=5.0): self.title = title # for context in error messages - self.result = result # a dict that the callbacks writes to self.callback = callback # only used to prevent it from being gc'd - self.poll_function = poll_function # call this to poll wgpu self.finalizer = finalizer # function to finish the result + self.poll_function = poll_function # call this to poll wgpu self.maxtime = time.perf_counter() + float(timeout) + self.result = None + + def set_result(self, result): + self.result = (result, None) + + def set_error(self, error): + self.result = (None, error) - def is_done(self): + def _is_done(self): self.poll_function() - if self.result: - return True - if time.perf_counter() > self.maxtime: - self.result["timeout"] = True - return True - return False - - def finish(self): - if "result" in self.result: - return self.finalizer(self.result["result"]) - elif "error" in self.result: - raise RuntimeError(self.result["error"]) - elif "timeout" in self.result: - raise RuntimeError(f"Waiting for {self.title} timed out.") - else: - raise RuntimeError(f"Failed to obtain result for {self.title}.") + return self.result is not None or time.perf_counter() > self.maxtime + + def _finish(self): + try: + if not self.result: + raise RuntimeError(f"Waiting for {self.title} timed out.") + result, error = self.result + if error: + raise RuntimeError(error) + else: + return self.finalizer(result) + finally: + # Reset attrs to prevent potential memory leaks + self.callback = self.finalizer = self.poll_function = self.result = None def sync_wait(self): - # Basically a spin-lock, but we sleep(0) to give other threads more room to run. - while not self.is_done(): - time.sleep(0) # yield to the GIL - return self.finish() + if self.result is not None: + pass + elif not self.poll_function: + raise RuntimeError("Expected callback to have already happened") + else: + while not self._is_done(): + time.sleep(0) + return self._finish() def __await__(self): - # With this approach the CPU is also kept busy, because the task will - # continuously be scheduled to do a next step, but at least other tasks - # will have a chance to run as well (e.g. GUI's running in the same loop - # stay active). In theory we could async-sleep here, but there are two - # problems: how long should we sleep, and using asyncio.sleep() would - # not work on e.g. Trio. - while not self.is_done(): - yield None # yield to the loop - return self.finish() + # There is no documentation on what __await__() is supposed to return, but we + # can certainly copy from a function that *does* know what to return + async def wait_for_callback(): + # In all the async cases that I've tried, the result is either already set, or + # resolves after the first call to the poll function. To make sure that our + # sleep-logic actually works, we always do at least one sleep call. + await async_sleep(0) + if self.result is not None: + return + if not self.poll_function: + raise RuntimeError("Expected callback to have already happened") + while not self._is_done(): + await async_sleep(0) + + yield from wait_for_callback().__await__() + return self._finish() class ErrorHandler: diff --git a/wgpu/resources/codegen_report.md b/wgpu/resources/codegen_report.md index 3c69e6ac..279fba39 100644 --- a/wgpu/resources/codegen_report.md +++ b/wgpu/resources/codegen_report.md @@ -20,7 +20,7 @@ * Diffs for GPUQueue: add read_buffer, add read_texture, hide copy_external_image_to_texture * Validated 37 classes, 121 methods, 46 properties ### Patching API for backends/wgpu_native/_api.py -* Validated 37 classes, 119 methods, 0 properties +* Validated 37 classes, 121 methods, 0 properties ## Validating backends/wgpu_native/_api.py * Enum field FeatureName.texture-compression-bc-sliced-3d missing in wgpu.h * Enum field FeatureName.clip-distances missing in wgpu.h @@ -35,6 +35,6 @@ * Enum CanvasAlphaMode missing in wgpu.h * Enum CanvasToneMappingMode missing in wgpu.h * Wrote 236 enum mappings and 47 struct-field mappings to wgpu_native/_mappings.py -* Validated 140 C function calls -* Not using 65 C functions +* Validated 142 C function calls +* Not using 64 C functions * Validated 82 C structs