diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 3958eade2c..edfc210ef8 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -7,7 +7,7 @@ updates:
       interval: weekly
     # This allows dependabot to update _all_ lockfile packages.
     #
-    # These will be grouped into the existing group update PRs, so shoudn't generate additional jobs.
+    # These will be grouped into the existing group update PRs, so shouldn't generate additional jobs.
     allow:
       # Allow both direct and indirect updates for all packages
       - dependency-type: "all"
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index fd0102cf4d..1cb6a28a19 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -37,13 +37,13 @@ env:
   #
   # This needs to be newer to work around https://github.com/gfx-rs/wgpu/issues/4905.
   #
-  # Once 1.76 coes out, we can use that instead of nightly.
+  # Once this fix hits stable Rust, we can use that instead of nightly.
   DOCS_RUST_VERSION: "nightly-2023-12-17"
   # This is the MSRV used by `wgpu` itself and all surrounding infrastructure.
   REPO_MSRV: "1.76"
   # This is the MSRV used by the `wgpu-core`, `wgpu-hal`, and `wgpu-types` crates,
   # to ensure that they can be used with firefox.
-  CORE_MSRV: "1.74"
+  CORE_MSRV: "1.76"
 
   #
   # Environment variables
@@ -59,6 +59,7 @@ env:
   RUSTDOCFLAGS: -D warnings
   WASM_BINDGEN_TEST_TIMEOUT: 300 # 5 minutes
   CACHE_SUFFIX: c # cache busting
+  WGPU_TESTING: true
 
 # We distinguish the following kinds of builds:
 # - native: build for the same target as we compile on
@@ -150,10 +151,10 @@ jobs:
           cargo -V
 
       # Use special toolchain for rustdoc, see https://github.com/gfx-rs/wgpu/issues/4905
-      # - name: Install Rustdoc Toolchain
-      #   run: |
-      #     rustup toolchain install ${{ env.DOCS_RUST_VERSION }} --no-self-update --profile=minimal --component rust-docs --target ${{ matrix.target }}
-      #     cargo +${{ env.DOCS_RUST_VERSION }} -V
+      - name: Install Rustdoc Toolchain
+        run: |
+          rustup toolchain install ${{ env.DOCS_RUST_VERSION }} --no-self-update --profile=minimal --component rust-docs --target ${{ matrix.target }}
+          cargo +${{ env.DOCS_RUST_VERSION }} -V
 
       - name: disable debug
         shell: bash
@@ -195,11 +196,11 @@ jobs:
           # build for WebGPU
           cargo clippy --target ${{ matrix.target }} --tests --features glsl,spirv,fragile-send-sync-non-atomic-wasm
           cargo clippy --target ${{ matrix.target }} --tests --features glsl,spirv
-          # cargo +${{ env.DOCS_RUST_VERSION }} doc --target ${{ matrix.target }} --no-deps --features glsl,spirv
+          cargo +${{ env.DOCS_RUST_VERSION }} doc --target ${{ matrix.target }} --no-deps --features glsl,spirv
 
           # all features
           cargo clippy --target ${{ matrix.target }} --tests --all-features
-          # cargo +${{ env.DOCS_RUST_VERSION }} doc --target ${{ matrix.target }} --no-deps --all-features
+          cargo +${{ env.DOCS_RUST_VERSION }} doc --target ${{ matrix.target }} --no-deps --all-features
 
       - name: check em
         if: matrix.kind == 'em'
@@ -229,13 +230,15 @@ jobs:
           cargo clippy --target ${{ matrix.target }} --tests --benches --all-features
 
           # build docs
-          # cargo +${{ env.DOCS_RUST_VERSION }} doc --target ${{ matrix.target }} --all-features --no-deps
+          cargo +${{ env.DOCS_RUST_VERSION }} doc --target ${{ matrix.target }} --all-features --no-deps
+      # wgpu-core docs are not feasible due to <https://github.com/gfx-rs/wgpu/issues/4905>
+      #
       # - name: check private item docs
       #   if: matrix.kind == 'native'
       #   shell: bash
       #   run: |
       #     set -e
-
+      #
       #     # wgpu_core package
       #     cargo +${{ env.DOCS_RUST_VERSION }} doc --target ${{ matrix.target }} \
       #           --package wgpu-core \
@@ -568,6 +571,7 @@ jobs:
         if: steps.coverage.outcome == 'success'
         with:
           files: lcov.info
+          token: ${{ secrets.CODECOV_TOKEN }}
 
   doctest:
     # runtime is normally 2 minutes
@@ -628,7 +632,7 @@ jobs:
           cargo fmt --manifest-path xtask/Cargo.toml -- --check
 
       - name: Check for typos
-        uses: crate-ci/typos@v1.22.9
+        uses: crate-ci/typos@v1.23.6
 
   check-cts-runner:
     # runtime is normally 2 minutes
@@ -677,7 +681,7 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Run `cargo deny check`
-        uses: EmbarkStudios/cargo-deny-action@v1
+        uses: EmbarkStudios/cargo-deny-action@v2
         with:
           command: check advisories
           arguments: --all-features --workspace
@@ -694,7 +698,7 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Run `cargo deny check`
-        uses: EmbarkStudios/cargo-deny-action@v1
+        uses: EmbarkStudios/cargo-deny-action@v2
         with:
           command: check bans licenses sources
           arguments: --all-features --workspace
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e580c550d2..fb1bc4a0ca 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,7 +4,7 @@
 Please add your PR to the changelog! Choose from a top level and bottom
 level category, then write your changes like follows:
 
-- Describe your change in a user friendly format by @yourslug in [#99999](https://github.com/gfx-rs/wgpu/pull/99999)
+- Describe your change in a user friendly format. By @yourslug in [#99999](https://github.com/gfx-rs/wgpu/pull/99999)
 
 You can add additional user facing information if it's a major breaking change. You can use the following to help:
 
@@ -41,11 +41,81 @@ Bottom level categories:
 
 ### Major Changes
 
+#### `wgpu-core` is no longer generic over `wgpu-hal` backends
+Dynamic dispatch between different backends has been moved from the user facing `wgpu` crate,
+to a new dynamic dispatch mechanism inside the backend abstraction layer `wgpu-hal`.
+
+Whenever targeting more than a single backend (default on Windows & Linux) this leads to faster compile times and smaller binaries!
+This also solves a long standing issue with `cargo doc` failing to run for `wgpu-core`.
+
+Benchmarking indicated that compute pass recording is slower as a consequence,
+whereas on render passes speed improvements have been observed.
+However, this effort simplifies many of the internals of the wgpu family of crates
+which we're hoping to build performance improvements upon in the future.
+
+By @wumpf in [#6069](https://github.com/gfx-rs/wgpu/pull/6069), [#6099](https://github.com/gfx-rs/wgpu/pull/6099), [#6100](https://github.com/gfx-rs/wgpu/pull/6100).
+
+### New Features
+
+#### Naga
+
+* Support constant evaluation for `firstLeadingBit` and `firstTrailingBit` numeric built-ins in WGSL. Front-ends that translate to these built-ins also benefit from constant evaluation. By @ErichDonGubler in [#5101](https://github.com/gfx-rs/wgpu/pull/5101).
+
+### Bug Fixes
+
+#### General
+
+- If GL context creation fails retry with GLES. By @Rapdorian in [#5996](https://github.com/gfx-rs/wgpu/pull/5996)
+- Fix profiling with `tracy`. By @waywardmonkeys in [#5988](https://github.com/gfx-rs/wgpu/pull/5988)
+- As a workaround for [issue #4905](https://github.com/gfx-rs/wgpu/issues/4905), `wgpu-core` is undocumented unless `--cfg wgpu_core_doc` feature is enabled. By @kpreid in [#5987](https://github.com/gfx-rs/wgpu/pull/5987)
+- Bump MSRV for `d3d12`/`naga`/`wgpu-core`/`wgpu-hal`/`wgpu-types`' to 1.76. By @wumpf in [#6003](https://github.com/gfx-rs/wgpu/pull/6003)
+- Print requested and supported usages on `UnsupportedUsage` error. By @VladasZ in [#6007](https://github.com/gfx-rs/wgpu/pull/6007)
+- Fix function for checking bind compatibility to error instead of panic. By @sagudev [#6012](https://github.com/gfx-rs/wgpu/pull/6012)
+- Deduplicate bind group layouts that are created from pipelines with "auto" layouts. By @teoxoy [#6049](https://github.com/gfx-rs/wgpu/pull/6049)
+- Fix crash when dropping the surface after the device. By @wumpf in [#6052](https://github.com/gfx-rs/wgpu/pull/6052)
+- Fix error message that is thrown in create_render_pass to no longer say `compute_pass`. By @matthew-wong1 [#6041](https://github.com/gfx-rs/wgpu/pull/6041)
+
+### Changes
+
+- Reduce the amount of debug and trace logs emitted by wgpu-core and wgpu-hal. By @nical in [#6065](https://github.com/gfx-rs/wgpu/issues/6065)
+- `Rg11b10Float` is renamed to `Rg11b10UFloat`. By @sagudev in [#6108](https://github.com/gfx-rs/wgpu/pull/6108)
+
+### Dependency Updates
+
+#### GLES
+
+- Replace `winapi` code in WGL wrapper to use the `windows` crate. By @MarijnS95 in [#6006](https://github.com/gfx-rs/wgpu/pull/6006)
+
+## 22.0.0 (2024-07-17)
+
+### Overview
+
+### Our first major version release!
+
+For the first time ever, WGPU is being released with a major version (i.e., 22.* instead of 0.22.*)! Maintainership has decided to fully adhere to [Semantic Versioning](https://semver.org/)'s recommendations for versioning production software. According to [SemVer 2.0.0's Q&A about when to use 1.0.0 versions (and beyond)](https://semver.org/spec/v2.0.0.html#how-do-i-know-when-to-release-100):
+
+> ### How do I know when to release 1.0.0?
+>
+> If your software is being used in production, it should probably already be 1.0.0. If you have a stable API on which users have come to depend, you should be 1.0.0. If you’re worrying a lot about backward compatibility, you should probably already be 1.0.0.
+
+It is a well-known fact that WGPU has been used for applications and platforms already in production for years, at this point. We are often concerned with tracking breaking changes, and affecting these consumers' ability to ship. By releasing our first major version, we publicly acknowledge that this is the case. We encourage other projects in the Rust ecosystem to follow suit.
+
+Note that while we start to use the major version number, WGPU is _not_ "going stable", as many Rust projects do. We anticipate many breaking changes before we fully comply with the WebGPU spec., which we expect to take a small number of years.
+
+### Overview
+
+A major ([pun intended](#our-first-major-version-release)) theme of this release is incremental improvement. Among the typically large set of bug fixes, new features, and other adjustments to WGPU by the many contributors listed below, @wumpf and @teoxoy have merged a series of many simplifications to WGPU's internals and, in one case, to the render and compute pass recording APIs. Many of these change WGPU to use atomically reference-counted resource tracking (i.e., `Arc<…>`), rather than using IDs to manage the lifetimes of platform-specific graphics resources in a registry of separate reference counts. This has led us to diagnose and fix many long-standing bugs, and net some neat performance improvements on the order of 40% or more of some workloads.
+
+While the above is exciting, we acknowledge already finding and fixing some (easy-to-fix) regressions from the above work. If you migrate to WGPU 22 and encounter such bugs, please engage us in the issue tracker right away!
+
+### Major Changes
+
 #### Lifetime bounds on `wgpu::RenderPass` & `wgpu::ComputePass`
 
 `wgpu::RenderPass` & `wgpu::ComputePass` recording methods (e.g. `wgpu::RenderPass:set_render_pipeline`) no longer impose a lifetime constraint to objects passed to a pass (like pipelines/buffers/bindgroups/query-sets etc.).
 
 This means the following pattern works now as expected:
+
 ```rust
 let mut pipelines: Vec<wgpu::RenderPipeline> = ...;
 // ...
@@ -79,6 +149,7 @@ By @wumpf in [#5569](https://github.com/gfx-rs/wgpu/pull/5569), [#5575](https://
 Wgpu now supports querying [shader compilation info](https://www.w3.org/TR/webgpu/#dom-gpushadermodule-getcompilationinfo).
 
 This allows you to get more structured information about compilation errors, warnings and info:
+
 ```rust
 ...
 let lighting_shader = ctx.device.create_shader_module(include_wgsl!("lighting.wgsl"));
@@ -143,14 +214,11 @@ to pass a compatible surface when targeting WebGL2, having `enumerate_adapters()
 By @teoxoy in [#5901](https://github.com/gfx-rs/wgpu/pull/5901)
 
 ### New features
-#### Vulkan
-
-- Added a `PipelineCache` resource to allow using Vulkan pipeline caches. By @DJMcNab in [#5319](https://github.com/gfx-rs/wgpu/pull/5319)
 
 #### General
 
 - Added `as_hal` for `Buffer` to access wgpu created buffers form wgpu-hal. By @JasondeWolff in [#5724](https://github.com/gfx-rs/wgpu/pull/5724)
-- Unconsumed vertex outputs are now always allowed. Removed `StageError::InputNotConsumed`, `Features::SHADER_UNUSED_VERTEX_OUTPUT`, and associated validation. By @Imberflur in [#5531](https://github.com/gfx-rs/wgpu/pull/5531)
+- `include_wgsl!` is now callable in const contexts by @9SMTM6 in [#5872](https://github.com/gfx-rs/wgpu/pull/5872)
 - Added memory allocation hints to `DeviceDescriptor` by @nical in [#5875](https://github.com/gfx-rs/wgpu/pull/5875)
     - `MemoryHints::Performance`, the default, favors performance over memory usage and will likely cause large amounts of VRAM to be allocated up-front. This hint is typically good for games.
     - `MemoryHints::MemoryUsage` favors memory usage over performance. This hint is typically useful for smaller applications or UI libraries.
@@ -164,23 +232,33 @@ By @teoxoy in [#5901](https://github.com/gfx-rs/wgpu/pull/5901)
 - Implement `WGSL`'s `unpack4xI8`,`unpack4xU8`,`pack4xI8` and `pack4xU8`. By @VlaDexa in [#5424](https://github.com/gfx-rs/wgpu/pull/5424)
 - Began work adding support for atomics to the SPIR-V frontend. Tracking issue is [here](https://github.com/gfx-rs/wgpu/issues/4489). By @schell in [#5702](https://github.com/gfx-rs/wgpu/pull/5702).
 - In hlsl-out, allow passing information about the fragment entry point to omit vertex outputs that are not in the fragment inputs. By @Imberflur in [#5531](https://github.com/gfx-rs/wgpu/pull/5531)
+- In spv-out, allow passing `acceleration_structure` as a function argument. By @kvark in [#5961](https://github.com/gfx-rs/wgpu/pull/5961)
 
   ```diff
   let writer: naga::back::hlsl::Writer = /* ... */;
   -writer.write(&module, &module_info);
   +writer.write(&module, &module_info, None);
   ```
+- HLSL & MSL output can now be added conditionally on the target via the `msl-out-if-target-apple` and `hlsl-out-if-target-windows` features. This is used in wgpu-hal to no longer compile with MSL output when `metal` is enabled & MacOS isn't targeted and no longer compile with HLSL output when `dx12` is enabled & Windows isn't targeted. By @wumpf in [#5919](https://github.com/gfx-rs/wgpu/pull/5919)
+
+#### Vulkan
+
+- Added a `PipelineCache` resource to allow using Vulkan pipeline caches. By @DJMcNab in [#5319](https://github.com/gfx-rs/wgpu/pull/5319)
 
 #### WebGPU
 
-- `include_wgsl!` is now callable in const contexts by @9SMTM6 in [#5872](https://github.com/gfx-rs/wgpu/pull/5872)
+- Added support for pipeline-overridable constants to the WebGPU backend by @DouglasDwyer in [#5688](https://github.com/gfx-rs/wgpu/pull/5688)
 
 ### Changes
 
 #### General
 
+- Unconsumed vertex outputs are now always allowed. Removed `StageError::InputNotConsumed`, `Features::SHADER_UNUSED_VERTEX_OUTPUT`, and associated validation. By @Imberflur in [#5531](https://github.com/gfx-rs/wgpu/pull/5531)
 - Avoid introducing spurious features for optional dependencies. By @bjorn3 in [#5691](https://github.com/gfx-rs/wgpu/pull/5691)
 - `wgpu::Error` is now `Sync`, making it possible to be wrapped in `anyhow::Error` or `eyre::Report`. By @nolanderc in [#5820](https://github.com/gfx-rs/wgpu/pull/5820)
+- Added benchmark suite. By @cwfitzgerald in [#5694](https://github.com/gfx-rs/wgpu/pull/5694), compute passes by @wumpf in [#5767](https://github.com/gfx-rs/wgpu/pull/5767)
+- Improve performance of `.submit()` by 39-64% (`.submit()` + `.poll()` by 22-32%). By @teoxoy in [#5910](https://github.com/gfx-rs/wgpu/pull/5910)
+- The `trace` wgpu feature has been temporarily removed. By @teoxoy in [#5975](https://github.com/gfx-rs/wgpu/pull/5975)
 
 #### Metal
 - Removed the `link` Cargo feature.
@@ -190,6 +268,7 @@ By @teoxoy in [#5901](https://github.com/gfx-rs/wgpu/pull/5901)
   [target.'cfg(target_vendor = "apple")']
   rustflags = ["-C", "link-args=-weak_framework Metal -weak_framework QuartzCore -weak_framework CoreGraphics"]
   ```
+  By @madsmtm in [#5752](https://github.com/gfx-rs/wgpu/pull/5752)
 
 ### Bug Fixes
 
@@ -198,11 +277,49 @@ By @teoxoy in [#5901](https://github.com/gfx-rs/wgpu/pull/5901)
 - Ensure render pipelines have at least 1 target. By @ErichDonGubler in [#5715](https://github.com/gfx-rs/wgpu/pull/5715)
 - `wgpu::ComputePass` now internally takes ownership of `QuerySet` for both `wgpu::ComputePassTimestampWrites` as well as timestamp writes and statistics query, fixing crashes when destroying `QuerySet` before ending the pass. By @wumpf in [#5671](https://github.com/gfx-rs/wgpu/pull/5671)
 - Validate resources passed during compute pass recording for mismatching device. By @wumpf in [#5779](https://github.com/gfx-rs/wgpu/pull/5779)
-- Fix a `CommandBuffer` leak. By @cwfitzgerald and @nical in [#5141](https://github.com/gfx-rs/wgpu/pull/5141)
+- Fix staging buffers being destroyed too early. By @teoxoy in [#5910](https://github.com/gfx-rs/wgpu/pull/5910)
+- Fix attachment byte cost validation panicking with native only formats. By @teoxoy in [#5934](https://github.com/gfx-rs/wgpu/pull/5934)
+- [wgpu] Fix leaks from auto layout pipelines. By @teoxoy in [#5971](https://github.com/gfx-rs/wgpu/pull/5971)
+- [wgpu-core] Fix length of copy in `queue_write_texture` (causing UB). By @teoxoy in [#5973](https://github.com/gfx-rs/wgpu/pull/5973)
+- Add missing same device checks. By @teoxoy in [#5980](https://github.com/gfx-rs/wgpu/pull/5980)
+
+#### GLES / OpenGL
+
+- Fix `ClearColorF`, `ClearColorU` and `ClearColorI` commands being issued before `SetDrawColorBuffers` [#5666](https://github.com/gfx-rs/wgpu/pull/5666)
+- Replace `glClear` with `glClearBufferF` because `glDrawBuffers` requires that the ith buffer must be `COLOR_ATTACHMENTi` or `NONE` [#5666](https://github.com/gfx-rs/wgpu/pull/5666)
+- Return the unmodified version in driver_info. By @Valaphee in [#5753](https://github.com/gfx-rs/wgpu/pull/5753)
+
+#### Naga
+
+- In spv-out don't decorate a `BindingArray`'s type with `Block` if the type is a struct with a runtime array by @Vecvec in [#5776](https://github.com/gfx-rs/wgpu/pull/5776)
+- Add `packed` as a keyword for GLSL by @kjarosh in [#5855](https://github.com/gfx-rs/wgpu/pull/5855)
+
+## v0.20.2 (2024-06-12)
+
+This release force-bumps transitive dependencies of `wgpu` on `wgpu-core` and `wgpu-hal` to 0.21.1, to resolve some undefined behavior observable in the DX12 backend after upgrading to Rust 1.79 or later.
+
+### Bug Fixes
+
+#### General
+
+* Fix a `CommandBuffer` leak. By @cwfitzgerald and @nical in [#5141](https://github.com/gfx-rs/wgpu/pull/5141)
 
 #### DX12
 
-- Do not feed `&""` to `D3DCompile`, by @workingjubilee in [#5812](https://github.com/gfx-rs/wgpu/issues/5812).
+* Do not feed `&""` to `D3DCompile`, by @workingjubilee in [#5812](https://github.com/gfx-rs/wgpu/issues/5812).
+
+## v0.20.1 (2024-06-12)
+
+This release included v0.21.0 of `wgpu-core` and `wgpu-hal`, due to breaking changes needed to solve vulkan validation issues.
+
+### Bug Fixes
+
+This release fixes the validation errors whenever a surface is used with the vulkan backend. By @cwfitzgerald in [#5681](https://github.com/gfx-rs/wgpu/pull/5681).
+
+#### General
+
+- Clean up weak references to texture views and bind groups to prevent memory leaks. By @xiaopengli89 in [#5595](https://github.com/gfx-rs/wgpu/pull/5595).
+- Fix segfault on exit is queue & device are dropped before surface. By @sagudev in [#5640](https://github.com/gfx-rs/wgpu/pull/5640).
 
 #### Metal
 
@@ -212,21 +329,15 @@ By @teoxoy in [#5901](https://github.com/gfx-rs/wgpu/pull/5901)
 
 - Fix enablement of subgroup ops extension on Vulkan devices that don't support Vulkan 1.3. By @cwfitzgerald in [#5624](https://github.com/gfx-rs/wgpu/pull/5624).
 
-#### GLES / OpenGL
 
-- Fix regression on OpenGL (EGL) where non-sRGB still used sRGB [#5642](https://github.com/gfx-rs/wgpu/pull/5642)
-- Fix `ClearColorF`, `ClearColorU` and `ClearColorI` commands being issued before `SetDrawColorBuffers` [#5666](https://github.com/gfx-rs/wgpu/pull/5666)
-- Replace `glClear` with `glClearBufferF` because `glDrawBuffers` requires that the ith buffer must be `COLOR_ATTACHMENTi` or `NONE` [#5666](https://github.com/gfx-rs/wgpu/pull/5666)
-- Return the unmodified version in driver_info. By @Valaphee in [#5753](https://github.com/gfx-rs/wgpu/pull/5753)
-
-#### WebGPU
+#### GLES / OpenGL
 
-- Added support for pipeline-overridable constants to the WebGPU backend by @DouglasDwyer in [#5688](https://github.com/gfx-rs/wgpu/pull/5688)
+-  Fix regression on OpenGL (EGL) where non-sRGB still used sRGB [#5642](https://github.com/gfx-rs/wgpu/pull/5642)
 
 #### Naga
 
-- In spv-out don't decorate a `BindingArray`'s type with `Block` if the type is a struct with a runtime array by @Vecvec in [#5776](https://github.com/gfx-rs/wgpu/pull/5776)
-- Add `packed` as a keyword for GLSL by @kjarosh in [#5855](https://github.com/gfx-rs/wgpu/pull/5855)
+- Work around shader consumers that have bugs handling `switch` statements with a single body for all cases. These are now written as `do {} while(false);` loops in hlsl-out and glsl-out. By @Imberflur in [#5654](https://github.com/gfx-rs/wgpu/pull/5654)
+- In hlsl-out, defer `continue` statements in switches by setting a flag and breaking from the switch. This allows such constructs to work with FXC which does not support `continue` within a switch. By @Imberflur in [#5654](https://github.com/gfx-rs/wgpu/pull/5654)
 
 ## v0.20.0 (2024-04-28)
 
@@ -379,7 +490,6 @@ By @atlv24 and @cwfitzgerald in [#5154](https://github.com/gfx-rs/wgpu/pull/5154
 - Fix deadlocks caused by recursive read-write lock acquisitions [#5426](https://github.com/gfx-rs/wgpu/pull/5426).
 - Remove exposed C symbols (`extern "C"` + [no_mangle]) from RenderPass & ComputePass recording. By @wumpf in [#5409](https://github.com/gfx-rs/wgpu/pull/5409).
 - Fix surfaces being only compatible with first backend enabled on an instance, causing failures when manually specifying an adapter. By @Wumpf in [#5535](https://github.com/gfx-rs/wgpu/pull/5535).
-- Clean up weak references to texture views and bind groups. By @xiaopengli89 [#5595](https://github.com/gfx-rs/wgpu/pull/5595).
 
 #### Naga
 
@@ -411,6 +521,17 @@ By @atlv24 and @cwfitzgerald in [#5154](https://github.com/gfx-rs/wgpu/pull/5154
 - Refactor tests to read feature flags by name instead of a hardcoded hexadecimal u64. By @atlv24 in [#5155](https://github.com/gfx-rs/wgpu/pull/5155).
 - Add test that verifies that we can drop the queue before using the device to create a command encoder. By @Davidster in [#5211](https://github.com/gfx-rs/wgpu/pull/5211)
 
+## 0.19.5 (2024-07-16)
+
+This release only releases `wgpu-hal` 0.19.5, which contains an important fix
+for DX12.
+
+### Bug Fixes
+
+#### DX12
+
+- Do not feed `&""` to `D3DCompile`, by @workingjubilee in [#5812](https://github.com/gfx-rs/wgpu/issues/5812), backported by @Elabajaba in [#5833](https://github.com/gfx-rs/wgpu/pull/5833).
+
 ## v0.19.4 (2024-04-17)
 
 ### Bug Fixes
diff --git a/Cargo.lock b/Cargo.lock
index 4891f246bd..2f4d41739d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4,9 +4,9 @@ version = 3
 
 [[package]]
 name = "ab_glyph"
-version = "0.2.27"
+version = "0.2.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1c3a1cbc201cc13ed06cf875efb781f2249b3677f5c74571b67d817877f9d697"
+checksum = "79faae4620f45232f599d9bc7b290f88247a0834162c4495ab2f02d60004adfb"
 dependencies = [
  "ab_glyph_rasterizer",
  "owned_ttf_parser",
@@ -105,9 +105,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
 
 [[package]]
 name = "anstream"
-version = "0.6.14"
+version = "0.6.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b"
+checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526"
 dependencies = [
  "anstyle",
  "anstyle-parse",
@@ -120,33 +120,33 @@ dependencies = [
 
 [[package]]
 name = "anstyle"
-version = "1.0.7"
+version = "1.0.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b"
+checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1"
 
 [[package]]
 name = "anstyle-parse"
-version = "0.2.4"
+version = "0.2.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4"
+checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb"
 dependencies = [
  "utf8parse",
 ]
 
 [[package]]
 name = "anstyle-query"
-version = "1.1.0"
+version = "1.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391"
+checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a"
 dependencies = [
  "windows-sys 0.52.0",
 ]
 
 [[package]]
 name = "anstyle-wincon"
-version = "3.0.3"
+version = "3.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19"
+checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8"
 dependencies = [
  "anstyle",
  "windows-sys 0.52.0",
@@ -186,7 +186,7 @@ dependencies = [
  "argh_shared",
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -200,9 +200,9 @@ dependencies = [
 
 [[package]]
 name = "arrayref"
-version = "0.3.7"
+version = "0.3.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545"
+checksum = "9d151e35f61089500b617991b791fc8bfd237ae50cd5950803758a179b41e67a"
 
 [[package]]
 name = "arrayvec"
@@ -231,18 +231,18 @@ version = "0.38.0+1.3.281"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0bb44936d800fea8f016d7f2311c6a4f97aebd5dc86f09906139ec848cf3a46f"
 dependencies = [
- "libloading 0.8.4",
+ "libloading 0.8.5",
 ]
 
 [[package]]
 name = "async-trait"
-version = "0.1.80"
+version = "0.1.81"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca"
+checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -308,18 +308,18 @@ dependencies = [
 
 [[package]]
 name = "bit-set"
-version = "0.6.0"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0481a0e032742109b1133a095184ee93d88f3dc9e0d28a5d033dc77a073f44f"
+checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
 dependencies = [
  "bit-vec",
 ]
 
 [[package]]
 name = "bit-vec"
-version = "0.7.0"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d2c54ff287cfc0a34f38a6b832ea1bd8e448a330b3e40a50859e6488bee07f22"
+checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
 
 [[package]]
 name = "bitflags"
@@ -370,9 +370,9 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
 
 [[package]]
 name = "bytemuck"
-version = "1.16.1"
+version = "1.16.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e"
+checksum = "102087e286b4677862ea56cf8fc58bb2cdfa8725c40ffb80fe3a008eb7f2fc83"
 dependencies = [
  "bytemuck_derive",
 ]
@@ -385,7 +385,7 @@ checksum = "1ee891b04274a59bd38b412188e24b849617b2e45a0fd8d057deb63e7403761b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -396,9 +396,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
 
 [[package]]
 name = "bytes"
-version = "1.6.0"
+version = "1.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9"
+checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50"
 
 [[package]]
 name = "calloop"
@@ -448,13 +448,12 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 
 [[package]]
 name = "cc"
-version = "1.0.103"
+version = "1.1.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2755ff20a1d93490d26ba33a6f092a38a508398a5320df5d4b3014fcccce9410"
+checksum = "e9e8aabfac534be767c909e0690571677d49f41bd8465ae876fe043d52ba5292"
 dependencies = [
  "jobserver",
  "libc",
- "once_cell",
 ]
 
 [[package]]
@@ -513,9 +512,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.5.8"
+version = "4.5.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "84b3edb18336f4df585bc9aa31dd99c036dfa5dc5e9a2939a722a188f3a8970d"
+checksum = "11d8838454fda655dafd3accb2b6e2bea645b9e4078abe84a22ceb947235c5cc"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -523,9 +522,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.8"
+version = "4.5.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c1c09dd5ada6c6c78075d6fd0da3f90d8080651e2d6cc8eb2f1aaa4034ced708"
+checksum = "216aec2b177652e3846684cbfe25c9964d18ec45234f0f5da5157b207ed1aab6"
 dependencies = [
  "anstream",
  "anstyle",
@@ -535,21 +534,21 @@ dependencies = [
 
 [[package]]
 name = "clap_derive"
-version = "4.5.8"
+version = "4.5.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2bac35c6dafb060fd4d275d9a4ffae97917c13a6327903a8be2153cd964f7085"
+checksum = "501d359d5f3dcaf6ecdeee48833ae73ec6e42723a1e52419c79abf9507eec0a0"
 dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.74",
 ]
 
 [[package]]
 name = "clap_lex"
-version = "0.7.1"
+version = "0.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70"
+checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
 
 [[package]]
 name = "cmake"
@@ -624,9 +623,9 @@ checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b"
 
 [[package]]
 name = "colorchoice"
-version = "1.0.1"
+version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422"
+checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0"
 
 [[package]]
 name = "com"
@@ -728,9 +727,9 @@ dependencies = [
 
 [[package]]
 name = "core-foundation-sys"
-version = "0.8.6"
+version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
 
 [[package]]
 name = "core-graphics"
@@ -887,7 +886,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f"
 dependencies = [
  "quote",
- "syn 2.0.68",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -918,10 +917,10 @@ checksum = "96a6ac251f4a2aca6b3f91340350eab87ae57c3f127ffeb585e92bd336717991"
 
 [[package]]
 name = "d3d12"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "bitflags 2.6.0",
- "libloading 0.8.4",
+ "libloading 0.8.5",
  "winapi",
 ]
 
@@ -1034,16 +1033,17 @@ dependencies = [
  "quote",
  "strum",
  "strum_macros",
- "syn 2.0.68",
+ "syn 2.0.74",
  "thiserror",
 ]
 
 [[package]]
 name = "deno_unsync"
-version = "0.3.5"
+version = "0.3.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6cfb230b6e1965cd2695f7c4082adb278e0b999175a0fbb0852c7e67d26654b1"
+checksum = "c3c8b95582c2023dbb66fccc37421b374026f5915fa507d437cb566904db9a3a"
 dependencies = [
+ "parking_lot",
  "tokio",
 ]
 
@@ -1106,7 +1106,7 @@ checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -1119,7 +1119,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustc_version 0.4.0",
- "syn 2.0.68",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -1140,14 +1140,14 @@ version = "0.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "330c60081dcc4c72131f8eb70510f1ac07223e5d4163db481a04a0befcffa412"
 dependencies = [
- "libloading 0.8.4",
+ "libloading 0.8.5",
 ]
 
 [[package]]
 name = "document-features"
-version = "0.2.8"
+version = "0.2.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef5282ad69563b5fc40319526ba27e0e7363d552a896f0297d54f767717f9b95"
+checksum = "cb6969eaabd2421f8a2775cfd2471a2b634372b4a25d41e3bd647b79912850a0"
 dependencies = [
  "litrs",
 ]
@@ -1180,9 +1180,9 @@ checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
 
 [[package]]
 name = "encase"
-version = "0.8.0"
+version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a9299a95fa5671ddf29ecc22b00e121843a65cb9ff24911e394b4ae556baf36"
+checksum = "0265fa0e7bcdb058128cdf7597cdacea42e33911713663a04d971a39cad16afa"
 dependencies = [
  "const_panic",
  "encase_derive",
@@ -1192,22 +1192,22 @@ dependencies = [
 
 [[package]]
 name = "encase_derive"
-version = "0.8.0"
+version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "07e09decb3beb1fe2db6940f598957b2e1f7df6206a804d438ff6cb2a9cddc10"
+checksum = "e3b6f7502bafc52a60b5582560a2aaee16921eef79a742ae48dd411fe7a9263b"
 dependencies = [
  "encase_derive_impl",
 ]
 
 [[package]]
 name = "encase_derive_impl"
-version = "0.8.0"
+version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fd31dbbd9743684d339f907a87fe212cb7b51d75b9e8e74181fe363199ee9b47"
+checksum = "b36f2ddfca91251bed7f931f24b192e4eaf0a0e0fa70cf81cfb1416a1973620e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -1221,9 +1221,9 @@ dependencies = [
 
 [[package]]
 name = "env_filter"
-version = "0.1.0"
+version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea"
+checksum = "4f2c92ceda6ceec50f43169f9ee8424fe2db276791afde7b2cd8bc084cb376ab"
 dependencies = [
  "log",
  "regex",
@@ -1231,9 +1231,9 @@ dependencies = [
 
 [[package]]
 name = "env_logger"
-version = "0.11.3"
+version = "0.11.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "38b35839ba51819680ba087cd351788c9a3c476841207e0b8cee0b04722343b9"
+checksum = "e13fa619b91fb2381732789fc5de83b45675e882f66623b7d8cb4f643017018d"
 dependencies = [
  "anstream",
  "anstyle",
@@ -1300,9 +1300,9 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
 
 [[package]]
 name = "flate2"
-version = "1.0.30"
+version = "1.0.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae"
+checksum = "7f211bbe8e69bbd0cfdea405084f128ae8b4aaa6b0b522fc8f2b009084797920"
 dependencies = [
  "crc32fast",
  "miniz_oxide",
@@ -1353,7 +1353,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -1478,7 +1478,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -1513,16 +1513,15 @@ dependencies = [
 
 [[package]]
 name = "generator"
-version = "0.8.1"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "186014d53bc231d0090ef8d6f03e0920c54d85a5ed22f4f2f74315ec56cf83fb"
+checksum = "979f00864edc7516466d6b3157706e06c032f22715700ddd878228a91d02bc56"
 dependencies = [
- "cc",
  "cfg-if",
  "libc",
  "log",
  "rustversion",
- "windows 0.54.0",
+ "windows",
 ]
 
 [[package]]
@@ -1567,15 +1566,15 @@ dependencies = [
 
 [[package]]
 name = "glam"
-version = "0.27.0"
+version = "0.28.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e05e7e6723e3455f4818c7b26e855439f7546cf617ef669d1adedb8669e5cb9"
+checksum = "779ae4bf7e8421cf91c0b3b64e7e8b40b862fba4d393f59150042de7c4965a94"
 
 [[package]]
 name = "glow"
-version = "0.13.1"
+version = "0.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bd348e04c43b32574f2de31c8bb397d96c9fcfa1371bd4ca6d8bdc464ab121b1"
+checksum = "f865cbd94bd355b89611211e49508da98a1fce0ad755c1e8448fb96711b24528"
 dependencies = [
  "js-sys",
  "slotmap",
@@ -1678,15 +1677,15 @@ dependencies = [
 
 [[package]]
 name = "gpu-allocator"
-version = "0.26.0"
+version = "0.27.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fdd4240fc91d3433d5e5b0fc5b67672d771850dc19bbee03c1381e19322803d7"
+checksum = "c151a2a5ef800297b4e79efa4f4bec035c5f51d5ae587287c9b952bdf734cacd"
 dependencies = [
  "log",
  "presser",
  "thiserror",
  "winapi",
- "windows 0.52.0",
+ "windows",
 ]
 
 [[package]]
@@ -1747,7 +1746,7 @@ dependencies = [
  "bitflags 2.6.0",
  "com",
  "libc",
- "libloading 0.8.4",
+ "libloading 0.8.5",
  "thiserror",
  "widestring",
  "winapi",
@@ -1854,9 +1853,9 @@ dependencies = [
 
 [[package]]
 name = "indexmap"
-version = "2.2.6"
+version = "2.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
+checksum = "de3fc2e30ba82dd1b3911c8de1ffc143c74a914a14e99514d7637e3099df5ea0"
 dependencies = [
  "arbitrary",
  "equivalent",
@@ -1889,9 +1888,9 @@ dependencies = [
 
 [[package]]
 name = "is_terminal_polyfill"
-version = "1.70.0"
+version = "1.70.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800"
+checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
 
 [[package]]
 name = "itertools"
@@ -1932,9 +1931,9 @@ checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130"
 
 [[package]]
 name = "jobserver"
-version = "0.1.31"
+version = "0.1.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e"
+checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0"
 dependencies = [
  "libc",
 ]
@@ -1955,7 +1954,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6aae1df220ece3c0ada96b8153459b67eebe9ae9212258bb0134ae60416fdf76"
 dependencies = [
  "libc",
- "libloading 0.8.4",
+ "libloading 0.8.5",
  "pkg-config",
 ]
 
@@ -2009,9 +2008,9 @@ dependencies = [
 
 [[package]]
 name = "libloading"
-version = "0.8.4"
+version = "0.8.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e310b3a6b5907f99202fcdb4960ff45b93735d7c7d96b760fcff8db2dc0e103d"
+checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
 dependencies = [
  "cfg-if",
  "windows-targets 0.48.5",
@@ -2142,9 +2141,9 @@ dependencies = [
 
 [[package]]
 name = "metal"
-version = "0.28.0"
+version = "0.29.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5637e166ea14be6063a3f8ba5ccb9a4159df7d8f6d61c02fc3d480b1f90dcfcb"
+checksum = "7ecfd3296f8c56b7c1f6fbac3c71cefa9d78ce009850c45000015f206dc7fa21"
 dependencies = [
  "bitflags 2.6.0",
  "block",
@@ -2177,9 +2176,21 @@ dependencies = [
  "windows-sys 0.48.0",
 ]
 
+[[package]]
+name = "mio"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4569e456d394deccd22ce1c1913e6ea0e54519f577285001215d33557431afe4"
+dependencies = [
+ "hermit-abi 0.3.9",
+ "libc",
+ "wasi",
+ "windows-sys 0.52.0",
+]
+
 [[package]]
 name = "naga"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "arbitrary",
  "arrayvec 0.7.4",
@@ -2207,7 +2218,7 @@ dependencies = [
 
 [[package]]
 name = "naga-cli"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "anyhow",
  "argh",
@@ -2455,7 +2466,7 @@ dependencies = [
  "proc-macro-crate 3.1.0",
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -2515,9 +2526,9 @@ checksum = "d079845b37af429bfe5dfa76e6d087d788031045b25cfc6fd898486fd9847666"
 
 [[package]]
 name = "object"
-version = "0.36.1"
+version = "0.36.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "081b846d1d56ddfc18fdf1a922e4f6e07a11768ea1b92dec44e42b72712ccfce"
+checksum = "27b64972346851a39438c60b341ebc01bba47464ae329e55cf343eb93964efd9"
 dependencies = [
  "memchr",
 ]
@@ -2530,9 +2541,9 @@ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
 
 [[package]]
 name = "oorandom"
-version = "11.1.3"
+version = "11.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
+checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9"
 
 [[package]]
 name = "orbclient"
@@ -2572,9 +2583,9 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
 
 [[package]]
 name = "owned_ttf_parser"
-version = "0.21.0"
+version = "0.24.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b41438d2fc63c46c74a2203bf5ccd82c41ba04347b2fcf5754f230b167067d5"
+checksum = "490d3a563d3122bf7c911a59b0add9389e5ec0f5f0c3ac6b91ff235a0e6a7f90"
 dependencies = [
  "ttf-parser",
 ]
@@ -2608,7 +2619,7 @@ dependencies = [
  "redox_syscall 0.5.1",
  "smallvec",
  "thread-id",
- "windows-targets 0.52.5",
+ "windows-targets 0.52.6",
 ]
 
 [[package]]
@@ -2656,7 +2667,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -2679,7 +2690,7 @@ checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
 
 [[package]]
 name = "player"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "env_logger",
  "log",
@@ -2734,9 +2745,9 @@ dependencies = [
 
 [[package]]
 name = "polling"
-version = "3.7.2"
+version = "3.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a3ed00ed3fbf728b5816498ecd316d1716eecaced9c0c8d2c5a6740ca214985b"
+checksum = "cc2790cd301dec6cd3b7a025e4815cf825724a51c98dccfe6a3e55f05ffb6511"
 dependencies = [
  "cfg-if",
  "concurrent-queue",
@@ -2744,7 +2755,7 @@ dependencies = [
  "pin-project-lite",
  "rustix",
  "tracing",
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -2795,7 +2806,7 @@ checksum = "07c277e4e643ef00c1233393c673f655e3672cf7eb3ba08a00bdd0ea59139b5f"
 dependencies = [
  "proc-macro-rules-macros",
  "proc-macro2",
- "syn 2.0.68",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -2807,7 +2818,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -2943,9 +2954,9 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.10.5"
+version = "1.10.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f"
+checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -3145,32 +3156,33 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
 
 [[package]]
 name = "serde"
-version = "1.0.203"
+version = "1.0.206"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094"
+checksum = "5b3e4cd94123dd520a128bcd11e34d9e9e423e7e3e50425cb1b4b1e3549d0284"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.203"
+version = "1.0.206"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba"
+checksum = "fabfb6138d2383ea8208cf98ccf69cdfb1aff4088460681d84189aa259762f97"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.74",
 ]
 
 [[package]]
 name = "serde_json"
-version = "1.0.119"
+version = "1.0.124"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e8eddb61f0697cc3989c5d64b452f5488e2b8a60fd7d5076a3045076ffef8cb0"
+checksum = "66ad62847a56b3dba58cc891acd13884b9c61138d330c0d7b6181713d4fce38d"
 dependencies = [
  "indexmap",
  "itoa",
+ "memchr",
  "ryu",
  "serde",
 ]
@@ -3427,7 +3439,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.68",
+ "syn 2.0.74",
 ]
 
 [[package]]
@@ -3443,9 +3455,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.68"
+version = "2.0.74"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "901fa70d88b9d6c98022e23b4136f9f3e54e4662c3bc1bd1d84a42a9a0f0c1e9"
+checksum = "1fceb41e3d546d0bd83421d3409b1460cc7444cd389341a4c880fe7a042cb3d7"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3463,29 +3475,29 @@ dependencies = [
 
 [[package]]
 name = "thiserror"
-version = "1.0.61"
+version = "1.0.63"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709"
+checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724"
 dependencies = [
  "thiserror-impl",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "1.0.61"
+version = "1.0.63"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533"
+checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.74",
 ]
 
 [[package]]
 name = "thread-id"
-version = "4.2.1"
+version = "4.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0ec81c46e9eb50deaa257be2f148adf052d1fb7701cfd55ccfab2525280b70b"
+checksum = "cfe8f25bbdd100db7e1d34acf7fd2dc59c4bf8f7483f505eaa7d4f12f76cc0ea"
 dependencies = [
  "libc",
  "winapi",
@@ -3572,9 +3584,9 @@ dependencies = [
 
 [[package]]
 name = "tinyvec"
-version = "1.6.1"
+version = "1.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c55115c6fbe2d2bef26eb09ad74bde02d8255476fc0c7b515ef09fbb35742d82"
+checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938"
 dependencies = [
  "tinyvec_macros",
 ]
@@ -3587,39 +3599,38 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.38.0"
+version = "1.39.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a"
+checksum = "daa4fb1bc778bd6f04cbfc4bb2d06a7396a8f299dc33ea1900cedaa316f467b1"
 dependencies = [
  "backtrace",
  "bytes",
  "libc",
- "mio",
- "num_cpus",
+ "mio 1.0.1",
  "parking_lot",
  "pin-project-lite",
  "signal-hook-registry",
  "socket2",
  "tokio-macros",
- "windows-sys 0.48.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
 name = "tokio-macros"
-version = "2.3.0"
+version = "2.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a"
+checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.74",
 ]
 
 [[package]]
 name = "toml_datetime"
-version = "0.6.6"
+version = "0.6.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf"
+checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41"
 
 [[package]]
 name = "toml_edit"
@@ -3694,9 +3705,9 @@ dependencies = [
 
 [[package]]
 name = "tracy-client"
-version = "0.17.0"
+version = "0.17.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "59fb931a64ff88984f86d3e9bcd1ae8843aa7fe44dd0f8097527bc172351741d"
+checksum = "63de1e1d4115534008d8fd5788b39324d6f58fc707849090533828619351d855"
 dependencies = [
  "loom",
  "once_cell",
@@ -3705,18 +3716,18 @@ dependencies = [
 
 [[package]]
 name = "tracy-client-sys"
-version = "0.22.2"
+version = "0.23.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d104d610dfa9dd154535102cc9c6164ae1fa37842bc2d9e83f9ac82b0ae0882"
+checksum = "98b98232a2447ce0a58f9a0bfb5f5e39647b5c597c994b63945fcccd1306fafb"
 dependencies = [
  "cc",
 ]
 
 [[package]]
 name = "ttf-parser"
-version = "0.21.1"
+version = "0.24.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2c591d83f69777866b9126b24c6dd9a18351f177e49d625920d19f989fd31cf8"
+checksum = "5be21190ff5d38e8b4a2d3b6a3ae57f612cc39c96e83cedeaf7abc338a8bac4a"
 
 [[package]]
 name = "unic-char-property"
@@ -3767,9 +3778,9 @@ checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75"
 
 [[package]]
 name = "unicode-id-start"
-version = "1.1.2"
+version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b8f73150333cb58412db36f2aca8f2875b013049705cc77b94ded70a1ab1f5da"
+checksum = "bc3882f69607a2ac8cc4de3ee7993d8f68bb06f2974271195065b3bd07f2edea"
 
 [[package]]
 name = "unicode-ident"
@@ -3837,9 +3848,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
 
 [[package]]
 name = "uuid"
-version = "1.9.1"
+version = "1.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5de17fd2f7da591098415cff336e12965a28061ddace43b59cb3c430179c9439"
+checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314"
 dependencies = [
  "getrandom",
  "serde",
@@ -3874,9 +3885,9 @@ checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
 
 [[package]]
 name = "version_check"
-version = "0.9.4"
+version = "0.9.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
+checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
 
 [[package]]
 name = "vsimd"
@@ -3921,7 +3932,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.74",
  "wasm-bindgen-shared",
 ]
 
@@ -3955,7 +3966,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.74",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -3988,21 +3999,21 @@ checksum = "b7f89739351a2e03cb94beb799d47fb2cac01759b40ec441f7de39b00cbf7ef0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.74",
 ]
 
 [[package]]
 name = "wayland-backend"
-version = "0.3.4"
+version = "0.3.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34e9e6b6d4a2bb4e7e69433e0b35c7923b95d4dc8503a84d25ec917a4bbfdf07"
+checksum = "f90e11ce2ca99c97b940ee83edbae9da2d56a08f9ea8158550fd77fa31722993"
 dependencies = [
  "cc",
  "downcast-rs",
  "rustix",
  "scoped-tls",
  "smallvec",
- "wayland-sys 0.31.2",
+ "wayland-sys 0.31.4",
 ]
 
 [[package]]
@@ -4173,9 +4184,9 @@ dependencies = [
 
 [[package]]
 name = "wayland-sys"
-version = "0.31.2"
+version = "0.31.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "105b1842da6554f91526c14a2a2172897b7f745a805d62af4ce698706be79c12"
+checksum = "43676fe2daf68754ecf1d72026e4e6c15483198b5d24e888b74d3f22f887a148"
 dependencies = [
  "dlib",
  "log",
@@ -4205,7 +4216,7 @@ dependencies = [
 
 [[package]]
 name = "wgpu"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "arrayvec 0.7.4",
  "cfg_aliases",
@@ -4229,7 +4240,7 @@ dependencies = [
 
 [[package]]
 name = "wgpu-benchmark"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "bincode",
  "bytemuck",
@@ -4246,7 +4257,7 @@ dependencies = [
 
 [[package]]
 name = "wgpu-core"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "arrayvec 0.7.4",
  "bit-vec",
@@ -4272,7 +4283,7 @@ dependencies = [
 
 [[package]]
 name = "wgpu-examples"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "bytemuck",
  "cfg-if",
@@ -4303,7 +4314,7 @@ dependencies = [
 
 [[package]]
 name = "wgpu-hal"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "android_system_properties",
  "arrayvec 0.7.4",
@@ -4327,7 +4338,7 @@ dependencies = [
  "js-sys",
  "khronos-egl",
  "libc",
- "libloading 0.8.4",
+ "libloading 0.8.5",
  "log",
  "metal",
  "naga",
@@ -4346,12 +4357,13 @@ dependencies = [
  "web-sys",
  "wgpu-types",
  "winapi",
+ "windows",
  "winit 0.29.15",
 ]
 
 [[package]]
 name = "wgpu-info"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "anyhow",
  "bitflags 2.6.0",
@@ -4364,16 +4376,16 @@ dependencies = [
 
 [[package]]
 name = "wgpu-macros"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "heck 0.5.0",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.74",
 ]
 
 [[package]]
 name = "wgpu-test"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "anyhow",
  "arrayvec 0.7.4",
@@ -4409,7 +4421,7 @@ dependencies = [
 
 [[package]]
 name = "wgpu-types"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "bitflags 2.6.0",
  "js-sys",
@@ -4455,11 +4467,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
 
 [[package]]
 name = "winapi-util"
-version = "0.1.8"
+version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b"
+checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
 dependencies = [
- "windows-sys 0.52.0",
+ "windows-sys 0.48.0",
 ]
 
 [[package]]
@@ -4470,50 +4482,66 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
 
 [[package]]
 name = "windows"
-version = "0.52.0"
+version = "0.58.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be"
+checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6"
 dependencies = [
- "windows-core 0.52.0",
- "windows-targets 0.52.5",
+ "windows-core",
+ "windows-targets 0.52.6",
 ]
 
 [[package]]
-name = "windows"
-version = "0.54.0"
+name = "windows-core"
+version = "0.58.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9252e5725dbed82865af151df558e754e4a3c2c30818359eb17465f1346a1b49"
+checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99"
 dependencies = [
- "windows-core 0.54.0",
- "windows-targets 0.52.5",
+ "windows-implement",
+ "windows-interface",
+ "windows-result",
+ "windows-strings",
+ "windows-targets 0.52.6",
 ]
 
 [[package]]
-name = "windows-core"
-version = "0.52.0"
+name = "windows-implement"
+version = "0.58.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
+checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b"
 dependencies = [
- "windows-targets 0.52.5",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.74",
 ]
 
 [[package]]
-name = "windows-core"
-version = "0.54.0"
+name = "windows-interface"
+version = "0.58.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "12661b9c89351d684a50a8a643ce5f608e20243b9fb84687800163429f161d65"
+checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515"
 dependencies = [
- "windows-result",
- "windows-targets 0.52.5",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.74",
 ]
 
 [[package]]
 name = "windows-result"
-version = "0.1.2"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5e383302e8ec8515204254685643de10811af0ed97ea37210dc26fb0032647f8"
+checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10"
 dependencies = [
- "windows-targets 0.52.5",
+ "windows-result",
+ "windows-targets 0.52.6",
 ]
 
 [[package]]
@@ -4553,7 +4581,16 @@ version = "0.52.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
 dependencies = [
- "windows-targets 0.52.5",
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets 0.52.6",
 ]
 
 [[package]]
@@ -4588,18 +4625,18 @@ dependencies = [
 
 [[package]]
 name = "windows-targets"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
 dependencies = [
- "windows_aarch64_gnullvm 0.52.5",
- "windows_aarch64_msvc 0.52.5",
- "windows_i686_gnu 0.52.5",
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
  "windows_i686_gnullvm",
- "windows_i686_msvc 0.52.5",
- "windows_x86_64_gnu 0.52.5",
- "windows_x86_64_gnullvm 0.52.5",
- "windows_x86_64_msvc 0.52.5",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
 ]
 
 [[package]]
@@ -4616,9 +4653,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
 
 [[package]]
 name = "windows_aarch64_gnullvm"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
 
 [[package]]
 name = "windows_aarch64_msvc"
@@ -4640,9 +4677,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
 
 [[package]]
 name = "windows_aarch64_msvc"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
 
 [[package]]
 name = "windows_i686_gnu"
@@ -4664,15 +4701,15 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
 
 [[package]]
 name = "windows_i686_gnu"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
 
 [[package]]
 name = "windows_i686_gnullvm"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
 
 [[package]]
 name = "windows_i686_msvc"
@@ -4694,9 +4731,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
 
 [[package]]
 name = "windows_i686_msvc"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
 
 [[package]]
 name = "windows_x86_64_gnu"
@@ -4718,9 +4755,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
 
 [[package]]
 name = "windows_x86_64_gnu"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
 
 [[package]]
 name = "windows_x86_64_gnullvm"
@@ -4736,9 +4773,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
 
 [[package]]
 name = "windows_x86_64_gnullvm"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
 
 [[package]]
 name = "windows_x86_64_msvc"
@@ -4760,9 +4797,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
 
 [[package]]
 name = "windows_x86_64_msvc"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 
 [[package]]
 name = "winit"
@@ -4778,7 +4815,7 @@ dependencies = [
  "instant",
  "libc",
  "log",
- "mio",
+ "mio 0.8.11",
  "ndk 0.7.0",
  "ndk-glue",
  "objc",
@@ -4883,7 +4920,7 @@ dependencies = [
  "as-raw-xcb-connection",
  "gethostname",
  "libc",
- "libloading 0.8.4",
+ "libloading 0.8.5",
  "once_cell",
  "rustix",
  "x11rb-protocol",
@@ -4897,9 +4934,9 @@ checksum = "ec107c4503ea0b4a98ef47356329af139c0a4f7750e621cf2973cd3385ebcb3d"
 
 [[package]]
 name = "xcursor"
-version = "0.3.5"
+version = "0.3.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a0ccd7b4a5345edfcd0c3535718a4e9ff7798ffc536bb5b5a0e26ff84732911"
+checksum = "f513f231f0810b04d988f0df4fb16ef0b6b25d23248f2c4b56b074e6b1b0ffe4"
 
 [[package]]
 name = "xkbcommon-dl"
@@ -4922,26 +4959,26 @@ checksum = "b9cc00251562a284751c9973bace760d86c0276c471b4be569fe6b068ee97a56"
 
 [[package]]
 name = "xml-rs"
-version = "0.8.20"
+version = "0.8.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "791978798f0597cfc70478424c2b4fdc2b7a8024aaff78497ef00f24ef674193"
+checksum = "539a77ee7c0de333dcc6da69b177380a0b81e0dacfa4f7344c465a36871ee601"
 
 [[package]]
 name = "zerocopy"
-version = "0.7.34"
+version = "0.7.35"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087"
+checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
 dependencies = [
  "zerocopy-derive",
 ]
 
 [[package]]
 name = "zerocopy-derive"
-version = "0.7.34"
+version = "0.7.35"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b"
+checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.68",
+ "syn 2.0.74",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index ce5ebcce1d..04b26b8044 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -42,55 +42,57 @@ default-members = [
 
 [workspace.package]
 edition = "2021"
-rust-version = "1.74"
+rust-version = "1.76"
 keywords = ["graphics"]
 license = "MIT OR Apache-2.0"
 homepage = "https://wgpu.rs/"
 repository = "https://github.com/gfx-rs/wgpu"
-version = "0.20.0"
+version = "22.0.0"
 authors = ["gfx-rs developers"]
 
 [workspace.dependencies.wgc]
 package = "wgpu-core"
 path = "./wgpu-core"
-version = "0.20.0"
+version = "22.0.0"
 
 [workspace.dependencies.wgt]
 package = "wgpu-types"
 path = "./wgpu-types"
-version = "0.20.0"
+version = "22.0.0"
 
 [workspace.dependencies.hal]
 package = "wgpu-hal"
 path = "./wgpu-hal"
-version = "0.20.0"
+version = "22.0.0"
 
 [workspace.dependencies.naga]
 path = "./naga"
-version = "0.20.0"
+version = "22.0.0"
 
 [workspace.dependencies]
 anyhow = "1.0.86"
+argh = "0.1.5"
 arrayvec = "0.7"
 bincode = "1"
-bit-vec = "0.7"
-bitflags = "2"
+bit-vec = "0.8"
+bitflags = "2.6"
 bytemuck = { version = "1.16", features = ["derive"] }
 cfg_aliases = "0.1"
 cfg-if = "1"
 criterion = "0.5"
 codespan-reporting = "0.11"
 ctor = "0.2"
-document-features = "0.2.8"
-encase = "0.8"
+document-features = "0.2.10"
+encase = "0.9"
 env_logger = "0.11"
 fern = "0.6"
 flume = "0.11"
 futures-lite = "2"
 getrandom = "0.2"
-glam = "0.27"
+glam = "0.28"
 heck = "0.5.0"
 image = { version = "0.24", default-features = false, features = ["png"] }
+indexmap = "2"
 itertools = { version = "0.10.5" }
 ktx2 = "0.3"
 libc = "0.2"
@@ -103,7 +105,7 @@ nanorand = { version = "0.7", default-features = false, features = ["wyrand"] }
 noise = { version = "0.8", git = "https://github.com/Razaekel/noise-rs.git", rev = "c6942d4fb70af26db4441edcf41f90fa115333f2" }
 nv-flip = "0.1"
 obj = "0.10"
-once_cell = "1"
+once_cell = "1.19.0"
 parking_lot = ">=0.11, <0.13" # parking_lot 0.12 switches from `winapi` to `windows`; permit either
 pico-args = { version = "0.5.0", features = [
     "eq-separator",
@@ -119,24 +121,23 @@ renderdoc-sys = "1.1.0"
 ron = "0.8"
 rustc-hash = "1.1.0"
 serde = "1"
-serde_json = "1.0.119"
+serde_json = "1.0.124"
 smallvec = "1"
 static_assertions = "1.1.0"
 strum = { version = "0.25.0", features = ["derive"] }
 tracy-client = "0.17"
-thiserror = "1"
-wgpu = { version = "0.20.0", path = "./wgpu", default-features = false }
-wgpu-core = { version = "0.20.0", path = "./wgpu-core" }
-wgpu-example = { version = "0.20.0", path = "./examples/common" }
-wgpu-macros = { version = "0.20.0", path = "./wgpu-macros" }
-wgpu-test = { version = "0.20.0", path = "./tests" }
-wgpu-types = { version = "0.20.0", path = "./wgpu-types" }
+thiserror = "1.0.63"
+wgpu = { version = "22.0.0", path = "./wgpu", default-features = false }
+wgpu-core = { version = "22.0.0", path = "./wgpu-core" }
+wgpu-macros = { version = "22.0.0", path = "./wgpu-macros" }
+wgpu-test = { version = "22.0.0", path = "./tests" }
+wgpu-types = { version = "22.0.0", path = "./wgpu-types" }
 winit = { version = "0.29", features = ["android-native-activity"] }
 
 # Metal dependencies
 block = "0.1"
 core-graphics-types = "0.1"
-metal = { version = "0.28.0" }
+metal = { version = "0.29.0" }
 objc = "0.2.5"
 
 # Vulkan dependencies
@@ -146,20 +147,24 @@ gpu-alloc = "0.6"
 gpu-descriptor = "0.3"
 
 # DX dependencies
-bit-set = "0.6"
-gpu-allocator = { version = "0.26", default-features = false, features = [
+bit-set = "0.8"
+gpu-allocator = { version = "0.27", default-features = false, features = [
     "d3d12",
     "public-winapi",
 ] }
-d3d12 = { version = "0.20.0", path = "./d3d12/" }
+d3d12 = { version = "22.0.0", path = "./d3d12/" }
 range-alloc = "0.1"
 winapi = "0.3"
 hassle-rs = "0.11.0"
 
 # Gles dependencies
 khronos-egl = "6"
-glow = "0.13.1"
+glow = "0.14.0"
 glutin = "0.29.1"
+glutin_wgl_sys = "0.6"
+
+# DX and GLES dependencies
+windows = { version = "0.58", default-features = false }
 
 # wasm32 dependencies
 console_error_panic_hook = "0.1.7"
@@ -178,9 +183,12 @@ deno_url = "0.143.0"
 deno_web = "0.174.0"
 deno_webidl = "0.143.0"
 deno_webgpu = { version = "0.118.0", path = "./deno_webgpu" }
-tokio = "1.38.0"
+tokio = "1.39.2"
 termcolor = "1.4.1"
 
+# android dependencies
+ndk-sys = "0.5.0"
+
 [patch."https://github.com/gfx-rs/naga"]
 
 [patch."https://github.com/zakarumych/gpu-descriptor"]
diff --git a/README.md b/README.md
index fcff011539..bdd587b573 100644
--- a/README.md
+++ b/README.md
@@ -120,8 +120,8 @@ On Linux, you can point to them using `LD_LIBRARY_PATH` environment.
 
 Due to complex dependants, we have two MSRV policies:
 
-- `d3d12`, `naga`, `wgpu-core`, `wgpu-hal`, and `wgpu-types`'s MSRV is **1.74**.
-- The rest of the workspace has an MSRV of **1.76**.
+- `d3d12`, `naga`, `wgpu-core`, `wgpu-hal`, and `wgpu-types`'s MSRV is **1.76**, but may be lower than the rest of the workspace in the future.
+- The rest of the workspace has an MSRV of **1.76** as well right now, but may be higher than above listed crates.
 
 It is enforced on CI (in "/.github/workflows/ci.yml") with the `CORE_MSRV` and `REPO_MSRV` variables.
 This version can only be upgraded in breaking releases, though we release a breaking version every three months.
diff --git a/benches/Cargo.toml b/benches/Cargo.toml
index 65ac0eefdb..82207d5105 100644
--- a/benches/Cargo.toml
+++ b/benches/Cargo.toml
@@ -43,4 +43,4 @@ pollster.workspace = true
 profiling.workspace = true
 rayon.workspace = true
 tracy-client = { workspace = true, optional = true }
-wgpu.workspace = true
+wgpu = { workspace = true, features = ["wgsl", "metal", "dx12"] }
diff --git a/benches/README.md b/benches/README.md
index 3f20cbba7d..55af5fe18e 100644
--- a/benches/README.md
+++ b/benches/README.md
@@ -24,6 +24,21 @@ By default it measures 10k draw calls, with 90k total resources.
 
 Within this benchmark, both single threaded and multi-threaded recording are tested, as well as splitting
 the render pass into multiple passes over multiple command buffers.
+If available, it also tests a bindless approach, binding all textures at once instead of switching
+the bind group for every draw call.
+
+#### `Computepass`
+
+This benchmark measures the performance of recording and submitting a compute pass with a large
+number of dispatches and resources.
+By default it measures 10k dispatch calls, with 60k total resources, emulating an unusually complex and sequential compute workload.
+
+Within this benchmark, both single threaded and multi-threaded recording are tested, as well as splitting
+the compute pass into multiple passes over multiple command buffers.
+If available, it also tests a bindless approach, binding all resources at once instead of switching
+the bind group for every draw call.
+TODO(https://github.com/gfx-rs/wgpu/issues/5766): The bindless version uses only 1k dispatches with 6k resources since it would be too slow for a reasonable benchmarking time otherwise.
+
 
 #### `Resource Creation`
 
diff --git a/benches/benches/computepass-bindless.wgsl b/benches/benches/computepass-bindless.wgsl
new file mode 100644
index 0000000000..402ff94489
--- /dev/null
+++ b/benches/benches/computepass-bindless.wgsl
@@ -0,0 +1,26 @@
+@group(0) @binding(0)
+var tex: binding_array<texture_2d<f32>>;
+
+@group(0) @binding(1)
+// TODO(https://github.com/gfx-rs/wgpu/issues/5765): The extra whitespace between the angle brackets is needed to workaround a parsing bug.
+var images: binding_array<texture_storage_2d<r32float, read_write> >;
+struct BufferElement {
+    element: vec4f,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> buffers: binding_array<BufferElement>;
+
+@compute
+@workgroup_size(16)
+fn cs_main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
+    let offset = global_invocation_id.x; // Would be nice to offset this dynamically (it's just 0 always in the current setup)
+    
+    let idx0 = offset * 2 + 0;
+    let idx1 = offset * 2 + 1;
+    
+    let tex = textureLoad(tex[idx0], vec2u(0), 0) + textureLoad(tex[idx0], vec2u(0), 0);
+    let image = textureLoad(images[idx0], vec2u(0)) + textureLoad(images[idx1], vec2u(0));
+    buffers[idx0].element = tex.rrrr;
+    buffers[idx1].element = image.rrrr;
+}
\ No newline at end of file
diff --git a/benches/benches/computepass.rs b/benches/benches/computepass.rs
new file mode 100644
index 0000000000..2af1413605
--- /dev/null
+++ b/benches/benches/computepass.rs
@@ -0,0 +1,593 @@
+use std::{
+    num::{NonZeroU32, NonZeroU64},
+    time::{Duration, Instant},
+};
+
+use criterion::{criterion_group, Criterion, Throughput};
+use nanorand::{Rng, WyRand};
+use once_cell::sync::Lazy;
+use rayon::iter::{IntoParallelIterator, ParallelIterator};
+
+use crate::DeviceState;
+
+fn dispatch_count() -> usize {
+    // On CI we only want to run a very lightweight version of the benchmark
+    // to ensure that it does not break.
+    if std::env::var("WGPU_TESTING").is_ok() {
+        8
+    } else {
+        10_000
+    }
+}
+
+// Currently bindless is _much_ slower than with regularly resources,
+// since wgpu needs to issues barriers for all resources between each dispatch for all read/write textures & buffers.
+// This is in fact so slow that it makes the benchmark unusable when we use the same amount of
+// resources as the regular benchmark.
+// For details see https://github.com/gfx-rs/wgpu/issues/5766
+fn dispatch_count_bindless() -> usize {
+    // On CI we only want to run a very lightweight version of the benchmark
+    // to ensure that it does not break.
+    if std::env::var("WGPU_TESTING").is_ok() {
+        8
+    } else {
+        1_000
+    }
+}
+
+// Must match the number of textures in the computepass.wgsl shader
+const TEXTURES_PER_DISPATCH: usize = 2;
+const STORAGE_TEXTURES_PER_DISPATCH: usize = 2;
+const STORAGE_BUFFERS_PER_DISPATCH: usize = 2;
+
+const BUFFER_SIZE: u64 = 16;
+
+struct ComputepassState {
+    device_state: DeviceState,
+    pipeline: wgpu::ComputePipeline,
+    bind_groups: Vec<wgpu::BindGroup>,
+
+    // Bindless resources
+    bindless_bind_group: Option<wgpu::BindGroup>,
+    bindless_pipeline: Option<wgpu::ComputePipeline>,
+}
+
+impl ComputepassState {
+    /// Create and prepare all the resources needed for the computepass benchmark.
+    fn new() -> Self {
+        let device_state = DeviceState::new();
+
+        let dispatch_count = dispatch_count();
+        let dispatch_count_bindless = dispatch_count_bindless();
+        let texture_count = dispatch_count * TEXTURES_PER_DISPATCH;
+        let storage_buffer_count = dispatch_count * STORAGE_BUFFERS_PER_DISPATCH;
+        let storage_texture_count = dispatch_count * STORAGE_TEXTURES_PER_DISPATCH;
+
+        let supports_bindless = device_state.device.features().contains(
+            wgpu::Features::BUFFER_BINDING_ARRAY
+                | wgpu::Features::TEXTURE_BINDING_ARRAY
+                | wgpu::Features::STORAGE_RESOURCE_BINDING_ARRAY
+                | wgpu::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING,
+        )
+        // TODO: as of writing llvmpipe segfaults the bindless benchmark on ci
+        && device_state.adapter_info.driver != "llvmpipe";
+
+        // Performance gets considerably worse if the resources are shuffled.
+        //
+        // This more closely matches the real-world use case where resources have no
+        // well defined usage order.
+        let mut random = WyRand::new_seed(0x8BADF00D);
+
+        let mut bind_group_layout_entries = Vec::with_capacity(TEXTURES_PER_DISPATCH);
+        for i in 0..TEXTURES_PER_DISPATCH {
+            bind_group_layout_entries.push(wgpu::BindGroupLayoutEntry {
+                binding: i as u32,
+                visibility: wgpu::ShaderStages::COMPUTE,
+                ty: wgpu::BindingType::Texture {
+                    sample_type: wgpu::TextureSampleType::Float { filterable: true },
+                    view_dimension: wgpu::TextureViewDimension::D2,
+                    multisampled: false,
+                },
+                count: None,
+            });
+        }
+        for i in 0..STORAGE_TEXTURES_PER_DISPATCH {
+            bind_group_layout_entries.push(wgpu::BindGroupLayoutEntry {
+                binding: (TEXTURES_PER_DISPATCH + i) as u32,
+                visibility: wgpu::ShaderStages::COMPUTE,
+                ty: wgpu::BindingType::StorageTexture {
+                    access: wgpu::StorageTextureAccess::ReadWrite,
+                    format: wgpu::TextureFormat::R32Float,
+                    view_dimension: wgpu::TextureViewDimension::D2,
+                },
+                count: None,
+            });
+        }
+        for i in 0..STORAGE_BUFFERS_PER_DISPATCH {
+            bind_group_layout_entries.push(wgpu::BindGroupLayoutEntry {
+                binding: (TEXTURES_PER_DISPATCH + STORAGE_BUFFERS_PER_DISPATCH + i) as u32,
+                visibility: wgpu::ShaderStages::COMPUTE,
+                ty: wgpu::BindingType::Buffer {
+                    ty: wgpu::BufferBindingType::Storage { read_only: false },
+                    has_dynamic_offset: false,
+                    min_binding_size: NonZeroU64::new(BUFFER_SIZE),
+                },
+                count: None,
+            });
+        }
+
+        let bind_group_layout =
+            device_state
+                .device
+                .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+                    label: None,
+                    entries: &bind_group_layout_entries,
+                });
+
+        let mut texture_views = Vec::with_capacity(texture_count);
+        for i in 0..texture_count {
+            let texture = device_state
+                .device
+                .create_texture(&wgpu::TextureDescriptor {
+                    label: Some(&format!("Texture {i}")),
+                    size: wgpu::Extent3d {
+                        width: 1,
+                        height: 1,
+                        depth_or_array_layers: 1,
+                    },
+                    mip_level_count: 1,
+                    sample_count: 1,
+                    dimension: wgpu::TextureDimension::D2,
+                    format: wgpu::TextureFormat::Rgba8UnormSrgb,
+                    usage: wgpu::TextureUsages::TEXTURE_BINDING,
+                    view_formats: &[],
+                });
+            texture_views.push(texture.create_view(&wgpu::TextureViewDescriptor {
+                label: Some(&format!("Texture View {i}")),
+                ..Default::default()
+            }));
+        }
+        random.shuffle(&mut texture_views);
+        let texture_view_refs: Vec<_> = texture_views.iter().collect();
+
+        let mut storage_texture_views = Vec::with_capacity(storage_texture_count);
+        for i in 0..storage_texture_count {
+            let texture = device_state
+                .device
+                .create_texture(&wgpu::TextureDescriptor {
+                    label: Some(&format!("StorageTexture {i}")),
+                    size: wgpu::Extent3d {
+                        width: 1,
+                        height: 1,
+                        depth_or_array_layers: 1,
+                    },
+                    mip_level_count: 1,
+                    sample_count: 1,
+                    dimension: wgpu::TextureDimension::D2,
+                    format: wgpu::TextureFormat::R32Float,
+                    usage: wgpu::TextureUsages::STORAGE_BINDING,
+                    view_formats: &[],
+                });
+            storage_texture_views.push(texture.create_view(&wgpu::TextureViewDescriptor {
+                label: Some(&format!("StorageTexture View {i}")),
+                ..Default::default()
+            }));
+        }
+        random.shuffle(&mut storage_texture_views);
+        let storage_texture_view_refs: Vec<_> = storage_texture_views.iter().collect();
+
+        let mut storage_buffers = Vec::with_capacity(storage_buffer_count);
+        for i in 0..storage_buffer_count {
+            storage_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor {
+                label: Some(&format!("Buffer {i}")),
+                size: BUFFER_SIZE,
+                usage: wgpu::BufferUsages::STORAGE,
+                mapped_at_creation: false,
+            }));
+        }
+        random.shuffle(&mut storage_buffers);
+        let storage_buffer_bindings: Vec<_> = storage_buffers
+            .iter()
+            .map(|b| b.as_entire_buffer_binding())
+            .collect();
+
+        let mut bind_groups = Vec::with_capacity(dispatch_count);
+        for dispatch_idx in 0..dispatch_count {
+            let mut entries = Vec::with_capacity(TEXTURES_PER_DISPATCH);
+            for tex_idx in 0..TEXTURES_PER_DISPATCH {
+                entries.push(wgpu::BindGroupEntry {
+                    binding: tex_idx as u32,
+                    resource: wgpu::BindingResource::TextureView(
+                        &texture_views[dispatch_idx * TEXTURES_PER_DISPATCH + tex_idx],
+                    ),
+                });
+            }
+            for tex_idx in 0..STORAGE_TEXTURES_PER_DISPATCH {
+                entries.push(wgpu::BindGroupEntry {
+                    binding: (TEXTURES_PER_DISPATCH + tex_idx) as u32,
+                    resource: wgpu::BindingResource::TextureView(
+                        &storage_texture_views
+                            [dispatch_idx * STORAGE_TEXTURES_PER_DISPATCH + tex_idx],
+                    ),
+                });
+            }
+            for buffer_idx in 0..STORAGE_BUFFERS_PER_DISPATCH {
+                entries.push(wgpu::BindGroupEntry {
+                    binding: (TEXTURES_PER_DISPATCH + STORAGE_BUFFERS_PER_DISPATCH + buffer_idx)
+                        as u32,
+                    resource: wgpu::BindingResource::Buffer(
+                        storage_buffers[dispatch_idx * STORAGE_BUFFERS_PER_DISPATCH + buffer_idx]
+                            .as_entire_buffer_binding(),
+                    ),
+                });
+            }
+
+            bind_groups.push(
+                device_state
+                    .device
+                    .create_bind_group(&wgpu::BindGroupDescriptor {
+                        label: None,
+                        layout: &bind_group_layout,
+                        entries: &entries,
+                    }),
+            );
+        }
+        random.shuffle(&mut bind_groups);
+
+        let sm = device_state
+            .device
+            .create_shader_module(wgpu::include_wgsl!("computepass.wgsl"));
+
+        let pipeline_layout =
+            device_state
+                .device
+                .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+                    label: None,
+                    bind_group_layouts: &[&bind_group_layout],
+                    push_constant_ranges: &[],
+                });
+
+        let pipeline =
+            device_state
+                .device
+                .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
+                    label: Some("Compute Pipeline"),
+                    layout: Some(&pipeline_layout),
+                    module: &sm,
+                    entry_point: Some("cs_main"),
+                    compilation_options: wgpu::PipelineCompilationOptions::default(),
+                    cache: None,
+                });
+
+        let (bindless_bind_group, bindless_pipeline) = if supports_bindless {
+            let bindless_bind_group_layout =
+                device_state
+                    .device
+                    .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+                        label: None,
+                        entries: &[
+                            wgpu::BindGroupLayoutEntry {
+                                binding: 0,
+                                visibility: wgpu::ShaderStages::COMPUTE,
+                                ty: wgpu::BindingType::Texture {
+                                    sample_type: wgpu::TextureSampleType::Float {
+                                        filterable: true,
+                                    },
+                                    view_dimension: wgpu::TextureViewDimension::D2,
+                                    multisampled: false,
+                                },
+                                count: Some(NonZeroU32::new(texture_count as u32).unwrap()),
+                            },
+                            wgpu::BindGroupLayoutEntry {
+                                binding: 1,
+                                visibility: wgpu::ShaderStages::COMPUTE,
+                                ty: wgpu::BindingType::StorageTexture {
+                                    access: wgpu::StorageTextureAccess::ReadWrite,
+                                    format: wgpu::TextureFormat::R32Float,
+                                    view_dimension: wgpu::TextureViewDimension::D2,
+                                },
+                                count: Some(NonZeroU32::new(storage_texture_count as u32).unwrap()),
+                            },
+                            wgpu::BindGroupLayoutEntry {
+                                binding: 2,
+                                visibility: wgpu::ShaderStages::COMPUTE,
+                                ty: wgpu::BindingType::Buffer {
+                                    ty: wgpu::BufferBindingType::Storage { read_only: false },
+                                    has_dynamic_offset: false,
+                                    min_binding_size: std::num::NonZeroU64::new(BUFFER_SIZE),
+                                },
+                                count: Some(NonZeroU32::new(storage_buffer_count as u32).unwrap()),
+                            },
+                        ],
+                    });
+
+            let bindless_bind_group =
+                device_state
+                    .device
+                    .create_bind_group(&wgpu::BindGroupDescriptor {
+                        label: None,
+                        layout: &bindless_bind_group_layout,
+                        entries: &[
+                            wgpu::BindGroupEntry {
+                                binding: 0,
+                                resource: wgpu::BindingResource::TextureViewArray(
+                                    &texture_view_refs[..dispatch_count_bindless],
+                                ),
+                            },
+                            wgpu::BindGroupEntry {
+                                binding: 1,
+                                resource: wgpu::BindingResource::TextureViewArray(
+                                    &storage_texture_view_refs[..dispatch_count_bindless],
+                                ),
+                            },
+                            wgpu::BindGroupEntry {
+                                binding: 2,
+                                resource: wgpu::BindingResource::BufferArray(
+                                    &storage_buffer_bindings[..dispatch_count_bindless],
+                                ),
+                            },
+                        ],
+                    });
+
+            let bindless_sm = device_state
+                .device
+                .create_shader_module(wgpu::include_wgsl!("computepass-bindless.wgsl"));
+
+            let bindless_pipeline_layout =
+                device_state
+                    .device
+                    .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+                        label: None,
+                        bind_group_layouts: &[&bindless_bind_group_layout],
+                        push_constant_ranges: &[],
+                    });
+
+            let bindless_pipeline =
+                device_state
+                    .device
+                    .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
+                        label: Some("Compute Pipeline bindless"),
+                        layout: Some(&bindless_pipeline_layout),
+                        module: &bindless_sm,
+                        entry_point: Some("cs_main"),
+                        compilation_options: wgpu::PipelineCompilationOptions::default(),
+                        cache: None,
+                    });
+
+            (Some(bindless_bind_group), Some(bindless_pipeline))
+        } else {
+            (None, None)
+        };
+
+        Self {
+            device_state,
+            pipeline,
+            bind_groups,
+
+            bindless_bind_group,
+            bindless_pipeline,
+        }
+    }
+
+    fn run_subpass(&self, pass_number: usize, total_passes: usize) -> wgpu::CommandBuffer {
+        profiling::scope!("Computepass", &format!("Pass {pass_number}/{total_passes}"));
+
+        let dispatch_count = dispatch_count();
+        let dispatch_per_pass = dispatch_count / total_passes;
+
+        let mut encoder = self
+            .device_state
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+
+        let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
+            label: None,
+            timestamp_writes: None,
+        });
+
+        let start_idx = pass_number * dispatch_per_pass;
+        let end_idx = start_idx + dispatch_per_pass;
+        for dispatch_idx in start_idx..end_idx {
+            compute_pass.set_pipeline(&self.pipeline);
+            compute_pass.set_bind_group(0, &self.bind_groups[dispatch_idx], &[]);
+            compute_pass.dispatch_workgroups(1, 1, 1);
+        }
+
+        drop(compute_pass);
+
+        encoder.finish()
+    }
+
+    fn run_bindless_pass(&self, dispatch_count_bindless: usize) -> wgpu::CommandBuffer {
+        profiling::scope!("Bindless Computepass");
+
+        let mut encoder = self
+            .device_state
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+
+        let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
+            label: None,
+            timestamp_writes: None,
+        });
+
+        compute_pass.set_pipeline(self.bindless_pipeline.as_ref().unwrap());
+        compute_pass.set_bind_group(0, self.bindless_bind_group.as_ref().unwrap(), &[]);
+        for _ in 0..dispatch_count_bindless {
+            compute_pass.dispatch_workgroups(1, 1, 1);
+        }
+
+        drop(compute_pass);
+
+        encoder.finish()
+    }
+}
+
+fn run_bench(ctx: &mut Criterion) {
+    let state = Lazy::new(ComputepassState::new);
+
+    let dispatch_count = dispatch_count();
+    let dispatch_count_bindless = dispatch_count_bindless();
+    let texture_count = dispatch_count * TEXTURES_PER_DISPATCH;
+    let storage_buffer_count = dispatch_count * STORAGE_BUFFERS_PER_DISPATCH;
+    let storage_texture_count = dispatch_count * STORAGE_TEXTURES_PER_DISPATCH;
+
+    // Test 10k dispatch calls split up into 1, 2, 4, and 8 computepasses
+    let mut group = ctx.benchmark_group("Computepass: Single Threaded");
+    group.throughput(Throughput::Elements(dispatch_count as _));
+
+    for time_submit in [false, true] {
+        for cpasses in [1, 2, 4, 8] {
+            let dispatch_per_pass = dispatch_count / cpasses;
+
+            let label = if time_submit {
+                "Submit Time"
+            } else {
+                "Computepass Time"
+            };
+
+            group.bench_function(
+                &format!("{cpasses} computepasses x {dispatch_per_pass} dispatches ({label})"),
+                |b| {
+                    Lazy::force(&state);
+
+                    b.iter_custom(|iters| {
+                        profiling::scope!("benchmark invocation");
+
+                        let mut duration = Duration::ZERO;
+
+                        for _ in 0..iters {
+                            profiling::scope!("benchmark iteration");
+
+                            let mut start = Instant::now();
+
+                            let mut buffers: Vec<wgpu::CommandBuffer> = Vec::with_capacity(cpasses);
+                            for i in 0..cpasses {
+                                buffers.push(state.run_subpass(i, cpasses));
+                            }
+
+                            if time_submit {
+                                start = Instant::now();
+                            } else {
+                                duration += start.elapsed();
+                            }
+
+                            state.device_state.queue.submit(buffers);
+
+                            if time_submit {
+                                duration += start.elapsed();
+                            }
+
+                            state.device_state.device.poll(wgpu::Maintain::Wait);
+                        }
+
+                        duration
+                    })
+                },
+            );
+        }
+    }
+    group.finish();
+
+    // Test 10k dispatch calls split up over 2, 4, and 8 threads.
+    let mut group = ctx.benchmark_group("Computepass: Multi Threaded");
+    group.throughput(Throughput::Elements(dispatch_count as _));
+
+    for threads in [2, 4, 8] {
+        let dispatch_per_pass = dispatch_count / threads;
+        group.bench_function(
+            &format!("{threads} threads x {dispatch_per_pass} dispatch"),
+            |b| {
+                Lazy::force(&state);
+
+                b.iter_custom(|iters| {
+                    profiling::scope!("benchmark invocation");
+
+                    // This benchmark hangs on Apple Paravirtualized GPUs. No idea why.
+                    if state.device_state.adapter_info.name.contains("Paravirtual") {
+                        return Duration::from_secs_f32(1.0);
+                    }
+
+                    let mut duration = Duration::ZERO;
+
+                    for _ in 0..iters {
+                        profiling::scope!("benchmark iteration");
+
+                        let start = Instant::now();
+
+                        let buffers = (0..threads)
+                            .into_par_iter()
+                            .map(|i| state.run_subpass(i, threads))
+                            .collect::<Vec<_>>();
+
+                        duration += start.elapsed();
+
+                        state.device_state.queue.submit(buffers);
+                        state.device_state.device.poll(wgpu::Maintain::Wait);
+                    }
+
+                    duration
+                })
+            },
+        );
+    }
+    group.finish();
+
+    // Test 10k dispatch calls split up over 1, 2, 4, and 8 threads.
+    let mut group = ctx.benchmark_group("Computepass: Bindless");
+    group.throughput(Throughput::Elements(dispatch_count_bindless as _));
+
+    group.bench_function(&format!("{dispatch_count_bindless} dispatch"), |b| {
+        Lazy::force(&state);
+
+        b.iter_custom(|iters| {
+            profiling::scope!("benchmark invocation");
+
+            // This benchmark hangs on Apple Paravirtualized GPUs. No idea why.
+            if state.device_state.adapter_info.name.contains("Paravirtual") {
+                return Duration::from_secs_f32(1.0);
+            }
+
+            // Need bindless to run this benchmark
+            if state.bindless_bind_group.is_none() {
+                return Duration::from_secs_f32(1.0);
+            }
+
+            let mut duration = Duration::ZERO;
+
+            for _ in 0..iters {
+                profiling::scope!("benchmark iteration");
+
+                let start = Instant::now();
+
+                let buffer = state.run_bindless_pass(dispatch_count_bindless);
+
+                duration += start.elapsed();
+
+                state.device_state.queue.submit([buffer]);
+                state.device_state.device.poll(wgpu::Maintain::Wait);
+            }
+
+            duration
+        })
+    });
+    group.finish();
+
+    ctx.bench_function(
+        &format!(
+            "Computepass: Empty Submit with {} Resources",
+            texture_count + storage_texture_count + storage_buffer_count
+        ),
+        |b| {
+            Lazy::force(&state);
+
+            b.iter(|| state.device_state.queue.submit([]));
+        },
+    );
+}
+
+criterion_group! {
+    name = computepass;
+    config = Criterion::default().measurement_time(Duration::from_secs(10));
+    targets = run_bench,
+}
diff --git a/benches/benches/computepass.wgsl b/benches/benches/computepass.wgsl
new file mode 100644
index 0000000000..83d7d49785
--- /dev/null
+++ b/benches/benches/computepass.wgsl
@@ -0,0 +1,26 @@
+@group(0) @binding(0)
+var tex_0: texture_2d<f32>;
+
+@group(0) @binding(1)
+var tex_1: texture_2d<f32>;
+
+@group(0) @binding(2)
+var image_0: texture_storage_2d<r32float, read_write>;
+
+@group(0) @binding(3)
+var image_1: texture_storage_2d<r32float, read_write>;
+
+@group(0) @binding(4)
+var<storage, read_write> buffer0 : array<vec4f>;
+
+@group(0) @binding(5)
+var<storage, read_write> buffer1 : array<vec4f>;
+
+@compute
+@workgroup_size(16)
+fn cs_main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
+    let tex = textureLoad(tex_0, vec2u(0), 0) + textureLoad(tex_1, vec2u(0), 0);
+    let image = textureLoad(image_0, vec2u(0)) + textureLoad(image_1, vec2u(0));
+    buffer0[0] = tex.rrrr;
+    buffer1[0] = image.rrrr;
+}
diff --git a/benches/benches/renderpass.rs b/benches/benches/renderpass.rs
index fcb35c3864..7f2e14116e 100644
--- a/benches/benches/renderpass.rs
+++ b/benches/benches/renderpass.rs
@@ -10,13 +10,19 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator};
 
 use crate::DeviceState;
 
-const DRAW_COUNT: usize = 10_000;
+fn draw_count() -> usize {
+    // On CI we only want to run a very lightweight version of the benchmark
+    // to ensure that it does not break.
+    if std::env::var("WGPU_TESTING").is_ok() {
+        8
+    } else {
+        10_000
+    }
+}
+
 // Must match the number of textures in the renderpass.wgsl shader
 const TEXTURES_PER_DRAW: usize = 7;
 const VERTEX_BUFFERS_PER_DRAW: usize = 2;
-const VERTEX_BUFFER_COUNT: usize = DRAW_COUNT * VERTEX_BUFFERS_PER_DRAW;
-
-const TEXTURE_COUNT: usize = DRAW_COUNT * TEXTURES_PER_DRAW;
 
 struct RenderpassState {
     device_state: DeviceState,
@@ -36,6 +42,10 @@ impl RenderpassState {
     fn new() -> Self {
         let device_state = DeviceState::new();
 
+        let draw_count = draw_count();
+        let vertex_buffer_count = draw_count * VERTEX_BUFFERS_PER_DRAW;
+        let texture_count = draw_count * TEXTURES_PER_DRAW;
+
         let supports_bindless = device_state.device.features().contains(
             wgpu::Features::TEXTURE_BINDING_ARRAY
                 | wgpu::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING,
@@ -43,7 +53,7 @@ impl RenderpassState {
             .device
             .limits()
             .max_sampled_textures_per_shader_stage
-            >= TEXTURE_COUNT as _;
+            >= texture_count as _;
 
         // Performance gets considerably worse if the resources are shuffled.
         //
@@ -73,8 +83,8 @@ impl RenderpassState {
                     entries: &bind_group_layout_entries,
                 });
 
-        let mut texture_views = Vec::with_capacity(TEXTURE_COUNT);
-        for i in 0..TEXTURE_COUNT {
+        let mut texture_views = Vec::with_capacity(texture_count);
+        for i in 0..texture_count {
             let texture = device_state
                 .device
                 .create_texture(&wgpu::TextureDescriptor {
@@ -100,8 +110,8 @@ impl RenderpassState {
 
         let texture_view_refs: Vec<_> = texture_views.iter().collect();
 
-        let mut bind_groups = Vec::with_capacity(DRAW_COUNT);
-        for draw_idx in 0..DRAW_COUNT {
+        let mut bind_groups = Vec::with_capacity(draw_count);
+        for draw_idx in 0..draw_count {
             let mut entries = Vec::with_capacity(TEXTURES_PER_DRAW);
             for tex_idx in 0..TEXTURES_PER_DRAW {
                 entries.push(wgpu::BindGroupEntry {
@@ -137,8 +147,8 @@ impl RenderpassState {
                     push_constant_ranges: &[],
                 });
 
-        let mut vertex_buffers = Vec::with_capacity(VERTEX_BUFFER_COUNT);
-        for _ in 0..VERTEX_BUFFER_COUNT {
+        let mut vertex_buffers = Vec::with_capacity(vertex_buffer_count);
+        for _ in 0..vertex_buffer_count {
             vertex_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor {
                 label: None,
                 size: 3 * 16,
@@ -148,8 +158,8 @@ impl RenderpassState {
         }
         random.shuffle(&mut vertex_buffers);
 
-        let mut index_buffers = Vec::with_capacity(DRAW_COUNT);
-        for _ in 0..DRAW_COUNT {
+        let mut index_buffers = Vec::with_capacity(draw_count);
+        for _ in 0..draw_count {
             index_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor {
                 label: None,
                 size: 3 * 4,
@@ -181,7 +191,7 @@ impl RenderpassState {
                     layout: Some(&pipeline_layout),
                     vertex: wgpu::VertexState {
                         module: &sm,
-                        entry_point: "vs_main",
+                        entry_point: Some("vs_main"),
                         buffers: &vertex_buffer_layouts,
                         compilation_options: wgpu::PipelineCompilationOptions::default(),
                     },
@@ -198,7 +208,7 @@ impl RenderpassState {
                     multisample: wgpu::MultisampleState::default(),
                     fragment: Some(wgpu::FragmentState {
                         module: &sm,
-                        entry_point: "fs_main",
+                        entry_point: Some("fs_main"),
                         targets: &[Some(wgpu::ColorTargetState {
                             format: wgpu::TextureFormat::Rgba8UnormSrgb,
                             blend: None,
@@ -245,7 +255,7 @@ impl RenderpassState {
                                 view_dimension: wgpu::TextureViewDimension::D2,
                                 multisampled: false,
                             },
-                            count: Some(NonZeroU32::new(TEXTURE_COUNT as u32).unwrap()),
+                            count: Some(NonZeroU32::new(texture_count as u32).unwrap()),
                         }],
                     });
 
@@ -279,7 +289,7 @@ impl RenderpassState {
                     layout: Some(&bindless_pipeline_layout),
                     vertex: wgpu::VertexState {
                         module: &bindless_shader_module,
-                        entry_point: "vs_main",
+                        entry_point: Some("vs_main"),
                         buffers: &vertex_buffer_layouts,
                         compilation_options: wgpu::PipelineCompilationOptions::default(),
                     },
@@ -296,7 +306,7 @@ impl RenderpassState {
                     multisample: wgpu::MultisampleState::default(),
                     fragment: Some(wgpu::FragmentState {
                         module: &bindless_shader_module,
-                        entry_point: "fs_main",
+                        entry_point: Some("fs_main"),
                         targets: &[Some(wgpu::ColorTargetState {
                             format: wgpu::TextureFormat::Rgba8UnormSrgb,
                             blend: None,
@@ -323,10 +333,15 @@ impl RenderpassState {
         }
     }
 
-    fn run_subpass(&self, pass_number: usize, total_passes: usize) -> wgpu::CommandBuffer {
+    fn run_subpass(
+        &self,
+        pass_number: usize,
+        total_passes: usize,
+        draw_count: usize,
+    ) -> wgpu::CommandBuffer {
         profiling::scope!("Renderpass", &format!("Pass {pass_number}/{total_passes}"));
 
-        let draws_per_pass = DRAW_COUNT / total_passes;
+        let draws_per_pass = draw_count / total_passes;
 
         let mut encoder = self
             .device_state
@@ -371,7 +386,7 @@ impl RenderpassState {
         encoder.finish()
     }
 
-    fn run_bindless_pass(&self) -> wgpu::CommandBuffer {
+    fn run_bindless_pass(&self, draw_count: usize) -> wgpu::CommandBuffer {
         profiling::scope!("Bindless Renderpass");
 
         let mut encoder = self
@@ -401,7 +416,7 @@ impl RenderpassState {
         }
         render_pass.set_index_buffer(self.index_buffers[0].slice(..), wgpu::IndexFormat::Uint32);
 
-        for draw_idx in 0..DRAW_COUNT {
+        for draw_idx in 0..draw_count {
             render_pass.draw_indexed(0..3, 0, draw_idx as u32..draw_idx as u32 + 1);
         }
 
@@ -414,13 +429,17 @@ impl RenderpassState {
 fn run_bench(ctx: &mut Criterion) {
     let state = Lazy::new(RenderpassState::new);
 
+    let draw_count = draw_count();
+    let vertex_buffer_count = draw_count * VERTEX_BUFFERS_PER_DRAW;
+    let texture_count = draw_count * TEXTURES_PER_DRAW;
+
     // Test 10k draw calls split up into 1, 2, 4, and 8 renderpasses
     let mut group = ctx.benchmark_group("Renderpass: Single Threaded");
-    group.throughput(Throughput::Elements(DRAW_COUNT as _));
+    group.throughput(Throughput::Elements(draw_count as _));
 
     for time_submit in [false, true] {
         for rpasses in [1, 2, 4, 8] {
-            let draws_per_pass = DRAW_COUNT / rpasses;
+            let draws_per_pass = draw_count / rpasses;
 
             let label = if time_submit {
                 "Submit Time"
@@ -450,7 +469,7 @@ fn run_bench(ctx: &mut Criterion) {
 
                             let mut buffers: Vec<wgpu::CommandBuffer> = Vec::with_capacity(rpasses);
                             for i in 0..rpasses {
-                                buffers.push(state.run_subpass(i, rpasses));
+                                buffers.push(state.run_subpass(i, rpasses, draw_count));
                             }
 
                             if time_submit {
@@ -478,10 +497,10 @@ fn run_bench(ctx: &mut Criterion) {
 
     // Test 10k draw calls split up over 2, 4, and 8 threads.
     let mut group = ctx.benchmark_group("Renderpass: Multi Threaded");
-    group.throughput(Throughput::Elements(DRAW_COUNT as _));
+    group.throughput(Throughput::Elements(draw_count as _));
 
     for threads in [2, 4, 8] {
-        let draws_per_pass = DRAW_COUNT / threads;
+        let draws_per_pass = draw_count / threads;
         group.bench_function(
             &format!("{threads} threads x {draws_per_pass} draws"),
             |b| {
@@ -504,7 +523,7 @@ fn run_bench(ctx: &mut Criterion) {
 
                         let buffers = (0..threads)
                             .into_par_iter()
-                            .map(|i| state.run_subpass(i, threads))
+                            .map(|i| state.run_subpass(i, threads, draw_count))
                             .collect::<Vec<_>>();
 
                         duration += start.elapsed();
@@ -522,9 +541,9 @@ fn run_bench(ctx: &mut Criterion) {
 
     // Test 10k draw calls split up over 1, 2, 4, and 8 threads.
     let mut group = ctx.benchmark_group("Renderpass: Bindless");
-    group.throughput(Throughput::Elements(DRAW_COUNT as _));
+    group.throughput(Throughput::Elements(draw_count as _));
 
-    group.bench_function(&format!("{DRAW_COUNT} draws"), |b| {
+    group.bench_function(&format!("{draw_count} draws"), |b| {
         Lazy::force(&state);
 
         b.iter_custom(|iters| {
@@ -542,7 +561,7 @@ fn run_bench(ctx: &mut Criterion) {
 
                 let start = Instant::now();
 
-                let buffer = state.run_bindless_pass();
+                let buffer = state.run_bindless_pass(draw_count);
 
                 duration += start.elapsed();
 
@@ -558,7 +577,7 @@ fn run_bench(ctx: &mut Criterion) {
     ctx.bench_function(
         &format!(
             "Renderpass: Empty Submit with {} Resources",
-            TEXTURE_COUNT + VERTEX_BUFFER_COUNT
+            texture_count + vertex_buffer_count
         ),
         |b| {
             Lazy::force(&state);
diff --git a/benches/benches/root.rs b/benches/benches/root.rs
index 6ef2efabc2..064617783d 100644
--- a/benches/benches/root.rs
+++ b/benches/benches/root.rs
@@ -1,6 +1,7 @@
 use criterion::criterion_main;
 use pollster::block_on;
 
+mod computepass;
 mod renderpass;
 mod resource_creation;
 mod shader;
@@ -45,7 +46,7 @@ impl DeviceState {
                 required_features: adapter.features(),
                 required_limits: adapter.limits(),
                 memory_hints: wgpu::MemoryHints::Performance,
-                label: Some("RenderPass Device"),
+                label: Some("Compute/RenderPass Device"),
             },
             None,
         ))
@@ -61,6 +62,7 @@ impl DeviceState {
 
 criterion_main!(
     renderpass::renderpass,
+    computepass::computepass,
     resource_creation::resource_creation,
     shader::shader
 );
diff --git a/d3d12/Cargo.toml b/d3d12/Cargo.toml
index 2c3f721525..a792aeab69 100644
--- a/d3d12/Cargo.toml
+++ b/d3d12/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "d3d12"
-version = "0.20.0"
+version = "22.0.0"
 authors = ["gfx-rs developers"]
 description = "Low level D3D12 API wrapper"
 repository = "https://github.com/gfx-rs/wgpu/tree/trunk/d3d12"
diff --git a/d3d12/src/query.rs b/d3d12/src/query.rs
index a9dca262bc..68901de942 100644
--- a/d3d12/src/query.rs
+++ b/d3d12/src/query.rs
@@ -8,7 +8,7 @@ pub enum QueryHeapType {
     Timestamp = d3d12::D3D12_QUERY_HEAP_TYPE_TIMESTAMP,
     PipelineStatistics = d3d12::D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS,
     SOStatistics = d3d12::D3D12_QUERY_HEAP_TYPE_SO_STATISTICS,
-    // VideoDecodeStatistcs = d3d12::D3D12_QUERY_HEAP_TYPE_VIDEO_DECODE_STATISTICS,
+    // VideoDecodeStatistics = d3d12::D3D12_QUERY_HEAP_TYPE_VIDEO_DECODE_STATISTICS,
     // CopyQueueTimestamp = d3d12::D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP,
 }
 
diff --git a/deno_webgpu/01_webgpu.js b/deno_webgpu/01_webgpu.js
index 719a0f4860..b5bf0afc7a 100644
--- a/deno_webgpu/01_webgpu.js
+++ b/deno_webgpu/01_webgpu.js
@@ -180,27 +180,6 @@ function assertDevice(self, prefix, context) {
   return device;
 }
 
-/**
- * @param {InnerGPUDevice} self
- * @param {any} resource
- * @param {{prefix: string, resourceContext: string, selfContext: string}} opts
- * @returns {InnerGPUDevice & {rid: number}}
- */
-function assertDeviceMatch(
-  self,
-  resource,
-  { prefix, resourceContext, selfContext },
-) {
-  const resourceDevice = assertDevice(resource, prefix, resourceContext);
-  if (resourceDevice.rid !== self.rid) {
-    throw new DOMException(
-      `${prefix}: ${resourceContext} belongs to a different device than ${selfContext}.`,
-      "OperationError",
-    );
-  }
-  return { ...resourceDevice, rid: resourceDevice.rid };
-}
-
 /**
  * @param {any} self
  * @param {string} prefix
@@ -1262,11 +1241,6 @@ class GPUDevice extends EventTarget {
       (layout, i) => {
         const context = `bind group layout ${i + 1}`;
         const rid = assertResource(layout, prefix, context);
-        assertDeviceMatch(device, layout, {
-          prefix,
-          selfContext: "this",
-          resourceContext: context,
-        });
         return rid;
       },
     );
@@ -1301,11 +1275,6 @@ class GPUDevice extends EventTarget {
     );
     const device = assertDevice(this, prefix, "this");
     const layout = assertResource(descriptor.layout, prefix, "layout");
-    assertDeviceMatch(device, descriptor.layout, {
-      prefix,
-      resourceContext: "layout",
-      selfContext: "this",
-    });
     const entries = ArrayPrototypeMap(descriptor.entries, (entry, i) => {
       const context = `entry ${i + 1}`;
       const resource = entry.resource;
@@ -1403,22 +1372,12 @@ class GPUDevice extends EventTarget {
     if (typeof descriptor.layout !== "string") {
       const context = "layout";
       layout = assertResource(descriptor.layout, prefix, context);
-      assertDeviceMatch(device, descriptor.layout, {
-        prefix,
-        resourceContext: context,
-        selfContext: "this",
-      });
     }
     const module = assertResource(
       descriptor.compute.module,
       prefix,
       "compute shader module",
     );
-    assertDeviceMatch(device, descriptor.compute.module, {
-      prefix,
-      resourceContext: "compute shader module",
-      selfContext: "this",
-    });
 
     const { rid, err } = op_webgpu_create_compute_pipeline(
       device.rid,
@@ -1459,22 +1418,12 @@ class GPUDevice extends EventTarget {
     if (typeof descriptor.layout !== "string") {
       const context = "layout";
       layout = assertResource(descriptor.layout, prefix, context);
-      assertDeviceMatch(device, descriptor.layout, {
-        prefix,
-        resourceContext: context,
-        selfContext: "this",
-      });
     }
     const module = assertResource(
       descriptor.vertex.module,
       prefix,
       "vertex shader module",
     );
-    assertDeviceMatch(device, descriptor.vertex.module, {
-      prefix,
-      resourceContext: "vertex shader module",
-      selfContext: "this",
-    });
     let fragment = undefined;
     if (descriptor.fragment) {
       const module = assertResource(
@@ -1482,11 +1431,6 @@ class GPUDevice extends EventTarget {
         prefix,
         "fragment shader module",
       );
-      assertDeviceMatch(device, descriptor.fragment.module, {
-        prefix,
-        resourceContext: "fragment shader module",
-        selfContext: "this",
-      });
       fragment = {
         module,
         entryPoint: descriptor.fragment.entryPoint,
@@ -1536,22 +1480,12 @@ class GPUDevice extends EventTarget {
     if (typeof descriptor.layout !== "string") {
       const context = "layout";
       layout = assertResource(descriptor.layout, prefix, context);
-      assertDeviceMatch(device, descriptor.layout, {
-        prefix,
-        resourceContext: context,
-        selfContext: "this",
-      });
     }
     const module = assertResource(
       descriptor.compute.module,
       prefix,
       "compute shader module",
     );
-    assertDeviceMatch(device, descriptor.compute.module, {
-      prefix,
-      resourceContext: "compute shader module",
-      selfContext: "this",
-    });
 
     const { rid, err } = op_webgpu_create_compute_pipeline(
       device.rid,
@@ -1607,22 +1541,12 @@ class GPUDevice extends EventTarget {
     if (typeof descriptor.layout !== "string") {
       const context = "layout";
       layout = assertResource(descriptor.layout, prefix, context);
-      assertDeviceMatch(device, descriptor.layout, {
-        prefix,
-        resourceContext: context,
-        selfContext: "this",
-      });
     }
     const module = assertResource(
       descriptor.vertex.module,
       prefix,
       "vertex shader module",
     );
-    assertDeviceMatch(device, descriptor.vertex.module, {
-      prefix,
-      resourceContext: "vertex shader module",
-      selfContext: "this",
-    });
     let fragment = undefined;
     if (descriptor.fragment) {
       const module = assertResource(
@@ -1630,11 +1554,6 @@ class GPUDevice extends EventTarget {
         prefix,
         "fragment shader module",
       );
-      assertDeviceMatch(device, descriptor.fragment.module, {
-        prefix,
-        resourceContext: "fragment shader module",
-        selfContext: "this",
-      });
       fragment = {
         module,
         entryPoint: descriptor.fragment.entryPoint,
@@ -1916,11 +1835,6 @@ class GPUQueue {
       (buffer, i) => {
         const context = `command buffer ${i + 1}`;
         const rid = assertResource(buffer, prefix, context);
-        assertDeviceMatch(device, buffer, {
-          prefix,
-          selfContext: "this",
-          resourceContext: context,
-        });
         return rid;
       },
     );
@@ -1964,11 +1878,6 @@ class GPUQueue {
       : webidl.converters.GPUSize64(size, prefix, "Argument 5");
     const device = assertDevice(this, prefix, "this");
     const bufferRid = assertResource(buffer, prefix, "Argument 1");
-    assertDeviceMatch(device, buffer, {
-      prefix,
-      selfContext: "this",
-      resourceContext: "Argument 1",
-    });
     /** @type {ArrayBufferLike} */
     let abLike = data;
     if (isTypedArray(data)) {
@@ -2014,11 +1923,6 @@ class GPUQueue {
     size = webidl.converters.GPUExtent3D(size, prefix, "Argument 4");
     const device = assertDevice(this, prefix, "this");
     const textureRid = assertResource(destination.texture, prefix, "texture");
-    assertDeviceMatch(device, destination.texture, {
-      prefix,
-      selfContext: "this",
-      resourceContext: "texture",
-    });
 
     /** @type {ArrayBufferLike} */
     let abLike = data;
@@ -3189,15 +3093,6 @@ class GPUCommandEncoder {
         prefix,
         "texture view for depth stencil attachment",
       );
-      assertDeviceMatch(
-        device,
-        descriptor.depthStencilAttachment.view[_texture],
-        {
-          prefix,
-          resourceContext: "texture view for depth stencil attachment",
-          selfContext: "this",
-        },
-      );
 
       depthStencilAttachment = {
         ...descriptor.depthStencilAttachment,
@@ -3218,15 +3113,6 @@ class GPUCommandEncoder {
           prefix,
           `texture backing texture view for ${context}`,
         );
-        assertDeviceMatch(
-          device,
-          colorAttachment.view[_texture],
-          {
-            prefix,
-            resourceContext: `texture view for ${context}`,
-            selfContext: "this",
-          },
-        );
         let resolveTarget;
         if (colorAttachment.resolveTarget) {
           resolveTarget = assertResource(
@@ -3239,15 +3125,6 @@ class GPUCommandEncoder {
             prefix,
             `texture backing resolve target texture view for ${context}`,
           );
-          assertDeviceMatch(
-            device,
-            colorAttachment.resolveTarget[_texture],
-            {
-              prefix,
-              resourceContext: `resolve target texture view for ${context}`,
-              selfContext: "this",
-            },
-          );
         }
         return {
           view: view,
@@ -3388,17 +3265,7 @@ class GPUCommandEncoder {
     const device = assertDevice(this, prefix, "this");
     const commandEncoderRid = assertResource(this, prefix, "this");
     const sourceRid = assertResource(source, prefix, "Argument 1");
-    assertDeviceMatch(device, source, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     const destinationRid = assertResource(destination, prefix, "Argument 3");
-    assertDeviceMatch(device, destination, {
-      prefix,
-      resourceContext: "Argument 3",
-      selfContext: "this",
-    });
 
     const { err } = op_webgpu_command_encoder_copy_buffer_to_buffer(
       commandEncoderRid,
@@ -3436,22 +3303,11 @@ class GPUCommandEncoder {
       prefix,
       "source in Argument 1",
     );
-    // deno-lint-ignore prefer-primordials
-    assertDeviceMatch(device, source.buffer, {
-      prefix,
-      resourceContext: "source in Argument 1",
-      selfContext: "this",
-    });
     const destinationTextureRid = assertResource(
       destination.texture,
       prefix,
       "texture in Argument 2",
     );
-    assertDeviceMatch(device, destination.texture, {
-      prefix,
-      resourceContext: "texture in Argument 2",
-      selfContext: "this",
-    });
 
     const { err } = op_webgpu_command_encoder_copy_buffer_to_texture(
       commandEncoderRid,
@@ -3500,23 +3356,12 @@ class GPUCommandEncoder {
       prefix,
       "texture in Argument 1",
     );
-    assertDeviceMatch(device, source.texture, {
-      prefix,
-      resourceContext: "texture in Argument 1",
-      selfContext: "this",
-    });
     const destinationBufferRid = assertResource(
       // deno-lint-ignore prefer-primordials
       destination.buffer,
       prefix,
       "buffer in Argument 2",
     );
-    // deno-lint-ignore prefer-primordials
-    assertDeviceMatch(device, destination.buffer, {
-      prefix,
-      resourceContext: "buffer in Argument 2",
-      selfContext: "this",
-    });
     const { err } = op_webgpu_command_encoder_copy_texture_to_buffer(
       commandEncoderRid,
       {
@@ -3562,21 +3407,11 @@ class GPUCommandEncoder {
       prefix,
       "texture in Argument 1",
     );
-    assertDeviceMatch(device, source.texture, {
-      prefix,
-      resourceContext: "texture in Argument 1",
-      selfContext: "this",
-    });
     const destinationTextureRid = assertResource(
       destination.texture,
       prefix,
       "texture in Argument 2",
     );
-    assertDeviceMatch(device, destination.texture, {
-      prefix,
-      resourceContext: "texture in Argument 2",
-      selfContext: "this",
-    });
     const { err } = op_webgpu_command_encoder_copy_texture_to_texture(
       commandEncoderRid,
       {
@@ -3685,11 +3520,6 @@ class GPUCommandEncoder {
     const device = assertDevice(this, prefix, "this");
     const commandEncoderRid = assertResource(this, prefix, "this");
     const querySetRid = assertResource(querySet, prefix, "Argument 1");
-    assertDeviceMatch(device, querySet, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     const { err } = op_webgpu_command_encoder_write_timestamp(
       commandEncoderRid,
       querySetRid,
@@ -3731,17 +3561,7 @@ class GPUCommandEncoder {
     const device = assertDevice(this, prefix, "this");
     const commandEncoderRid = assertResource(this, prefix, "this");
     const querySetRid = assertResource(querySet, prefix, "Argument 1");
-    assertDeviceMatch(device, querySet, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     const destinationRid = assertResource(destination, prefix, "Argument 3");
-    assertDeviceMatch(device, destination, {
-      prefix,
-      resourceContext: "Argument 3",
-      selfContext: "this",
-    });
     const { err } = op_webgpu_command_encoder_resolve_query_set(
       commandEncoderRid,
       querySetRid,
@@ -3991,11 +3811,6 @@ class GPURenderPassEncoder {
     const bundleRids = ArrayPrototypeMap(bundles, (bundle, i) => {
       const context = `bundle ${i + 1}`;
       const rid = assertResource(bundle, prefix, context);
-      assertDeviceMatch(device, bundle, {
-        prefix,
-        resourceContext: context,
-        selfContext: "this",
-      });
       return rid;
     });
     op_webgpu_render_pass_execute_bundles(renderPassRid, bundleRids);
@@ -4041,11 +3856,6 @@ class GPURenderPassEncoder {
     assertResource(this[_encoder], prefix, "encoder referenced by this");
     const renderPassRid = assertResource(this, prefix, "this");
     const bindGroupRid = assertResource(bindGroup, prefix, "Argument 2");
-    assertDeviceMatch(device, bindGroup, {
-      prefix,
-      resourceContext: "Argument 2",
-      selfContext: "this",
-    });
     if (
       TypedArrayPrototypeGetSymbolToStringTag(dynamicOffsetsData) !==
       "Uint32Array"
@@ -4128,11 +3938,6 @@ class GPURenderPassEncoder {
     assertResource(this[_encoder], prefix, "encoder referenced by this");
     const renderPassRid = assertResource(this, prefix, "this");
     const pipelineRid = assertResource(pipeline, prefix, "Argument 1");
-    assertDeviceMatch(device, pipeline, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_render_pass_set_pipeline(renderPassRid, pipelineRid);
   }
 
@@ -4165,11 +3970,6 @@ class GPURenderPassEncoder {
     assertResource(this[_encoder], prefix, "encoder referenced by this");
     const renderPassRid = assertResource(this, prefix, "this");
     const bufferRid = assertResource(buffer, prefix, "Argument 1");
-    assertDeviceMatch(device, buffer, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_render_pass_set_index_buffer(
       renderPassRid,
       bufferRid,
@@ -4204,11 +4004,6 @@ class GPURenderPassEncoder {
     assertResource(this[_encoder], prefix, "encoder referenced by this");
     const renderPassRid = assertResource(this, prefix, "this");
     const bufferRid = assertResource(buffer, prefix, "Argument 2");
-    assertDeviceMatch(device, buffer, {
-      prefix,
-      resourceContext: "Argument 2",
-      selfContext: "this",
-    });
     op_webgpu_render_pass_set_vertex_buffer(
       renderPassRid,
       slot,
@@ -4337,11 +4132,6 @@ class GPURenderPassEncoder {
       prefix,
       "Argument 1",
     );
-    assertDeviceMatch(device, indirectBuffer, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_render_pass_draw_indirect(
       renderPassRid,
       indirectBufferRid,
@@ -4380,11 +4170,6 @@ class GPURenderPassEncoder {
       prefix,
       "Argument 1",
     );
-    assertDeviceMatch(device, indirectBuffer, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_render_pass_draw_indexed_indirect(
       renderPassRid,
       indirectBufferRid,
@@ -4466,11 +4251,6 @@ class GPUComputePassEncoder {
     assertResource(this[_encoder], prefix, "encoder referenced by this");
     const computePassRid = assertResource(this, prefix, "this");
     const pipelineRid = assertResource(pipeline, prefix, "Argument 1");
-    assertDeviceMatch(device, pipeline, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_compute_pass_set_pipeline(computePassRid, pipelineRid);
   }
 
@@ -4545,11 +4325,6 @@ class GPUComputePassEncoder {
       prefix,
       "Argument 1",
     );
-    assertDeviceMatch(device, indirectBuffer, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_compute_pass_dispatch_workgroups_indirect(
       computePassRid,
       indirectBufferRid,
@@ -4598,11 +4373,6 @@ class GPUComputePassEncoder {
     assertResource(this[_encoder], prefix, "encoder referenced by this");
     const computePassRid = assertResource(this, prefix, "this");
     const bindGroupRid = assertResource(bindGroup, prefix, "Argument 2");
-    assertDeviceMatch(device, bindGroup, {
-      prefix,
-      resourceContext: "Argument 2",
-      selfContext: "this",
-    });
     if (
       TypedArrayPrototypeGetSymbolToStringTag(dynamicOffsetsData) !==
       "Uint32Array"
@@ -4814,11 +4584,6 @@ class GPURenderBundleEncoder {
     const device = assertDevice(this, prefix, "this");
     const renderBundleEncoderRid = assertResource(this, prefix, "this");
     const bindGroupRid = assertResource(bindGroup, prefix, "Argument 2");
-    assertDeviceMatch(device, bindGroup, {
-      prefix,
-      resourceContext: "Argument 2",
-      selfContext: "this",
-    });
     if (
       TypedArrayPrototypeGetSymbolToStringTag(dynamicOffsetsData) !==
       "Uint32Array"
@@ -4902,11 +4667,6 @@ class GPURenderBundleEncoder {
     const device = assertDevice(this, prefix, "this");
     const renderBundleEncoderRid = assertResource(this, prefix, "this");
     const pipelineRid = assertResource(pipeline, prefix, "Argument 1");
-    assertDeviceMatch(device, pipeline, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_render_bundle_encoder_set_pipeline(
       renderBundleEncoderRid,
       pipelineRid,
@@ -4935,11 +4695,6 @@ class GPURenderBundleEncoder {
     const device = assertDevice(this, prefix, "this");
     const renderBundleEncoderRid = assertResource(this, prefix, "this");
     const bufferRid = assertResource(buffer, prefix, "Argument 1");
-    assertDeviceMatch(device, buffer, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_render_bundle_encoder_set_index_buffer(
       renderBundleEncoderRid,
       bufferRid,
@@ -4969,11 +4724,6 @@ class GPURenderBundleEncoder {
     const device = assertDevice(this, prefix, "this");
     const renderBundleEncoderRid = assertResource(this, prefix, "this");
     const bufferRid = assertResource(buffer, prefix, "Argument 2");
-    assertDeviceMatch(device, buffer, {
-      prefix,
-      resourceContext: "Argument 2",
-      selfContext: "this",
-    });
     op_webgpu_render_bundle_encoder_set_vertex_buffer(
       renderBundleEncoderRid,
       slot,
@@ -5097,11 +4847,6 @@ class GPURenderBundleEncoder {
       prefix,
       "Argument 1",
     );
-    assertDeviceMatch(device, indirectBuffer, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_render_bundle_encoder_draw_indirect(
       renderBundleEncoderRid,
       indirectBufferRid,
@@ -5326,6 +5071,7 @@ webidl.converters["GPUFeatureName"] = webidl.createEnumConverter(
     // texture formats
     "depth32float-stencil8",
     "texture-compression-bc",
+    "texture-compression-bc-sliced-3d",
     "texture-compression-etc2",
     "texture-compression-astc",
     "rg11b10ufloat-renderable",
diff --git a/deno_webgpu/binding.rs b/deno_webgpu/binding.rs
index 0efeb6716a..f1f3a80d35 100644
--- a/deno_webgpu/binding.rs
+++ b/deno_webgpu/binding.rs
@@ -21,7 +21,7 @@ impl Resource for WebGpuBindGroupLayout {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.bind_group_layout_drop(self.1));
+        self.0.bind_group_layout_drop(self.1);
     }
 }
 
@@ -35,7 +35,7 @@ impl Resource for WebGpuBindGroup {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.bind_group_drop(self.1));
+        self.0.bind_group_drop(self.1);
     }
 }
 
@@ -191,7 +191,7 @@ pub fn op_webgpu_create_bind_group_layout(
         entries: Cow::from(entries),
     };
 
-    gfx_put!(device => instance.device_create_bind_group_layout(
+    gfx_put!(instance.device_create_bind_group_layout(
     device,
     &descriptor,
     None
@@ -226,7 +226,7 @@ pub fn op_webgpu_create_pipeline_layout(
         push_constant_ranges: Default::default(),
     };
 
-    gfx_put!(device => instance.device_create_pipeline_layout(
+    gfx_put!(instance.device_create_pipeline_layout(
     device,
     &descriptor,
     None
@@ -305,7 +305,7 @@ pub fn op_webgpu_create_bind_group(
         entries: Cow::from(entries),
     };
 
-    gfx_put!(device => instance.device_create_bind_group(
+    gfx_put!(instance.device_create_bind_group(
     device,
     &descriptor,
     None
diff --git a/deno_webgpu/buffer.rs b/deno_webgpu/buffer.rs
index e0b0e50d31..08afcd133d 100644
--- a/deno_webgpu/buffer.rs
+++ b/deno_webgpu/buffer.rs
@@ -27,7 +27,7 @@ impl Resource for WebGpuBuffer {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.buffer_drop(self.1, true));
+        self.0.buffer_drop(self.1);
     }
 }
 
@@ -62,7 +62,7 @@ pub fn op_webgpu_create_buffer(
         mapped_at_creation,
     };
 
-    gfx_put!(device => instance.device_create_buffer(
+    gfx_put!(instance.device_create_buffer(
     device,
     &descriptor,
     None
@@ -97,20 +97,21 @@ pub async fn op_webgpu_buffer_get_map_async(
         });
 
         // TODO(lucacasonato): error handling
-        let maybe_err = gfx_select!(buffer => instance.buffer_map_async(
-            buffer,
-            offset,
-            Some(size),
-            wgpu_core::resource::BufferMapOperation {
-                host: match mode {
-                    1 => wgpu_core::device::HostMap::Read,
-                    2 => wgpu_core::device::HostMap::Write,
-                    _ => unreachable!(),
+        let maybe_err = instance
+            .buffer_map_async(
+                buffer,
+                offset,
+                Some(size),
+                wgpu_core::resource::BufferMapOperation {
+                    host: match mode {
+                        1 => wgpu_core::device::HostMap::Read,
+                        2 => wgpu_core::device::HostMap::Write,
+                        _ => unreachable!(),
+                    },
+                    callback: Some(wgpu_core::resource::BufferMapCallback::from_rust(callback)),
                 },
-                callback: Some(wgpu_core::resource::BufferMapCallback::from_rust(callback)),
-            }
-        ))
-        .err();
+            )
+            .err();
 
         if maybe_err.is_some() {
             return Ok(WebGpuResult::maybe_err(maybe_err));
@@ -124,7 +125,8 @@ pub async fn op_webgpu_buffer_get_map_async(
             {
                 let state = state.borrow();
                 let instance = state.borrow::<super::Instance>();
-                gfx_select!(device => instance.device_poll(device, wgpu_types::Maintain::wait()))
+                instance
+                    .device_poll(device, wgpu_types::Maintain::wait())
                     .unwrap();
             }
             tokio::time::sleep(Duration::from_millis(10)).await;
@@ -157,12 +159,9 @@ pub fn op_webgpu_buffer_get_mapped_range(
     let buffer_resource = state.resource_table.get::<WebGpuBuffer>(buffer_rid)?;
     let buffer = buffer_resource.1;
 
-    let (slice_pointer, range_size) = gfx_select!(buffer => instance.buffer_get_mapped_range(
-      buffer,
-      offset,
-      size
-    ))
-    .map_err(|e| DomExceptionOperationError::new(&e.to_string()))?;
+    let (slice_pointer, range_size) = instance
+        .buffer_get_mapped_range(buffer, offset, size)
+        .map_err(|e| DomExceptionOperationError::new(&e.to_string()))?;
 
     // SAFETY: guarantee to be safe from wgpu
     let slice =
@@ -199,5 +198,5 @@ pub fn op_webgpu_buffer_unmap(
         slice.copy_from_slice(buf);
     }
 
-    gfx_ok!(buffer => instance.buffer_unmap(buffer))
+    gfx_ok!(instance.buffer_unmap(buffer))
 }
diff --git a/deno_webgpu/bundle.rs b/deno_webgpu/bundle.rs
index dfe5ccf494..0d1421d202 100644
--- a/deno_webgpu/bundle.rs
+++ b/deno_webgpu/bundle.rs
@@ -30,7 +30,7 @@ impl Resource for WebGpuRenderBundle {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.render_bundle_drop(self.1));
+        self.0.render_bundle_drop(self.1);
     }
 }
 
@@ -108,7 +108,7 @@ pub fn op_webgpu_render_bundle_encoder_finish(
         .into_inner();
     let instance = state.borrow::<super::Instance>();
 
-    gfx_put!(render_bundle_encoder.parent() => instance.render_bundle_encoder_finish(
+    gfx_put!(instance.render_bundle_encoder_finish(
     render_bundle_encoder,
     &wgpu_core::command::RenderBundleDescriptor {
       label: Some(label),
diff --git a/deno_webgpu/command_encoder.rs b/deno_webgpu/command_encoder.rs
index ba21bb05b5..d7306a37a7 100644
--- a/deno_webgpu/command_encoder.rs
+++ b/deno_webgpu/command_encoder.rs
@@ -23,7 +23,7 @@ impl Resource for WebGpuCommandEncoder {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.command_encoder_drop(self.1));
+        self.0.command_encoder_drop(self.1);
     }
 }
 
@@ -38,7 +38,7 @@ impl Resource for WebGpuCommandBuffer {
 
     fn close(self: Rc<Self>) {
         if let Some(id) = *self.1.borrow() {
-            gfx_select!(id => self.0.command_buffer_drop(id));
+            self.0.command_buffer_drop(id);
         }
     }
 }
@@ -58,7 +58,7 @@ pub fn op_webgpu_create_command_encoder(
 
     let descriptor = wgpu_types::CommandEncoderDescriptor { label: Some(label) };
 
-    gfx_put!(device => instance.device_create_command_encoder(
+    gfx_put!(instance.device_create_command_encoder(
     device,
     &descriptor,
     None
@@ -210,7 +210,8 @@ pub fn op_webgpu_command_encoder_begin_render_pass(
         occlusion_query_set: occlusion_query_set_resource,
     };
 
-    let (render_pass, error) = gfx_select!(command_encoder => instance.command_encoder_create_render_pass_dyn(*command_encoder, &descriptor));
+    let (render_pass, error) =
+        instance.command_encoder_create_render_pass(*command_encoder, &descriptor);
     let rid = state
         .resource_table
         .add(super::render_pass::WebGpuRenderPass(RefCell::new(
@@ -262,7 +263,8 @@ pub fn op_webgpu_command_encoder_begin_compute_pass(
         timestamp_writes: timestamp_writes.as_ref(),
     };
 
-    let (compute_pass, error) = gfx_select!(command_encoder => instance.command_encoder_create_compute_pass_dyn(*command_encoder, &descriptor));
+    let (compute_pass, error) =
+        instance.command_encoder_create_compute_pass(*command_encoder, &descriptor);
     let rid = state
         .resource_table
         .add(super::compute_pass::WebGpuComputePass(RefCell::new(
@@ -297,13 +299,13 @@ pub fn op_webgpu_command_encoder_copy_buffer_to_buffer(
         .get::<super::buffer::WebGpuBuffer>(destination)?;
     let destination_buffer = destination_buffer_resource.1;
 
-    gfx_ok!(command_encoder => instance.command_encoder_copy_buffer_to_buffer(
-      command_encoder,
-      source_buffer,
-      source_offset,
-      destination_buffer,
-      destination_offset,
-      size
+    gfx_ok!(instance.command_encoder_copy_buffer_to_buffer(
+        command_encoder,
+        source_buffer,
+        source_offset,
+        destination_buffer,
+        destination_offset,
+        size
     ))
 }
 
@@ -360,11 +362,11 @@ pub fn op_webgpu_command_encoder_copy_buffer_to_texture(
         origin: destination.origin,
         aspect: destination.aspect,
     };
-    gfx_ok!(command_encoder => instance.command_encoder_copy_buffer_to_texture(
-      command_encoder,
-      &source,
-      &destination,
-      &copy_size
+    gfx_ok!(instance.command_encoder_copy_buffer_to_texture(
+        command_encoder,
+        &source,
+        &destination,
+        &copy_size
     ))
 }
 
@@ -403,11 +405,11 @@ pub fn op_webgpu_command_encoder_copy_texture_to_buffer(
             rows_per_image: destination.rows_per_image,
         },
     };
-    gfx_ok!(command_encoder => instance.command_encoder_copy_texture_to_buffer(
-      command_encoder,
-      &source,
-      &destination,
-      &copy_size
+    gfx_ok!(instance.command_encoder_copy_texture_to_buffer(
+        command_encoder,
+        &source,
+        &destination,
+        &copy_size
     ))
 }
 
@@ -444,11 +446,11 @@ pub fn op_webgpu_command_encoder_copy_texture_to_texture(
         origin: destination.origin,
         aspect: destination.aspect,
     };
-    gfx_ok!(command_encoder => instance.command_encoder_copy_texture_to_texture(
-      command_encoder,
-      &source,
-      &destination,
-      &copy_size
+    gfx_ok!(instance.command_encoder_copy_texture_to_texture(
+        command_encoder,
+        &source,
+        &destination,
+        &copy_size
     ))
 }
 
@@ -470,11 +472,11 @@ pub fn op_webgpu_command_encoder_clear_buffer(
         .resource_table
         .get::<super::buffer::WebGpuBuffer>(buffer_rid)?;
 
-    gfx_ok!(command_encoder => instance.command_encoder_clear_buffer(
-      command_encoder,
-      destination_resource.1,
-      offset,
-      Some(size)
+    gfx_ok!(instance.command_encoder_clear_buffer(
+        command_encoder,
+        destination_resource.1,
+        offset,
+        Some(size)
     ))
 }
 
@@ -491,7 +493,7 @@ pub fn op_webgpu_command_encoder_push_debug_group(
         .get::<WebGpuCommandEncoder>(command_encoder_rid)?;
     let command_encoder = command_encoder_resource.1;
 
-    gfx_ok!(command_encoder => instance.command_encoder_push_debug_group(command_encoder, group_label))
+    gfx_ok!(instance.command_encoder_push_debug_group(command_encoder, group_label))
 }
 
 #[op2]
@@ -506,7 +508,7 @@ pub fn op_webgpu_command_encoder_pop_debug_group(
         .get::<WebGpuCommandEncoder>(command_encoder_rid)?;
     let command_encoder = command_encoder_resource.1;
 
-    gfx_ok!(command_encoder => instance.command_encoder_pop_debug_group(command_encoder))
+    gfx_ok!(instance.command_encoder_pop_debug_group(command_encoder))
 }
 
 #[op2]
@@ -522,10 +524,7 @@ pub fn op_webgpu_command_encoder_insert_debug_marker(
         .get::<WebGpuCommandEncoder>(command_encoder_rid)?;
     let command_encoder = command_encoder_resource.1;
 
-    gfx_ok!(command_encoder => instance.command_encoder_insert_debug_marker(
-      command_encoder,
-      marker_label
-    ))
+    gfx_ok!(instance.command_encoder_insert_debug_marker(command_encoder, marker_label))
 }
 
 #[op2]
@@ -545,10 +544,10 @@ pub fn op_webgpu_command_encoder_write_timestamp(
         .resource_table
         .get::<super::WebGpuQuerySet>(query_set)?;
 
-    gfx_ok!(command_encoder => instance.command_encoder_write_timestamp(
-      command_encoder,
-      query_set_resource.1,
-      query_index
+    gfx_ok!(instance.command_encoder_write_timestamp(
+        command_encoder,
+        query_set_resource.1,
+        query_index
     ))
 }
 
@@ -575,13 +574,13 @@ pub fn op_webgpu_command_encoder_resolve_query_set(
         .resource_table
         .get::<super::buffer::WebGpuBuffer>(destination)?;
 
-    gfx_ok!(command_encoder => instance.command_encoder_resolve_query_set(
-      command_encoder,
-      query_set_resource.1,
-      first_query,
-      query_count,
-      destination_resource.1,
-      destination_offset
+    gfx_ok!(instance.command_encoder_resolve_query_set(
+        command_encoder,
+        query_set_resource.1,
+        first_query,
+        query_count,
+        destination_resource.1,
+        destination_offset
     ))
 }
 
@@ -600,10 +599,7 @@ pub fn op_webgpu_command_encoder_finish(
 
     let descriptor = wgpu_types::CommandBufferDescriptor { label: Some(label) };
 
-    let (val, maybe_err) = gfx_select!(command_encoder => instance.command_encoder_finish(
-      command_encoder,
-      &descriptor
-    ));
+    let (val, maybe_err) = instance.command_encoder_finish(command_encoder, &descriptor);
 
     let rid = state.resource_table.add(WebGpuCommandBuffer(
         instance.clone(),
diff --git a/deno_webgpu/compute_pass.rs b/deno_webgpu/compute_pass.rs
index 3b653ef349..e3e69860ab 100644
--- a/deno_webgpu/compute_pass.rs
+++ b/deno_webgpu/compute_pass.rs
@@ -10,9 +10,7 @@ use std::cell::RefCell;
 
 use super::error::WebGpuResult;
 
-pub(crate) struct WebGpuComputePass(
-    pub(crate) RefCell<Box<dyn wgpu_core::command::DynComputePass>>,
-);
+pub(crate) struct WebGpuComputePass(pub(crate) RefCell<wgpu_core::command::ComputePass>);
 impl Resource for WebGpuComputePass {
     fn name(&self) -> Cow<str> {
         "webGPUComputePass".into()
@@ -33,10 +31,12 @@ pub fn op_webgpu_compute_pass_set_pipeline(
         .resource_table
         .get::<WebGpuComputePass>(compute_pass_rid)?;
 
-    compute_pass_resource
-        .0
-        .borrow_mut()
-        .set_pipeline(state.borrow(), compute_pipeline_resource.1)?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .compute_pass_set_pipeline(
+            &mut compute_pass_resource.0.borrow_mut(),
+            compute_pipeline_resource.1,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -54,10 +54,9 @@ pub fn op_webgpu_compute_pass_dispatch_workgroups(
         .resource_table
         .get::<WebGpuComputePass>(compute_pass_rid)?;
 
-    compute_pass_resource
-        .0
-        .borrow_mut()
-        .dispatch_workgroups(state.borrow(), x, y, z)?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .compute_pass_dispatch_workgroups(&mut compute_pass_resource.0.borrow_mut(), x, y, z)?;
 
     Ok(WebGpuResult::empty())
 }
@@ -77,10 +76,13 @@ pub fn op_webgpu_compute_pass_dispatch_workgroups_indirect(
         .resource_table
         .get::<WebGpuComputePass>(compute_pass_rid)?;
 
-    compute_pass_resource
-        .0
-        .borrow_mut()
-        .dispatch_workgroups_indirect(state.borrow(), buffer_resource.1, indirect_offset)?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .compute_pass_dispatch_workgroups_indirect(
+            &mut compute_pass_resource.0.borrow_mut(),
+            buffer_resource.1,
+            indirect_offset,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -95,7 +97,9 @@ pub fn op_webgpu_compute_pass_end(
         .resource_table
         .take::<WebGpuComputePass>(compute_pass_rid)?;
 
-    compute_pass_resource.0.borrow_mut().end(state.borrow())?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .compute_pass_end(&mut compute_pass_resource.0.borrow_mut())?;
 
     Ok(WebGpuResult::empty())
 }
@@ -127,12 +131,14 @@ pub fn op_webgpu_compute_pass_set_bind_group(
 
     let dynamic_offsets_data: &[u32] = &dynamic_offsets_data[start..start + len];
 
-    compute_pass_resource.0.borrow_mut().set_bind_group(
-        state.borrow(),
-        index,
-        bind_group_resource.1,
-        dynamic_offsets_data,
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .compute_pass_set_bind_group(
+            &mut compute_pass_resource.0.borrow_mut(),
+            index,
+            bind_group_resource.1,
+            dynamic_offsets_data,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -148,11 +154,13 @@ pub fn op_webgpu_compute_pass_push_debug_group(
         .resource_table
         .get::<WebGpuComputePass>(compute_pass_rid)?;
 
-    compute_pass_resource.0.borrow_mut().push_debug_group(
-        state.borrow(),
-        group_label,
-        0, // wgpu#975
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .compute_pass_push_debug_group(
+            &mut compute_pass_resource.0.borrow_mut(),
+            group_label,
+            0, // wgpu#975
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -167,10 +175,9 @@ pub fn op_webgpu_compute_pass_pop_debug_group(
         .resource_table
         .get::<WebGpuComputePass>(compute_pass_rid)?;
 
-    compute_pass_resource
-        .0
-        .borrow_mut()
-        .pop_debug_group(state.borrow())?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .compute_pass_pop_debug_group(&mut compute_pass_resource.0.borrow_mut())?;
 
     Ok(WebGpuResult::empty())
 }
@@ -186,11 +193,13 @@ pub fn op_webgpu_compute_pass_insert_debug_marker(
         .resource_table
         .get::<WebGpuComputePass>(compute_pass_rid)?;
 
-    compute_pass_resource.0.borrow_mut().insert_debug_marker(
-        state.borrow(),
-        marker_label,
-        0, // wgpu#975
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .compute_pass_insert_debug_marker(
+            &mut compute_pass_resource.0.borrow_mut(),
+            marker_label,
+            0, // wgpu#975
+        )?;
 
     Ok(WebGpuResult::empty())
 }
diff --git a/deno_webgpu/lib.rs b/deno_webgpu/lib.rs
index aafb225fb9..c2dfb240fa 100644
--- a/deno_webgpu/lib.rs
+++ b/deno_webgpu/lib.rs
@@ -23,48 +23,17 @@ pub const UNSTABLE_FEATURE_NAME: &str = "webgpu";
 
 #[macro_use]
 mod macros {
-    macro_rules! gfx_select {
-    ($id:expr => $p0:ident.$p1:tt.$method:ident $params:tt) => {
-      gfx_select!($id => {$p0.$p1}, $method $params)
-    };
-
-    ($id:expr => $p0:ident.$method:ident $params:tt) => {
-      gfx_select!($id => {$p0}, $method $params)
-    };
-
-    ($id:expr => {$($c:tt)*}, $method:ident $params:tt) => {
-      match $id.backend() {
-        #[cfg(any(
-            all(not(target_arch = "wasm32"), not(target_os = "ios"), not(target_os = "macos")),
-            feature = "vulkan-portability"
-        ))]
-        wgpu_types::Backend::Vulkan => $($c)*.$method::<wgpu_core::api::Vulkan> $params,
-        #[cfg(all(not(target_arch = "wasm32"), any(target_os = "ios", target_os = "macos")))]
-        wgpu_types::Backend::Metal => $($c)*.$method::<wgpu_core::api::Metal> $params,
-        #[cfg(all(not(target_arch = "wasm32"), windows))]
-        wgpu_types::Backend::Dx12 => $($c)*.$method::<wgpu_core::api::Dx12> $params,
-        #[cfg(any(
-            all(unix, not(target_os = "macos"), not(target_os = "ios")),
-            feature = "angle",
-            target_arch = "wasm32"
-        ))]
-        wgpu_types::Backend::Gl => $($c)*.$method::<wgpu_core::api::Gles> $params,
-        other => panic!("Unexpected backend {:?}", other),
-      }
-    };
-  }
-
     macro_rules! gfx_put {
-    ($id:expr => $global:ident.$method:ident( $($param:expr),* ) => $state:expr, $rc:expr) => {{
-      let (val, maybe_err) = gfx_select!($id => $global.$method($($param),*));
+    ($global:ident.$method:ident( $($param:expr),* ) => $state:expr, $rc:expr) => {{
+      let (val, maybe_err) = $global.$method($($param),*);
       let rid = $state.resource_table.add($rc($global.clone(), val));
       Ok(WebGpuResult::rid_err(rid, maybe_err))
     }};
   }
 
     macro_rules! gfx_ok {
-    ($id:expr => $global:ident.$method:ident( $($param:expr),* )) => {{
-      let maybe_err = gfx_select!($id => $global.$method($($param),*)).err();
+    ($global:ident.$method:ident( $($param:expr),* )) => {{
+      let maybe_err = $global.$method($($param),*).err();
       Ok(WebGpuResult::maybe_err(maybe_err))
     }};
   }
@@ -94,7 +63,7 @@ impl Resource for WebGpuAdapter {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.adapter_drop(self.1));
+        self.0.adapter_drop(self.1);
     }
 }
 
@@ -105,7 +74,7 @@ impl Resource for WebGpuDevice {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.device_drop(self.1));
+        self.0.device_drop(self.1);
     }
 }
 
@@ -116,7 +85,7 @@ impl Resource for WebGpuQuerySet {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.query_set_drop(self.1));
+        self.0.query_set_drop(self.1);
     }
 }
 
@@ -248,6 +217,9 @@ fn deserialize_features(features: &wgpu_types::Features) -> Vec<&'static str> {
     if features.contains(wgpu_types::Features::TEXTURE_COMPRESSION_BC) {
         return_features.push("texture-compression-bc");
     }
+    if features.contains(wgpu_types::Features::TEXTURE_COMPRESSION_BC_SLICED_3D) {
+        return_features.push("texture-compression-bc-sliced-3d");
+    }
     if features.contains(wgpu_types::Features::TEXTURE_COMPRESSION_ETC2) {
         return_features.push("texture-compression-etc2");
     }
@@ -442,9 +414,9 @@ pub fn op_webgpu_request_adapter(
             })
         }
     };
-    let adapter_features = gfx_select!(adapter => instance.adapter_features(adapter))?;
+    let adapter_features = instance.adapter_features(adapter)?;
     let features = deserialize_features(&adapter_features);
-    let adapter_limits = gfx_select!(adapter => instance.adapter_limits(adapter))?;
+    let adapter_limits = instance.adapter_limits(adapter)?;
 
     let instance = instance.clone();
 
@@ -491,6 +463,12 @@ impl From<GpuRequiredFeatures> for wgpu_types::Features {
             wgpu_types::Features::TEXTURE_COMPRESSION_BC,
             required_features.0.contains("texture-compression-bc"),
         );
+        features.set(
+            wgpu_types::Features::TEXTURE_COMPRESSION_BC_SLICED_3D,
+            required_features
+                .0
+                .contains("texture-compression-bc-sliced-3d"),
+        );
         features.set(
             wgpu_types::Features::TEXTURE_COMPRESSION_ETC2,
             required_features.0.contains("texture-compression-etc2"),
@@ -671,21 +649,24 @@ pub fn op_webgpu_request_device(
         memory_hints: wgpu_types::MemoryHints::default(),
     };
 
-    let (device, queue, maybe_err) = gfx_select!(adapter => instance.adapter_request_device(
-      adapter,
-      &descriptor,
-      std::env::var("DENO_WEBGPU_TRACE").ok().as_ref().map(std::path::Path::new),
-      None,
-      None
-    ));
+    let (device, queue, maybe_err) = instance.adapter_request_device(
+        adapter,
+        &descriptor,
+        std::env::var("DENO_WEBGPU_TRACE")
+            .ok()
+            .as_ref()
+            .map(std::path::Path::new),
+        None,
+        None,
+    );
     adapter_resource.close();
     if let Some(err) = maybe_err {
         return Err(DomExceptionOperationError::new(&err.to_string()).into());
     }
 
-    let device_features = gfx_select!(device => instance.device_features(device))?;
+    let device_features = instance.device_features(device)?;
     let features = deserialize_features(&device_features);
-    let limits = gfx_select!(device => instance.device_limits(device))?;
+    let limits = instance.device_limits(device)?;
 
     let instance = instance.clone();
     let instance2 = instance.clone();
@@ -724,7 +705,7 @@ pub fn op_webgpu_request_adapter_info(
     let adapter = adapter_resource.1;
     let instance = state.borrow::<Instance>();
 
-    let info = gfx_select!(adapter => instance.adapter_get_info(adapter))?;
+    let info = instance.adapter_get_info(adapter)?;
     adapter_resource.close();
 
     Ok(GPUAdapterInfo {
@@ -777,7 +758,7 @@ pub fn op_webgpu_create_query_set(
         count: args.count,
     };
 
-    gfx_put!(device => instance.device_create_query_set(
+    gfx_put!(instance.device_create_query_set(
     device,
     &descriptor,
     None
diff --git a/deno_webgpu/pipeline.rs b/deno_webgpu/pipeline.rs
index 75bd9b3ef2..0ab3c40262 100644
--- a/deno_webgpu/pipeline.rs
+++ b/deno_webgpu/pipeline.rs
@@ -14,8 +14,6 @@ use std::rc::Rc;
 use super::error::WebGpuError;
 use super::error::WebGpuResult;
 
-const MAX_BIND_GROUPS: usize = 8;
-
 pub(crate) struct WebGpuPipelineLayout(
     pub(crate) crate::Instance,
     pub(crate) wgpu_core::id::PipelineLayoutId,
@@ -26,7 +24,7 @@ impl Resource for WebGpuPipelineLayout {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.pipeline_layout_drop(self.1));
+        self.0.pipeline_layout_drop(self.1);
     }
 }
 
@@ -40,7 +38,7 @@ impl Resource for WebGpuComputePipeline {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.compute_pipeline_drop(self.1));
+        self.0.compute_pipeline_drop(self.1);
     }
 }
 
@@ -54,7 +52,7 @@ impl Resource for WebGpuRenderPipeline {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.render_pipeline_drop(self.1));
+        self.0.render_pipeline_drop(self.1);
     }
 }
 
@@ -114,26 +112,12 @@ pub fn op_webgpu_create_compute_pipeline(
             entry_point: compute.entry_point.map(Cow::from),
             constants: Cow::Owned(compute.constants.unwrap_or_default()),
             zero_initialize_workgroup_memory: true,
-            vertex_pulling_transform: false,
         },
         cache: None,
     };
-    let implicit_pipelines = match layout {
-        GPUPipelineLayoutOrGPUAutoLayoutMode::Layout(_) => None,
-        GPUPipelineLayoutOrGPUAutoLayoutMode::Auto(GPUAutoLayoutMode::Auto) => {
-            Some(wgpu_core::device::ImplicitPipelineIds {
-                root_id: None,
-                group_ids: &[None; MAX_BIND_GROUPS],
-            })
-        }
-    };
 
-    let (compute_pipeline, maybe_err) = gfx_select!(device => instance.device_create_compute_pipeline(
-      device,
-      &descriptor,
-      None,
-      implicit_pipelines
-    ));
+    let (compute_pipeline, maybe_err) =
+        instance.device_create_compute_pipeline(device, &descriptor, None, None);
 
     let rid = state
         .resource_table
@@ -162,7 +146,8 @@ pub fn op_webgpu_compute_pipeline_get_bind_group_layout(
         .get::<WebGpuComputePipeline>(compute_pipeline_rid)?;
     let compute_pipeline = compute_pipeline_resource.1;
 
-    let (bind_group_layout, maybe_err) = gfx_select!(compute_pipeline => instance.compute_pipeline_get_bind_group_layout(compute_pipeline, index, None));
+    let (bind_group_layout, maybe_err) =
+        instance.compute_pipeline_get_bind_group_layout(compute_pipeline, index, None);
 
     let rid = state
         .resource_table
@@ -359,7 +344,6 @@ pub fn op_webgpu_create_render_pipeline(
                 constants: Cow::Owned(fragment.constants.unwrap_or_default()),
                 // Required to be true for WebGPU
                 zero_initialize_workgroup_memory: true,
-                vertex_pulling_transform: false,
             },
             targets: Cow::Owned(fragment.targets),
         })
@@ -385,7 +369,6 @@ pub fn op_webgpu_create_render_pipeline(
                 constants: Cow::Owned(args.vertex.constants.unwrap_or_default()),
                 // Required to be true for WebGPU
                 zero_initialize_workgroup_memory: true,
-                vertex_pulling_transform: false,
             },
             buffers: Cow::Owned(vertex_buffers),
         },
@@ -397,22 +380,8 @@ pub fn op_webgpu_create_render_pipeline(
         cache: None,
     };
 
-    let implicit_pipelines = match args.layout {
-        GPUPipelineLayoutOrGPUAutoLayoutMode::Layout(_) => None,
-        GPUPipelineLayoutOrGPUAutoLayoutMode::Auto(GPUAutoLayoutMode::Auto) => {
-            Some(wgpu_core::device::ImplicitPipelineIds {
-                root_id: None,
-                group_ids: &[None; MAX_BIND_GROUPS],
-            })
-        }
-    };
-
-    let (render_pipeline, maybe_err) = gfx_select!(device => instance.device_create_render_pipeline(
-      device,
-      &descriptor,
-      None,
-      implicit_pipelines
-    ));
+    let (render_pipeline, maybe_err) =
+        instance.device_create_render_pipeline(device, &descriptor, None, None);
 
     let rid = state
         .resource_table
@@ -434,7 +403,8 @@ pub fn op_webgpu_render_pipeline_get_bind_group_layout(
         .get::<WebGpuRenderPipeline>(render_pipeline_rid)?;
     let render_pipeline = render_pipeline_resource.1;
 
-    let (bind_group_layout, maybe_err) = gfx_select!(render_pipeline => instance.render_pipeline_get_bind_group_layout(render_pipeline, index, None));
+    let (bind_group_layout, maybe_err) =
+        instance.render_pipeline_get_bind_group_layout(render_pipeline, index, None);
 
     let rid = state
         .resource_table
diff --git a/deno_webgpu/queue.rs b/deno_webgpu/queue.rs
index 2640134455..fdbf993f8c 100644
--- a/deno_webgpu/queue.rs
+++ b/deno_webgpu/queue.rs
@@ -20,7 +20,7 @@ impl Resource for WebGpuQueue {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.queue_drop(self.1));
+        self.0.queue_drop(self.1);
     }
 }
 
@@ -44,7 +44,7 @@ pub fn op_webgpu_queue_submit(
         })
         .collect::<Result<Vec<_>, AnyError>>()?;
 
-    let maybe_err = gfx_select!(queue => instance.queue_submit(queue, &ids)).err();
+    let maybe_err = instance.queue_submit(queue, &ids).err();
 
     for rid in command_buffers {
         let resource = state.resource_table.take::<WebGpuCommandBuffer>(rid)?;
@@ -95,13 +95,9 @@ pub fn op_webgpu_write_buffer(
         Some(size) => &buf[data_offset..(data_offset + size)],
         None => &buf[data_offset..],
     };
-    let maybe_err = gfx_select!(queue => instance.queue_write_buffer(
-      queue,
-      buffer,
-      buffer_offset,
-      data
-    ))
-    .err();
+    let maybe_err = instance
+        .queue_write_buffer(queue, buffer, buffer_offset, data)
+        .err();
 
     Ok(WebGpuResult::maybe_err(maybe_err))
 }
@@ -131,11 +127,5 @@ pub fn op_webgpu_write_texture(
     };
     let data_layout = data_layout.into();
 
-    gfx_ok!(queue => instance.queue_write_texture(
-      queue,
-      &destination,
-      buf,
-      &data_layout,
-      &size
-    ))
+    gfx_ok!(instance.queue_write_texture(queue, &destination, buf, &data_layout, &size))
 }
diff --git a/deno_webgpu/render_pass.rs b/deno_webgpu/render_pass.rs
index 941245971c..2d4557cf03 100644
--- a/deno_webgpu/render_pass.rs
+++ b/deno_webgpu/render_pass.rs
@@ -12,7 +12,7 @@ use std::cell::RefCell;
 
 use super::error::WebGpuResult;
 
-pub(crate) struct WebGpuRenderPass(pub(crate) RefCell<Box<dyn wgpu_core::command::DynRenderPass>>);
+pub(crate) struct WebGpuRenderPass(pub(crate) RefCell<wgpu_core::command::RenderPass>);
 impl Resource for WebGpuRenderPass {
     fn name(&self) -> Cow<str> {
         "webGPURenderPass".into()
@@ -41,15 +41,17 @@ pub fn op_webgpu_render_pass_set_viewport(
         .resource_table
         .get::<WebGpuRenderPass>(args.render_pass_rid)?;
 
-    render_pass_resource.0.borrow_mut().set_viewport(
-        state.borrow(),
-        args.x,
-        args.y,
-        args.width,
-        args.height,
-        args.min_depth,
-        args.max_depth,
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_set_viewport(
+            &mut render_pass_resource.0.borrow_mut(),
+            args.x,
+            args.y,
+            args.width,
+            args.height,
+            args.min_depth,
+            args.max_depth,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -68,10 +70,15 @@ pub fn op_webgpu_render_pass_set_scissor_rect(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource
-        .0
-        .borrow_mut()
-        .set_scissor_rect(state.borrow(), x, y, width, height)?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_set_scissor_rect(
+            &mut render_pass_resource.0.borrow_mut(),
+            x,
+            y,
+            width,
+            height,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -87,10 +94,9 @@ pub fn op_webgpu_render_pass_set_blend_constant(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource
-        .0
-        .borrow_mut()
-        .set_blend_constant(state.borrow(), color)?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_set_blend_constant(&mut render_pass_resource.0.borrow_mut(), color)?;
 
     Ok(WebGpuResult::empty())
 }
@@ -106,10 +112,9 @@ pub fn op_webgpu_render_pass_set_stencil_reference(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource
-        .0
-        .borrow_mut()
-        .set_stencil_reference(state.borrow(), reference)?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_set_stencil_reference(&mut render_pass_resource.0.borrow_mut(), reference)?;
 
     Ok(WebGpuResult::empty())
 }
@@ -125,10 +130,9 @@ pub fn op_webgpu_render_pass_begin_occlusion_query(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource
-        .0
-        .borrow_mut()
-        .begin_occlusion_query(state.borrow(), query_index)?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_begin_occlusion_query(&mut render_pass_resource.0.borrow_mut(), query_index)?;
 
     Ok(WebGpuResult::empty())
 }
@@ -143,10 +147,9 @@ pub fn op_webgpu_render_pass_end_occlusion_query(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource
-        .0
-        .borrow_mut()
-        .end_occlusion_query(state.borrow())?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_end_occlusion_query(&mut render_pass_resource.0.borrow_mut())?;
 
     Ok(WebGpuResult::empty())
 }
@@ -172,10 +175,9 @@ pub fn op_webgpu_render_pass_execute_bundles(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource
-        .0
-        .borrow_mut()
-        .execute_bundles(state.borrow(), &bundles)?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_execute_bundles(&mut render_pass_resource.0.borrow_mut(), &bundles)?;
 
     Ok(WebGpuResult::empty())
 }
@@ -190,7 +192,9 @@ pub fn op_webgpu_render_pass_end(
         .resource_table
         .take::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource.0.borrow_mut().end(state.borrow())?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_end(&mut render_pass_resource.0.borrow_mut())?;
 
     Ok(WebGpuResult::empty())
 }
@@ -222,12 +226,14 @@ pub fn op_webgpu_render_pass_set_bind_group(
 
     let dynamic_offsets_data: &[u32] = &dynamic_offsets_data[start..start + len];
 
-    render_pass_resource.0.borrow_mut().set_bind_group(
-        state.borrow(),
-        index,
-        bind_group_resource.1,
-        dynamic_offsets_data,
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_set_bind_group(
+            &mut render_pass_resource.0.borrow_mut(),
+            index,
+            bind_group_resource.1,
+            dynamic_offsets_data,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -243,11 +249,13 @@ pub fn op_webgpu_render_pass_push_debug_group(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource.0.borrow_mut().push_debug_group(
-        state.borrow(),
-        group_label,
-        0, // wgpu#975
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_push_debug_group(
+            &mut render_pass_resource.0.borrow_mut(),
+            group_label,
+            0, // wgpu#975
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -262,10 +270,9 @@ pub fn op_webgpu_render_pass_pop_debug_group(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource
-        .0
-        .borrow_mut()
-        .pop_debug_group(state.borrow())?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_pop_debug_group(&mut render_pass_resource.0.borrow_mut())?;
 
     Ok(WebGpuResult::empty())
 }
@@ -281,11 +288,13 @@ pub fn op_webgpu_render_pass_insert_debug_marker(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource.0.borrow_mut().insert_debug_marker(
-        state.borrow(),
-        marker_label,
-        0, // wgpu#975
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_insert_debug_marker(
+            &mut render_pass_resource.0.borrow_mut(),
+            marker_label,
+            0, // wgpu#975
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -304,10 +313,12 @@ pub fn op_webgpu_render_pass_set_pipeline(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource
-        .0
-        .borrow_mut()
-        .set_pipeline(state.borrow(), render_pipeline_resource.1)?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_set_pipeline(
+            &mut render_pass_resource.0.borrow_mut(),
+            render_pipeline_resource.1,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -338,13 +349,15 @@ pub fn op_webgpu_render_pass_set_index_buffer(
         None
     };
 
-    render_pass_resource.0.borrow_mut().set_index_buffer(
-        state.borrow(),
-        buffer_resource.1,
-        index_format,
-        offset,
-        size,
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_set_index_buffer(
+            &mut render_pass_resource.0.borrow_mut(),
+            buffer_resource.1,
+            index_format,
+            offset,
+            size,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -375,13 +388,15 @@ pub fn op_webgpu_render_pass_set_vertex_buffer(
         None
     };
 
-    render_pass_resource.0.borrow_mut().set_vertex_buffer(
-        state.borrow(),
-        slot,
-        buffer_resource.1,
-        offset,
-        size,
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_set_vertex_buffer(
+            &mut render_pass_resource.0.borrow_mut(),
+            slot,
+            buffer_resource.1,
+            offset,
+            size,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -400,13 +415,15 @@ pub fn op_webgpu_render_pass_draw(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource.0.borrow_mut().draw(
-        state.borrow(),
-        vertex_count,
-        instance_count,
-        first_vertex,
-        first_instance,
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_draw(
+            &mut render_pass_resource.0.borrow_mut(),
+            vertex_count,
+            instance_count,
+            first_vertex,
+            first_instance,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -426,14 +443,16 @@ pub fn op_webgpu_render_pass_draw_indexed(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource.0.borrow_mut().draw_indexed(
-        state.borrow(),
-        index_count,
-        instance_count,
-        first_index,
-        base_vertex,
-        first_instance,
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_draw_indexed(
+            &mut render_pass_resource.0.borrow_mut(),
+            index_count,
+            instance_count,
+            first_index,
+            base_vertex,
+            first_instance,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -453,11 +472,13 @@ pub fn op_webgpu_render_pass_draw_indirect(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource.0.borrow_mut().draw_indirect(
-        state.borrow(),
-        buffer_resource.1,
-        indirect_offset,
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_draw_indirect(
+            &mut render_pass_resource.0.borrow_mut(),
+            buffer_resource.1,
+            indirect_offset,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -477,11 +498,13 @@ pub fn op_webgpu_render_pass_draw_indexed_indirect(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    render_pass_resource.0.borrow_mut().draw_indexed_indirect(
-        state.borrow(),
-        buffer_resource.1,
-        indirect_offset,
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_draw_indexed_indirect(
+            &mut render_pass_resource.0.borrow_mut(),
+            buffer_resource.1,
+            indirect_offset,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
diff --git a/deno_webgpu/sampler.rs b/deno_webgpu/sampler.rs
index 822c4bda14..59b6f4e302 100644
--- a/deno_webgpu/sampler.rs
+++ b/deno_webgpu/sampler.rs
@@ -21,7 +21,7 @@ impl Resource for WebGpuSampler {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.sampler_drop(self.1));
+        self.0.sampler_drop(self.1);
     }
 }
 
@@ -71,7 +71,7 @@ pub fn op_webgpu_create_sampler(
         border_color: None, // native-only
     };
 
-    gfx_put!(device => instance.device_create_sampler(
+    gfx_put!(instance.device_create_sampler(
     device,
     &descriptor,
     None
diff --git a/deno_webgpu/shader.rs b/deno_webgpu/shader.rs
index 17cde43936..4c7a30b2bd 100644
--- a/deno_webgpu/shader.rs
+++ b/deno_webgpu/shader.rs
@@ -20,7 +20,7 @@ impl Resource for WebGpuShaderModule {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.shader_module_drop(self.1));
+        self.0.shader_module_drop(self.1);
     }
 }
 
@@ -45,7 +45,7 @@ pub fn op_webgpu_create_shader_module(
         shader_bound_checks: wgpu_types::ShaderBoundChecks::default(),
     };
 
-    gfx_put!(device => instance.device_create_shader_module(
+    gfx_put!(instance.device_create_shader_module(
     device,
     &descriptor,
     source,
diff --git a/deno_webgpu/surface.rs b/deno_webgpu/surface.rs
index a8b984eefe..b48dbd2c8b 100644
--- a/deno_webgpu/surface.rs
+++ b/deno_webgpu/surface.rs
@@ -63,7 +63,7 @@ pub fn op_webgpu_surface_configure(
         desired_maximum_frame_latency: 2,
     };
 
-    let err = gfx_select!(device => instance.surface_configure(surface, device, &conf));
+    let err = instance.surface_configure(surface, device, &conf);
 
     Ok(WebGpuResult::maybe_err(err))
 }
@@ -72,18 +72,14 @@ pub fn op_webgpu_surface_configure(
 #[serde]
 pub fn op_webgpu_surface_get_current_texture(
     state: &mut OpState,
-    #[smi] device_rid: ResourceId,
+    #[smi] _device_rid: ResourceId,
     #[smi] surface_rid: ResourceId,
 ) -> Result<WebGpuResult, AnyError> {
     let instance = state.borrow::<super::Instance>();
-    let device_resource = state
-        .resource_table
-        .get::<super::WebGpuDevice>(device_rid)?;
-    let device = device_resource.1;
     let surface_resource = state.resource_table.get::<WebGpuSurface>(surface_rid)?;
     let surface = surface_resource.1;
 
-    let output = gfx_select!(device => instance.surface_get_current_texture(surface, None))?;
+    let output = instance.surface_get_current_texture(surface, None)?;
 
     match output.status {
         SurfaceStatus::Good | SurfaceStatus::Suboptimal => {
@@ -102,18 +98,14 @@ pub fn op_webgpu_surface_get_current_texture(
 #[op2(fast)]
 pub fn op_webgpu_surface_present(
     state: &mut OpState,
-    #[smi] device_rid: ResourceId,
+    #[smi] _device_rid: ResourceId,
     #[smi] surface_rid: ResourceId,
 ) -> Result<(), AnyError> {
     let instance = state.borrow::<super::Instance>();
-    let device_resource = state
-        .resource_table
-        .get::<super::WebGpuDevice>(device_rid)?;
-    let device = device_resource.1;
     let surface_resource = state.resource_table.get::<WebGpuSurface>(surface_rid)?;
     let surface = surface_resource.1;
 
-    let _ = gfx_select!(device => instance.surface_present(surface))?;
+    instance.surface_present(surface)?;
 
     Ok(())
 }
diff --git a/deno_webgpu/texture.rs b/deno_webgpu/texture.rs
index 2dc1a740a5..a432c7b627 100644
--- a/deno_webgpu/texture.rs
+++ b/deno_webgpu/texture.rs
@@ -24,7 +24,7 @@ impl Resource for WebGpuTexture {
     fn close(self: Rc<Self>) {
         if self.owned {
             let instance = &self.instance;
-            gfx_select!(self.id => instance.texture_drop(self.id, true));
+            instance.texture_drop(self.id);
         }
     }
 }
@@ -39,7 +39,7 @@ impl Resource for WebGpuTextureView {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.texture_view_drop(self.1, true)).unwrap();
+        self.0.texture_view_drop(self.1).unwrap();
     }
 }
 
@@ -80,11 +80,7 @@ pub fn op_webgpu_create_texture(
         view_formats: args.view_formats,
     };
 
-    let (val, maybe_err) = gfx_select!(device => instance.device_create_texture(
-      device,
-      &descriptor,
-      None
-    ));
+    let (val, maybe_err) = instance.device_create_texture(device, &descriptor, None);
 
     let rid = state.resource_table.add(WebGpuTexture {
         instance: instance.clone(),
@@ -125,9 +121,9 @@ pub fn op_webgpu_create_texture_view(
         range: args.range,
     };
 
-    gfx_put!(texture => instance.texture_create_view(
-    texture,
-    &descriptor,
-    None
-  ) => state, WebGpuTextureView)
+    gfx_put!(instance.texture_create_view(
+        texture,
+        &descriptor,
+        None
+    ) => state, WebGpuTextureView)
 }
diff --git a/deno_webgpu/webgpu.idl b/deno_webgpu/webgpu.idl
index 07d9d60ec7..41949feb1f 100644
--- a/deno_webgpu/webgpu.idl
+++ b/deno_webgpu/webgpu.idl
@@ -97,6 +97,7 @@ enum GPUFeatureName {
     // texture formats
     "depth32float-stencil8",
     "texture-compression-bc",
+    "texture-compression-bc-sliced-3d",
     "texture-compression-etc2",
     "texture-compression-astc",
     // api
diff --git a/examples/src/boids/mod.rs b/examples/src/boids/mod.rs
index 7b1b8f0bc3..8c3581824b 100644
--- a/examples/src/boids/mod.rs
+++ b/examples/src/boids/mod.rs
@@ -131,7 +131,7 @@ impl crate::framework::Example for Example {
             layout: Some(&render_pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &draw_shader,
-                entry_point: "main_vs",
+                entry_point: Some("main_vs"),
                 compilation_options: Default::default(),
                 buffers: &[
                     wgpu::VertexBufferLayout {
@@ -148,7 +148,7 @@ impl crate::framework::Example for Example {
             },
             fragment: Some(wgpu::FragmentState {
                 module: &draw_shader,
-                entry_point: "main_fs",
+                entry_point: Some("main_fs"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
@@ -165,7 +165,7 @@ impl crate::framework::Example for Example {
             label: Some("Compute pipeline"),
             layout: Some(&compute_pipeline_layout),
             module: &compute_shader,
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             cache: None,
         });
diff --git a/examples/src/bunnymark/mod.rs b/examples/src/bunnymark/mod.rs
index b5b33b54d5..54bdc2a941 100644
--- a/examples/src/bunnymark/mod.rs
+++ b/examples/src/bunnymark/mod.rs
@@ -202,13 +202,13 @@ impl crate::framework::Example for Example {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(wgpu::ColorTargetState {
                     format: config.view_formats[0],
diff --git a/examples/src/conservative_raster/mod.rs b/examples/src/conservative_raster/mod.rs
index 116ed8623b..d029134756 100644
--- a/examples/src/conservative_raster/mod.rs
+++ b/examples/src/conservative_raster/mod.rs
@@ -96,13 +96,13 @@ impl crate::framework::Example for Example {
                 layout: Some(&pipeline_layout_empty),
                 vertex: wgpu::VertexState {
                     module: &shader_triangle_and_lines,
-                    entry_point: "vs_main",
+                    entry_point: Some("vs_main"),
                     compilation_options: Default::default(),
                     buffers: &[],
                 },
                 fragment: Some(wgpu::FragmentState {
                     module: &shader_triangle_and_lines,
-                    entry_point: "fs_main_red",
+                    entry_point: Some("fs_main_red"),
                     compilation_options: Default::default(),
                     targets: &[Some(RENDER_TARGET_FORMAT.into())],
                 }),
@@ -122,13 +122,13 @@ impl crate::framework::Example for Example {
                 layout: Some(&pipeline_layout_empty),
                 vertex: wgpu::VertexState {
                     module: &shader_triangle_and_lines,
-                    entry_point: "vs_main",
+                    entry_point: Some("vs_main"),
                     compilation_options: Default::default(),
                     buffers: &[],
                 },
                 fragment: Some(wgpu::FragmentState {
                     module: &shader_triangle_and_lines,
-                    entry_point: "fs_main_blue",
+                    entry_point: Some("fs_main_blue"),
                     compilation_options: Default::default(),
                     targets: &[Some(RENDER_TARGET_FORMAT.into())],
                 }),
@@ -149,13 +149,13 @@ impl crate::framework::Example for Example {
                     layout: Some(&pipeline_layout_empty),
                     vertex: wgpu::VertexState {
                         module: &shader_triangle_and_lines,
-                        entry_point: "vs_main",
+                        entry_point: Some("vs_main"),
                         compilation_options: Default::default(),
                         buffers: &[],
                     },
                     fragment: Some(wgpu::FragmentState {
                         module: &shader_triangle_and_lines,
-                        entry_point: "fs_main_white",
+                        entry_point: Some("fs_main_white"),
                         compilation_options: Default::default(),
                         targets: &[Some(config.view_formats[0].into())],
                     }),
@@ -213,13 +213,13 @@ impl crate::framework::Example for Example {
                     layout: Some(&pipeline_layout),
                     vertex: wgpu::VertexState {
                         module: &shader,
-                        entry_point: "vs_main",
+                        entry_point: Some("vs_main"),
                         compilation_options: Default::default(),
                         buffers: &[],
                     },
                     fragment: Some(wgpu::FragmentState {
                         module: &shader,
-                        entry_point: "fs_main",
+                        entry_point: Some("fs_main"),
                         compilation_options: Default::default(),
                         targets: &[Some(config.view_formats[0].into())],
                     }),
diff --git a/examples/src/cube/mod.rs b/examples/src/cube/mod.rs
index 9828157e57..608fae0088 100644
--- a/examples/src/cube/mod.rs
+++ b/examples/src/cube/mod.rs
@@ -243,13 +243,13 @@ impl crate::framework::Example for Example {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &vertex_buffers,
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
@@ -272,13 +272,13 @@ impl crate::framework::Example for Example {
                 layout: Some(&pipeline_layout),
                 vertex: wgpu::VertexState {
                     module: &shader,
-                    entry_point: "vs_main",
+                    entry_point: Some("vs_main"),
                     compilation_options: Default::default(),
                     buffers: &vertex_buffers,
                 },
                 fragment: Some(wgpu::FragmentState {
                     module: &shader,
-                    entry_point: "fs_wire",
+                    entry_point: Some("fs_wire"),
                     compilation_options: Default::default(),
                     targets: &[Some(wgpu::ColorTargetState {
                         format: config.view_formats[0],
diff --git a/examples/src/hello_compute/mod.rs b/examples/src/hello_compute/mod.rs
index fb23e13955..7f3c3f05bf 100644
--- a/examples/src/hello_compute/mod.rs
+++ b/examples/src/hello_compute/mod.rs
@@ -109,7 +109,7 @@ async fn execute_gpu_inner(
         label: None,
         layout: None,
         module: &cs_module,
-        entry_point: "main",
+        entry_point: Some("main"),
         compilation_options: Default::default(),
         cache: None,
     });
diff --git a/examples/src/hello_synchronization/README.md b/examples/src/hello_synchronization/README.md
index 5750801f14..5367213eec 100644
--- a/examples/src/hello_synchronization/README.md
+++ b/examples/src/hello_synchronization/README.md
@@ -2,7 +2,7 @@
 
 This example is 
 1. A small demonstration of the importance of synchronization.
-2. How basic synchronization you can understand from the CPU is preformed on the GPU.
+2. How basic synchronization you can understand from the CPU is performed on the GPU.
 
 ## To Run
 
diff --git a/examples/src/hello_synchronization/mod.rs b/examples/src/hello_synchronization/mod.rs
index d98f1bb8d4..397af48c98 100644
--- a/examples/src/hello_synchronization/mod.rs
+++ b/examples/src/hello_synchronization/mod.rs
@@ -103,7 +103,7 @@ async fn execute(
         label: None,
         layout: Some(&pipeline_layout),
         module: &shaders_module,
-        entry_point: "patient_main",
+        entry_point: Some("patient_main"),
         compilation_options: Default::default(),
         cache: None,
     });
@@ -111,7 +111,7 @@ async fn execute(
         label: None,
         layout: Some(&pipeline_layout),
         module: &shaders_module,
-        entry_point: "hasty_main",
+        entry_point: Some("hasty_main"),
         compilation_options: Default::default(),
         cache: None,
     });
diff --git a/examples/src/hello_triangle/mod.rs b/examples/src/hello_triangle/mod.rs
index 41c0583506..7c82d49cf0 100644
--- a/examples/src/hello_triangle/mod.rs
+++ b/examples/src/hello_triangle/mod.rs
@@ -59,13 +59,13 @@ async fn run(event_loop: EventLoop<()>, window: Window) {
         layout: Some(&pipeline_layout),
         vertex: wgpu::VertexState {
             module: &shader,
-            entry_point: "vs_main",
+            entry_point: Some("vs_main"),
             buffers: &[],
             compilation_options: Default::default(),
         },
         fragment: Some(wgpu::FragmentState {
             module: &shader,
-            entry_point: "fs_main",
+            entry_point: Some("fs_main"),
             compilation_options: Default::default(),
             targets: &[Some(swapchain_format.into())],
         }),
diff --git a/examples/src/hello_workgroups/mod.rs b/examples/src/hello_workgroups/mod.rs
index 0184981c05..3260aa8628 100644
--- a/examples/src/hello_workgroups/mod.rs
+++ b/examples/src/hello_workgroups/mod.rs
@@ -110,7 +110,7 @@ async fn run() {
         label: None,
         layout: Some(&pipeline_layout),
         module: &shader,
-        entry_point: "main",
+        entry_point: Some("main"),
         compilation_options: Default::default(),
         cache: None,
     });
diff --git a/examples/src/mipmap/mod.rs b/examples/src/mipmap/mod.rs
index 3e9250c702..33e23a474a 100644
--- a/examples/src/mipmap/mod.rs
+++ b/examples/src/mipmap/mod.rs
@@ -92,13 +92,13 @@ impl Example {
             layout: None,
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(TEXTURE_FORMAT.into())],
             }),
@@ -292,13 +292,13 @@ impl crate::framework::Example for Example {
             layout: None,
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
diff --git a/examples/src/msaa_line/mod.rs b/examples/src/msaa_line/mod.rs
index 46bb743e99..e57a4461ab 100644
--- a/examples/src/msaa_line/mod.rs
+++ b/examples/src/msaa_line/mod.rs
@@ -53,7 +53,7 @@ impl Example {
             layout: Some(pipeline_layout),
             vertex: wgpu::VertexState {
                 module: shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[wgpu::VertexBufferLayout {
                     array_stride: std::mem::size_of::<Vertex>() as wgpu::BufferAddress,
@@ -63,7 +63,7 @@ impl Example {
             },
             fragment: Some(wgpu::FragmentState {
                 module: shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
diff --git a/examples/src/ray_cube_compute/mod.rs b/examples/src/ray_cube_compute/mod.rs
index b814bb8286..4d2c238844 100644
--- a/examples/src/ray_cube_compute/mod.rs
+++ b/examples/src/ray_cube_compute/mod.rs
@@ -395,7 +395,7 @@ impl crate::framework::Example for Example {
             label: Some("rt"),
             layout: None,
             module: &shader,
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             cache: None,
         });
@@ -426,13 +426,13 @@ impl crate::framework::Example for Example {
             layout: None,
             vertex: wgpu::VertexState {
                 module: &blit_shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: &blit_shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.format.into())],
             }),
@@ -623,6 +623,7 @@ static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTest
     base_test_parameters: wgpu_test::TestParameters {
         required_features: <Example as crate::framework::Example>::required_features(),
         required_limits: <Example as crate::framework::Example>::required_limits(),
+        force_fxc: false,
         skips: vec![],
         failures: Vec::new(),
         required_downlevel_caps:
diff --git a/examples/src/ray_cube_fragment/mod.rs b/examples/src/ray_cube_fragment/mod.rs
index b42e6b94e1..cfada8fd21 100644
--- a/examples/src/ray_cube_fragment/mod.rs
+++ b/examples/src/ray_cube_fragment/mod.rs
@@ -201,13 +201,13 @@ impl crate::framework::Example for Example {
             layout: None,
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.format.into())],
             }),
@@ -388,6 +388,7 @@ static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTest
     base_test_parameters: wgpu_test::TestParameters {
         required_features: <Example as crate::framework::Example>::required_features(),
         required_limits: <Example as crate::framework::Example>::required_limits(),
+        force_fxc: false,
         skips: vec![],
         failures: Vec::new(),
         required_downlevel_caps:
diff --git a/examples/src/ray_scene/mod.rs b/examples/src/ray_scene/mod.rs
index d2c34d2da4..25f663d4bf 100644
--- a/examples/src/ray_scene/mod.rs
+++ b/examples/src/ray_scene/mod.rs
@@ -378,13 +378,13 @@ impl crate::framework::Example for Example {
             layout: None,
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.format.into())],
             }),
@@ -564,6 +564,7 @@ static TEST: crate::framework::ExampleTestParams = crate::framework::ExampleTest
     base_test_parameters: wgpu_test::TestParameters {
         required_features: <Example as crate::framework::Example>::required_features(),
         required_limits: <Example as crate::framework::Example>::required_limits(),
+        force_fxc: false,
         skips: vec![],
         failures: Vec::new(),
         required_downlevel_caps:
diff --git a/examples/src/render_to_texture/mod.rs b/examples/src/render_to_texture/mod.rs
index c0922bc2ec..1d6f488d52 100644
--- a/examples/src/render_to_texture/mod.rs
+++ b/examples/src/render_to_texture/mod.rs
@@ -59,13 +59,13 @@ async fn run(_path: Option<String>) {
         layout: None,
         vertex: wgpu::VertexState {
             module: &shader,
-            entry_point: "vs_main",
+            entry_point: Some("vs_main"),
             compilation_options: Default::default(),
             buffers: &[],
         },
         fragment: Some(wgpu::FragmentState {
             module: &shader,
-            entry_point: "fs_main",
+            entry_point: Some("fs_main"),
             compilation_options: Default::default(),
             targets: &[Some(wgpu::TextureFormat::Rgba8UnormSrgb.into())],
         }),
diff --git a/examples/src/repeated_compute/mod.rs b/examples/src/repeated_compute/mod.rs
index 330b930f6f..5dac9ce7c2 100644
--- a/examples/src/repeated_compute/mod.rs
+++ b/examples/src/repeated_compute/mod.rs
@@ -245,7 +245,7 @@ impl WgpuContext {
             label: None,
             layout: Some(&pipeline_layout),
             module: &shader,
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             cache: None,
         });
diff --git a/examples/src/shadow/mod.rs b/examples/src/shadow/mod.rs
index b2c27f5892..7047ab598c 100644
--- a/examples/src/shadow/mod.rs
+++ b/examples/src/shadow/mod.rs
@@ -499,7 +499,7 @@ impl crate::framework::Example for Example {
                 layout: Some(&pipeline_layout),
                 vertex: wgpu::VertexState {
                     module: &shader,
-                    entry_point: "vs_bake",
+                    entry_point: Some("vs_bake"),
                     compilation_options: Default::default(),
                     buffers: &[vb_desc.clone()],
                 },
@@ -633,17 +633,17 @@ impl crate::framework::Example for Example {
                 layout: Some(&pipeline_layout),
                 vertex: wgpu::VertexState {
                     module: &shader,
-                    entry_point: "vs_main",
+                    entry_point: Some("vs_main"),
                     compilation_options: Default::default(),
                     buffers: &[vb_desc],
                 },
                 fragment: Some(wgpu::FragmentState {
                     module: &shader,
-                    entry_point: if supports_storage_resources {
+                    entry_point: Some(if supports_storage_resources {
                         "fs_main"
                     } else {
                         "fs_main_without_storage"
-                    },
+                    }),
                     compilation_options: Default::default(),
                     targets: &[Some(config.view_formats[0].into())],
                 }),
diff --git a/examples/src/skybox/mod.rs b/examples/src/skybox/mod.rs
index e526feedae..fd5532e6d1 100644
--- a/examples/src/skybox/mod.rs
+++ b/examples/src/skybox/mod.rs
@@ -198,13 +198,13 @@ impl crate::framework::Example for Example {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_sky",
+                entry_point: Some("vs_sky"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_sky",
+                entry_point: Some("fs_sky"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
@@ -228,7 +228,7 @@ impl crate::framework::Example for Example {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_entity",
+                entry_point: Some("vs_entity"),
                 compilation_options: Default::default(),
                 buffers: &[wgpu::VertexBufferLayout {
                     array_stride: std::mem::size_of::<Vertex>() as wgpu::BufferAddress,
@@ -238,7 +238,7 @@ impl crate::framework::Example for Example {
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_entity",
+                entry_point: Some("fs_entity"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
diff --git a/examples/src/srgb_blend/mod.rs b/examples/src/srgb_blend/mod.rs
index 314fc92df2..63e5e79cb5 100644
--- a/examples/src/srgb_blend/mod.rs
+++ b/examples/src/srgb_blend/mod.rs
@@ -130,13 +130,13 @@ impl<const SRGB: bool> crate::framework::Example for Example<SRGB> {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &vertex_buffers,
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(wgpu::ColorTargetState {
                     format: config.view_formats[0],
diff --git a/examples/src/stencil_triangles/mod.rs b/examples/src/stencil_triangles/mod.rs
index 8d638d20d1..d497eccc32 100644
--- a/examples/src/stencil_triangles/mod.rs
+++ b/examples/src/stencil_triangles/mod.rs
@@ -73,13 +73,13 @@ impl crate::framework::Example for Example {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &vertex_buffers,
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(wgpu::ColorTargetState {
                     format: config.view_formats[0],
@@ -114,13 +114,13 @@ impl crate::framework::Example for Example {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &vertex_buffers,
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
diff --git a/examples/src/storage_texture/mod.rs b/examples/src/storage_texture/mod.rs
index d6a06d6e2f..76b95d09dd 100644
--- a/examples/src/storage_texture/mod.rs
+++ b/examples/src/storage_texture/mod.rs
@@ -100,7 +100,7 @@ async fn run(_path: Option<String>) {
         label: None,
         layout: Some(&pipeline_layout),
         module: &shader,
-        entry_point: "main",
+        entry_point: Some("main"),
         compilation_options: Default::default(),
         cache: None,
     });
diff --git a/examples/src/texture_arrays/mod.rs b/examples/src/texture_arrays/mod.rs
index b0f474b957..785b461802 100644
--- a/examples/src/texture_arrays/mod.rs
+++ b/examples/src/texture_arrays/mod.rs
@@ -320,7 +320,7 @@ impl crate::framework::Example for Example {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &base_shader_module,
-                entry_point: "vert_main",
+                entry_point: Some("vert_main"),
                 compilation_options: Default::default(),
                 buffers: &[wgpu::VertexBufferLayout {
                     array_stride: vertex_size as wgpu::BufferAddress,
@@ -330,7 +330,7 @@ impl crate::framework::Example for Example {
             },
             fragment: Some(wgpu::FragmentState {
                 module: fragment_shader_module,
-                entry_point: fragment_entry_point,
+                entry_point: Some(fragment_entry_point),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
diff --git a/examples/src/timestamp_queries/mod.rs b/examples/src/timestamp_queries/mod.rs
index d712762cfd..3edcd7b83c 100644
--- a/examples/src/timestamp_queries/mod.rs
+++ b/examples/src/timestamp_queries/mod.rs
@@ -298,7 +298,7 @@ fn compute_pass(
         label: None,
         layout: None,
         module,
-        entry_point: "main_cs",
+        entry_point: Some("main_cs"),
         compilation_options: Default::default(),
         cache: None,
     });
@@ -354,13 +354,13 @@ fn render_pass(
         layout: Some(&pipeline_layout),
         vertex: wgpu::VertexState {
             module,
-            entry_point: "vs_main",
+            entry_point: Some("vs_main"),
             compilation_options: Default::default(),
             buffers: &[],
         },
         fragment: Some(wgpu::FragmentState {
             module,
-            entry_point: "fs_main",
+            entry_point: Some("fs_main"),
             compilation_options: Default::default(),
             targets: &[Some(format.into())],
         }),
diff --git a/examples/src/uniform_values/mod.rs b/examples/src/uniform_values/mod.rs
index 0adbf4e466..f275853ba2 100644
--- a/examples/src/uniform_values/mod.rs
+++ b/examples/src/uniform_values/mod.rs
@@ -6,10 +6,10 @@
 //! 4. the bind group layout is attached to the pipeline layout.
 //! 5. the uniform buffer and the bind group are stored alongside the pipeline.
 //! 6. an instance of `AppState` is created. This variable will be modified
-//! to change parameters in the shader and modified by app events to preform and save
-//! those changes.
+//!    to change parameters in the shader and modified by app events to preform and save
+//!    those changes.
 //! 7. (7a and 7b) the `state` variable created at (6) is modified by commands such
-//! as pressing the arrow keys or zooming in or out.
+//!    as pressing the arrow keys or zooming in or out.
 //! 8. the contents of the `AppState` are loaded into the uniform buffer in preparation.
 //! 9. the bind group with the uniform buffer is attached to the render pass.
 //!
@@ -179,13 +179,13 @@ impl WgpuContext {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(swapchain_format.into())],
             }),
diff --git a/examples/src/water/mod.rs b/examples/src/water/mod.rs
index b21ec70c4d..6b4943d45e 100644
--- a/examples/src/water/mod.rs
+++ b/examples/src/water/mod.rs
@@ -511,7 +511,7 @@ impl crate::framework::Example for Example {
             // Vertex shader and input buffers
             vertex: wgpu::VertexState {
                 module: &water_module,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 // Layout of our vertices. This should match the structs
                 // which are uploaded to the GPU. This should also be
@@ -527,7 +527,7 @@ impl crate::framework::Example for Example {
             // Fragment shader and output targets
             fragment: Some(wgpu::FragmentState {
                 module: &water_module,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 // Describes how the colour will be interpolated
                 // and assigned to the output attachment.
@@ -584,7 +584,7 @@ impl crate::framework::Example for Example {
             layout: Some(&terrain_pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &terrain_module,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[wgpu::VertexBufferLayout {
                     array_stride: terrain_vertex_size as wgpu::BufferAddress,
@@ -594,7 +594,7 @@ impl crate::framework::Example for Example {
             },
             fragment: Some(wgpu::FragmentState {
                 module: &terrain_module,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
diff --git a/naga-cli/Cargo.toml b/naga-cli/Cargo.toml
index 9ffe6e937b..e9abb82d26 100644
--- a/naga-cli/Cargo.toml
+++ b/naga-cli/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "naga-cli"
-version = "0.20.0"
+version = "22.0.0"
 authors = ["gfx-rs developers"]
 edition = "2021"
 description = "Shader translation command line tool"
@@ -18,14 +18,14 @@ doc = false
 test = false
 
 [dependencies]
-bincode = "1"
-codespan-reporting = "0.11"
-env_logger = "0.11"
-argh = "0.1.5"
+bincode.workspace = true
+codespan-reporting.workspace = true
+env_logger.workspace = true
+argh.workspace = true
 anyhow.workspace = true
 
 [dependencies.naga]
-version = "0.20.0"
+version = "22.0.0"
 path = "../naga"
 features = [
     "compact",
diff --git a/naga-cli/src/bin/naga.rs b/naga-cli/src/bin/naga.rs
index 97d947973e..002c6dd664 100644
--- a/naga-cli/src/bin/naga.rs
+++ b/naga-cli/src/bin/naga.rs
@@ -38,13 +38,6 @@ struct Args {
     #[argh(option)]
     image_load_bounds_check_policy: Option<BoundsCheckPolicyArg>,
 
-    /// what policy to use for texture stores bounds checking.
-    ///
-    /// Possible values are the same as for `index-bounds-check-policy`. If
-    /// omitted, defaults to the index bounds check policy.
-    #[argh(option)]
-    image_store_bounds_check_policy: Option<BoundsCheckPolicyArg>,
-
     /// directory to dump the SPIR-V block context dump to
     #[argh(option)]
     block_ctx_dir: Option<String>,
@@ -409,10 +402,6 @@ fn run() -> anyhow::Result<()> {
         Some(arg) => arg.0,
         None => params.bounds_check_policies.index,
     };
-    params.bounds_check_policies.image_store = match args.image_store_bounds_check_policy {
-        Some(arg) => arg.0,
-        None => params.bounds_check_policies.index,
-    };
     params.overrides = args
         .overrides
         .iter()
diff --git a/naga/CHANGELOG.md b/naga/CHANGELOG.md
index 2a00f01f86..49cde4e212 100644
--- a/naga/CHANGELOG.md
+++ b/naga/CHANGELOG.md
@@ -81,6 +81,7 @@ For changelogs after v0.14, see [the wgpu changelog](../CHANGELOG.md).
 - Make varyings' struct members unique. ([#2521](https://github.com/gfx-rs/naga/pull/2521)) **@evahop**
 - Add experimental vertex pulling transform flag. ([#5254](https://github.com/gfx-rs/wgpu/pull/5254)) **@bradwerth**
 - Fixup some generated MSL for vertex buffer unpack functions. ([#5829](https://github.com/gfx-rs/wgpu/pull/5829)) **@bradwerth**
+- Make vertex pulling transform on by default. ([#5773](https://github.com/gfx-rs/wgpu/pull/5773)) **@bradwerth**
 
 #### GLSL-OUT
 
diff --git a/naga/Cargo.toml b/naga/Cargo.toml
index 8478cc6f7b..e8415a3bc3 100644
--- a/naga/Cargo.toml
+++ b/naga/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "naga"
-version = "0.20.0"
+version = "22.0.0"
 authors = ["gfx-rs developers"]
 edition = "2021"
 description = "Shader translation infrastructure"
@@ -9,9 +9,14 @@ keywords = ["shader", "SPIR-V", "GLSL", "MSL"]
 license = "MIT OR Apache-2.0"
 exclude = ["bin/**/*", "tests/**/*", "Cargo.lock", "target/**/*"]
 resolver = "2"
-rust-version = "1.74"
 autotests = false
 
+# Override the workspace's `rust-version` key. Firefox uses `cargo vendor` to
+# copy the crates it actually uses out of the workspace, so it's meaningful for
+# them to have less restrictive MSRVs individually than the workspace as a
+# whole, if their code permits. See `../README.md` for details.
+rust-version = "1.76"
+
 [[test]]
 name = "naga-test"
 path = "tests/root.rs"
@@ -59,31 +64,31 @@ compact = []
 
 [dependencies]
 arbitrary = { version = "1.3", features = ["derive"], optional = true }
-bitflags = "2.6"
-bit-set = "0.6"
+arrayvec.workspace = true
+bitflags.workspace = true
+bit-set.workspace = true
 termcolor = { version = "1.4.1" }
 # remove termcolor dep when updating to the next version of codespan-reporting
 # termcolor minimum version was wrong and was fixed in
 # https://github.com/brendanzab/codespan/commit/e99c867339a877731437e7ee6a903a3d03b5439e
 codespan-reporting = { version = "0.11.0" }
-rustc-hash = "1.1.0"
-indexmap = { version = "2", features = ["std"] }
+rustc-hash.workspace = true
+indexmap.workspace = true
 log = "0.4"
 spirv = { version = "0.3", optional = true }
-thiserror = "1.0.61"
-serde = { version = "1.0.203", features = ["derive"], optional = true }
+thiserror.workspace = true
+serde = { version = "1.0.206", features = ["derive"], optional = true }
 petgraph = { version = "0.6", optional = true }
 pp-rs = { version = "0.2.1", optional = true }
 hexf-parse = { version = "0.2.1", optional = true }
 unicode-xid = { version = "0.2.3", optional = true }
-arrayvec.workspace = true
 
 [build-dependencies]
 cfg_aliases.workspace = true
 
 [dev-dependencies]
 diff = "0.1"
-env_logger = "0.11"
+env_logger.workspace = true
 # This _cannot_ have a version specified. If it does, crates.io will look
 # for a version of the package on crates when we publish naga. Path dependencies
 # are allowed through though.
@@ -93,5 +98,5 @@ hlsl-snapshots = { path = "./hlsl-snapshots" }
 # incompatible with our tests because we do a syntactic diff and not a semantic one.
 ron = "0.8.0"
 rspirv = { version = "0.11", git = "https://github.com/gfx-rs/rspirv", rev = "b969f175d5663258b4891e44b76c1544da9661ab" }
-serde = { version = "1.0", features = ["derive"] }
+serde = { workspace = true, features = ["derive"] }
 spirv = { version = "0.3", features = ["deserialize"] }
diff --git a/naga/README.md b/naga/README.md
index 0e07d40496..b5e98bb727 100644
--- a/naga/README.md
+++ b/naga/README.md
@@ -4,7 +4,7 @@
 [![Crates.io](https://img.shields.io/crates/v/naga.svg?label=naga)](https://crates.io/crates/naga)
 [![Docs.rs](https://docs.rs/naga/badge.svg)](https://docs.rs/naga)
 [![Build Status](https://github.com/gfx-rs/naga/workflows/pipeline/badge.svg)](https://github.com/gfx-rs/naga/actions)
-![MSRV](https://img.shields.io/badge/rustc-1.74+-blue.svg)
+![MSRV](https://img.shields.io/badge/rustc-1.76+-blue.svg)
 [![codecov.io](https://codecov.io/gh/gfx-rs/naga/branch/master/graph/badge.svg?token=9VOKYO8BM2)](https://codecov.io/gh/gfx-rs/naga)
 
 The shader translation library for the needs of [wgpu](https://github.com/gfx-rs/wgpu).
diff --git a/naga/fuzz/Cargo.toml b/naga/fuzz/Cargo.toml
index 196919e441..c4dd8cd1c1 100644
--- a/naga/fuzz/Cargo.toml
+++ b/naga/fuzz/Cargo.toml
@@ -15,7 +15,7 @@ libfuzzer-sys = "0.4"
 
 [target.'cfg(not(any(target_arch = "wasm32", target_os = "ios")))'.dependencies.naga]
 path = ".."
-version = "0.20.0"
+version = "22.0.0"
 features = ["arbitrary", "spv-in", "wgsl-in", "glsl-in"]
 
 [[bin]]
diff --git a/naga/src/back/continue_forward.rs b/naga/src/back/continue_forward.rs
new file mode 100644
index 0000000000..cecb93a837
--- /dev/null
+++ b/naga/src/back/continue_forward.rs
@@ -0,0 +1,311 @@
+//! Workarounds for platform bugs and limitations in switches and loops.
+//!
+//! In these docs, we use CamelCase links for Naga IR concepts, and ordinary
+//! `code` formatting for HLSL or GLSL concepts.
+//!
+//! ## Avoiding `continue` within `switch`
+//!
+//! As described in <https://github.com/gfx-rs/wgpu/issues/4485>, the FXC HLSL
+//! compiler doesn't allow `continue` statements within `switch` statements, but
+//! Naga IR does. We work around this by introducing synthetic boolean local
+//! variables and branches.
+//!
+//! Specifically:
+//!
+//! - We generate code for [`Continue`] statements within [`SwitchCase`]s that
+//!   sets an introduced `bool` local to `true` and does a `break`, jumping to
+//!   immediately after the generated `switch`.
+//!
+//! - When generating code for a [`Switch`] statement, we conservatively assume
+//!   it might contain such a [`Continue`] statement, so:
+//!
+//!   - If it's the outermost such [`Switch`] within a [`Loop`], we declare the
+//!     `bool` local ahead of the switch, initialized to `false`. Immediately
+//!     after the `switch`, we check the local and do a `continue` if it's set.
+//!
+//!   - If the [`Switch`] is nested within other [`Switch`]es, then after the
+//!     generated `switch`, we check the local (which we know was declared
+//!     before the surrounding `switch`) and do a `break` if it's set.
+//!
+//!   - As an optimization, we only generate the check of the local if a
+//!     [`Continue`] statement is encountered within the [`Switch`]. This may
+//!     help drivers more easily identify that the `bool` is unused.
+//!
+//! So while we "weaken" the [`Continue`] statement by rendering it as a `break`
+//! statement, we also place checks immediately at the locations to which those
+//! `break` statements will jump, until we can be sure we've reached the
+//! intended target of the original [`Continue`].
+//!
+//! In the case of nested [`Loop`] and [`Switch`] statements, there may be
+//! multiple introduced `bool` locals in scope, but there's no problem knowing
+//! which one to operate on. At any point, there is at most one [`Loop`]
+//! statement that could be targeted by a [`Continue`] statement, so the correct
+//! `bool` local to set and test is always the one introduced for the innermost
+//! enclosing [`Loop`]'s outermost [`Switch`].
+//!
+//! # Avoiding single body `switch` statements
+//!
+//! As described in <https://github.com/gfx-rs/wgpu/issues/4514>, some language
+//! front ends miscompile `switch` statements where all cases branch to the same
+//! body. Our HLSL and GLSL backends render [`Switch`] statements with a single
+//! [`SwitchCase`] as `do {} while(false);` loops.
+//!
+//! However, this rewriting introduces a new loop that could "capture"
+//! `continue` statements in its body. To avoid doing so, we apply the
+//! [`Continue`]-to-`break` transformation described above.
+//!
+//! [`Continue`]: crate::Statement::Continue
+//! [`Loop`]: crate::Statement::Loop
+//! [`Switch`]: crate::Statement::Switch
+//! [`SwitchCase`]: crate::SwitchCase
+
+use crate::proc::Namer;
+use std::rc::Rc;
+
+/// A summary of the code surrounding a statement.
+enum Nesting {
+    /// Currently nested in at least one [`Loop`] statement.
+    ///
+    /// [`Continue`] should apply to the innermost loop.
+    ///
+    /// When this entry is on the top of the stack:
+    ///
+    /// * When entering an inner [`Loop`] statement, push a [`Loop`][nl] state
+    ///   onto the stack.
+    ///
+    /// * When entering a nested [`Switch`] statement, push a [`Switch`][ns]
+    ///   state onto the stack with a new variable name. Before the generated
+    ///   `switch`, introduce a `bool` local with that name, initialized to
+    ///   `false`.
+    ///
+    /// When exiting the [`Loop`] for which this entry was pushed, pop it from
+    /// the stack.
+    ///
+    /// [`Continue`]: crate::Statement::Continue
+    /// [`Loop`]: crate::Statement::Loop
+    /// [`Switch`]: crate::Statement::Switch
+    /// [ns]: Nesting::Switch
+    /// [nl]: Nesting::Loop
+    Loop,
+
+    /// Currently nested in at least one [`Switch`] that may need to forward
+    /// [`Continue`]s.
+    ///
+    /// This includes [`Switch`]es rendered as `do {} while(false)` loops, but
+    /// doesn't need to include regular [`Switch`]es in backends that can
+    /// support `continue` within switches.
+    ///
+    /// [`Continue`] should be forwarded to the innermost surrounding [`Loop`].
+    ///
+    /// When this entry is on the top of the stack:
+    ///
+    /// * When entering a nested [`Loop`], push a [`Loop`][nl] state onto the
+    ///   stack.
+    ///
+    /// * When entering a nested [`Switch`], push a [`Switch`][ns] state onto
+    ///   the stack with a clone of the introduced `bool` variable's name.
+    ///
+    /// * When encountering a [`Continue`] statement, render it as code to set
+    ///   the introduced `bool` local (whose name is held in [`variable`]) to
+    ///   `true`, and then `break`. Set [`continue_encountered`] to `true` to
+    ///   record that the [`Switch`] contains a [`Continue`].
+    ///
+    /// * When exiting this [`Switch`], pop its entry from the stack. If
+    ///   [`continue_encountered`] is set, then we have rendered [`Continue`]
+    ///   statements as `break` statements that jump to this point. Generate
+    ///   code to check `variable`, and if it is `true`:
+    ///
+    ///     * If there is another [`Switch`][ns] left on top of the stack, set
+    ///       its `continue_encountered` flag, and generate a `break`. (Both
+    ///       [`Switch`][ns]es are within the same [`Loop`] and share the same
+    ///       introduced variable, so there's no need to set another flag to
+    ///       continue to exit the `switch`es.)
+    ///
+    ///     * Otherwise, `continue`.
+    ///
+    /// When we exit the [`Switch`] for which this entry was pushed, pop it.
+    ///
+    /// [`Continue`]: crate::Statement::Continue
+    /// [`Loop`]: crate::Statement::Loop
+    /// [`Switch`]: crate::Statement::Switch
+    /// [`variable`]: Nesting::Switch::variable
+    /// [`continue_encountered`]: Nesting::Switch::continue_encountered
+    /// [ns]: Nesting::Switch
+    /// [nl]: Nesting::Loop
+    Switch {
+        variable: Rc<String>,
+
+        /// Set if we've generated code for a [`Continue`] statement with this
+        /// entry on the top of the stack.
+        ///
+        /// If this is still clear when we finish rendering the [`Switch`], then
+        /// we know we don't need to generate branch forwarding code. Omitting
+        /// that may make it easier for drivers to tell that the `bool` we
+        /// introduced ahead of the [`Switch`] is actually unused.
+        ///
+        /// [`Continue`]: crate::Statement::Continue
+        /// [`Switch`]: crate::Statement::Switch
+        continue_encountered: bool,
+    },
+}
+
+/// A micro-IR for code a backend should generate after a [`Switch`].
+///
+/// [`Switch`]: crate::Statement::Switch
+pub(super) enum ExitControlFlow {
+    None,
+    /// Emit `if (continue_variable) { continue; }`
+    Continue {
+        variable: Rc<String>,
+    },
+    /// Emit `if (continue_variable) { break; }`
+    ///
+    /// Used after a [`Switch`] to exit from an enclosing [`Switch`].
+    ///
+    /// After the enclosing switch, its associated check will consult this same
+    /// variable, see that it is set, and exit early.
+    ///
+    /// [`Switch`]: crate::Statement::Switch
+    Break {
+        variable: Rc<String>,
+    },
+}
+
+/// Utility for tracking nesting of loops and switches to orchestrate forwarding
+/// of continue statements inside of a switch to the enclosing loop.
+///
+/// See [module docs](self) for why we need this.
+#[derive(Default)]
+pub(super) struct ContinueCtx {
+    stack: Vec<Nesting>,
+}
+
+impl ContinueCtx {
+    /// Resets internal state.
+    ///
+    /// Use this to reuse memory between writing sessions.
+    pub fn clear(&mut self) {
+        self.stack.clear();
+    }
+
+    /// Updates internal state to record entering a [`Loop`] statement.
+    ///
+    /// [`Loop`]: crate::Statement::Loop
+    pub fn enter_loop(&mut self) {
+        self.stack.push(Nesting::Loop);
+    }
+
+    /// Updates internal state to record exiting a [`Loop`] statement.
+    ///
+    /// [`Loop`]: crate::Statement::Loop
+    pub fn exit_loop(&mut self) {
+        if !matches!(self.stack.pop(), Some(Nesting::Loop)) {
+            unreachable!("ContinueCtx stack out of sync");
+        }
+    }
+
+    /// Updates internal state to record entering a [`Switch`] statement.
+    ///
+    /// Return `Some(variable)` if this [`Switch`] is nested within a [`Loop`],
+    /// and the caller should introcue a new `bool` local variable named
+    /// `variable` above the `switch`, for forwarding [`Continue`] statements.
+    ///
+    /// `variable` is guaranteed not to conflict with any names used by the
+    /// program itself.
+    ///
+    /// [`Continue`]: crate::Statement::Continue
+    /// [`Loop`]: crate::Statement::Loop
+    /// [`Switch`]: crate::Statement::Switch
+    pub fn enter_switch(&mut self, namer: &mut Namer) -> Option<Rc<String>> {
+        match self.stack.last() {
+            // If the stack is empty, we are not in loop, so we don't need to
+            // forward continue statements within this `Switch`. We can leave
+            // the stack empty.
+            None => None,
+            Some(&Nesting::Loop { .. }) => {
+                let variable = Rc::new(namer.call("should_continue"));
+                self.stack.push(Nesting::Switch {
+                    variable: Rc::clone(&variable),
+                    continue_encountered: false,
+                });
+                Some(variable)
+            }
+            Some(&Nesting::Switch { ref variable, .. }) => {
+                self.stack.push(Nesting::Switch {
+                    variable: Rc::clone(variable),
+                    continue_encountered: false,
+                });
+                // We have already declared the variable before some enclosing
+                // `Switch`.
+                None
+            }
+        }
+    }
+
+    /// Update internal state to record leaving a [`Switch`] statement.
+    ///
+    /// Return an [`ExitControlFlow`] value indicating what code should be
+    /// introduced after the generated `switch` to forward continues.
+    ///
+    /// [`Switch`]: crate::Statement::Switch
+    pub fn exit_switch(&mut self) -> ExitControlFlow {
+        match self.stack.pop() {
+            // This doesn't indicate a problem: we don't start pushing entries
+            // for `Switch` statements unless we have an enclosing `Loop`.
+            None => ExitControlFlow::None,
+            Some(Nesting::Loop { .. }) => {
+                unreachable!("Unexpected loop state when exiting switch");
+            }
+            Some(Nesting::Switch {
+                variable,
+                continue_encountered: inner_continue,
+            }) => {
+                if !inner_continue {
+                    // No `Continue` statement was encountered, so we didn't
+                    // introduce any `break`s jumping to this point.
+                    ExitControlFlow::None
+                } else if let Some(&mut Nesting::Switch {
+                    continue_encountered: ref mut outer_continue,
+                    ..
+                }) = self.stack.last_mut()
+                {
+                    // This is nested in another `Switch`. Propagate upwards
+                    // that there is a continue statement present.
+                    *outer_continue = true;
+                    ExitControlFlow::Break { variable }
+                } else {
+                    ExitControlFlow::Continue { variable }
+                }
+            }
+        }
+    }
+
+    /// Determine what to generate for a [`Continue`] statement.
+    ///
+    /// If we can generate an ordinary `continue` statement, return `None`.
+    ///
+    /// Otherwise, we're enclosed by a [`Switch`] that is itself enclosed by a
+    /// [`Loop`]. Return `Some(variable)` to indicate that the [`Continue`]
+    /// should be rendered as setting `variable` to `true`, and then doing a
+    /// `break`.
+    ///
+    /// This also notes that we've encountered a [`Continue`] statement, so that
+    /// we can generate the right code to forward the branch following the
+    /// enclosing `switch`.
+    ///
+    /// [`Continue`]: crate::Statement::Continue
+    /// [`Loop`]: crate::Statement::Loop
+    /// [`Switch`]: crate::Statement::Switch
+    pub fn continue_encountered(&mut self) -> Option<&str> {
+        if let Some(&mut Nesting::Switch {
+            ref variable,
+            ref mut continue_encountered,
+        }) = self.stack.last_mut()
+        {
+            *continue_encountered = true;
+            Some(variable)
+        } else {
+            None
+        }
+    }
+}
diff --git a/naga/src/back/glsl/features.rs b/naga/src/back/glsl/features.rs
index 0478e01351..b22bcbe651 100644
--- a/naga/src/back/glsl/features.rs
+++ b/naga/src/back/glsl/features.rs
@@ -399,7 +399,7 @@ impl<'a, W> Writer<'a, W> {
                             | StorageFormat::Rg16Float
                             | StorageFormat::Rgb10a2Uint
                             | StorageFormat::Rgb10a2Unorm
-                            | StorageFormat::Rg11b10Float
+                            | StorageFormat::Rg11b10UFloat
                             | StorageFormat::Rg32Uint
                             | StorageFormat::Rg32Sint
                             | StorageFormat::Rg32Float => {
@@ -447,7 +447,7 @@ impl<'a, W> Writer<'a, W> {
             ..
         } = self;
 
-        // Loop trough all expressions in both functions and the entry point
+        // Loop through all expressions in both functions and the entry point
         // to check for needed features
         for (expressions, info) in module
             .functions
diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs
index bc2d2a90d8..4c7f8b3251 100644
--- a/naga/src/back/glsl/mod.rs
+++ b/naga/src/back/glsl/mod.rs
@@ -545,6 +545,11 @@ pub struct Writer<'a, W> {
     named_expressions: crate::NamedExpressions,
     /// Set of expressions that need to be baked to avoid unnecessary repetition in output
     need_bake_expressions: back::NeedBakeExpressions,
+    /// Information about nesting of loops and switches.
+    ///
+    /// Used for forwarding continue statements in switches that have been
+    /// transformed to `do {} while(false);` loops.
+    continue_ctx: back::continue_forward::ContinueCtx,
     /// How many views to render to, if doing multiview rendering.
     multiview: Option<std::num::NonZeroU32>,
     /// Mapping of varying variables to their location. Needed for reflections.
@@ -619,6 +624,7 @@ impl<'a, W: Write> Writer<'a, W> {
             block_id: IdGenerator::default(),
             named_expressions: Default::default(),
             need_bake_expressions: Default::default(),
+            continue_ctx: back::continue_forward::ContinueCtx::default(),
             varying: Default::default(),
         };
 
@@ -1307,14 +1313,13 @@ impl<'a, W: Write> Writer<'a, W> {
                     crate::MathFunction::Dot => {
                         // if the expression is a Dot product with integer arguments,
                         // then the args needs baking as well
-                        if let TypeInner::Scalar(crate::Scalar { kind, .. }) = *inner {
-                            match kind {
-                                crate::ScalarKind::Sint | crate::ScalarKind::Uint => {
-                                    self.need_bake_expressions.insert(arg);
-                                    self.need_bake_expressions.insert(arg1.unwrap());
-                                }
-                                _ => {}
-                            }
+                        if let TypeInner::Scalar(crate::Scalar {
+                            kind: crate::ScalarKind::Sint | crate::ScalarKind::Uint,
+                            ..
+                        }) = *inner
+                        {
+                            self.need_bake_expressions.insert(arg);
+                            self.need_bake_expressions.insert(arg1.unwrap());
                         }
                     }
                     crate::MathFunction::Pack4xI8
@@ -1869,7 +1874,7 @@ impl<'a, W: Write> Writer<'a, W> {
         // with different precedences from applying earlier.
         write!(self.out, "(")?;
 
-        // Cycle trough all the components of the vector
+        // Cycle through all the components of the vector
         for index in 0..size {
             let component = back::COMPONENTS[index];
             // Write the addition to the previous product
@@ -2082,42 +2087,94 @@ impl<'a, W: Write> Writer<'a, W> {
                 selector,
                 ref cases,
             } => {
-                // Start the switch
-                write!(self.out, "{level}")?;
-                write!(self.out, "switch(")?;
-                self.write_expr(selector, ctx)?;
-                writeln!(self.out, ") {{")?;
-
-                // Write all cases
                 let l2 = level.next();
-                for case in cases {
-                    match case.value {
-                        crate::SwitchValue::I32(value) => write!(self.out, "{l2}case {value}:")?,
-                        crate::SwitchValue::U32(value) => write!(self.out, "{l2}case {value}u:")?,
-                        crate::SwitchValue::Default => write!(self.out, "{l2}default:")?,
-                    }
+                // Some GLSL consumers may not handle switches with a single
+                // body correctly: See wgpu#4514. Write such switch statements
+                // as a `do {} while(false);` loop instead.
+                //
+                // Since doing so may inadvertently capture `continue`
+                // statements in the switch body, we must apply continue
+                // forwarding. See the `naga::back::continue_forward` module
+                // docs for details.
+                let one_body = cases
+                    .iter()
+                    .rev()
+                    .skip(1)
+                    .all(|case| case.fall_through && case.body.is_empty());
+                if one_body {
+                    // Unlike HLSL, in GLSL `continue_ctx` only needs to know
+                    // about [`Switch`] statements that are being rendered as
+                    // `do-while` loops.
+                    if let Some(variable) = self.continue_ctx.enter_switch(&mut self.namer) {
+                        writeln!(self.out, "{level}bool {variable} = false;",)?;
+                    };
+                    writeln!(self.out, "{level}do {{")?;
+                    // Note: Expressions have no side-effects so we don't need to emit selector expression.
 
-                    let write_block_braces = !(case.fall_through && case.body.is_empty());
-                    if write_block_braces {
-                        writeln!(self.out, " {{")?;
-                    } else {
-                        writeln!(self.out)?;
+                    // Body
+                    if let Some(case) = cases.last() {
+                        for sta in case.body.iter() {
+                            self.write_stmt(sta, ctx, l2)?;
+                        }
                     }
-
-                    for sta in case.body.iter() {
-                        self.write_stmt(sta, ctx, l2.next())?;
+                    // End do-while
+                    writeln!(self.out, "{level}}} while(false);")?;
+
+                    // Handle any forwarded continue statements.
+                    use back::continue_forward::ExitControlFlow;
+                    let op = match self.continue_ctx.exit_switch() {
+                        ExitControlFlow::None => None,
+                        ExitControlFlow::Continue { variable } => Some(("continue", variable)),
+                        ExitControlFlow::Break { variable } => Some(("break", variable)),
+                    };
+                    if let Some((control_flow, variable)) = op {
+                        writeln!(self.out, "{level}if ({variable}) {{")?;
+                        writeln!(self.out, "{l2}{control_flow};")?;
+                        writeln!(self.out, "{level}}}")?;
                     }
+                } else {
+                    // Start the switch
+                    write!(self.out, "{level}")?;
+                    write!(self.out, "switch(")?;
+                    self.write_expr(selector, ctx)?;
+                    writeln!(self.out, ") {{")?;
+
+                    // Write all cases
+                    for case in cases {
+                        match case.value {
+                            crate::SwitchValue::I32(value) => {
+                                write!(self.out, "{l2}case {value}:")?
+                            }
+                            crate::SwitchValue::U32(value) => {
+                                write!(self.out, "{l2}case {value}u:")?
+                            }
+                            crate::SwitchValue::Default => write!(self.out, "{l2}default:")?,
+                        }
 
-                    if !case.fall_through && case.body.last().map_or(true, |s| !s.is_terminator()) {
-                        writeln!(self.out, "{}break;", l2.next())?;
-                    }
+                        let write_block_braces = !(case.fall_through && case.body.is_empty());
+                        if write_block_braces {
+                            writeln!(self.out, " {{")?;
+                        } else {
+                            writeln!(self.out)?;
+                        }
+
+                        for sta in case.body.iter() {
+                            self.write_stmt(sta, ctx, l2.next())?;
+                        }
+
+                        if !case.fall_through
+                            && case.body.last().map_or(true, |s| !s.is_terminator())
+                        {
+                            writeln!(self.out, "{}break;", l2.next())?;
+                        }
 
-                    if write_block_braces {
-                        writeln!(self.out, "{l2}}}")?;
+                        if write_block_braces {
+                            writeln!(self.out, "{l2}}}")?;
+                        }
                     }
-                }
 
-                writeln!(self.out, "{level}}}")?
+                    writeln!(self.out, "{level}}}")?
+                }
             }
             // Loops in naga IR are based on wgsl loops, glsl can emulate the behaviour by using a
             // while true loop and appending the continuing block to the body resulting on:
@@ -2134,6 +2191,7 @@ impl<'a, W: Write> Writer<'a, W> {
                 ref continuing,
                 break_if,
             } => {
+                self.continue_ctx.enter_loop();
                 if !continuing.is_empty() || break_if.is_some() {
                     let gate_name = self.namer.call("loop_init");
                     writeln!(self.out, "{level}bool {gate_name} = true;")?;
@@ -2159,7 +2217,8 @@ impl<'a, W: Write> Writer<'a, W> {
                 for sta in body {
                     self.write_stmt(sta, ctx, level.next())?;
                 }
-                writeln!(self.out, "{level}}}")?
+                writeln!(self.out, "{level}}}")?;
+                self.continue_ctx.exit_loop();
             }
             // Break, continue and return as written as in C
             // `break;`
@@ -2169,8 +2228,14 @@ impl<'a, W: Write> Writer<'a, W> {
             }
             // `continue;`
             Statement::Continue => {
-                write!(self.out, "{level}")?;
-                writeln!(self.out, "continue;")?
+                // Sometimes we must render a `Continue` statement as a `break`.
+                // See the docs for the `back::continue_forward` module.
+                if let Some(variable) = self.continue_ctx.continue_encountered() {
+                    writeln!(self.out, "{level}{variable} = true;",)?;
+                    writeln!(self.out, "{level}break;")?
+                } else {
+                    writeln!(self.out, "{level}continue;")?
+                }
             }
             // `return expr;`, `expr` is optional
             Statement::Return { value } => {
@@ -3581,8 +3646,8 @@ impl<'a, W: Write> Writer<'a, W> {
 
                         return Ok(());
                     }
-                    Mf::FindLsb => "findLSB",
-                    Mf::FindMsb => "findMSB",
+                    Mf::FirstTrailingBit => "findLSB",
+                    Mf::FirstLeadingBit => "findMSB",
                     // data packing
                     Mf::Pack4x8snorm => "packSnorm4x8",
                     Mf::Pack4x8unorm => "packUnorm4x8",
@@ -3656,8 +3721,10 @@ impl<'a, W: Write> Writer<'a, W> {
 
                 // Some GLSL functions always return signed integers (like findMSB),
                 // so they need to be cast to uint if the argument is also an uint.
-                let ret_might_need_int_to_uint =
-                    matches!(fun, Mf::FindLsb | Mf::FindMsb | Mf::CountOneBits | Mf::Abs);
+                let ret_might_need_int_to_uint = matches!(
+                    fun,
+                    Mf::FirstTrailingBit | Mf::FirstLeadingBit | Mf::CountOneBits | Mf::Abs
+                );
 
                 // Some GLSL functions only accept signed integers (like abs),
                 // so they need their argument cast from uint to int.
@@ -4753,7 +4820,7 @@ fn glsl_storage_format(format: crate::StorageFormat) -> Result<&'static str, Err
         Sf::Rgba8Sint => "rgba8i",
         Sf::Rgb10a2Uint => "rgb10_a2ui",
         Sf::Rgb10a2Unorm => "rgb10_a2",
-        Sf::Rg11b10Float => "r11f_g11f_b10f",
+        Sf::Rg11b10UFloat => "r11f_g11f_b10f",
         Sf::Rg32Uint => "rg32ui",
         Sf::Rg32Sint => "rg32i",
         Sf::Rg32Float => "rg32f",
diff --git a/naga/src/back/hlsl/conv.rs b/naga/src/back/hlsl/conv.rs
index 7d15f43f6c..6c0daf4762 100644
--- a/naga/src/back/hlsl/conv.rs
+++ b/naga/src/back/hlsl/conv.rs
@@ -132,7 +132,7 @@ impl crate::StorageFormat {
             Self::Rg8Sint | Self::Rg16Sint => "int2",
             Self::Rg8Uint | Self::Rg16Uint => "uint2",
 
-            Self::Rg11b10Float => "float3",
+            Self::Rg11b10UFloat => "float3",
 
             Self::Rgba16Float | Self::R32Float | Self::Rg32Float | Self::Rgba32Float => "float4",
             Self::Rgba8Unorm | Self::Bgra8Unorm | Self::Rgba16Unorm | Self::Rgb10a2Unorm => {
diff --git a/naga/src/back/hlsl/mod.rs b/naga/src/back/hlsl/mod.rs
index 49ff07ebf2..d28b387bf7 100644
--- a/naga/src/back/hlsl/mod.rs
+++ b/naga/src/back/hlsl/mod.rs
@@ -327,6 +327,7 @@ pub struct Writer<'a, W> {
     /// Set of expressions that have associated temporary variables
     named_expressions: crate::NamedExpressions,
     wrapped: Wrapped,
+    continue_ctx: back::continue_forward::ContinueCtx,
 
     /// A reference to some part of a global variable, lowered to a series of
     /// byte offset calculations.
diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs
index d40b9b24c2..85d943e850 100644
--- a/naga/src/back/hlsl/writer.rs
+++ b/naga/src/back/hlsl/writer.rs
@@ -9,7 +9,7 @@ use super::{
 use crate::{
     back::{self, Baked},
     proc::{self, NameKey},
-    valid, Handle, Module, ScalarKind, ShaderStage, TypeInner,
+    valid, Handle, Module, Scalar, ScalarKind, ShaderStage, TypeInner,
 };
 use std::{fmt, mem};
 
@@ -104,6 +104,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
             entry_point_io: Vec::new(),
             named_expressions: crate::NamedExpressions::default(),
             wrapped: super::Wrapped::default(),
+            continue_ctx: back::continue_forward::ContinueCtx::default(),
             temp_access_chain: Vec::new(),
             need_bake_expressions: Default::default(),
         }
@@ -122,6 +123,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
         self.entry_point_io.clear();
         self.named_expressions.clear();
         self.wrapped.clear();
+        self.continue_ctx.clear();
         self.need_bake_expressions.clear();
     }
 
@@ -1439,6 +1441,151 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
         self.write_barrier(crate::Barrier::WORK_GROUP, level)
     }
 
+    /// Helper method used to write switches
+    fn write_switch(
+        &mut self,
+        module: &Module,
+        func_ctx: &back::FunctionCtx<'_>,
+        level: back::Level,
+        selector: Handle<crate::Expression>,
+        cases: &[crate::SwitchCase],
+    ) -> BackendResult {
+        // Write all cases
+        let indent_level_1 = level.next();
+        let indent_level_2 = indent_level_1.next();
+
+        // See docs of `back::continue_forward` module.
+        if let Some(variable) = self.continue_ctx.enter_switch(&mut self.namer) {
+            writeln!(self.out, "{level}bool {variable} = false;",)?;
+        };
+
+        // Check if there is only one body, by seeing if all except the last case are fall through
+        // with empty bodies. FXC doesn't handle these switches correctly, so
+        // we generate a `do {} while(false);` loop instead. There must be a default case, so there
+        // is no need to check if one of the cases would have matched.
+        let one_body = cases
+            .iter()
+            .rev()
+            .skip(1)
+            .all(|case| case.fall_through && case.body.is_empty());
+        if one_body {
+            // Start the do-while
+            writeln!(self.out, "{level}do {{")?;
+            // Note: Expressions have no side-effects so we don't need to emit selector expression.
+
+            // Body
+            if let Some(case) = cases.last() {
+                for sta in case.body.iter() {
+                    self.write_stmt(module, sta, func_ctx, indent_level_1)?;
+                }
+            }
+            // End do-while
+            writeln!(self.out, "{level}}} while(false);")?;
+        } else {
+            // Start the switch
+            write!(self.out, "{level}")?;
+            write!(self.out, "switch(")?;
+            self.write_expr(module, selector, func_ctx)?;
+            writeln!(self.out, ") {{")?;
+
+            for (i, case) in cases.iter().enumerate() {
+                match case.value {
+                    crate::SwitchValue::I32(value) => {
+                        write!(self.out, "{indent_level_1}case {value}:")?
+                    }
+                    crate::SwitchValue::U32(value) => {
+                        write!(self.out, "{indent_level_1}case {value}u:")?
+                    }
+                    crate::SwitchValue::Default => write!(self.out, "{indent_level_1}default:")?,
+                }
+
+                // The new block is not only stylistic, it plays a role here:
+                // We might end up having to write the same case body
+                // multiple times due to FXC not supporting fallthrough.
+                // Therefore, some `Expression`s written by `Statement::Emit`
+                // will end up having the same name (`_expr<handle_index>`).
+                // So we need to put each case in its own scope.
+                let write_block_braces = !(case.fall_through && case.body.is_empty());
+                if write_block_braces {
+                    writeln!(self.out, " {{")?;
+                } else {
+                    writeln!(self.out)?;
+                }
+
+                // Although FXC does support a series of case clauses before
+                // a block[^yes], it does not support fallthrough from a
+                // non-empty case block to the next[^no]. If this case has a
+                // non-empty body with a fallthrough, emulate that by
+                // duplicating the bodies of all the cases it would fall
+                // into as extensions of this case's own body. This makes
+                // the HLSL output potentially quadratic in the size of the
+                // Naga IR.
+                //
+                // [^yes]: ```hlsl
+                // case 1:
+                // case 2: do_stuff()
+                // ```
+                // [^no]: ```hlsl
+                // case 1: do_this();
+                // case 2: do_that();
+                // ```
+                if case.fall_through && !case.body.is_empty() {
+                    let curr_len = i + 1;
+                    let end_case_idx = curr_len
+                        + cases
+                            .iter()
+                            .skip(curr_len)
+                            .position(|case| !case.fall_through)
+                            .unwrap();
+                    let indent_level_3 = indent_level_2.next();
+                    for case in &cases[i..=end_case_idx] {
+                        writeln!(self.out, "{indent_level_2}{{")?;
+                        let prev_len = self.named_expressions.len();
+                        for sta in case.body.iter() {
+                            self.write_stmt(module, sta, func_ctx, indent_level_3)?;
+                        }
+                        // Clear all named expressions that were previously inserted by the statements in the block
+                        self.named_expressions.truncate(prev_len);
+                        writeln!(self.out, "{indent_level_2}}}")?;
+                    }
+
+                    let last_case = &cases[end_case_idx];
+                    if last_case.body.last().map_or(true, |s| !s.is_terminator()) {
+                        writeln!(self.out, "{indent_level_2}break;")?;
+                    }
+                } else {
+                    for sta in case.body.iter() {
+                        self.write_stmt(module, sta, func_ctx, indent_level_2)?;
+                    }
+                    if !case.fall_through && case.body.last().map_or(true, |s| !s.is_terminator()) {
+                        writeln!(self.out, "{indent_level_2}break;")?;
+                    }
+                }
+
+                if write_block_braces {
+                    writeln!(self.out, "{indent_level_1}}}")?;
+                }
+            }
+
+            writeln!(self.out, "{level}}}")?;
+        }
+
+        // Handle any forwarded continue statements.
+        use back::continue_forward::ExitControlFlow;
+        let op = match self.continue_ctx.exit_switch() {
+            ExitControlFlow::None => None,
+            ExitControlFlow::Continue { variable } => Some(("continue", variable)),
+            ExitControlFlow::Break { variable } => Some(("break", variable)),
+        };
+        if let Some((control_flow, variable)) = op {
+            writeln!(self.out, "{level}if ({variable}) {{")?;
+            writeln!(self.out, "{indent_level_1}{control_flow};")?;
+            writeln!(self.out, "{level}}}")?;
+        }
+
+        Ok(())
+    }
+
     /// Helper method used to write statements
     ///
     /// # Notes
@@ -1882,6 +2029,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                 ref continuing,
                 break_if,
             } => {
+                self.continue_ctx.enter_loop();
                 let l2 = level.next();
                 if !continuing.is_empty() || break_if.is_some() {
                     let gate_name = self.namer.call("loop_init");
@@ -1908,10 +2056,18 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                 for sta in body.iter() {
                     self.write_stmt(module, sta, func_ctx, l2)?;
                 }
-                writeln!(self.out, "{level}}}")?
+                writeln!(self.out, "{level}}}")?;
+                self.continue_ctx.exit_loop();
             }
             Statement::Break => writeln!(self.out, "{level}break;")?,
-            Statement::Continue => writeln!(self.out, "{level}continue;")?,
+            Statement::Continue => {
+                if let Some(variable) = self.continue_ctx.continue_encountered() {
+                    writeln!(self.out, "{level}{variable} = true;")?;
+                    writeln!(self.out, "{level}break;")?
+                } else {
+                    writeln!(self.out, "{level}continue;")?
+                }
+            }
             Statement::Barrier(barrier) => {
                 self.write_barrier(barrier, level)?;
             }
@@ -2013,7 +2169,11 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                         // ownership of our reusable access chain buffer.
                         let chain = mem::take(&mut self.temp_access_chain);
                         let var_name = &self.names[&NameKey::GlobalVariable(var_handle)];
-                        write!(self.out, "{var_name}.Interlocked{fun_str}(")?;
+                        let width = match func_ctx.resolve_type(value, &module.types) {
+                            &TypeInner::Scalar(Scalar { width: 8, .. }) => "64",
+                            _ => "",
+                        };
+                        write!(self.out, "{var_name}.Interlocked{fun_str}{width}(")?;
                         self.write_storage_address(module, &chain, func_ctx)?;
                         self.temp_access_chain = chain;
                     }
@@ -2059,100 +2219,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                 selector,
                 ref cases,
             } => {
-                // Start the switch
-                write!(self.out, "{level}")?;
-                write!(self.out, "switch(")?;
-                self.write_expr(module, selector, func_ctx)?;
-                writeln!(self.out, ") {{")?;
-
-                // Write all cases
-                let indent_level_1 = level.next();
-                let indent_level_2 = indent_level_1.next();
-
-                for (i, case) in cases.iter().enumerate() {
-                    match case.value {
-                        crate::SwitchValue::I32(value) => {
-                            write!(self.out, "{indent_level_1}case {value}:")?
-                        }
-                        crate::SwitchValue::U32(value) => {
-                            write!(self.out, "{indent_level_1}case {value}u:")?
-                        }
-                        crate::SwitchValue::Default => {
-                            write!(self.out, "{indent_level_1}default:")?
-                        }
-                    }
-
-                    // The new block is not only stylistic, it plays a role here:
-                    // We might end up having to write the same case body
-                    // multiple times due to FXC not supporting fallthrough.
-                    // Therefore, some `Expression`s written by `Statement::Emit`
-                    // will end up having the same name (`_expr<handle_index>`).
-                    // So we need to put each case in its own scope.
-                    let write_block_braces = !(case.fall_through && case.body.is_empty());
-                    if write_block_braces {
-                        writeln!(self.out, " {{")?;
-                    } else {
-                        writeln!(self.out)?;
-                    }
-
-                    // Although FXC does support a series of case clauses before
-                    // a block[^yes], it does not support fallthrough from a
-                    // non-empty case block to the next[^no]. If this case has a
-                    // non-empty body with a fallthrough, emulate that by
-                    // duplicating the bodies of all the cases it would fall
-                    // into as extensions of this case's own body. This makes
-                    // the HLSL output potentially quadratic in the size of the
-                    // Naga IR.
-                    //
-                    // [^yes]: ```hlsl
-                    // case 1:
-                    // case 2: do_stuff()
-                    // ```
-                    // [^no]: ```hlsl
-                    // case 1: do_this();
-                    // case 2: do_that();
-                    // ```
-                    if case.fall_through && !case.body.is_empty() {
-                        let curr_len = i + 1;
-                        let end_case_idx = curr_len
-                            + cases
-                                .iter()
-                                .skip(curr_len)
-                                .position(|case| !case.fall_through)
-                                .unwrap();
-                        let indent_level_3 = indent_level_2.next();
-                        for case in &cases[i..=end_case_idx] {
-                            writeln!(self.out, "{indent_level_2}{{")?;
-                            let prev_len = self.named_expressions.len();
-                            for sta in case.body.iter() {
-                                self.write_stmt(module, sta, func_ctx, indent_level_3)?;
-                            }
-                            // Clear all named expressions that were previously inserted by the statements in the block
-                            self.named_expressions.truncate(prev_len);
-                            writeln!(self.out, "{indent_level_2}}}")?;
-                        }
-
-                        let last_case = &cases[end_case_idx];
-                        if last_case.body.last().map_or(true, |s| !s.is_terminator()) {
-                            writeln!(self.out, "{indent_level_2}break;")?;
-                        }
-                    } else {
-                        for sta in case.body.iter() {
-                            self.write_stmt(module, sta, func_ctx, indent_level_2)?;
-                        }
-                        if !case.fall_through
-                            && case.body.last().map_or(true, |s| !s.is_terminator())
-                        {
-                            writeln!(self.out, "{indent_level_2}break;")?;
-                        }
-                    }
-
-                    if write_block_braces {
-                        writeln!(self.out, "{indent_level_1}}}")?;
-                    }
-                }
-
-                writeln!(self.out, "{level}}}")?
+                self.write_switch(module, func_ctx, level, selector, cases)?;
             }
             Statement::RayQuery { .. } => unreachable!(),
             Statement::SubgroupBallot { result, predicate } => {
@@ -2852,7 +2919,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                 let inner = func_ctx.resolve_type(expr, &module.types);
                 let close_paren = match convert {
                     Some(dst_width) => {
-                        let scalar = crate::Scalar {
+                        let scalar = Scalar {
                             kind,
                             width: dst_width,
                         };
@@ -2996,8 +3063,8 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                     Mf::CountLeadingZeros => Function::CountLeadingZeros,
                     Mf::CountOneBits => Function::MissingIntOverload("countbits"),
                     Mf::ReverseBits => Function::MissingIntOverload("reversebits"),
-                    Mf::FindLsb => Function::MissingIntReturnType("firstbitlow"),
-                    Mf::FindMsb => Function::MissingIntReturnType("firstbithigh"),
+                    Mf::FirstTrailingBit => Function::MissingIntReturnType("firstbitlow"),
+                    Mf::FirstLeadingBit => Function::MissingIntReturnType("firstbithigh"),
                     Mf::ExtractBits => Function::Regular(EXTRACT_BITS_FUNCTION),
                     Mf::InsertBits => Function::Regular(INSERT_BITS_FUNCTION),
                     // Data Packing
@@ -3213,7 +3280,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                     // as non-32bit types are DXC only.
                     Function::MissingIntOverload(fun_name) => {
                         let scalar_kind = func_ctx.resolve_type(arg, &module.types).scalar();
-                        if let Some(crate::Scalar {
+                        if let Some(Scalar {
                             kind: ScalarKind::Sint,
                             width: 4,
                         }) = scalar_kind
@@ -3231,7 +3298,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                     // as non-32bit types are DXC only.
                     Function::MissingIntReturnType(fun_name) => {
                         let scalar_kind = func_ctx.resolve_type(arg, &module.types).scalar();
-                        if let Some(crate::Scalar {
+                        if let Some(Scalar {
                             kind: ScalarKind::Sint,
                             width: 4,
                         }) = scalar_kind
diff --git a/naga/src/back/mod.rs b/naga/src/back/mod.rs
index 364d0f2506..352adc37ec 100644
--- a/naga/src/back/mod.rs
+++ b/naga/src/back/mod.rs
@@ -19,6 +19,9 @@ pub mod wgsl;
 #[cfg(any(hlsl_out, msl_out, spv_out, glsl_out))]
 pub mod pipeline_constants;
 
+#[cfg(any(hlsl_out, glsl_out))]
+mod continue_forward;
+
 /// Names of vector components.
 pub const COMPONENTS: &[char] = &['x', 'y', 'z', 'w'];
 /// Indent for backends.
@@ -254,7 +257,9 @@ impl crate::TypeInner {
     /// Returns true if this is a handle to a type rather than the type directly.
     pub const fn is_handle(&self) -> bool {
         match *self {
-            crate::TypeInner::Image { .. } | crate::TypeInner::Sampler { .. } => true,
+            crate::TypeInner::Image { .. }
+            | crate::TypeInner::Sampler { .. }
+            | crate::TypeInner::AccelerationStructure { .. } => true,
             _ => false,
         }
     }
diff --git a/naga/src/back/msl/mod.rs b/naga/src/back/msl/mod.rs
index 37e0b98d77..626475debc 100644
--- a/naga/src/back/msl/mod.rs
+++ b/naga/src/back/msl/mod.rs
@@ -295,7 +295,10 @@ pub enum VertexFormat {
     /// Four signed ints (i32). `vec4<i32>` in shaders.
     Sint32x4 = 29,
     /// Three unsigned 10-bit integers and one 2-bit integer, packed into a 32-bit integer (u32). [0, 1024] converted to float [0, 1] `vec4<f32>` in shaders.
-    #[cfg_attr(feature = "serde", serde(rename = "unorm10-10-10-2"))]
+    #[cfg_attr(
+        any(feature = "serialize", feature = "deserialize"),
+        serde(rename = "unorm10-10-10-2")
+    )]
     Unorm10_10_10_2 = 34,
 }
 
@@ -351,7 +354,9 @@ pub struct PipelineOptions {
     /// to receive the vertex buffers, lengths, and vertex id as args,
     /// and bounds-check the vertex id and use the index into the
     /// vertex buffers to access attributes, rather than using Metal's
-    /// [[stage-in]] assembled attribute data.
+    /// [[stage-in]] assembled attribute data. This is true by default,
+    /// but remains configurable for use by tests via deserialization
+    /// of this struct. There is no user-facing way to set this value.
     pub vertex_pulling_transform: bool,
 
     /// vertex_buffer_mappings are used during shader translation to
diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs
index 8b86897007..48f862f8ba 100644
--- a/naga/src/back/msl/writer.rs
+++ b/naga/src/back/msl/writer.rs
@@ -1063,43 +1063,6 @@ impl<W: Write> Writer<W> {
         address: &TexelAddress,
         value: Handle<crate::Expression>,
         context: &StatementContext,
-    ) -> BackendResult {
-        match context.expression.policies.image_store {
-            proc::BoundsCheckPolicy::Restrict => {
-                // We don't have a restricted level value, because we don't
-                // support writes to mipmapped textures.
-                debug_assert!(address.level.is_none());
-
-                write!(self.out, "{level}")?;
-                self.put_expression(image, &context.expression, false)?;
-                write!(self.out, ".write(")?;
-                self.put_expression(value, &context.expression, true)?;
-                write!(self.out, ", ")?;
-                self.put_restricted_texel_address(image, address, &context.expression)?;
-                writeln!(self.out, ");")?;
-            }
-            proc::BoundsCheckPolicy::ReadZeroSkipWrite => {
-                write!(self.out, "{level}if (")?;
-                self.put_image_access_bounds_check(image, address, &context.expression)?;
-                writeln!(self.out, ") {{")?;
-                self.put_unchecked_image_store(level.next(), image, address, value, context)?;
-                writeln!(self.out, "{level}}}")?;
-            }
-            proc::BoundsCheckPolicy::Unchecked => {
-                self.put_unchecked_image_store(level, image, address, value, context)?;
-            }
-        }
-
-        Ok(())
-    }
-
-    fn put_unchecked_image_store(
-        &mut self,
-        level: back::Level,
-        image: Handle<crate::Expression>,
-        address: &TexelAddress,
-        value: Handle<crate::Expression>,
-        context: &StatementContext,
     ) -> BackendResult {
         write!(self.out, "{level}")?;
         self.put_expression(image, &context.expression, false)?;
@@ -1235,7 +1198,7 @@ impl<W: Write> Writer<W> {
         // with different precedences from applying earlier.
         write!(self.out, "(")?;
 
-        // Cycle trough all the components of the vector
+        // Cycle through all the components of the vector
         for index in 0..size {
             let component = back::COMPONENTS[index];
             // Write the addition to the previous product
@@ -1875,8 +1838,8 @@ impl<W: Write> Writer<W> {
                     Mf::ReverseBits => "reverse_bits",
                     Mf::ExtractBits => "",
                     Mf::InsertBits => "",
-                    Mf::FindLsb => "",
-                    Mf::FindMsb => "",
+                    Mf::FirstTrailingBit => "",
+                    Mf::FirstLeadingBit => "",
                     // data packing
                     Mf::Pack4x8snorm => "pack_float_to_snorm4x8",
                     Mf::Pack4x8unorm => "pack_float_to_unorm4x8",
@@ -1920,7 +1883,7 @@ impl<W: Write> Writer<W> {
                         self.put_expression(arg1.unwrap(), context, false)?;
                         write!(self.out, ")")?;
                     }
-                    Mf::FindLsb => {
+                    Mf::FirstTrailingBit => {
                         let scalar = context.resolve_type(arg).scalar().unwrap();
                         let constant = scalar.width * 8 + 1;
 
@@ -1928,7 +1891,7 @@ impl<W: Write> Writer<W> {
                         self.put_expression(arg, context, true)?;
                         write!(self.out, ") + 1) % {constant}) - 1)")?;
                     }
-                    Mf::FindMsb => {
+                    Mf::FirstLeadingBit => {
                         let inner = context.resolve_type(arg);
                         let scalar = inner.scalar().unwrap();
                         let constant = scalar.width * 8 - 1;
@@ -2702,7 +2665,7 @@ impl<W: Write> Writer<W> {
                             }
                         }
                     }
-                    crate::MathFunction::FindMsb
+                    crate::MathFunction::FirstLeadingBit
                     | crate::MathFunction::Pack4xI8
                     | crate::MathFunction::Pack4xU8
                     | crate::MathFunction::Unpack4xI8
@@ -3953,8 +3916,8 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return metal::float2((float(b0) - 128.0f) / 255.0f, \
-                                            (float(b1) - 128.0f) / 255.0f);",
+                    "{}return metal::float2(metal::max(-1.0f, as_type<char>(b0) / 127.0f), \
+                                            metal::max(-1.0f, as_type<char>(b1) / 127.0f));",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
@@ -3971,10 +3934,10 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return metal::float4((float(b0) - 128.0f) / 255.0f, \
-                                            (float(b1) - 128.0f) / 255.0f, \
-                                            (float(b2) - 128.0f) / 255.0f, \
-                                            (float(b3) - 128.0f) / 255.0f);",
+                    "{}return metal::float4(metal::max(-1.0f, as_type<char>(b0) / 127.0f), \
+                                            metal::max(-1.0f, as_type<char>(b1) / 127.0f), \
+                                            metal::max(-1.0f, as_type<char>(b2) / 127.0f), \
+                                            metal::max(-1.0f, as_type<char>(b3) / 127.0f));",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
@@ -4033,8 +3996,8 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return metal::int2(as_type<metal::short>(b1 << 8 | b0), \
-                                          as_type<metal::short>(b3 << 8 | b2));",
+                    "{}return metal::int2(as_type<short>(metal::ushort(b1 << 8 | b0)), \
+                                          as_type<short>(metal::ushort(b3 << 8 | b2)));",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
@@ -4055,10 +4018,10 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return metal::int4(as_type<metal::short>(b1 << 8 | b0), \
-                                          as_type<metal::short>(b3 << 8 | b2), \
-                                          as_type<metal::short>(b5 << 8 | b4), \
-                                          as_type<metal::short>(b7 << 8 | b6));",
+                    "{}return metal::int4(as_type<short>(metal::ushort(b1 << 8 | b0)), \
+                                          as_type<short>(metal::ushort(b3 << 8 | b2)), \
+                                          as_type<short>(metal::ushort(b5 << 8 | b4)), \
+                                          as_type<short>(metal::ushort(b7 << 8 | b6)));",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
@@ -4117,8 +4080,7 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return metal::float2((float(b1 << 8 | b0) - 32767.0f) / 65535.0f, \
-                                            (float(b3 << 8 | b2) - 32767.0f) / 65535.0f);",
+                    "{}return metal::unpack_snorm2x16_to_float(b1 << 24 | b0 << 16 | b3 << 8 | b2);",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
@@ -4139,10 +4101,8 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return metal::float4((float(b1 << 8 | b0) - 32767.0f) / 65535.0f, \
-                                            (float(b3 << 8 | b2) - 32767.0f) / 65535.0f, \
-                                            (float(b5 << 8 | b4) - 32767.0f) / 65535.0f, \
-                                            (float(b7 << 8 | b6) - 32767.0f) / 65535.0f);",
+                    "{}return metal::float4(metal::unpack_snorm2x16_to_float(b1 << 24 | b0 << 16 | b3 << 8 | b2), \
+                                            metal::unpack_snorm2x16_to_float(b5 << 24 | b4 << 16 | b7 << 8 | b6));",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
@@ -4159,8 +4119,8 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return metal::float2(as_type<metal::half>(b1 << 8 | b0), \
-                                            as_type<metal::half>(b3 << 8 | b2));",
+                    "{}return metal::float2(as_type<half>(metal::ushort(b1 << 8 | b0)), \
+                                            as_type<half>(metal::ushort(b3 << 8 | b2)));",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
@@ -4170,7 +4130,7 @@ impl<W: Write> Writer<W> {
                 let name = self.namer.call("unpackFloat16x4");
                 writeln!(
                     self.out,
-                    "metal::int4 {name}(metal::ushort b0, \
+                    "metal::float4 {name}(metal::ushort b0, \
                                         metal::ushort b1, \
                                         metal::ushort b2, \
                                         metal::ushort b3, \
@@ -4181,10 +4141,10 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return metal::int4(as_type<metal::half>(b1 << 8 | b0), \
-                                          as_type<metal::half>(b3 << 8 | b2), \
-                                          as_type<metal::half>(b5 << 8 | b4), \
-                                          as_type<metal::half>(b7 << 8 | b6));",
+                    "{}return metal::float4(as_type<half>(metal::ushort(b1 << 8 | b0)), \
+                                          as_type<half>(metal::ushort(b3 << 8 | b2)), \
+                                          as_type<half>(metal::ushort(b5 << 8 | b4)), \
+                                          as_type<half>(metal::ushort(b7 << 8 | b6)));",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
@@ -4390,10 +4350,10 @@ impl<W: Write> Writer<W> {
                 let name = self.namer.call("unpackSint32");
                 writeln!(
                     self.out,
-                    "metal::int {name}(uint b0, \
-                                       uint b1, \
-                                       uint b2, \
-                                       uint b3) {{"
+                    "int {name}(uint b0, \
+                                uint b1, \
+                                uint b2, \
+                                uint b3) {{"
                 )?;
                 writeln!(
                     self.out,
@@ -4495,7 +4455,18 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return unpack_unorm10a2_to_float(b3 << 24 | b2 << 16 | b1 << 8 | b0);",
+                    // The following is correct for RGBA packing, but our format seems to
+                    // match ABGR, which can be fed into the Metal builtin function
+                    // unpack_unorm10a2_to_float.
+                    /*
+                    "{}uint v = (b3 << 24 | b2 << 16 | b1 << 8 | b0); \
+                       uint r = (v & 0xFFC00000) >> 22; \
+                       uint g = (v & 0x003FF000) >> 12; \
+                       uint b = (v & 0x00000FFC) >> 2; \
+                       uint a = (v & 0x00000003); \
+                       return metal::float4(float(r) / 1023.0f, float(g) / 1023.0f, float(b) / 1023.0f, float(a) / 3.0f);",
+                    */
+                    "{}return metal::unpack_unorm10a2_to_float(b3 << 24 | b2 << 16 | b1 << 8 | b0);",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs
index 33f892aa45..9fb9485860 100644
--- a/naga/src/back/spv/block.rs
+++ b/naga/src/back/spv/block.rs
@@ -1183,13 +1183,13 @@ impl<'w> BlockContext<'w> {
                             count_id,
                         ))
                     }
-                    Mf::FindLsb => MathOp::Ext(spirv::GLOp::FindILsb),
-                    Mf::FindMsb => {
+                    Mf::FirstTrailingBit => MathOp::Ext(spirv::GLOp::FindILsb),
+                    Mf::FirstLeadingBit => {
                         if arg_ty.scalar_width() == Some(4) {
                             let thing = match arg_scalar_kind {
                                 Some(crate::ScalarKind::Uint) => spirv::GLOp::FindUMsb,
                                 Some(crate::ScalarKind::Sint) => spirv::GLOp::FindSMsb,
-                                other => unimplemented!("Unexpected findMSB({:?})", other),
+                                other => unimplemented!("Unexpected firstLeadingBit({:?})", other),
                             };
                             MathOp::Ext(thing)
                         } else {
diff --git a/naga/src/back/spv/image.rs b/naga/src/back/spv/image.rs
index 3011ee4d13..769971d136 100644
--- a/naga/src/back/spv/image.rs
+++ b/naga/src/back/spv/image.rs
@@ -1178,32 +1178,13 @@ impl<'w> BlockContext<'w> {
             _ => {}
         }
 
-        match self.writer.bounds_check_policies.image_store {
-            crate::proc::BoundsCheckPolicy::Restrict => {
-                let (coords, _, _) =
-                    self.write_restricted_coordinates(image_id, coordinates, None, None, block)?;
-                write.generate(&mut self.writer.id_gen, coords, None, None, block);
-            }
-            crate::proc::BoundsCheckPolicy::ReadZeroSkipWrite => {
-                self.write_conditional_image_access(
-                    image_id,
-                    coordinates,
-                    None,
-                    None,
-                    block,
-                    &write,
-                )?;
-            }
-            crate::proc::BoundsCheckPolicy::Unchecked => {
-                write.generate(
-                    &mut self.writer.id_gen,
-                    coordinates.value_id,
-                    None,
-                    None,
-                    block,
-                );
-            }
-        }
+        write.generate(
+            &mut self.writer.id_gen,
+            coordinates.value_id,
+            None,
+            None,
+            block,
+        );
 
         Ok(())
     }
diff --git a/naga/src/back/spv/instructions.rs b/naga/src/back/spv/instructions.rs
index df2774ab9c..9029c973de 100644
--- a/naga/src/back/spv/instructions.rs
+++ b/naga/src/back/spv/instructions.rs
@@ -1170,7 +1170,7 @@ impl From<crate::StorageFormat> for spirv::ImageFormat {
             Sf::Bgra8Unorm => Self::Unknown,
             Sf::Rgb10a2Uint => Self::Rgb10a2ui,
             Sf::Rgb10a2Unorm => Self::Rgb10A2,
-            Sf::Rg11b10Float => Self::R11fG11fB10f,
+            Sf::Rg11b10UFloat => Self::R11fG11fB10f,
             Sf::Rg32Uint => Self::Rg32ui,
             Sf::Rg32Sint => Self::Rg32i,
             Sf::Rg32Float => Self::Rg32f,
diff --git a/naga/src/back/wgsl/writer.rs b/naga/src/back/wgsl/writer.rs
index 8cd37830ec..e5a5e5f647 100644
--- a/naga/src/back/wgsl/writer.rs
+++ b/naga/src/back/wgsl/writer.rs
@@ -1710,8 +1710,8 @@ impl<W: Write> Writer<W> {
                     Mf::ReverseBits => Function::Regular("reverseBits"),
                     Mf::ExtractBits => Function::Regular("extractBits"),
                     Mf::InsertBits => Function::Regular("insertBits"),
-                    Mf::FindLsb => Function::Regular("firstTrailingBit"),
-                    Mf::FindMsb => Function::Regular("firstLeadingBit"),
+                    Mf::FirstTrailingBit => Function::Regular("firstTrailingBit"),
+                    Mf::FirstLeadingBit => Function::Regular("firstLeadingBit"),
                     // data packing
                     Mf::Pack4x8snorm => Function::Regular("pack4x8snorm"),
                     Mf::Pack4x8unorm => Function::Regular("pack4x8unorm"),
@@ -2015,7 +2015,7 @@ const fn storage_format_str(format: crate::StorageFormat) -> &'static str {
         Sf::Bgra8Unorm => "bgra8unorm",
         Sf::Rgb10a2Uint => "rgb10a2uint",
         Sf::Rgb10a2Unorm => "rgb10a2unorm",
-        Sf::Rg11b10Float => "rg11b10float",
+        Sf::Rg11b10UFloat => "rg11b10float",
         Sf::Rg32Uint => "rg32uint",
         Sf::Rg32Sint => "rg32sint",
         Sf::Rg32Float => "rg32float",
diff --git a/naga/src/compact/expressions.rs b/naga/src/compact/expressions.rs
index 8072d46d33..0677ab694a 100644
--- a/naga/src/compact/expressions.rs
+++ b/naga/src/compact/expressions.rs
@@ -3,7 +3,6 @@ use crate::arena::{Arena, Handle};
 
 pub struct ExpressionTracer<'tracer> {
     pub constants: &'tracer Arena<crate::Constant>,
-    pub overrides: &'tracer Arena<crate::Override>,
 
     /// The arena in which we are currently tracing expressions.
     pub expressions: &'tracer Arena<crate::Expression>,
diff --git a/naga/src/compact/functions.rs b/naga/src/compact/functions.rs
index 372d472da3..bc13e4b229 100644
--- a/naga/src/compact/functions.rs
+++ b/naga/src/compact/functions.rs
@@ -4,7 +4,6 @@ use super::{FunctionMap, ModuleMap};
 pub struct FunctionTracer<'a> {
     pub function: &'a crate::Function,
     pub constants: &'a crate::Arena<crate::Constant>,
-    pub overrides: &'a crate::Arena<crate::Override>,
 
     pub types_used: &'a mut HandleSet<crate::Type>,
     pub constants_used: &'a mut HandleSet<crate::Constant>,
@@ -48,7 +47,6 @@ impl<'a> FunctionTracer<'a> {
     fn as_expression(&mut self) -> super::expressions::ExpressionTracer {
         super::expressions::ExpressionTracer {
             constants: self.constants,
-            overrides: self.overrides,
             expressions: &self.function.expressions,
 
             types_used: self.types_used,
diff --git a/naga/src/compact/mod.rs b/naga/src/compact/mod.rs
index c40a1880e1..a9fc7bc945 100644
--- a/naga/src/compact/mod.rs
+++ b/naga/src/compact/mod.rs
@@ -253,7 +253,6 @@ impl<'module> ModuleTracer<'module> {
         expressions::ExpressionTracer {
             expressions: &self.module.global_expressions,
             constants: &self.module.constants,
-            overrides: &self.module.overrides,
             types_used: &mut self.types_used,
             constants_used: &mut self.constants_used,
             expressions_used: &mut self.global_expressions_used,
@@ -268,7 +267,6 @@ impl<'module> ModuleTracer<'module> {
         FunctionTracer {
             function,
             constants: &self.module.constants,
-            overrides: &self.module.overrides,
             types_used: &mut self.types_used,
             constants_used: &mut self.constants_used,
             global_expressions_used: &mut self.global_expressions_used,
diff --git a/naga/src/front/glsl/builtins.rs b/naga/src/front/glsl/builtins.rs
index cbb9b99387..f76ce7754a 100644
--- a/naga/src/front/glsl/builtins.rs
+++ b/naga/src/front/glsl/builtins.rs
@@ -646,8 +646,8 @@ fn inject_standard_builtins(
                 "bitfieldReverse" => MathFunction::ReverseBits,
                 "bitfieldExtract" => MathFunction::ExtractBits,
                 "bitfieldInsert" => MathFunction::InsertBits,
-                "findLSB" => MathFunction::FindLsb,
-                "findMSB" => MathFunction::FindMsb,
+                "findLSB" => MathFunction::FirstTrailingBit,
+                "findMSB" => MathFunction::FirstLeadingBit,
                 _ => unreachable!(),
             };
 
@@ -695,8 +695,12 @@ fn inject_standard_builtins(
                 // we need to cast the return type of findLsb / findMsb
                 let mc = if scalar.kind == Sk::Uint {
                     match mc {
-                        MacroCall::MathFunction(MathFunction::FindLsb) => MacroCall::FindLsbUint,
-                        MacroCall::MathFunction(MathFunction::FindMsb) => MacroCall::FindMsbUint,
+                        MacroCall::MathFunction(MathFunction::FirstTrailingBit) => {
+                            MacroCall::FindLsbUint
+                        }
+                        MacroCall::MathFunction(MathFunction::FirstLeadingBit) => {
+                            MacroCall::FindMsbUint
+                        }
                         mc => mc,
                     }
                 } else {
@@ -1787,8 +1791,8 @@ impl MacroCall {
             )?,
             mc @ (MacroCall::FindLsbUint | MacroCall::FindMsbUint) => {
                 let fun = match mc {
-                    MacroCall::FindLsbUint => MathFunction::FindLsb,
-                    MacroCall::FindMsbUint => MathFunction::FindMsb,
+                    MacroCall::FindLsbUint => MathFunction::FirstTrailingBit,
+                    MacroCall::FindMsbUint => MathFunction::FirstLeadingBit,
                     _ => unreachable!(),
                 };
                 let res = ctx.add_expression(
diff --git a/naga/src/front/glsl/context.rs b/naga/src/front/glsl/context.rs
index 6ba7df593a..ee1fcc04ba 100644
--- a/naga/src/front/glsl/context.rs
+++ b/naga/src/front/glsl/context.rs
@@ -393,7 +393,7 @@ impl<'a> Context<'a> {
     /// # Panics
     ///
     /// - If more than one [`StmtContext`] are active at the same time or if the
-    /// previous call didn't use it in lowering.
+    ///   previous call didn't use it in lowering.
     #[must_use]
     pub fn stmt_ctx(&mut self) -> StmtContext {
         self.stmt_ctx.take().unwrap()
diff --git a/naga/src/front/glsl/parser/types.rs b/naga/src/front/glsl/parser/types.rs
index 1b612b298d..d22387f375 100644
--- a/naga/src/front/glsl/parser/types.rs
+++ b/naga/src/front/glsl/parser/types.rs
@@ -397,7 +397,7 @@ fn map_image_format(word: &str) -> Option<crate::StorageFormat> {
         "rgba16f" => Sf::Rgba16Float,
         "rg32f" => Sf::Rg32Float,
         "rg16f" => Sf::Rg16Float,
-        "r11f_g11f_b10f" => Sf::Rg11b10Float,
+        "r11f_g11f_b10f" => Sf::Rg11b10UFloat,
         "r32f" => Sf::R32Float,
         "r16f" => Sf::R16Float,
         "rgba16" => Sf::Rgba16Unorm,
diff --git a/naga/src/front/mod.rs b/naga/src/front/mod.rs
index 3f602f3dd0..11c8aa047e 100644
--- a/naga/src/front/mod.rs
+++ b/naga/src/front/mod.rs
@@ -275,7 +275,7 @@ where
         Name: std::borrow::Borrow<Q>,
         Q: std::hash::Hash + Eq + ?Sized,
     {
-        // Iterate backwards trough the scopes and try to find the variable
+        // Iterate backwards through the scopes and try to find the variable
         for scope in self.scopes[..self.cursor].iter().rev() {
             if let Some(var) = scope.get(name) {
                 return Some(var);
diff --git a/naga/src/front/spv/convert.rs b/naga/src/front/spv/convert.rs
index a6bf0e0451..88d171b5b7 100644
--- a/naga/src/front/spv/convert.rs
+++ b/naga/src/front/spv/convert.rs
@@ -104,7 +104,7 @@ pub(super) fn map_image_format(word: spirv::Word) -> Result<crate::StorageFormat
         Some(spirv::ImageFormat::Rgba8i) => Ok(crate::StorageFormat::Rgba8Sint),
         Some(spirv::ImageFormat::Rgb10a2ui) => Ok(crate::StorageFormat::Rgb10a2Uint),
         Some(spirv::ImageFormat::Rgb10A2) => Ok(crate::StorageFormat::Rgb10a2Unorm),
-        Some(spirv::ImageFormat::R11fG11fB10f) => Ok(crate::StorageFormat::Rg11b10Float),
+        Some(spirv::ImageFormat::R11fG11fB10f) => Ok(crate::StorageFormat::Rg11b10UFloat),
         Some(spirv::ImageFormat::Rg32ui) => Ok(crate::StorageFormat::Rg32Uint),
         Some(spirv::ImageFormat::Rg32i) => Ok(crate::StorageFormat::Rg32Sint),
         Some(spirv::ImageFormat::Rg32f) => Ok(crate::StorageFormat::Rg32Float),
diff --git a/naga/src/front/spv/mod.rs b/naga/src/front/spv/mod.rs
index d154712b20..7dfb4ae293 100644
--- a/naga/src/front/spv/mod.rs
+++ b/naga/src/front/spv/mod.rs
@@ -3026,8 +3026,8 @@ impl<I: Iterator<Item = u32>> Frontend<I> {
                         Glo::UnpackHalf2x16 => Mf::Unpack2x16float,
                         Glo::UnpackUnorm2x16 => Mf::Unpack2x16unorm,
                         Glo::UnpackSnorm2x16 => Mf::Unpack2x16snorm,
-                        Glo::FindILsb => Mf::FindLsb,
-                        Glo::FindUMsb | Glo::FindSMsb => Mf::FindMsb,
+                        Glo::FindILsb => Mf::FirstTrailingBit,
+                        Glo::FindUMsb | Glo::FindSMsb => Mf::FirstLeadingBit,
                         // TODO: https://github.com/gfx-rs/naga/issues/2526
                         Glo::Modf | Glo::Frexp => return Err(Error::UnsupportedExtInst(inst_id)),
                         Glo::IMix
@@ -3460,7 +3460,7 @@ impl<I: Iterator<Item = u32>> Frontend<I> {
                             .insert(target, (case_body_idx, vec![literal as i32]));
                     }
 
-                    // Loop trough the collected target blocks creating a new case for each
+                    // Loop through the collected target blocks creating a new case for each
                     // literal pointing to it, only one case will have the true body and all the
                     // others will be empty fallthrough so that they all execute the same body
                     // without duplicating code.
diff --git a/naga/src/front/wgsl/lower/mod.rs b/naga/src/front/wgsl/lower/mod.rs
index 7c5954d065..34f8daf506 100644
--- a/naga/src/front/wgsl/lower/mod.rs
+++ b/naga/src/front/wgsl/lower/mod.rs
@@ -2482,6 +2482,10 @@ impl<'source, 'temp> Lowerer<'source, 'temp> {
                     crate::TypeInner::Scalar(crate::Scalar { width: 8, .. })
                 );
         let result = if is_64_bit_min_max && is_statement {
+            let rctx = ctx.runtime_expression_ctx(span)?;
+            rctx.block
+                .extend(rctx.emitter.finish(&rctx.function.expressions));
+            rctx.emitter.start(&rctx.function.expressions);
             None
         } else {
             let ty = ctx.register_type(value)?;
diff --git a/naga/src/front/wgsl/parse/ast.rs b/naga/src/front/wgsl/parse/ast.rs
index ea8013ee7c..7df5c8a1c9 100644
--- a/naga/src/front/wgsl/parse/ast.rs
+++ b/naga/src/front/wgsl/parse/ast.rs
@@ -117,33 +117,6 @@ pub struct Function<'a> {
     pub name: Ident<'a>,
     pub arguments: Vec<FunctionArgument<'a>>,
     pub result: Option<FunctionResult<'a>>,
-
-    /// Local variable and function argument arena.
-    ///
-    /// Note that the `Local` here is actually a zero-sized type. The AST keeps
-    /// all the detailed information about locals - names, types, etc. - in
-    /// [`LocalDecl`] statements. For arguments, that information is kept in
-    /// [`arguments`]. This `Arena`'s only role is to assign a unique `Handle`
-    /// to each of them, and track their definitions' spans for use in
-    /// diagnostics.
-    ///
-    /// In the AST, when an [`Ident`] expression refers to a local variable or
-    /// argument, its [`IdentExpr`] holds the referent's `Handle<Local>` in this
-    /// arena.
-    ///
-    /// During lowering, [`LocalDecl`] statements add entries to a per-function
-    /// table that maps `Handle<Local>` values to their Naga representations,
-    /// accessed via [`StatementContext::local_table`] and
-    /// [`RuntimeExpressionContext::local_table`]. This table is then consulted when
-    /// lowering subsequent [`Ident`] expressions.
-    ///
-    /// [`LocalDecl`]: StatementKind::LocalDecl
-    /// [`arguments`]: Function::arguments
-    /// [`Ident`]: Expression::Ident
-    /// [`StatementContext::local_table`]: StatementContext::local_table
-    /// [`RuntimeExpressionContext::local_table`]: RuntimeExpressionContext::local_table
-    pub locals: Arena<Local>,
-
     pub body: Block<'a>,
 }
 
diff --git a/naga/src/front/wgsl/parse/conv.rs b/naga/src/front/wgsl/parse/conv.rs
index 49b15dfa83..4718b85e5e 100644
--- a/naga/src/front/wgsl/parse/conv.rs
+++ b/naga/src/front/wgsl/parse/conv.rs
@@ -92,7 +92,7 @@ pub fn map_storage_format(word: &str, span: Span) -> Result<crate::StorageFormat
         "rgba8sint" => Sf::Rgba8Sint,
         "rgb10a2uint" => Sf::Rgb10a2Uint,
         "rgb10a2unorm" => Sf::Rgb10a2Unorm,
-        "rg11b10float" => Sf::Rg11b10Float,
+        "rg11b10float" => Sf::Rg11b10UFloat,
         "rg32uint" => Sf::Rg32Uint,
         "rg32sint" => Sf::Rg32Sint,
         "rg32float" => Sf::Rg32Float,
@@ -235,8 +235,8 @@ pub fn map_standard_fun(word: &str) -> Option<crate::MathFunction> {
         "reverseBits" => Mf::ReverseBits,
         "extractBits" => Mf::ExtractBits,
         "insertBits" => Mf::InsertBits,
-        "firstTrailingBit" => Mf::FindLsb,
-        "firstLeadingBit" => Mf::FindMsb,
+        "firstTrailingBit" => Mf::FirstTrailingBit,
+        "firstLeadingBit" => Mf::FirstLeadingBit,
         // data packing
         "pack4x8snorm" => Mf::Pack4x8snorm,
         "pack4x8unorm" => Mf::Pack4x8unorm,
diff --git a/naga/src/front/wgsl/parse/mod.rs b/naga/src/front/wgsl/parse/mod.rs
index ee3a1846b9..c9114d685d 100644
--- a/naga/src/front/wgsl/parse/mod.rs
+++ b/naga/src/front/wgsl/parse/mod.rs
@@ -37,9 +37,30 @@ struct ExpressionContext<'input, 'temp, 'out> {
     /// [`Function::locals`]: ast::Function::locals
     local_table: &'temp mut SymbolTable<&'input str, Handle<ast::Local>>,
 
-    /// The [`Function::locals`] arena for the function we're building.
+    /// Local variable and function argument arena for the function we're building.
     ///
-    /// [`Function::locals`]: ast::Function::locals
+    /// Note that the `Local` here is actually a zero-sized type. The AST keeps
+    /// all the detailed information about locals - names, types, etc. - in
+    /// [`LocalDecl`] statements. For arguments, that information is kept in
+    /// [`arguments`]. This `Arena`'s only role is to assign a unique `Handle`
+    /// to each of them, and track their definitions' spans for use in
+    /// diagnostics.
+    ///
+    /// In the AST, when an [`Ident`] expression refers to a local variable or
+    /// argument, its [`IdentExpr`] holds the referent's `Handle<Local>` in this
+    /// arena.
+    ///
+    /// During lowering, [`LocalDecl`] statements add entries to a per-function
+    /// table that maps `Handle<Local>` values to their Naga representations,
+    /// accessed via [`StatementContext::local_table`] and
+    /// [`RuntimeExpressionContext::local_table`]. This table is then consulted when
+    /// lowering subsequent [`Ident`] expressions.
+    ///
+    /// [`LocalDecl`]: StatementKind::LocalDecl
+    /// [`arguments`]: Function::arguments
+    /// [`Ident`]: Expression::Ident
+    /// [`StatementContext::local_table`]: StatementContext::local_table
+    /// [`RuntimeExpressionContext::local_table`]: RuntimeExpressionContext::local_table
     locals: &'out mut Arena<ast::Local>,
 
     /// Identifiers used by the current global declaration that have no local definition.
@@ -2158,7 +2179,6 @@ impl Parser {
             arguments,
             result,
             body,
-            locals,
         };
 
         // done
diff --git a/naga/src/front/wgsl/to_wgsl.rs b/naga/src/front/wgsl/to_wgsl.rs
index 63bc9f7317..ec3af8edd4 100644
--- a/naga/src/front/wgsl/to_wgsl.rs
+++ b/naga/src/front/wgsl/to_wgsl.rs
@@ -175,7 +175,7 @@ impl crate::StorageFormat {
             Sf::Bgra8Unorm => "bgra8unorm",
             Sf::Rgb10a2Uint => "rgb10a2uint",
             Sf::Rgb10a2Unorm => "rgb10a2unorm",
-            Sf::Rg11b10Float => "rg11b10float",
+            Sf::Rg11b10UFloat => "rg11b10float",
             Sf::Rg32Uint => "rg32uint",
             Sf::Rg32Sint => "rg32sint",
             Sf::Rg32Float => "rg32float",
diff --git a/naga/src/lib.rs b/naga/src/lib.rs
index 8ed7527922..60e5a1f47b 100644
--- a/naga/src/lib.rs
+++ b/naga/src/lib.rs
@@ -615,7 +615,7 @@ pub enum StorageFormat {
     // Packed 32-bit formats
     Rgb10a2Uint,
     Rgb10a2Unorm,
-    Rg11b10Float,
+    Rg11b10UFloat,
 
     // 64-bit formats
     Rg32Uint,
@@ -873,7 +873,7 @@ pub enum Literal {
 }
 
 /// Pipeline-overridable constant.
-#[derive(Debug, Clone)]
+#[derive(Clone, Debug, PartialEq)]
 #[cfg_attr(feature = "serialize", derive(Serialize))]
 #[cfg_attr(feature = "deserialize", derive(Deserialize))]
 #[cfg_attr(feature = "arbitrary", derive(Arbitrary))]
@@ -891,8 +891,7 @@ pub struct Override {
 }
 
 /// Constant value.
-#[derive(Debug, Clone)]
-#[cfg_attr(test, derive(PartialEq))]
+#[derive(Clone, Debug, PartialEq)]
 #[cfg_attr(feature = "serialize", derive(Serialize))]
 #[cfg_attr(feature = "deserialize", derive(Deserialize))]
 #[cfg_attr(feature = "arbitrary", derive(Arbitrary))]
@@ -954,7 +953,7 @@ pub struct ResourceBinding {
 }
 
 /// Variable defined at module level.
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq)]
 #[cfg_attr(feature = "serialize", derive(Serialize))]
 #[cfg_attr(feature = "deserialize", derive(Deserialize))]
 #[cfg_attr(feature = "arbitrary", derive(Arbitrary))]
@@ -1198,8 +1197,8 @@ pub enum MathFunction {
     ReverseBits,
     ExtractBits,
     InsertBits,
-    FindLsb,
-    FindMsb,
+    FirstTrailingBit,
+    FirstLeadingBit,
     // data packing
     Pack4x8snorm,
     Pack4x8unorm,
@@ -1337,7 +1336,7 @@ bitflags::bitflags! {
         const STORAGE = 1 << 0;
         /// Barrier affects all [`AddressSpace::WorkGroup`] accesses.
         const WORK_GROUP = 1 << 1;
-        /// Barrier synchronizes execution across all invocations within a subgroup that exectue this instruction.
+        /// Barrier synchronizes execution across all invocations within a subgroup that execute this instruction.
         const SUB_GROUP = 1 << 2;
     }
 }
@@ -1354,8 +1353,7 @@ bitflags::bitflags! {
 ///
 /// [`Constant`]: Expression::Constant
 /// [`Override`]: Expression::Override
-#[derive(Clone, Debug)]
-#[cfg_attr(test, derive(PartialEq))]
+#[derive(Clone, Debug, PartialEq)]
 #[cfg_attr(feature = "serialize", derive(Serialize))]
 #[cfg_attr(feature = "deserialize", derive(Deserialize))]
 #[cfg_attr(feature = "arbitrary", derive(Arbitrary))]
diff --git a/naga/src/proc/constant_evaluator.rs b/naga/src/proc/constant_evaluator.rs
index b5c821f412..deaa9c93c7 100644
--- a/naga/src/proc/constant_evaluator.rs
+++ b/naga/src/proc/constant_evaluator.rs
@@ -27,6 +27,8 @@ macro_rules! gen_component_wise_extractor {
         scalar_kinds: [$( $scalar_kind:ident ),* $(,)?],
     ) => {
         /// A subset of [`Literal`]s intended to be used for implementing numeric built-ins.
+        #[derive(Debug)]
+        #[cfg_attr(test, derive(PartialEq))]
         enum $target<const N: usize> {
             $(
                 #[doc = concat!(
@@ -1231,6 +1233,12 @@ impl<'a> ConstantEvaluator<'a> {
             crate::MathFunction::ReverseBits => {
                 component_wise_concrete_int!(self, span, [arg], |e| { Ok([e.reverse_bits()]) })
             }
+            crate::MathFunction::FirstTrailingBit => {
+                component_wise_concrete_int(self, span, [arg], |ci| Ok(first_trailing_bit(ci)))
+            }
+            crate::MathFunction::FirstLeadingBit => {
+                component_wise_concrete_int(self, span, [arg], |ci| Ok(first_leading_bit(ci)))
+            }
 
             fun => Err(ConstantEvaluatorError::NotImplemented(format!(
                 "{fun:?} built-in function"
@@ -2096,6 +2104,174 @@ impl<'a> ConstantEvaluator<'a> {
     }
 }
 
+fn first_trailing_bit(concrete_int: ConcreteInt<1>) -> ConcreteInt<1> {
+    // NOTE: Bit indices for this built-in start at 0 at the "right" (or LSB). For example, a value
+    // of 1 means the least significant bit is set. Therefore, an input of `0x[80 00…]` would
+    // return a right-to-left bit index of 0.
+    let trailing_zeros_to_bit_idx = |e: u32| -> u32 {
+        match e {
+            idx @ 0..=31 => idx,
+            32 => u32::MAX,
+            _ => unreachable!(),
+        }
+    };
+    match concrete_int {
+        ConcreteInt::U32([e]) => ConcreteInt::U32([trailing_zeros_to_bit_idx(e.trailing_zeros())]),
+        ConcreteInt::I32([e]) => {
+            ConcreteInt::I32([trailing_zeros_to_bit_idx(e.trailing_zeros()) as i32])
+        }
+    }
+}
+
+#[test]
+fn first_trailing_bit_smoke() {
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::I32([0])),
+        ConcreteInt::I32([-1])
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::I32([1])),
+        ConcreteInt::I32([0])
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::I32([2])),
+        ConcreteInt::I32([1])
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::I32([-1])),
+        ConcreteInt::I32([0]),
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::I32([i32::MIN])),
+        ConcreteInt::I32([31]),
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::I32([i32::MAX])),
+        ConcreteInt::I32([0]),
+    );
+    for idx in 0..32 {
+        assert_eq!(
+            first_trailing_bit(ConcreteInt::I32([1 << idx])),
+            ConcreteInt::I32([idx])
+        )
+    }
+
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::U32([0])),
+        ConcreteInt::U32([u32::MAX])
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::U32([1])),
+        ConcreteInt::U32([0])
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::U32([2])),
+        ConcreteInt::U32([1])
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::U32([1 << 31])),
+        ConcreteInt::U32([31]),
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::U32([u32::MAX])),
+        ConcreteInt::U32([0]),
+    );
+    for idx in 0..32 {
+        assert_eq!(
+            first_trailing_bit(ConcreteInt::U32([1 << idx])),
+            ConcreteInt::U32([idx])
+        )
+    }
+}
+
+fn first_leading_bit(concrete_int: ConcreteInt<1>) -> ConcreteInt<1> {
+    // NOTE: Bit indices for this built-in start at 0 at the "right" (or LSB). For example, 1 means
+    // the least significant bit is set. Therefore, an input of 1 would return a right-to-left bit
+    // index of 0.
+    let rtl_to_ltr_bit_idx = |e: u32| -> u32 {
+        match e {
+            idx @ 0..=31 => 31 - idx,
+            32 => u32::MAX,
+            _ => unreachable!(),
+        }
+    };
+    match concrete_int {
+        ConcreteInt::I32([e]) => ConcreteInt::I32([{
+            let rtl_bit_index = if e.is_negative() {
+                e.leading_ones()
+            } else {
+                e.leading_zeros()
+            };
+            rtl_to_ltr_bit_idx(rtl_bit_index) as i32
+        }]),
+        ConcreteInt::U32([e]) => ConcreteInt::U32([rtl_to_ltr_bit_idx(e.leading_zeros())]),
+    }
+}
+
+#[test]
+fn first_leading_bit_smoke() {
+    assert_eq!(
+        first_leading_bit(ConcreteInt::I32([-1])),
+        ConcreteInt::I32([-1])
+    );
+    assert_eq!(
+        first_leading_bit(ConcreteInt::I32([0])),
+        ConcreteInt::I32([-1])
+    );
+    assert_eq!(
+        first_leading_bit(ConcreteInt::I32([1])),
+        ConcreteInt::I32([0])
+    );
+    assert_eq!(
+        first_leading_bit(ConcreteInt::I32([-2])),
+        ConcreteInt::I32([0])
+    );
+    assert_eq!(
+        first_leading_bit(ConcreteInt::I32([1234 + 4567])),
+        ConcreteInt::I32([12])
+    );
+    assert_eq!(
+        first_leading_bit(ConcreteInt::I32([i32::MAX])),
+        ConcreteInt::I32([30])
+    );
+    assert_eq!(
+        first_leading_bit(ConcreteInt::I32([i32::MIN])),
+        ConcreteInt::I32([30])
+    );
+    // NOTE: Ignore the sign bit, which is a separate (above) case.
+    for idx in 0..(32 - 1) {
+        assert_eq!(
+            first_leading_bit(ConcreteInt::I32([1 << idx])),
+            ConcreteInt::I32([idx])
+        );
+    }
+    for idx in 1..(32 - 1) {
+        assert_eq!(
+            first_leading_bit(ConcreteInt::I32([-(1 << idx)])),
+            ConcreteInt::I32([idx - 1])
+        );
+    }
+
+    assert_eq!(
+        first_leading_bit(ConcreteInt::U32([0])),
+        ConcreteInt::U32([u32::MAX])
+    );
+    assert_eq!(
+        first_leading_bit(ConcreteInt::U32([1])),
+        ConcreteInt::U32([0])
+    );
+    assert_eq!(
+        first_leading_bit(ConcreteInt::U32([u32::MAX])),
+        ConcreteInt::U32([31])
+    );
+    for idx in 0..32 {
+        assert_eq!(
+            first_leading_bit(ConcreteInt::U32([1 << idx])),
+            ConcreteInt::U32([idx])
+        )
+    }
+}
+
 /// Trait for conversions of abstract values to concrete types.
 trait TryFromAbstract<T>: Sized {
     /// Convert an abstract literal `value` to `Self`.
diff --git a/naga/src/proc/index.rs b/naga/src/proc/index.rs
index 48b987ce85..555b08d2c3 100644
--- a/naga/src/proc/index.rs
+++ b/naga/src/proc/index.rs
@@ -112,21 +112,15 @@ pub struct BoundsCheckPolicies {
     /// This controls the behavior of [`ImageLoad`] expressions when a coordinate,
     /// texture array index, level of detail, or multisampled sample number is out of range.
     ///
-    /// [`ImageLoad`]: crate::Expression::ImageLoad
-    #[cfg_attr(feature = "deserialize", serde(default))]
-    pub image_load: BoundsCheckPolicy,
-
-    /// How should the generated code handle image texel stores that are out
-    /// of range?
-    ///
-    /// This controls the behavior of [`ImageStore`] statements when a coordinate,
-    /// texture array index, level of detail, or multisampled sample number is out of range.
-    ///
-    /// This policy should't be needed since all backends should ignore OOB writes.
+    /// There is no corresponding policy for [`ImageStore`] statements. All the
+    /// platforms we support already discard out-of-bounds image stores,
+    /// effectively implementing the "skip write" part of [`ReadZeroSkipWrite`].
     ///
+    /// [`ImageLoad`]: crate::Expression::ImageLoad
     /// [`ImageStore`]: crate::Statement::ImageStore
+    /// [`ReadZeroSkipWrite`]: BoundsCheckPolicy::ReadZeroSkipWrite
     #[cfg_attr(feature = "deserialize", serde(default))]
-    pub image_store: BoundsCheckPolicy,
+    pub image_load: BoundsCheckPolicy,
 
     /// How should the generated code handle binding array indexes that are out of bounds.
     #[cfg_attr(feature = "deserialize", serde(default))]
@@ -173,10 +167,7 @@ impl BoundsCheckPolicies {
 
     /// Return `true` if any of `self`'s policies are `policy`.
     pub fn contains(&self, policy: BoundsCheckPolicy) -> bool {
-        self.index == policy
-            || self.buffer == policy
-            || self.image_load == policy
-            || self.image_store == policy
+        self.index == policy || self.buffer == policy || self.image_load == policy
     }
 }
 
diff --git a/naga/src/proc/mod.rs b/naga/src/proc/mod.rs
index 86d2b11f25..642c016615 100644
--- a/naga/src/proc/mod.rs
+++ b/naga/src/proc/mod.rs
@@ -48,7 +48,7 @@ impl From<super::StorageFormat> for super::ScalarKind {
             Sf::Bgra8Unorm => Sk::Float,
             Sf::Rgb10a2Uint => Sk::Uint,
             Sf::Rgb10a2Unorm => Sk::Float,
-            Sf::Rg11b10Float => Sk::Float,
+            Sf::Rg11b10UFloat => Sk::Float,
             Sf::Rg32Uint => Sk::Uint,
             Sf::Rg32Sint => Sk::Sint,
             Sf::Rg32Float => Sk::Float,
@@ -484,8 +484,8 @@ impl super::MathFunction {
             Self::ReverseBits => 1,
             Self::ExtractBits => 3,
             Self::InsertBits => 4,
-            Self::FindLsb => 1,
-            Self::FindMsb => 1,
+            Self::FirstTrailingBit => 1,
+            Self::FirstLeadingBit => 1,
             // data packing
             Self::Pack4x8snorm => 1,
             Self::Pack4x8unorm => 1,
diff --git a/naga/src/proc/typifier.rs b/naga/src/proc/typifier.rs
index 0a02900c4a..d8af0cd236 100644
--- a/naga/src/proc/typifier.rs
+++ b/naga/src/proc/typifier.rs
@@ -788,8 +788,8 @@ impl<'a> ResolveContext<'a> {
                     Mf::ReverseBits |
                     Mf::ExtractBits |
                     Mf::InsertBits |
-                    Mf::FindLsb |
-                    Mf::FindMsb => match *res_arg.inner_with(types)  {
+                    Mf::FirstTrailingBit |
+                    Mf::FirstLeadingBit => match *res_arg.inner_with(types)  {
                         Ti::Scalar(scalar @ crate::Scalar {
                             kind: crate::ScalarKind::Sint | crate::ScalarKind::Uint,
                             ..
diff --git a/naga/src/valid/analyzer.rs b/naga/src/valid/analyzer.rs
index 058d91c63b..89b3da6a4c 100644
--- a/naga/src/valid/analyzer.rs
+++ b/naga/src/valid/analyzer.rs
@@ -1,10 +1,9 @@
-/*! Module analyzer.
-
-Figures out the following properties:
-  - control flow uniformity
-  - texture/sampler pairs
-  - expression reference counts
-!*/
+//! Module analyzer.
+//!
+//! Figures out the following properties:
+//! - control flow uniformity
+//! - texture/sampler pairs
+//! - expression reference counts
 
 use super::{ExpressionError, FunctionError, ModuleInfo, ShaderStages, ValidationFlags};
 use crate::span::{AddSpan as _, WithSpan};
@@ -594,15 +593,14 @@ impl FunctionInfo {
             E::FunctionArgument(index) => {
                 let arg = &resolve_context.arguments[index as usize];
                 let uniform = match arg.binding {
-                    Some(crate::Binding::BuiltIn(built_in)) => match built_in {
+                    Some(crate::Binding::BuiltIn(
                         // per-polygon built-ins are uniform
                         crate::BuiltIn::FrontFacing
                         // per-work-group built-ins are uniform
                         | crate::BuiltIn::WorkGroupId
                         | crate::BuiltIn::WorkGroupSize
-                        | crate::BuiltIn::NumWorkGroups => true,
-                        _ => false,
-                    },
+                        | crate::BuiltIn::NumWorkGroups)
+                    ) => true,
                     // only flat inputs are uniform
                     Some(crate::Binding::Location {
                         interpolation: Some(crate::Interpolation::Flat),
diff --git a/naga/src/valid/expression.rs b/naga/src/valid/expression.rs
index 89bceae061..1d1420aef6 100644
--- a/naga/src/valid/expression.rs
+++ b/naga/src/valid/expression.rs
@@ -1350,8 +1350,8 @@ impl super::Validator {
                     | Mf::CountTrailingZeros
                     | Mf::CountOneBits
                     | Mf::ReverseBits
-                    | Mf::FindMsb
-                    | Mf::FindLsb => {
+                    | Mf::FirstLeadingBit
+                    | Mf::FirstTrailingBit => {
                         if arg1_ty.is_some() || arg2_ty.is_some() || arg3_ty.is_some() {
                             return Err(ExpressionError::WrongArgumentCount(fun));
                         }
@@ -1696,7 +1696,7 @@ pub fn check_literal_value(literal: crate::Literal) -> Result<(), LiteralError>
     Ok(())
 }
 
-#[cfg(all(test, feature = "validate"))]
+#[cfg(test)]
 /// Validate a module containing the given expression, expecting an error.
 fn validate_with_expression(
     expr: crate::Expression,
@@ -1719,7 +1719,7 @@ fn validate_with_expression(
     validator.validate(&module)
 }
 
-#[cfg(all(test, feature = "validate"))]
+#[cfg(test)]
 /// Validate a module containing the given constant expression, expecting an error.
 fn validate_with_const_expression(
     expr: crate::Expression,
@@ -1736,7 +1736,6 @@ fn validate_with_const_expression(
 }
 
 /// Using F64 in a function's expression arena is forbidden.
-#[cfg(feature = "validate")]
 #[test]
 fn f64_runtime_literals() {
     let result = validate_with_expression(
@@ -1748,7 +1747,7 @@ fn f64_runtime_literals() {
         error,
         crate::valid::ValidationError::Function {
             source: super::FunctionError::Expression {
-                source: super::ExpressionError::Literal(super::LiteralError::Width(
+                source: ExpressionError::Literal(LiteralError::Width(
                     super::r#type::WidthError::MissingCapability {
                         name: "f64",
                         flag: "FLOAT64",
@@ -1768,7 +1767,6 @@ fn f64_runtime_literals() {
 }
 
 /// Using F64 in a module's constant expression arena is forbidden.
-#[cfg(feature = "validate")]
 #[test]
 fn f64_const_literals() {
     let result = validate_with_const_expression(
@@ -1779,7 +1777,7 @@ fn f64_const_literals() {
     assert!(matches!(
         error,
         crate::valid::ValidationError::ConstExpression {
-            source: super::ConstExpressionError::Literal(super::LiteralError::Width(
+            source: ConstExpressionError::Literal(LiteralError::Width(
                 super::r#type::WidthError::MissingCapability {
                     name: "f64",
                     flag: "FLOAT64",
@@ -1795,48 +1793,3 @@ fn f64_const_literals() {
     );
     assert!(result.is_ok());
 }
-
-/// Using I64 in a function's expression arena is forbidden.
-#[cfg(feature = "validate")]
-#[test]
-fn i64_runtime_literals() {
-    let result = validate_with_expression(
-        crate::Expression::Literal(crate::Literal::I64(1729)),
-        // There is no capability that enables this.
-        super::Capabilities::all(),
-    );
-    let error = result.unwrap_err().into_inner();
-    assert!(matches!(
-        error,
-        crate::valid::ValidationError::Function {
-            source: super::FunctionError::Expression {
-                source: super::ExpressionError::Literal(super::LiteralError::Width(
-                    super::r#type::WidthError::Unsupported64Bit
-                ),),
-                ..
-            },
-            ..
-        }
-    ));
-}
-
-/// Using I64 in a module's constant expression arena is forbidden.
-#[cfg(feature = "validate")]
-#[test]
-fn i64_const_literals() {
-    let result = validate_with_const_expression(
-        crate::Expression::Literal(crate::Literal::I64(1729)),
-        // There is no capability that enables this.
-        super::Capabilities::all(),
-    );
-    let error = result.unwrap_err().into_inner();
-    assert!(matches!(
-        error,
-        crate::valid::ValidationError::ConstExpression {
-            source: super::ConstExpressionError::Literal(super::LiteralError::Width(
-                super::r#type::WidthError::Unsupported64Bit,
-            ),),
-            ..
-        }
-    ));
-}
diff --git a/naga/src/valid/handles.rs b/naga/src/valid/handles.rs
index 4d46776a71..f8be76d026 100644
--- a/naga/src/valid/handles.rs
+++ b/naga/src/valid/handles.rs
@@ -16,10 +16,10 @@ impl super::Validator {
     /// Validates that all handles within `module` are:
     ///
     /// * Valid, in the sense that they contain indices within each arena structure inside the
-    /// [`crate::Module`] type.
+    ///   [`crate::Module`] type.
     /// * No arena contents contain any items that have forward dependencies; that is, the value
-    ///     associated with a handle only may contain references to handles in the same arena that
-    ///     were constructed before it.
+    ///   associated with a handle only may contain references to handles in the same arena that
+    ///   were constructed before it.
     ///
     /// By validating the above conditions, we free up subsequent logic to assume that handle
     /// accesses are infallible.
diff --git a/naga/src/valid/mod.rs b/naga/src/valid/mod.rs
index d9a986df7e..c314ec2ac8 100644
--- a/naga/src/valid/mod.rs
+++ b/naga/src/valid/mod.rs
@@ -533,14 +533,13 @@ impl Validator {
 
         let decl_ty = &gctx.types[o.ty].inner;
         match decl_ty {
-            &crate::TypeInner::Scalar(scalar) => match scalar {
+            &crate::TypeInner::Scalar(
                 crate::Scalar::BOOL
                 | crate::Scalar::I32
                 | crate::Scalar::U32
                 | crate::Scalar::F32
-                | crate::Scalar::F64 => {}
-                _ => return Err(OverrideError::TypeNotScalar),
-            },
+                | crate::Scalar::F64,
+            ) => {}
             _ => return Err(OverrideError::TypeNotScalar),
         }
 
diff --git a/naga/tests/in/atomicOps-int64-min-max.wgsl b/naga/tests/in/atomicOps-int64-min-max.wgsl
index 94e6aa6862..fdedd8b4da 100644
--- a/naga/tests/in/atomicOps-int64-min-max.wgsl
+++ b/naga/tests/in/atomicOps-int64-min-max.wgsl
@@ -9,19 +9,21 @@ var<storage, read_write> storage_atomic_scalar: atomic<u64>;
 var<storage, read_write> storage_atomic_arr: array<atomic<u64>, 2>;
 @group(0) @binding(2)
 var<storage, read_write> storage_struct: Struct;
+@group(0) @binding(3)
+var<uniform> input: u64;
 
 @compute
 @workgroup_size(2)
 fn cs_main(@builtin(local_invocation_id) id: vec3<u32>) {
-    atomicMax(&storage_atomic_scalar, 1lu);
-    atomicMax(&storage_atomic_arr[1], 1lu);
+    atomicMax(&storage_atomic_scalar, input);
+    atomicMax(&storage_atomic_arr[1], 1 + input);
     atomicMax(&storage_struct.atomic_scalar, 1lu);
-    atomicMax(&storage_struct.atomic_arr[1], 1lu);
+    atomicMax(&storage_struct.atomic_arr[1], u64(id.x));
 
     workgroupBarrier();
 
-    atomicMin(&storage_atomic_scalar, 1lu);
-    atomicMin(&storage_atomic_arr[1], 1lu);
+    atomicMin(&storage_atomic_scalar, input);
+    atomicMin(&storage_atomic_arr[1], 1 + input);
     atomicMin(&storage_struct.atomic_scalar, 1lu);
-    atomicMin(&storage_struct.atomic_arr[1], 1lu);
+    atomicMin(&storage_struct.atomic_arr[1], u64(id.x));
 }
diff --git a/naga/tests/in/binding-arrays.param.ron b/naga/tests/in/binding-arrays.param.ron
index 39d6c03664..56a4983709 100644
--- a/naga/tests/in/binding-arrays.param.ron
+++ b/naga/tests/in/binding-arrays.param.ron
@@ -42,6 +42,5 @@
 		index: ReadZeroSkipWrite,
 		buffer: ReadZeroSkipWrite,
 		image_load: ReadZeroSkipWrite,
-		image_store: ReadZeroSkipWrite,
 	)
 )
diff --git a/naga/tests/in/bounds-check-image-restrict.param.ron b/naga/tests/in/bounds-check-image-restrict.param.ron
index d7ff0f006b..19f7399068 100644
--- a/naga/tests/in/bounds-check-image-restrict.param.ron
+++ b/naga/tests/in/bounds-check-image-restrict.param.ron
@@ -1,7 +1,6 @@
 (
 	bounds_check_policies: (
 		image_load: Restrict,
-		image_store: Restrict,
 	),
 	spv: (
 		version: (1, 1),
diff --git a/naga/tests/in/bounds-check-image-rzsw.param.ron b/naga/tests/in/bounds-check-image-rzsw.param.ron
index b256790e15..e818d7a3ba 100644
--- a/naga/tests/in/bounds-check-image-rzsw.param.ron
+++ b/naga/tests/in/bounds-check-image-rzsw.param.ron
@@ -1,7 +1,6 @@
 (
 	bounds_check_policies: (
 		image_load: ReadZeroSkipWrite,
-		image_store: ReadZeroSkipWrite,
 	),
 	spv: (
 		version: (1, 1),
diff --git a/naga/tests/in/control-flow.wgsl b/naga/tests/in/control-flow.wgsl
index 5a0ef1cbbf..a25c899a44 100644
--- a/naga/tests/in/control-flow.wgsl
+++ b/naga/tests/in/control-flow.wgsl
@@ -88,3 +88,96 @@ fn loop_switch_continue(x: i32) {
         }
     }
 }
+
+fn loop_switch_continue_nesting(x: i32, y: i32, z: i32) {
+    loop {
+        switch x {
+            case 1: {
+                continue;
+            }
+            case 2: {
+                switch y {
+                    case 1: {
+                        continue;
+                    }
+                    default: {
+                        loop {
+                            switch z {
+                                case 1: {
+                                    continue;
+                                }
+                                default: {}
+                            }
+                        }
+                    }
+                }
+            }
+            default: {}
+        }
+
+
+        // Degenerate switch with continue
+        switch y {
+            default: {
+                continue;
+            }
+        }
+    }
+
+    // In separate loop to avoid spv validation error:
+    // See https://github.com/gfx-rs/wgpu/issues/5658
+    loop {
+        // Nested degenerate switch with continue
+        switch y {
+            case 1, default: {
+                switch z {
+                    default: {
+                        continue;
+                    }
+                }
+            }
+        }
+    }
+}
+
+// Cases with some of the loop nested switches not containing continues.
+// See `continue_forward` module in `naga`.
+fn loop_switch_omit_continue_variable_checks(x: i32, y: i32, z: i32, w: i32) {
+    // switch in loop with no continues, we expect checks after the switch
+    // statement to not be generated
+    var pos: i32 = 0;
+    loop {
+        switch x {
+            case 1: {
+                pos = 1;
+            }
+            default: {}
+        }
+        // check here can be omitted
+    }
+
+    loop {
+        switch x {
+            case 1: {}
+            case 2: {
+                switch y {
+                    case 1: {
+                        continue;
+                    }
+                    default: {
+                        switch z {
+                            case 1: {
+                                pos = 2;
+                            }
+                            default: {}
+                        }
+                        // check here can be omitted
+                    }
+                }
+                // check needs to be generated here
+            }
+            default: {}
+        }
+        // check needs to be generated here
+    }
+}
diff --git a/naga/tests/in/pointers.param.ron b/naga/tests/in/pointers.param.ron
index fc40272838..c3b4d8880b 100644
--- a/naga/tests/in/pointers.param.ron
+++ b/naga/tests/in/pointers.param.ron
@@ -1,7 +1,6 @@
 (
 	bounds_check_policies: (
 		image_load: ReadZeroSkipWrite,
-		image_store: ReadZeroSkipWrite,
 	),
 	spv: (
 		version: (1, 2),
diff --git a/naga/tests/in/policy-mix.param.ron b/naga/tests/in/policy-mix.param.ron
index e5469157ed..31e80e4c52 100644
--- a/naga/tests/in/policy-mix.param.ron
+++ b/naga/tests/in/policy-mix.param.ron
@@ -3,7 +3,6 @@
 		index: Restrict,
 		buffer: Unchecked,
 		image_load: ReadZeroSkipWrite,
-		image_store: ReadZeroSkipWrite,
 	),
 	spv: (
 		version: (1, 1),
diff --git a/naga/tests/in/ray-query.wgsl b/naga/tests/in/ray-query.wgsl
index 4826547ded..0af8c7c95f 100644
--- a/naga/tests/in/ray-query.wgsl
+++ b/naga/tests/in/ray-query.wgsl
@@ -1,6 +1,3 @@
-@group(0) @binding(0)
-var acc_struct: acceleration_structure;
-
 /*
 let RAY_FLAG_NONE = 0x00u;
 let RAY_FLAG_OPAQUE = 0x01u;
@@ -43,6 +40,18 @@ struct RayIntersection {
 }
 */
 
+fn query_loop(pos: vec3<f32>, dir: vec3<f32>, acs: acceleration_structure) -> RayIntersection {
+    var rq: ray_query;
+    rayQueryInitialize(&rq, acs, RayDesc(RAY_FLAG_TERMINATE_ON_FIRST_HIT, 0xFFu, 0.1, 100.0, pos, dir));
+
+    while (rayQueryProceed(&rq)) {}
+
+    return rayQueryGetCommittedIntersection(&rq);
+}
+
+@group(0) @binding(0)
+var acc_struct: acceleration_structure;
+
 struct Output {
     visible: u32,
     normal: vec3<f32>,
@@ -58,16 +67,14 @@ fn get_torus_normal(world_point: vec3<f32>, intersection: RayIntersection) -> ve
     return normalize(world_point - world_point_on_guiding_line);
 }
 
+
+
 @compute @workgroup_size(1)
 fn main() {
-    var rq: ray_query;
-
+    let pos = vec3<f32>(0.0);
     let dir = vec3<f32>(0.0, 1.0, 0.0);
-    rayQueryInitialize(&rq, acc_struct, RayDesc(RAY_FLAG_TERMINATE_ON_FIRST_HIT, 0xFFu, 0.1, 100.0, vec3<f32>(0.0), dir));
-
-    while (rayQueryProceed(&rq)) {}
+    let intersection = query_loop(pos, dir, acc_struct);
 
-    let intersection = rayQueryGetCommittedIntersection(&rq);
     output.visible = u32(intersection.kind == RAY_QUERY_INTERSECTION_NONE);
     output.normal = get_torus_normal(dir * intersection.t, intersection);
 }
diff --git a/naga/tests/in/resource-binding-map.param.ron b/naga/tests/in/resource-binding-map.param.ron
index 25e7b054b0..a700a33f2a 100644
--- a/naga/tests/in/resource-binding-map.param.ron
+++ b/naga/tests/in/resource-binding-map.param.ron
@@ -49,6 +49,5 @@
 		index: ReadZeroSkipWrite,
 		buffer: ReadZeroSkipWrite,
 		image_load: ReadZeroSkipWrite,
-		image_store: ReadZeroSkipWrite,
 	)
 )
diff --git a/naga/tests/out/glsl/control-flow.main.Compute.glsl b/naga/tests/out/glsl/control-flow.main.Compute.glsl
index b877f9cb69..391fca84f4 100644
--- a/naga/tests/out/glsl/control-flow.main.Compute.glsl
+++ b/naga/tests/out/glsl/control-flow.main.Compute.glsl
@@ -7,11 +7,9 @@ layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
 
 void switch_default_break(int i) {
-    switch(i) {
-        default: {
-            break;
-        }
-    }
+    do {
+        break;
+    } while(false);
 }
 
 void switch_case_break() {
@@ -40,6 +38,110 @@ void loop_switch_continue(int x) {
     return;
 }
 
+void loop_switch_continue_nesting(int x_1, int y, int z) {
+    while(true) {
+        switch(x_1) {
+            case 1: {
+                continue;
+            }
+            case 2: {
+                switch(y) {
+                    case 1: {
+                        continue;
+                    }
+                    default: {
+                        while(true) {
+                            switch(z) {
+                                case 1: {
+                                    continue;
+                                }
+                                default: {
+                                    break;
+                                }
+                            }
+                        }
+                        break;
+                    }
+                }
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+        bool should_continue = false;
+        do {
+            should_continue = true;
+            break;
+        } while(false);
+        if (should_continue) {
+            continue;
+        }
+    }
+    while(true) {
+        bool should_continue_1 = false;
+        do {
+            do {
+                should_continue_1 = true;
+                break;
+            } while(false);
+            if (should_continue_1) {
+                break;
+            }
+        } while(false);
+        if (should_continue_1) {
+            continue;
+        }
+    }
+    return;
+}
+
+void loop_switch_omit_continue_variable_checks(int x_2, int y_1, int z_1, int w) {
+    int pos_1 = 0;
+    while(true) {
+        switch(x_2) {
+            case 1: {
+                pos_1 = 1;
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+    }
+    while(true) {
+        switch(x_2) {
+            case 1: {
+                break;
+            }
+            case 2: {
+                switch(y_1) {
+                    case 1: {
+                        continue;
+                    }
+                    default: {
+                        switch(z_1) {
+                            case 1: {
+                                pos_1 = 2;
+                                break;
+                            }
+                            default: {
+                                break;
+                            }
+                        }
+                        break;
+                    }
+                }
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+    }
+    return;
+}
+
 void main() {
     uvec3 global_id = gl_GlobalInvocationID;
     int pos = 0;
@@ -47,12 +149,9 @@ void main() {
     barrier();
     memoryBarrierShared();
     barrier();
-    switch(1) {
-        default: {
-            pos = 1;
-            break;
-        }
-    }
+    do {
+        pos = 1;
+    } while(false);
     int _e4 = pos;
     switch(_e4) {
         case 1: {
diff --git a/naga/tests/out/glsl/math-functions.main.Fragment.glsl b/naga/tests/out/glsl/math-functions.main.Fragment.glsl
index 7f91571dcc..4ab85269e1 100644
--- a/naga/tests/out/glsl/math-functions.main.Fragment.glsl
+++ b/naga/tests/out/glsl/math-functions.main.Fragment.glsl
@@ -65,14 +65,10 @@ void main() {
     ivec4 sign_b = ivec4(-1, -1, -1, -1);
     vec4 sign_d = vec4(-1.0, -1.0, -1.0, -1.0);
     int const_dot = ( + ivec2(0).x * ivec2(0).x + ivec2(0).y * ivec2(0).y);
-    uint first_leading_bit_abs = uint(findMSB(0u));
-    int flb_a = findMSB(-1);
-    ivec2 flb_b = findMSB(ivec2(-1));
-    uvec2 flb_c = uvec2(findMSB(uvec2(1u)));
-    int ftb_a = findLSB(-1);
-    uint ftb_b = uint(findLSB(1u));
-    ivec2 ftb_c = findLSB(ivec2(-1));
-    uvec2 ftb_d = uvec2(findLSB(uvec2(1u)));
+    ivec2 flb_b = ivec2(-1, -1);
+    uvec2 flb_c = uvec2(0u, 0u);
+    ivec2 ftb_c = ivec2(0, 0);
+    uvec2 ftb_d = uvec2(0u, 0u);
     uvec2 ctz_e = uvec2(32u, 32u);
     ivec2 ctz_f = ivec2(32, 32);
     uvec2 ctz_g = uvec2(0u, 0u);
diff --git a/naga/tests/out/hlsl/atomicOps-int64-min-max.hlsl b/naga/tests/out/hlsl/atomicOps-int64-min-max.hlsl
index 8c52e5b3b3..989a52b78b 100644
--- a/naga/tests/out/hlsl/atomicOps-int64-min-max.hlsl
+++ b/naga/tests/out/hlsl/atomicOps-int64-min-max.hlsl
@@ -13,18 +13,23 @@ struct Struct {
 RWByteAddressBuffer storage_atomic_scalar : register(u0);
 RWByteAddressBuffer storage_atomic_arr : register(u1);
 RWByteAddressBuffer storage_struct : register(u2);
+cbuffer input : register(b3) { uint64_t input; }
 
 [numthreads(2, 1, 1)]
 void cs_main(uint3 id : SV_GroupThreadID)
 {
-    storage_atomic_scalar.InterlockedMax(0, 1uL);
-    storage_atomic_arr.InterlockedMax(8, 1uL);
-    storage_struct.InterlockedMax(0, 1uL);
-    storage_struct.InterlockedMax(8+8, 1uL);
+    uint64_t _e3 = input;
+    storage_atomic_scalar.InterlockedMax64(0, _e3);
+    uint64_t _e7 = input;
+    storage_atomic_arr.InterlockedMax64(8, (1uL + _e7));
+    storage_struct.InterlockedMax64(0, 1uL);
+    storage_struct.InterlockedMax64(8+8, uint64_t(id.x));
     GroupMemoryBarrierWithGroupSync();
-    storage_atomic_scalar.InterlockedMin(0, 1uL);
-    storage_atomic_arr.InterlockedMin(8, 1uL);
-    storage_struct.InterlockedMin(0, 1uL);
-    storage_struct.InterlockedMin(8+8, 1uL);
+    uint64_t _e20 = input;
+    storage_atomic_scalar.InterlockedMin64(0, _e20);
+    uint64_t _e24 = input;
+    storage_atomic_arr.InterlockedMin64(8, (1uL + _e24));
+    storage_struct.InterlockedMin64(0, 1uL);
+    storage_struct.InterlockedMin64(8+8, uint64_t(id.x));
     return;
 }
diff --git a/naga/tests/out/hlsl/atomicOps-int64.hlsl b/naga/tests/out/hlsl/atomicOps-int64.hlsl
index 973cf07309..ea88f81753 100644
--- a/naga/tests/out/hlsl/atomicOps-int64.hlsl
+++ b/naga/tests/out/hlsl/atomicOps-int64.hlsl
@@ -44,72 +44,72 @@ void cs_main(uint3 id : SV_GroupThreadID, uint3 __local_invocation_id : SV_Group
     uint64_t l6_ = workgroup_struct.atomic_scalar;
     int64_t l7_ = workgroup_struct.atomic_arr[1];
     GroupMemoryBarrierWithGroupSync();
-    uint64_t _e51; storage_atomic_scalar.InterlockedAdd(0, 1uL, _e51);
-    int64_t _e55; storage_atomic_arr.InterlockedAdd(8, 1L, _e55);
-    uint64_t _e59; storage_struct.InterlockedAdd(0, 1uL, _e59);
-    int64_t _e64; storage_struct.InterlockedAdd(8+8, 1L, _e64);
+    uint64_t _e51; storage_atomic_scalar.InterlockedAdd64(0, 1uL, _e51);
+    int64_t _e55; storage_atomic_arr.InterlockedAdd64(8, 1L, _e55);
+    uint64_t _e59; storage_struct.InterlockedAdd64(0, 1uL, _e59);
+    int64_t _e64; storage_struct.InterlockedAdd64(8+8, 1L, _e64);
     uint64_t _e67; InterlockedAdd(workgroup_atomic_scalar, 1uL, _e67);
     int64_t _e71; InterlockedAdd(workgroup_atomic_arr[1], 1L, _e71);
     uint64_t _e75; InterlockedAdd(workgroup_struct.atomic_scalar, 1uL, _e75);
     int64_t _e80; InterlockedAdd(workgroup_struct.atomic_arr[1], 1L, _e80);
     GroupMemoryBarrierWithGroupSync();
-    uint64_t _e83; storage_atomic_scalar.InterlockedAdd(0, -1uL, _e83);
-    int64_t _e87; storage_atomic_arr.InterlockedAdd(8, -1L, _e87);
-    uint64_t _e91; storage_struct.InterlockedAdd(0, -1uL, _e91);
-    int64_t _e96; storage_struct.InterlockedAdd(8+8, -1L, _e96);
+    uint64_t _e83; storage_atomic_scalar.InterlockedAdd64(0, -1uL, _e83);
+    int64_t _e87; storage_atomic_arr.InterlockedAdd64(8, -1L, _e87);
+    uint64_t _e91; storage_struct.InterlockedAdd64(0, -1uL, _e91);
+    int64_t _e96; storage_struct.InterlockedAdd64(8+8, -1L, _e96);
     uint64_t _e99; InterlockedAdd(workgroup_atomic_scalar, -1uL, _e99);
     int64_t _e103; InterlockedAdd(workgroup_atomic_arr[1], -1L, _e103);
     uint64_t _e107; InterlockedAdd(workgroup_struct.atomic_scalar, -1uL, _e107);
     int64_t _e112; InterlockedAdd(workgroup_struct.atomic_arr[1], -1L, _e112);
     GroupMemoryBarrierWithGroupSync();
-    storage_atomic_scalar.InterlockedMax(0, 1uL);
-    storage_atomic_arr.InterlockedMax(8, 1L);
-    storage_struct.InterlockedMax(0, 1uL);
-    storage_struct.InterlockedMax(8+8, 1L);
+    storage_atomic_scalar.InterlockedMax64(0, 1uL);
+    storage_atomic_arr.InterlockedMax64(8, 1L);
+    storage_struct.InterlockedMax64(0, 1uL);
+    storage_struct.InterlockedMax64(8+8, 1L);
     InterlockedMax(workgroup_atomic_scalar, 1uL);
     InterlockedMax(workgroup_atomic_arr[1], 1L);
     InterlockedMax(workgroup_struct.atomic_scalar, 1uL);
     InterlockedMax(workgroup_struct.atomic_arr[1], 1L);
     GroupMemoryBarrierWithGroupSync();
-    storage_atomic_scalar.InterlockedMin(0, 1uL);
-    storage_atomic_arr.InterlockedMin(8, 1L);
-    storage_struct.InterlockedMin(0, 1uL);
-    storage_struct.InterlockedMin(8+8, 1L);
+    storage_atomic_scalar.InterlockedMin64(0, 1uL);
+    storage_atomic_arr.InterlockedMin64(8, 1L);
+    storage_struct.InterlockedMin64(0, 1uL);
+    storage_struct.InterlockedMin64(8+8, 1L);
     InterlockedMin(workgroup_atomic_scalar, 1uL);
     InterlockedMin(workgroup_atomic_arr[1], 1L);
     InterlockedMin(workgroup_struct.atomic_scalar, 1uL);
     InterlockedMin(workgroup_struct.atomic_arr[1], 1L);
     GroupMemoryBarrierWithGroupSync();
-    uint64_t _e163; storage_atomic_scalar.InterlockedAnd(0, 1uL, _e163);
-    int64_t _e167; storage_atomic_arr.InterlockedAnd(8, 1L, _e167);
-    uint64_t _e171; storage_struct.InterlockedAnd(0, 1uL, _e171);
-    int64_t _e176; storage_struct.InterlockedAnd(8+8, 1L, _e176);
+    uint64_t _e163; storage_atomic_scalar.InterlockedAnd64(0, 1uL, _e163);
+    int64_t _e167; storage_atomic_arr.InterlockedAnd64(8, 1L, _e167);
+    uint64_t _e171; storage_struct.InterlockedAnd64(0, 1uL, _e171);
+    int64_t _e176; storage_struct.InterlockedAnd64(8+8, 1L, _e176);
     uint64_t _e179; InterlockedAnd(workgroup_atomic_scalar, 1uL, _e179);
     int64_t _e183; InterlockedAnd(workgroup_atomic_arr[1], 1L, _e183);
     uint64_t _e187; InterlockedAnd(workgroup_struct.atomic_scalar, 1uL, _e187);
     int64_t _e192; InterlockedAnd(workgroup_struct.atomic_arr[1], 1L, _e192);
     GroupMemoryBarrierWithGroupSync();
-    uint64_t _e195; storage_atomic_scalar.InterlockedOr(0, 1uL, _e195);
-    int64_t _e199; storage_atomic_arr.InterlockedOr(8, 1L, _e199);
-    uint64_t _e203; storage_struct.InterlockedOr(0, 1uL, _e203);
-    int64_t _e208; storage_struct.InterlockedOr(8+8, 1L, _e208);
+    uint64_t _e195; storage_atomic_scalar.InterlockedOr64(0, 1uL, _e195);
+    int64_t _e199; storage_atomic_arr.InterlockedOr64(8, 1L, _e199);
+    uint64_t _e203; storage_struct.InterlockedOr64(0, 1uL, _e203);
+    int64_t _e208; storage_struct.InterlockedOr64(8+8, 1L, _e208);
     uint64_t _e211; InterlockedOr(workgroup_atomic_scalar, 1uL, _e211);
     int64_t _e215; InterlockedOr(workgroup_atomic_arr[1], 1L, _e215);
     uint64_t _e219; InterlockedOr(workgroup_struct.atomic_scalar, 1uL, _e219);
     int64_t _e224; InterlockedOr(workgroup_struct.atomic_arr[1], 1L, _e224);
     GroupMemoryBarrierWithGroupSync();
-    uint64_t _e227; storage_atomic_scalar.InterlockedXor(0, 1uL, _e227);
-    int64_t _e231; storage_atomic_arr.InterlockedXor(8, 1L, _e231);
-    uint64_t _e235; storage_struct.InterlockedXor(0, 1uL, _e235);
-    int64_t _e240; storage_struct.InterlockedXor(8+8, 1L, _e240);
+    uint64_t _e227; storage_atomic_scalar.InterlockedXor64(0, 1uL, _e227);
+    int64_t _e231; storage_atomic_arr.InterlockedXor64(8, 1L, _e231);
+    uint64_t _e235; storage_struct.InterlockedXor64(0, 1uL, _e235);
+    int64_t _e240; storage_struct.InterlockedXor64(8+8, 1L, _e240);
     uint64_t _e243; InterlockedXor(workgroup_atomic_scalar, 1uL, _e243);
     int64_t _e247; InterlockedXor(workgroup_atomic_arr[1], 1L, _e247);
     uint64_t _e251; InterlockedXor(workgroup_struct.atomic_scalar, 1uL, _e251);
     int64_t _e256; InterlockedXor(workgroup_struct.atomic_arr[1], 1L, _e256);
-    uint64_t _e259; storage_atomic_scalar.InterlockedExchange(0, 1uL, _e259);
-    int64_t _e263; storage_atomic_arr.InterlockedExchange(8, 1L, _e263);
-    uint64_t _e267; storage_struct.InterlockedExchange(0, 1uL, _e267);
-    int64_t _e272; storage_struct.InterlockedExchange(8+8, 1L, _e272);
+    uint64_t _e259; storage_atomic_scalar.InterlockedExchange64(0, 1uL, _e259);
+    int64_t _e263; storage_atomic_arr.InterlockedExchange64(8, 1L, _e263);
+    uint64_t _e267; storage_struct.InterlockedExchange64(0, 1uL, _e267);
+    int64_t _e272; storage_struct.InterlockedExchange64(8+8, 1L, _e272);
     uint64_t _e275; InterlockedExchange(workgroup_atomic_scalar, 1uL, _e275);
     int64_t _e279; InterlockedExchange(workgroup_atomic_arr[1], 1L, _e279);
     uint64_t _e283; InterlockedExchange(workgroup_struct.atomic_scalar, 1uL, _e283);
diff --git a/naga/tests/out/hlsl/control-flow.hlsl b/naga/tests/out/hlsl/control-flow.hlsl
index 1e253add21..2438858a8a 100644
--- a/naga/tests/out/hlsl/control-flow.hlsl
+++ b/naga/tests/out/hlsl/control-flow.hlsl
@@ -1,10 +1,8 @@
 void switch_default_break(int i)
 {
-    switch(i) {
-        default: {
-            break;
-        }
-    }
+    do {
+        break;
+    } while(false);
 }
 
 void switch_case_break()
@@ -23,14 +21,149 @@ void switch_case_break()
 void loop_switch_continue(int x)
 {
     while(true) {
+        bool should_continue = false;
         switch(x) {
             case 1: {
-                continue;
+                should_continue = true;
+                break;
             }
             default: {
                 break;
             }
         }
+        if (should_continue) {
+            continue;
+        }
+    }
+    return;
+}
+
+void loop_switch_continue_nesting(int x_1, int y, int z)
+{
+    while(true) {
+        bool should_continue_1 = false;
+        switch(x_1) {
+            case 1: {
+                should_continue_1 = true;
+                break;
+            }
+            case 2: {
+                switch(y) {
+                    case 1: {
+                        should_continue_1 = true;
+                        break;
+                    }
+                    default: {
+                        while(true) {
+                            bool should_continue_2 = false;
+                            switch(z) {
+                                case 1: {
+                                    should_continue_2 = true;
+                                    break;
+                                }
+                                default: {
+                                    break;
+                                }
+                            }
+                            if (should_continue_2) {
+                                continue;
+                            }
+                        }
+                        break;
+                    }
+                }
+                if (should_continue_1) {
+                    break;
+                }
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+        if (should_continue_1) {
+            continue;
+        }
+        bool should_continue_3 = false;
+        do {
+            should_continue_3 = true;
+            break;
+        } while(false);
+        if (should_continue_3) {
+            continue;
+        }
+    }
+    while(true) {
+        bool should_continue_4 = false;
+        do {
+            do {
+                should_continue_4 = true;
+                break;
+            } while(false);
+            if (should_continue_4) {
+                break;
+            }
+        } while(false);
+        if (should_continue_4) {
+            continue;
+        }
+    }
+    return;
+}
+
+void loop_switch_omit_continue_variable_checks(int x_2, int y_1, int z_1, int w)
+{
+    int pos_1 = 0;
+
+    while(true) {
+        bool should_continue_5 = false;
+        switch(x_2) {
+            case 1: {
+                pos_1 = 1;
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+    }
+    while(true) {
+        bool should_continue_6 = false;
+        switch(x_2) {
+            case 1: {
+                break;
+            }
+            case 2: {
+                switch(y_1) {
+                    case 1: {
+                        should_continue_6 = true;
+                        break;
+                    }
+                    default: {
+                        switch(z_1) {
+                            case 1: {
+                                pos_1 = 2;
+                                break;
+                            }
+                            default: {
+                                break;
+                            }
+                        }
+                        break;
+                    }
+                }
+                if (should_continue_6) {
+                    break;
+                }
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+        if (should_continue_6) {
+            continue;
+        }
     }
     return;
 }
@@ -42,12 +175,9 @@ void main(uint3 global_id : SV_DispatchThreadID)
 
     DeviceMemoryBarrierWithGroupSync();
     GroupMemoryBarrierWithGroupSync();
-    switch(1) {
-        default: {
-            pos = 1;
-            break;
-        }
-    }
+    do {
+        pos = 1;
+    } while(false);
     int _e4 = pos;
     switch(_e4) {
         case 1: {
diff --git a/naga/tests/out/hlsl/math-functions.hlsl b/naga/tests/out/hlsl/math-functions.hlsl
index c1a771c25d..a02b2b1280 100644
--- a/naga/tests/out/hlsl/math-functions.hlsl
+++ b/naga/tests/out/hlsl/math-functions.hlsl
@@ -79,14 +79,10 @@ void main()
     int4 sign_b = int4(-1, -1, -1, -1);
     float4 sign_d = float4(-1.0, -1.0, -1.0, -1.0);
     int const_dot = dot(ZeroValueint2(), ZeroValueint2());
-    uint first_leading_bit_abs = firstbithigh(0u);
-    int flb_a = asint(firstbithigh(-1));
-    int2 flb_b = asint(firstbithigh((-1).xx));
-    uint2 flb_c = firstbithigh((1u).xx);
-    int ftb_a = asint(firstbitlow(-1));
-    uint ftb_b = firstbitlow(1u);
-    int2 ftb_c = asint(firstbitlow((-1).xx));
-    uint2 ftb_d = firstbitlow((1u).xx);
+    int2 flb_b = int2(-1, -1);
+    uint2 flb_c = uint2(0u, 0u);
+    int2 ftb_c = int2(0, 0);
+    uint2 ftb_d = uint2(0u, 0u);
     uint2 ctz_e = uint2(32u, 32u);
     int2 ctz_f = int2(32, 32);
     uint2 ctz_g = uint2(0u, 0u);
diff --git a/naga/tests/out/msl/atomicOps-int64-min-max.msl b/naga/tests/out/msl/atomicOps-int64-min-max.msl
index a5dd1c97f0..f69a2a49bd 100644
--- a/naga/tests/out/msl/atomicOps-int64-min-max.msl
+++ b/naga/tests/out/msl/atomicOps-int64-min-max.msl
@@ -19,15 +19,20 @@ kernel void cs_main(
 , device metal::atomic_ulong& storage_atomic_scalar [[user(fake0)]]
 , device type_1& storage_atomic_arr [[user(fake0)]]
 , device Struct& storage_struct [[user(fake0)]]
+, constant ulong& input [[user(fake0)]]
 ) {
-    metal::atomic_max_explicit(&storage_atomic_scalar, 1uL, metal::memory_order_relaxed);
-    metal::atomic_max_explicit(&storage_atomic_arr.inner[1], 1uL, metal::memory_order_relaxed);
+    ulong _e3 = input;
+    metal::atomic_max_explicit(&storage_atomic_scalar, _e3, metal::memory_order_relaxed);
+    ulong _e7 = input;
+    metal::atomic_max_explicit(&storage_atomic_arr.inner[1], 1uL + _e7, metal::memory_order_relaxed);
     metal::atomic_max_explicit(&storage_struct.atomic_scalar, 1uL, metal::memory_order_relaxed);
-    metal::atomic_max_explicit(&storage_struct.atomic_arr.inner[1], 1uL, metal::memory_order_relaxed);
+    metal::atomic_max_explicit(&storage_struct.atomic_arr.inner[1], static_cast<ulong>(id.x), metal::memory_order_relaxed);
     metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup);
-    metal::atomic_min_explicit(&storage_atomic_scalar, 1uL, metal::memory_order_relaxed);
-    metal::atomic_min_explicit(&storage_atomic_arr.inner[1], 1uL, metal::memory_order_relaxed);
+    ulong _e20 = input;
+    metal::atomic_min_explicit(&storage_atomic_scalar, _e20, metal::memory_order_relaxed);
+    ulong _e24 = input;
+    metal::atomic_min_explicit(&storage_atomic_arr.inner[1], 1uL + _e24, metal::memory_order_relaxed);
     metal::atomic_min_explicit(&storage_struct.atomic_scalar, 1uL, metal::memory_order_relaxed);
-    metal::atomic_min_explicit(&storage_struct.atomic_arr.inner[1], 1uL, metal::memory_order_relaxed);
+    metal::atomic_min_explicit(&storage_struct.atomic_arr.inner[1], static_cast<ulong>(id.x), metal::memory_order_relaxed);
     return;
 }
diff --git a/naga/tests/out/msl/binding-arrays.msl b/naga/tests/out/msl/binding-arrays.msl
index f3548c9e79..75f787a9f2 100644
--- a/naga/tests/out/msl/binding-arrays.msl
+++ b/naga/tests/out/msl/binding-arrays.msl
@@ -150,17 +150,11 @@ fragment main_Output main_(
     metal::float4 _e278 = v4_;
     v4_ = _e278 + _e277;
     metal::float4 _e282 = v4_;
-    if (metal::all(metal::uint2(pix) < metal::uint2(texture_array_storage[0].get_width(), texture_array_storage[0].get_height()))) {
-        texture_array_storage[0].write(_e282, metal::uint2(pix));
-    }
+    texture_array_storage[0].write(_e282, metal::uint2(pix));
     metal::float4 _e285 = v4_;
-    if (metal::all(metal::uint2(pix) < metal::uint2(texture_array_storage[uniform_index].get_width(), texture_array_storage[uniform_index].get_height()))) {
-        texture_array_storage[uniform_index].write(_e285, metal::uint2(pix));
-    }
+    texture_array_storage[uniform_index].write(_e285, metal::uint2(pix));
     metal::float4 _e288 = v4_;
-    if (metal::all(metal::uint2(pix) < metal::uint2(texture_array_storage[non_uniform_index].get_width(), texture_array_storage[non_uniform_index].get_height()))) {
-        texture_array_storage[non_uniform_index].write(_e288, metal::uint2(pix));
-    }
+    texture_array_storage[non_uniform_index].write(_e288, metal::uint2(pix));
     metal::uint2 _e289 = u2_;
     uint _e290 = u1_;
     metal::float2 v2_ = static_cast<metal::float2>(_e289 + metal::uint2(_e290));
diff --git a/naga/tests/out/msl/bounds-check-image-restrict.msl b/naga/tests/out/msl/bounds-check-image-restrict.msl
index 6a3c43f0ce..138c0f6455 100644
--- a/naga/tests/out/msl/bounds-check-image-restrict.msl
+++ b/naga/tests/out/msl/bounds-check-image-restrict.msl
@@ -111,7 +111,7 @@ void test_textureStore_1d(
     metal::float4 value,
     metal::texture1d<float, metal::access::write> image_storage_1d
 ) {
-    image_storage_1d.write(value, metal::min(uint(coords_10), image_storage_1d.get_width() - 1));
+    image_storage_1d.write(value, uint(coords_10));
     return;
 }
 
@@ -120,7 +120,7 @@ void test_textureStore_2d(
     metal::float4 value_1,
     metal::texture2d<float, metal::access::write> image_storage_2d
 ) {
-    image_storage_2d.write(value_1, metal::min(metal::uint2(coords_11), metal::uint2(image_storage_2d.get_width(), image_storage_2d.get_height()) - 1));
+    image_storage_2d.write(value_1, metal::uint2(coords_11));
     return;
 }
 
@@ -130,7 +130,7 @@ void test_textureStore_2d_array_u(
     metal::float4 value_2,
     metal::texture2d_array<float, metal::access::write> image_storage_2d_array
 ) {
-    image_storage_2d_array.write(value_2, metal::min(metal::uint2(coords_12), metal::uint2(image_storage_2d_array.get_width(), image_storage_2d_array.get_height()) - 1), metal::min(uint(array_index), image_storage_2d_array.get_array_size() - 1));
+    image_storage_2d_array.write(value_2, metal::uint2(coords_12), array_index);
     return;
 }
 
@@ -140,7 +140,7 @@ void test_textureStore_2d_array_s(
     metal::float4 value_3,
     metal::texture2d_array<float, metal::access::write> image_storage_2d_array
 ) {
-    image_storage_2d_array.write(value_3, metal::min(metal::uint2(coords_13), metal::uint2(image_storage_2d_array.get_width(), image_storage_2d_array.get_height()) - 1), metal::min(uint(array_index_1), image_storage_2d_array.get_array_size() - 1));
+    image_storage_2d_array.write(value_3, metal::uint2(coords_13), array_index_1);
     return;
 }
 
@@ -149,7 +149,7 @@ void test_textureStore_3d(
     metal::float4 value_4,
     metal::texture3d<float, metal::access::write> image_storage_3d
 ) {
-    image_storage_3d.write(value_4, metal::min(metal::uint3(coords_14), metal::uint3(image_storage_3d.get_width(), image_storage_3d.get_height(), image_storage_3d.get_depth()) - 1));
+    image_storage_3d.write(value_4, metal::uint3(coords_14));
     return;
 }
 
diff --git a/naga/tests/out/msl/bounds-check-image-rzsw.msl b/naga/tests/out/msl/bounds-check-image-rzsw.msl
index 5db0c9df94..f73b8e3e32 100644
--- a/naga/tests/out/msl/bounds-check-image-rzsw.msl
+++ b/naga/tests/out/msl/bounds-check-image-rzsw.msl
@@ -110,9 +110,7 @@ void test_textureStore_1d(
     metal::float4 value,
     metal::texture1d<float, metal::access::write> image_storage_1d
 ) {
-    if (uint(coords_10) < image_storage_1d.get_width()) {
-        image_storage_1d.write(value, uint(coords_10));
-    }
+    image_storage_1d.write(value, uint(coords_10));
     return;
 }
 
@@ -121,9 +119,7 @@ void test_textureStore_2d(
     metal::float4 value_1,
     metal::texture2d<float, metal::access::write> image_storage_2d
 ) {
-    if (metal::all(metal::uint2(coords_11) < metal::uint2(image_storage_2d.get_width(), image_storage_2d.get_height()))) {
-        image_storage_2d.write(value_1, metal::uint2(coords_11));
-    }
+    image_storage_2d.write(value_1, metal::uint2(coords_11));
     return;
 }
 
@@ -133,9 +129,7 @@ void test_textureStore_2d_array_u(
     metal::float4 value_2,
     metal::texture2d_array<float, metal::access::write> image_storage_2d_array
 ) {
-    if (uint(array_index) < image_storage_2d_array.get_array_size() && metal::all(metal::uint2(coords_12) < metal::uint2(image_storage_2d_array.get_width(), image_storage_2d_array.get_height()))) {
-        image_storage_2d_array.write(value_2, metal::uint2(coords_12), array_index);
-    }
+    image_storage_2d_array.write(value_2, metal::uint2(coords_12), array_index);
     return;
 }
 
@@ -145,9 +139,7 @@ void test_textureStore_2d_array_s(
     metal::float4 value_3,
     metal::texture2d_array<float, metal::access::write> image_storage_2d_array
 ) {
-    if (uint(array_index_1) < image_storage_2d_array.get_array_size() && metal::all(metal::uint2(coords_13) < metal::uint2(image_storage_2d_array.get_width(), image_storage_2d_array.get_height()))) {
-        image_storage_2d_array.write(value_3, metal::uint2(coords_13), array_index_1);
-    }
+    image_storage_2d_array.write(value_3, metal::uint2(coords_13), array_index_1);
     return;
 }
 
@@ -156,9 +148,7 @@ void test_textureStore_3d(
     metal::float4 value_4,
     metal::texture3d<float, metal::access::write> image_storage_3d
 ) {
-    if (metal::all(metal::uint3(coords_14) < metal::uint3(image_storage_3d.get_width(), image_storage_3d.get_height(), image_storage_3d.get_depth()))) {
-        image_storage_3d.write(value_4, metal::uint3(coords_14));
-    }
+    image_storage_3d.write(value_4, metal::uint3(coords_14));
     return;
 }
 
diff --git a/naga/tests/out/msl/control-flow.msl b/naga/tests/out/msl/control-flow.msl
index 0d0e082e41..11771693aa 100644
--- a/naga/tests/out/msl/control-flow.msl
+++ b/naga/tests/out/msl/control-flow.msl
@@ -44,6 +44,114 @@ void loop_switch_continue(
     return;
 }
 
+void loop_switch_continue_nesting(
+    int x_1,
+    int y,
+    int z
+) {
+    while(true) {
+        switch(x_1) {
+            case 1: {
+                continue;
+            }
+            case 2: {
+                switch(y) {
+                    case 1: {
+                        continue;
+                    }
+                    default: {
+                        while(true) {
+                            switch(z) {
+                                case 1: {
+                                    continue;
+                                }
+                                default: {
+                                    break;
+                                }
+                            }
+                        }
+                        break;
+                    }
+                }
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+        switch(y) {
+            default: {
+                continue;
+            }
+        }
+    }
+    while(true) {
+        switch(y) {
+            case 1:
+            default: {
+                switch(z) {
+                    default: {
+                        continue;
+                    }
+                }
+                break;
+            }
+        }
+    }
+    return;
+}
+
+void loop_switch_omit_continue_variable_checks(
+    int x_2,
+    int y_1,
+    int z_1,
+    int w
+) {
+    int pos_1 = 0;
+    while(true) {
+        switch(x_2) {
+            case 1: {
+                pos_1 = 1;
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+    }
+    while(true) {
+        switch(x_2) {
+            case 1: {
+                break;
+            }
+            case 2: {
+                switch(y_1) {
+                    case 1: {
+                        continue;
+                    }
+                    default: {
+                        switch(z_1) {
+                            case 1: {
+                                pos_1 = 2;
+                                break;
+                            }
+                            default: {
+                                break;
+                            }
+                        }
+                        break;
+                    }
+                }
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+    }
+    return;
+}
+
 struct main_Input {
 };
 kernel void main_(
diff --git a/naga/tests/out/msl/math-functions.msl b/naga/tests/out/msl/math-functions.msl
index 0e6a5b24dc..559002c39b 100644
--- a/naga/tests/out/msl/math-functions.msl
+++ b/naga/tests/out/msl/math-functions.msl
@@ -67,16 +67,10 @@ fragment void main_(
     metal::int4 sign_b = metal::int4(-1, -1, -1, -1);
     metal::float4 sign_d = metal::float4(-1.0, -1.0, -1.0, -1.0);
     int const_dot = ( + metal::int2 {}.x * metal::int2 {}.x + metal::int2 {}.y * metal::int2 {}.y);
-    uint first_leading_bit_abs = metal::select(31 - metal::clz(0u), uint(-1), 0u == 0 || 0u == -1);
-    int flb_a = metal::select(31 - metal::clz(metal::select(-1, ~-1, -1 < 0)), int(-1), -1 == 0 || -1 == -1);
-    metal::int2 _e29 = metal::int2(-1);
-    metal::int2 flb_b = metal::select(31 - metal::clz(metal::select(_e29, ~_e29, _e29 < 0)), int2(-1), _e29 == 0 || _e29 == -1);
-    metal::uint2 _e32 = metal::uint2(1u);
-    metal::uint2 flb_c = metal::select(31 - metal::clz(_e32), uint2(-1), _e32 == 0 || _e32 == -1);
-    int ftb_a = (((metal::ctz(-1) + 1) % 33) - 1);
-    uint ftb_b = (((metal::ctz(1u) + 1) % 33) - 1);
-    metal::int2 ftb_c = (((metal::ctz(metal::int2(-1)) + 1) % 33) - 1);
-    metal::uint2 ftb_d = (((metal::ctz(metal::uint2(1u)) + 1) % 33) - 1);
+    metal::int2 flb_b = metal::int2(-1, -1);
+    metal::uint2 flb_c = metal::uint2(0u, 0u);
+    metal::int2 ftb_c = metal::int2(0, 0);
+    metal::uint2 ftb_d = metal::uint2(0u, 0u);
     metal::uint2 ctz_e = metal::uint2(32u, 32u);
     metal::int2 ctz_f = metal::int2(32, 32);
     metal::uint2 ctz_g = metal::uint2(0u, 0u);
diff --git a/naga/tests/out/msl/ray-query.msl b/naga/tests/out/msl/ray-query.msl
index 17b856427f..fbdaef5484 100644
--- a/naga/tests/out/msl/ray-query.msl
+++ b/naga/tests/out/msl/ray-query.msl
@@ -13,11 +13,6 @@ constexpr metal::uint _map_intersection_type(const metal::raytracing::intersecti
         ty==metal::raytracing::intersection_type::bounding_box ? 4 : 0;
 }
 
-struct Output {
-    uint visible;
-    char _pad1[12];
-    metal::float3 normal;
-};
 struct RayIntersection {
     uint kind;
     float t;
@@ -40,6 +35,34 @@ struct RayDesc {
     metal::float3 origin;
     metal::float3 dir;
 };
+struct Output {
+    uint visible;
+    char _pad1[12];
+    metal::float3 normal;
+};
+
+RayIntersection query_loop(
+    metal::float3 pos,
+    metal::float3 dir,
+    metal::raytracing::instance_acceleration_structure acs
+) {
+    _RayQuery rq = {};
+    RayDesc _e8 = RayDesc {4u, 255u, 0.1, 100.0, pos, dir};
+    rq.intersector.assume_geometry_type(metal::raytracing::geometry_type::triangle);
+    rq.intersector.set_opacity_cull_mode((_e8.flags & 64) != 0 ? metal::raytracing::opacity_cull_mode::opaque : (_e8.flags & 128) != 0 ? metal::raytracing::opacity_cull_mode::non_opaque : metal::raytracing::opacity_cull_mode::none);
+    rq.intersector.force_opacity((_e8.flags & 1) != 0 ? metal::raytracing::forced_opacity::opaque : (_e8.flags & 2) != 0 ? metal::raytracing::forced_opacity::non_opaque : metal::raytracing::forced_opacity::none);
+    rq.intersector.accept_any_intersection((_e8.flags & 4) != 0);
+    rq.intersection = rq.intersector.intersect(metal::raytracing::ray(_e8.origin, _e8.dir, _e8.tmin, _e8.tmax), acs, _e8.cull_mask);    rq.ready = true;
+    while(true) {
+        bool _e9 = rq.ready;
+        rq.ready = false;
+        if (_e9) {
+        } else {
+            break;
+        }
+    }
+    return RayIntersection {_map_intersection_type(rq.intersection.type), rq.intersection.distance, rq.intersection.user_instance_id, rq.intersection.instance_id, {}, rq.intersection.geometry_id, rq.intersection.primitive_id, rq.intersection.triangle_barycentric_coord, rq.intersection.triangle_front_facing, {}, rq.intersection.object_to_world_transform, rq.intersection.world_to_object_transform};
+}
 
 metal::float3 get_torus_normal(
     metal::float3 world_point,
@@ -55,25 +78,11 @@ kernel void main_(
   metal::raytracing::instance_acceleration_structure acc_struct [[user(fake0)]]
 , device Output& output [[user(fake0)]]
 ) {
-    _RayQuery rq = {};
-    metal::float3 dir = metal::float3(0.0, 1.0, 0.0);
-    RayDesc _e12 = RayDesc {4u, 255u, 0.1, 100.0, metal::float3(0.0), dir};
-    rq.intersector.assume_geometry_type(metal::raytracing::geometry_type::triangle);
-    rq.intersector.set_opacity_cull_mode((_e12.flags & 64) != 0 ? metal::raytracing::opacity_cull_mode::opaque : (_e12.flags & 128) != 0 ? metal::raytracing::opacity_cull_mode::non_opaque : metal::raytracing::opacity_cull_mode::none);
-    rq.intersector.force_opacity((_e12.flags & 1) != 0 ? metal::raytracing::forced_opacity::opaque : (_e12.flags & 2) != 0 ? metal::raytracing::forced_opacity::non_opaque : metal::raytracing::forced_opacity::none);
-    rq.intersector.accept_any_intersection((_e12.flags & 4) != 0);
-    rq.intersection = rq.intersector.intersect(metal::raytracing::ray(_e12.origin, _e12.dir, _e12.tmin, _e12.tmax), acc_struct, _e12.cull_mask);    rq.ready = true;
-    while(true) {
-        bool _e13 = rq.ready;
-        rq.ready = false;
-        if (_e13) {
-        } else {
-            break;
-        }
-    }
-    RayIntersection intersection_1 = RayIntersection {_map_intersection_type(rq.intersection.type), rq.intersection.distance, rq.intersection.user_instance_id, rq.intersection.instance_id, {}, rq.intersection.geometry_id, rq.intersection.primitive_id, rq.intersection.triangle_barycentric_coord, rq.intersection.triangle_front_facing, {}, rq.intersection.object_to_world_transform, rq.intersection.world_to_object_transform};
-    output.visible = static_cast<uint>(intersection_1.kind == 0u);
-    metal::float3 _e25 = get_torus_normal(dir * intersection_1.t, intersection_1);
-    output.normal = _e25;
+    metal::float3 pos_1 = metal::float3(0.0);
+    metal::float3 dir_1 = metal::float3(0.0, 1.0, 0.0);
+    RayIntersection _e7 = query_loop(pos_1, dir_1, acc_struct);
+    output.visible = static_cast<uint>(_e7.kind == 0u);
+    metal::float3 _e18 = get_torus_normal(dir_1 * _e7.t, _e7);
+    output.normal = _e18;
     return;
 }
diff --git a/naga/tests/out/spv/atomicOps-int64-min-max.spvasm b/naga/tests/out/spv/atomicOps-int64-min-max.spvasm
index aa798f546f..2d31197b3b 100644
--- a/naga/tests/out/spv/atomicOps-int64-min-max.spvasm
+++ b/naga/tests/out/spv/atomicOps-int64-min-max.spvasm
@@ -1,15 +1,15 @@
 ; SPIR-V
 ; Version: 1.0
 ; Generator: rspirv
-; Bound: 52
+; Bound: 67
 OpCapability Shader
 OpCapability Int64Atomics
 OpCapability Int64
 OpExtension "SPV_KHR_storage_buffer_storage_class"
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
-OpEntryPoint GLCompute %22 "cs_main" %19
-OpExecutionMode %22 LocalSize 2 1 1
+OpEntryPoint GLCompute %25 "cs_main" %22
+OpExecutionMode %25 LocalSize 2 1 1
 OpDecorate %4 ArrayStride 8
 OpMemberDecorate %7 0 Offset 0
 OpMemberDecorate %7 1 Offset 8
@@ -25,7 +25,11 @@ OpDecorate %15 DescriptorSet 0
 OpDecorate %15 Binding 2
 OpDecorate %16 Block
 OpMemberDecorate %16 0 Offset 0
-OpDecorate %19 BuiltIn LocalInvocationId
+OpDecorate %18 DescriptorSet 0
+OpDecorate %18 Binding 3
+OpDecorate %19 Block
+OpMemberDecorate %19 0 Offset 0
+OpDecorate %22 BuiltIn LocalInvocationId
 %2 = OpTypeVoid
 %3 = OpTypeInt 64 0
 %6 = OpTypeInt 32 0
@@ -42,41 +46,56 @@ OpDecorate %19 BuiltIn LocalInvocationId
 %16 = OpTypeStruct %7
 %17 = OpTypePointer StorageBuffer %16
 %15 = OpVariable  %17  StorageBuffer
-%20 = OpTypePointer Input %8
-%19 = OpVariable  %20  Input
-%23 = OpTypeFunction %2
-%24 = OpTypePointer StorageBuffer %3
-%25 = OpConstant  %6  0
-%27 = OpTypePointer StorageBuffer %4
-%29 = OpTypePointer StorageBuffer %7
-%31 = OpConstant  %3  1
-%35 = OpTypeInt 32 1
-%34 = OpConstant  %35  1
-%36 = OpConstant  %6  64
-%38 = OpConstant  %6  1
-%44 = OpConstant  %6  264
-%22 = OpFunction  %2  None %23
-%18 = OpLabel
-%21 = OpLoad  %8  %19
-%26 = OpAccessChain  %24  %9 %25
-%28 = OpAccessChain  %27  %12 %25
-%30 = OpAccessChain  %29  %15 %25
-OpBranch %32
-%32 = OpLabel
-%33 = OpAtomicUMax  %3  %26 %34 %36 %31
-%39 = OpAccessChain  %24  %28 %38
-%37 = OpAtomicUMax  %3  %39 %34 %36 %31
-%41 = OpAccessChain  %24  %30 %25
-%40 = OpAtomicUMax  %3  %41 %34 %36 %31
-%43 = OpAccessChain  %24  %30 %38 %38
-%42 = OpAtomicUMax  %3  %43 %34 %36 %31
-OpControlBarrier %5 %5 %44
-%45 = OpAtomicUMin  %3  %26 %34 %36 %31
-%47 = OpAccessChain  %24  %28 %38
-%46 = OpAtomicUMin  %3  %47 %34 %36 %31
-%49 = OpAccessChain  %24  %30 %25
-%48 = OpAtomicUMin  %3  %49 %34 %36 %31
-%51 = OpAccessChain  %24  %30 %38 %38
-%50 = OpAtomicUMin  %3  %51 %34 %36 %31
+%19 = OpTypeStruct %3
+%20 = OpTypePointer Uniform %19
+%18 = OpVariable  %20  Uniform
+%23 = OpTypePointer Input %8
+%22 = OpVariable  %23  Input
+%26 = OpTypeFunction %2
+%27 = OpTypePointer StorageBuffer %3
+%28 = OpConstant  %6  0
+%30 = OpTypePointer StorageBuffer %4
+%32 = OpTypePointer StorageBuffer %7
+%34 = OpTypePointer Uniform %3
+%36 = OpConstant  %3  1
+%41 = OpTypeInt 32 1
+%40 = OpConstant  %41  1
+%42 = OpConstant  %6  64
+%46 = OpConstant  %6  1
+%54 = OpConstant  %6  264
+%25 = OpFunction  %2  None %26
+%21 = OpLabel
+%24 = OpLoad  %8  %22
+%29 = OpAccessChain  %27  %9 %28
+%31 = OpAccessChain  %30  %12 %28
+%33 = OpAccessChain  %32  %15 %28
+%35 = OpAccessChain  %34  %18 %28
+OpBranch %37
+%37 = OpLabel
+%38 = OpLoad  %3  %35
+%39 = OpAtomicUMax  %3  %29 %40 %42 %38
+%43 = OpLoad  %3  %35
+%44 = OpIAdd  %3  %36 %43
+%47 = OpAccessChain  %27  %31 %46
+%45 = OpAtomicUMax  %3  %47 %40 %42 %44
+%49 = OpAccessChain  %27  %33 %28
+%48 = OpAtomicUMax  %3  %49 %40 %42 %36
+%50 = OpCompositeExtract  %6  %24 0
+%51 = OpUConvert  %3  %50
+%53 = OpAccessChain  %27  %33 %46 %46
+%52 = OpAtomicUMax  %3  %53 %40 %42 %51
+OpControlBarrier %5 %5 %54
+%55 = OpLoad  %3  %35
+%56 = OpAtomicUMin  %3  %29 %40 %42 %55
+%57 = OpLoad  %3  %35
+%58 = OpIAdd  %3  %36 %57
+%60 = OpAccessChain  %27  %31 %46
+%59 = OpAtomicUMin  %3  %60 %40 %42 %58
+%62 = OpAccessChain  %27  %33 %28
+%61 = OpAtomicUMin  %3  %62 %40 %42 %36
+%63 = OpCompositeExtract  %6  %24 0
+%64 = OpUConvert  %3  %63
+%66 = OpAccessChain  %27  %33 %46 %46
+%65 = OpAtomicUMin  %3  %66 %40 %42 %64
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/binding-arrays.spvasm b/naga/tests/out/spv/binding-arrays.spvasm
index 143ee269af..af75dca492 100644
--- a/naga/tests/out/spv/binding-arrays.spvasm
+++ b/naga/tests/out/spv/binding-arrays.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.1
 ; Generator: rspirv
-; Bound: 428
+; Bound: 413
 OpCapability Shader
 OpCapability ImageQuery
 OpCapability ShaderNonUniform
@@ -77,8 +77,8 @@ OpDecorate %380 NonUniform
 OpDecorate %381 NonUniform
 OpDecorate %382 NonUniform
 OpDecorate %383 NonUniform
-OpDecorate %405 NonUniform
-OpDecorate %406 NonUniform
+OpDecorate %395 NonUniform
+OpDecorate %396 NonUniform
 %2 = OpTypeVoid
 %3 = OpTypeInt 32 0
 %4 = OpTypeStruct %3
@@ -521,54 +521,30 @@ OpStore %72 %387
 %389 = OpAccessChain  %388  %36 %55
 %390 = OpLoad  %16  %389
 %391 = OpLoad  %22  %72
-%392 = OpImageQuerySize  %64  %390
-%393 = OpULessThan  %157  %65 %392
-%394 = OpAll  %150  %393
-OpSelectionMerge %395 None
-OpBranchConditional %394 %396 %395
-%396 = OpLabel
 OpImageWrite %390 %65 %391
-OpBranch %395
-%395 = OpLabel
-%397 = OpAccessChain  %388  %36 %77
-%398 = OpLoad  %16  %397
-%399 = OpLoad  %22  %72
-%400 = OpImageQuerySize  %64  %398
-%401 = OpULessThan  %157  %65 %400
-%402 = OpAll  %150  %401
-OpSelectionMerge %403 None
-OpBranchConditional %402 %404 %403
-%404 = OpLabel
-OpImageWrite %398 %65 %399
-OpBranch %403
-%403 = OpLabel
-%405 = OpAccessChain  %388  %36 %78
-%406 = OpLoad  %16  %405
-%407 = OpLoad  %22  %72
-%408 = OpImageQuerySize  %64  %406
-%409 = OpULessThan  %157  %65 %408
-%410 = OpAll  %150  %409
-OpSelectionMerge %411 None
-OpBranchConditional %410 %412 %411
-%412 = OpLabel
-OpImageWrite %406 %65 %407
-OpBranch %411
-%411 = OpLabel
-%413 = OpLoad  %23  %68
-%414 = OpLoad  %3  %66
-%415 = OpCompositeConstruct  %23  %414 %414
-%416 = OpIAdd  %23  %413 %415
-%417 = OpConvertUToF  %60  %416
-%418 = OpLoad  %22  %72
-%419 = OpCompositeExtract  %6  %417 0
-%420 = OpCompositeExtract  %6  %417 1
-%421 = OpCompositeExtract  %6  %417 0
-%422 = OpCompositeExtract  %6  %417 1
-%423 = OpCompositeConstruct  %22  %419 %420 %421 %422
-%424 = OpFAdd  %22  %418 %423
-%425 = OpLoad  %6  %70
-%426 = OpCompositeConstruct  %22  %425 %425 %425 %425
-%427 = OpFAdd  %22  %424 %426
-OpStore %50 %427
+%392 = OpAccessChain  %388  %36 %77
+%393 = OpLoad  %16  %392
+%394 = OpLoad  %22  %72
+OpImageWrite %393 %65 %394
+%395 = OpAccessChain  %388  %36 %78
+%396 = OpLoad  %16  %395
+%397 = OpLoad  %22  %72
+OpImageWrite %396 %65 %397
+%398 = OpLoad  %23  %68
+%399 = OpLoad  %3  %66
+%400 = OpCompositeConstruct  %23  %399 %399
+%401 = OpIAdd  %23  %398 %400
+%402 = OpConvertUToF  %60  %401
+%403 = OpLoad  %22  %72
+%404 = OpCompositeExtract  %6  %402 0
+%405 = OpCompositeExtract  %6  %402 1
+%406 = OpCompositeExtract  %6  %402 0
+%407 = OpCompositeExtract  %6  %402 1
+%408 = OpCompositeConstruct  %22  %404 %405 %406 %407
+%409 = OpFAdd  %22  %403 %408
+%410 = OpLoad  %6  %70
+%411 = OpCompositeConstruct  %22  %410 %410 %410 %410
+%412 = OpFAdd  %22  %409 %411
+OpStore %50 %412
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/bounds-check-image-restrict.spvasm b/naga/tests/out/spv/bounds-check-image-restrict.spvasm
index 038685a559..7837602e08 100644
--- a/naga/tests/out/spv/bounds-check-image-restrict.spvasm
+++ b/naga/tests/out/spv/bounds-check-image-restrict.spvasm
@@ -1,15 +1,15 @@
 ; SPIR-V
 ; Version: 1.1
 ; Generator: rspirv
-; Bound: 299
+; Bound: 280
 OpCapability Shader
 OpCapability Sampled1D
 OpCapability Image1D
 OpCapability ImageQuery
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
-OpEntryPoint Fragment %269 "fragment_shader" %267
-OpExecutionMode %269 OriginUpperLeft
+OpEntryPoint Fragment %250 "fragment_shader" %248
+OpExecutionMode %250 OriginUpperLeft
 OpName %21 "image_1d"
 OpName %23 "image_2d"
 OpName %25 "image_2d_array"
@@ -59,21 +59,21 @@ OpName %195 "test_textureLoad_depth_multisampled_2d"
 OpName %208 "coords"
 OpName %209 "value"
 OpName %210 "test_textureStore_1d"
-OpName %218 "coords"
-OpName %219 "value"
-OpName %220 "test_textureStore_2d"
-OpName %229 "coords"
-OpName %230 "array_index"
-OpName %231 "value"
-OpName %232 "test_textureStore_2d_array_u"
-OpName %243 "coords"
-OpName %244 "array_index"
-OpName %245 "value"
-OpName %246 "test_textureStore_2d_array_s"
-OpName %256 "coords"
-OpName %257 "value"
-OpName %258 "test_textureStore_3d"
-OpName %269 "fragment_shader"
+OpName %215 "coords"
+OpName %216 "value"
+OpName %217 "test_textureStore_2d"
+OpName %222 "coords"
+OpName %223 "array_index"
+OpName %224 "value"
+OpName %225 "test_textureStore_2d_array_u"
+OpName %232 "coords"
+OpName %233 "array_index"
+OpName %234 "value"
+OpName %235 "test_textureStore_2d_array_s"
+OpName %241 "coords"
+OpName %242 "value"
+OpName %243 "test_textureStore_3d"
+OpName %250 "fragment_shader"
 OpDecorate %21 DescriptorSet 0
 OpDecorate %21 Binding 0
 OpDecorate %23 DescriptorSet 0
@@ -102,7 +102,7 @@ OpDecorate %41 Binding 10
 OpDecorate %43 NonReadable
 OpDecorate %43 DescriptorSet 0
 OpDecorate %43 Binding 11
-OpDecorate %267 Location 0
+OpDecorate %248 Location 0
 %2 = OpTypeVoid
 %4 = OpTypeFloat 32
 %3 = OpTypeImage %4 1D 0 0 0 1 Unknown
@@ -165,24 +165,20 @@ OpDecorate %267 Location 0
 %187 = OpConstantComposite  %12  %53 %53 %53
 %202 = OpConstantComposite  %8  %53 %53
 %211 = OpTypeFunction %2 %5 %6
-%221 = OpTypeFunction %2 %8 %6
-%225 = OpConstantComposite  %8  %53 %53
-%233 = OpTypeFunction %2 %8 %10 %6
-%239 = OpConstantComposite  %12  %53 %53 %53
-%247 = OpTypeFunction %2 %8 %5 %6
-%252 = OpConstantComposite  %12  %53 %53 %53
-%259 = OpTypeFunction %2 %12 %6
-%263 = OpConstantComposite  %12  %53 %53 %53
-%268 = OpTypePointer Output %6
-%267 = OpVariable  %268  Output
-%270 = OpTypeFunction %2
-%280 = OpConstant  %5  0
-%281 = OpConstantNull  %8
-%282 = OpConstant  %10  0
-%283 = OpConstantNull  %12
-%284 = OpConstantNull  %6
-%285 = OpConstant  %4  0.0
-%286 = OpConstantComposite  %6  %285 %285 %285 %285
+%218 = OpTypeFunction %2 %8 %6
+%226 = OpTypeFunction %2 %8 %10 %6
+%236 = OpTypeFunction %2 %8 %5 %6
+%244 = OpTypeFunction %2 %12 %6
+%249 = OpTypePointer Output %6
+%248 = OpVariable  %249  Output
+%251 = OpTypeFunction %2
+%261 = OpConstant  %5  0
+%262 = OpConstantNull  %8
+%263 = OpConstant  %10  0
+%264 = OpConstantNull  %12
+%265 = OpConstantNull  %6
+%266 = OpConstant  %4  0.0
+%267 = OpConstantComposite  %6  %266 %266 %266 %266
 %48 = OpFunction  %6  None %49
 %46 = OpFunctionParameter  %5
 %47 = OpFunctionParameter  %5
@@ -364,93 +360,78 @@ OpFunctionEnd
 %212 = OpLoad  %17  %37
 OpBranch %213
 %213 = OpLabel
-%214 = OpImageQuerySize  %5  %212
-%215 = OpISub  %5  %214 %53
-%216 = OpExtInst  %5  %1 UMin %208 %215
-OpImageWrite %212 %216 %209
+OpImageWrite %212 %208 %209
 OpReturn
 OpFunctionEnd
-%220 = OpFunction  %2  None %221
-%218 = OpFunctionParameter  %8
-%219 = OpFunctionParameter  %6
-%217 = OpLabel
-%222 = OpLoad  %18  %39
-OpBranch %223
-%223 = OpLabel
-%224 = OpImageQuerySize  %8  %222
-%226 = OpISub  %8  %224 %225
-%227 = OpExtInst  %8  %1 UMin %218 %226
-OpImageWrite %222 %227 %219
+%217 = OpFunction  %2  None %218
+%215 = OpFunctionParameter  %8
+%216 = OpFunctionParameter  %6
+%214 = OpLabel
+%219 = OpLoad  %18  %39
+OpBranch %220
+%220 = OpLabel
+OpImageWrite %219 %215 %216
 OpReturn
 OpFunctionEnd
-%232 = OpFunction  %2  None %233
-%229 = OpFunctionParameter  %8
-%230 = OpFunctionParameter  %10
-%231 = OpFunctionParameter  %6
+%225 = OpFunction  %2  None %226
+%222 = OpFunctionParameter  %8
+%223 = OpFunctionParameter  %10
+%224 = OpFunctionParameter  %6
+%221 = OpLabel
+%227 = OpLoad  %19  %41
+OpBranch %228
 %228 = OpLabel
-%234 = OpLoad  %19  %41
-OpBranch %235
-%235 = OpLabel
-%236 = OpBitcast  %5  %230
-%237 = OpCompositeConstruct  %12  %229 %236
-%238 = OpImageQuerySize  %12  %234
-%240 = OpISub  %12  %238 %239
-%241 = OpExtInst  %12  %1 UMin %237 %240
-OpImageWrite %234 %241 %231
+%229 = OpBitcast  %5  %223
+%230 = OpCompositeConstruct  %12  %222 %229
+OpImageWrite %227 %230 %224
 OpReturn
 OpFunctionEnd
-%246 = OpFunction  %2  None %247
-%243 = OpFunctionParameter  %8
-%244 = OpFunctionParameter  %5
-%245 = OpFunctionParameter  %6
-%242 = OpLabel
-%248 = OpLoad  %19  %41
-OpBranch %249
-%249 = OpLabel
-%250 = OpCompositeConstruct  %12  %243 %244
-%251 = OpImageQuerySize  %12  %248
-%253 = OpISub  %12  %251 %252
-%254 = OpExtInst  %12  %1 UMin %250 %253
-OpImageWrite %248 %254 %245
+%235 = OpFunction  %2  None %236
+%232 = OpFunctionParameter  %8
+%233 = OpFunctionParameter  %5
+%234 = OpFunctionParameter  %6
+%231 = OpLabel
+%237 = OpLoad  %19  %41
+OpBranch %238
+%238 = OpLabel
+%239 = OpCompositeConstruct  %12  %232 %233
+OpImageWrite %237 %239 %234
 OpReturn
 OpFunctionEnd
-%258 = OpFunction  %2  None %259
-%256 = OpFunctionParameter  %12
-%257 = OpFunctionParameter  %6
-%255 = OpLabel
-%260 = OpLoad  %20  %43
-OpBranch %261
-%261 = OpLabel
-%262 = OpImageQuerySize  %12  %260
-%264 = OpISub  %12  %262 %263
-%265 = OpExtInst  %12  %1 UMin %256 %264
-OpImageWrite %260 %265 %257
+%243 = OpFunction  %2  None %244
+%241 = OpFunctionParameter  %12
+%242 = OpFunctionParameter  %6
+%240 = OpLabel
+%245 = OpLoad  %20  %43
+OpBranch %246
+%246 = OpLabel
+OpImageWrite %245 %241 %242
 OpReturn
 OpFunctionEnd
-%269 = OpFunction  %2  None %270
-%266 = OpLabel
-%271 = OpLoad  %3  %21
-%272 = OpLoad  %7  %23
-%273 = OpLoad  %9  %25
-%274 = OpLoad  %11  %27
-%275 = OpLoad  %13  %29
-%276 = OpLoad  %17  %37
-%277 = OpLoad  %18  %39
-%278 = OpLoad  %19  %41
-%279 = OpLoad  %20  %43
-OpBranch %287
-%287 = OpLabel
-%288 = OpFunctionCall  %6  %48 %280 %280
-%289 = OpFunctionCall  %6  %63 %281 %280
-%290 = OpFunctionCall  %6  %79 %281 %282 %280
-%291 = OpFunctionCall  %6  %97 %281 %280 %280
-%292 = OpFunctionCall  %6  %113 %283 %280
-%293 = OpFunctionCall  %6  %128 %281 %280
-%294 = OpFunctionCall  %2  %210 %280 %284
-%295 = OpFunctionCall  %2  %220 %281 %284
-%296 = OpFunctionCall  %2  %232 %281 %282 %284
-%297 = OpFunctionCall  %2  %246 %281 %280 %284
-%298 = OpFunctionCall  %2  %258 %283 %284
-OpStore %267 %286
+%250 = OpFunction  %2  None %251
+%247 = OpLabel
+%252 = OpLoad  %3  %21
+%253 = OpLoad  %7  %23
+%254 = OpLoad  %9  %25
+%255 = OpLoad  %11  %27
+%256 = OpLoad  %13  %29
+%257 = OpLoad  %17  %37
+%258 = OpLoad  %18  %39
+%259 = OpLoad  %19  %41
+%260 = OpLoad  %20  %43
+OpBranch %268
+%268 = OpLabel
+%269 = OpFunctionCall  %6  %48 %261 %261
+%270 = OpFunctionCall  %6  %63 %262 %261
+%271 = OpFunctionCall  %6  %79 %262 %263 %261
+%272 = OpFunctionCall  %6  %97 %262 %261 %261
+%273 = OpFunctionCall  %6  %113 %264 %261
+%274 = OpFunctionCall  %6  %128 %262 %261
+%275 = OpFunctionCall  %2  %210 %261 %265
+%276 = OpFunctionCall  %2  %217 %262 %265
+%277 = OpFunctionCall  %2  %225 %262 %263 %265
+%278 = OpFunctionCall  %2  %235 %262 %261 %265
+%279 = OpFunctionCall  %2  %243 %264 %265
+OpStore %248 %267
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/bounds-check-image-rzsw.spvasm b/naga/tests/out/spv/bounds-check-image-rzsw.spvasm
index a9eeb42047..9b8c091bba 100644
--- a/naga/tests/out/spv/bounds-check-image-rzsw.spvasm
+++ b/naga/tests/out/spv/bounds-check-image-rzsw.spvasm
@@ -1,15 +1,15 @@
 ; SPIR-V
 ; Version: 1.1
 ; Generator: rspirv
-; Bound: 326
+; Bound: 302
 OpCapability Shader
 OpCapability Sampled1D
 OpCapability Image1D
 OpCapability ImageQuery
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
-OpEntryPoint Fragment %297 "fragment_shader" %295
-OpExecutionMode %297 OriginUpperLeft
+OpEntryPoint Fragment %273 "fragment_shader" %271
+OpExecutionMode %273 OriginUpperLeft
 OpName %21 "image_1d"
 OpName %23 "image_2d"
 OpName %25 "image_2d_array"
@@ -59,21 +59,21 @@ OpName %216 "test_textureLoad_depth_multisampled_2d"
 OpName %231 "coords"
 OpName %232 "value"
 OpName %233 "test_textureStore_1d"
-OpName %242 "coords"
-OpName %243 "value"
-OpName %244 "test_textureStore_2d"
-OpName %254 "coords"
-OpName %255 "array_index"
-OpName %256 "value"
-OpName %257 "test_textureStore_2d_array_u"
-OpName %269 "coords"
-OpName %270 "array_index"
-OpName %271 "value"
-OpName %272 "test_textureStore_2d_array_s"
-OpName %283 "coords"
-OpName %284 "value"
-OpName %285 "test_textureStore_3d"
-OpName %297 "fragment_shader"
+OpName %238 "coords"
+OpName %239 "value"
+OpName %240 "test_textureStore_2d"
+OpName %245 "coords"
+OpName %246 "array_index"
+OpName %247 "value"
+OpName %248 "test_textureStore_2d_array_u"
+OpName %255 "coords"
+OpName %256 "array_index"
+OpName %257 "value"
+OpName %258 "test_textureStore_2d_array_s"
+OpName %264 "coords"
+OpName %265 "value"
+OpName %266 "test_textureStore_3d"
+OpName %273 "fragment_shader"
 OpDecorate %21 DescriptorSet 0
 OpDecorate %21 Binding 0
 OpDecorate %23 DescriptorSet 0
@@ -102,7 +102,7 @@ OpDecorate %41 Binding 10
 OpDecorate %43 NonReadable
 OpDecorate %43 DescriptorSet 0
 OpDecorate %43 Binding 11
-OpDecorate %295 Location 0
+OpDecorate %271 Location 0
 %2 = OpTypeVoid
 %4 = OpTypeFloat 32
 %3 = OpTypeImage %4 1D 0 0 0 1 Unknown
@@ -159,19 +159,19 @@ OpDecorate %295 Location 0
 %177 = OpTypeFunction %4 %8 %10 %5
 %198 = OpTypeFunction %4 %8 %5 %5
 %234 = OpTypeFunction %2 %5 %6
-%245 = OpTypeFunction %2 %8 %6
-%258 = OpTypeFunction %2 %8 %10 %6
-%273 = OpTypeFunction %2 %8 %5 %6
-%286 = OpTypeFunction %2 %12 %6
-%296 = OpTypePointer Output %6
-%295 = OpVariable  %296  Output
-%298 = OpTypeFunction %2
-%308 = OpConstant  %5  0
-%309 = OpConstantNull  %8
-%310 = OpConstant  %10  0
-%311 = OpConstantNull  %12
-%312 = OpConstant  %4  0.0
-%313 = OpConstantComposite  %6  %312 %312 %312 %312
+%241 = OpTypeFunction %2 %8 %6
+%249 = OpTypeFunction %2 %8 %10 %6
+%259 = OpTypeFunction %2 %8 %5 %6
+%267 = OpTypeFunction %2 %12 %6
+%272 = OpTypePointer Output %6
+%271 = OpVariable  %272  Output
+%274 = OpTypeFunction %2
+%284 = OpConstant  %5  0
+%285 = OpConstantNull  %8
+%286 = OpConstant  %10  0
+%287 = OpConstantNull  %12
+%288 = OpConstant  %4  0.0
+%289 = OpConstantComposite  %6  %288 %288 %288 %288
 %48 = OpFunction  %6  None %49
 %46 = OpFunctionParameter  %5
 %47 = OpFunctionParameter  %5
@@ -422,117 +422,78 @@ OpFunctionEnd
 %235 = OpLoad  %17  %37
 OpBranch %236
 %236 = OpLabel
-%237 = OpImageQuerySize  %5  %235
-%238 = OpULessThan  %52  %231 %237
-OpSelectionMerge %239 None
-OpBranchConditional %238 %240 %239
-%240 = OpLabel
 OpImageWrite %235 %231 %232
-OpBranch %239
-%239 = OpLabel
 OpReturn
 OpFunctionEnd
-%244 = OpFunction  %2  None %245
-%242 = OpFunctionParameter  %8
-%243 = OpFunctionParameter  %6
-%241 = OpLabel
-%246 = OpLoad  %18  %39
-OpBranch %247
-%247 = OpLabel
-%248 = OpImageQuerySize  %8  %246
-%249 = OpULessThan  %75  %242 %248
-%250 = OpAll  %52  %249
-OpSelectionMerge %251 None
-OpBranchConditional %250 %252 %251
-%252 = OpLabel
-OpImageWrite %246 %242 %243
-OpBranch %251
-%251 = OpLabel
+%240 = OpFunction  %2  None %241
+%238 = OpFunctionParameter  %8
+%239 = OpFunctionParameter  %6
+%237 = OpLabel
+%242 = OpLoad  %18  %39
+OpBranch %243
+%243 = OpLabel
+OpImageWrite %242 %238 %239
 OpReturn
 OpFunctionEnd
-%257 = OpFunction  %2  None %258
-%254 = OpFunctionParameter  %8
-%255 = OpFunctionParameter  %10
-%256 = OpFunctionParameter  %6
-%253 = OpLabel
-%259 = OpLoad  %19  %41
-OpBranch %260
-%260 = OpLabel
-%261 = OpBitcast  %5  %255
-%262 = OpCompositeConstruct  %12  %254 %261
-%263 = OpImageQuerySize  %12  %259
-%264 = OpULessThan  %96  %262 %263
-%265 = OpAll  %52  %264
-OpSelectionMerge %266 None
-OpBranchConditional %265 %267 %266
-%267 = OpLabel
-OpImageWrite %259 %262 %256
-OpBranch %266
-%266 = OpLabel
+%248 = OpFunction  %2  None %249
+%245 = OpFunctionParameter  %8
+%246 = OpFunctionParameter  %10
+%247 = OpFunctionParameter  %6
+%244 = OpLabel
+%250 = OpLoad  %19  %41
+OpBranch %251
+%251 = OpLabel
+%252 = OpBitcast  %5  %246
+%253 = OpCompositeConstruct  %12  %245 %252
+OpImageWrite %250 %253 %247
 OpReturn
 OpFunctionEnd
-%272 = OpFunction  %2  None %273
-%269 = OpFunctionParameter  %8
-%270 = OpFunctionParameter  %5
-%271 = OpFunctionParameter  %6
-%268 = OpLabel
-%274 = OpLoad  %19  %41
-OpBranch %275
-%275 = OpLabel
-%276 = OpCompositeConstruct  %12  %269 %270
-%277 = OpImageQuerySize  %12  %274
-%278 = OpULessThan  %96  %276 %277
-%279 = OpAll  %52  %278
-OpSelectionMerge %280 None
-OpBranchConditional %279 %281 %280
-%281 = OpLabel
-OpImageWrite %274 %276 %271
-OpBranch %280
-%280 = OpLabel
+%258 = OpFunction  %2  None %259
+%255 = OpFunctionParameter  %8
+%256 = OpFunctionParameter  %5
+%257 = OpFunctionParameter  %6
+%254 = OpLabel
+%260 = OpLoad  %19  %41
+OpBranch %261
+%261 = OpLabel
+%262 = OpCompositeConstruct  %12  %255 %256
+OpImageWrite %260 %262 %257
 OpReturn
 OpFunctionEnd
-%285 = OpFunction  %2  None %286
-%283 = OpFunctionParameter  %12
-%284 = OpFunctionParameter  %6
-%282 = OpLabel
-%287 = OpLoad  %20  %43
-OpBranch %288
-%288 = OpLabel
-%289 = OpImageQuerySize  %12  %287
-%290 = OpULessThan  %96  %283 %289
-%291 = OpAll  %52  %290
-OpSelectionMerge %292 None
-OpBranchConditional %291 %293 %292
-%293 = OpLabel
-OpImageWrite %287 %283 %284
-OpBranch %292
-%292 = OpLabel
+%266 = OpFunction  %2  None %267
+%264 = OpFunctionParameter  %12
+%265 = OpFunctionParameter  %6
+%263 = OpLabel
+%268 = OpLoad  %20  %43
+OpBranch %269
+%269 = OpLabel
+OpImageWrite %268 %264 %265
 OpReturn
 OpFunctionEnd
-%297 = OpFunction  %2  None %298
-%294 = OpLabel
-%299 = OpLoad  %3  %21
-%300 = OpLoad  %7  %23
-%301 = OpLoad  %9  %25
-%302 = OpLoad  %11  %27
-%303 = OpLoad  %13  %29
-%304 = OpLoad  %17  %37
-%305 = OpLoad  %18  %39
-%306 = OpLoad  %19  %41
-%307 = OpLoad  %20  %43
-OpBranch %314
-%314 = OpLabel
-%315 = OpFunctionCall  %6  %48 %308 %308
-%316 = OpFunctionCall  %6  %66 %309 %308
-%317 = OpFunctionCall  %6  %85 %309 %310 %308
-%318 = OpFunctionCall  %6  %106 %309 %308 %308
-%319 = OpFunctionCall  %6  %124 %311 %308
-%320 = OpFunctionCall  %6  %141 %309 %308
-%321 = OpFunctionCall  %2  %233 %308 %53
-%322 = OpFunctionCall  %2  %244 %309 %53
-%323 = OpFunctionCall  %2  %257 %309 %310 %53
-%324 = OpFunctionCall  %2  %272 %309 %308 %53
-%325 = OpFunctionCall  %2  %285 %311 %53
-OpStore %295 %313
+%273 = OpFunction  %2  None %274
+%270 = OpLabel
+%275 = OpLoad  %3  %21
+%276 = OpLoad  %7  %23
+%277 = OpLoad  %9  %25
+%278 = OpLoad  %11  %27
+%279 = OpLoad  %13  %29
+%280 = OpLoad  %17  %37
+%281 = OpLoad  %18  %39
+%282 = OpLoad  %19  %41
+%283 = OpLoad  %20  %43
+OpBranch %290
+%290 = OpLabel
+%291 = OpFunctionCall  %6  %48 %284 %284
+%292 = OpFunctionCall  %6  %66 %285 %284
+%293 = OpFunctionCall  %6  %85 %285 %286 %284
+%294 = OpFunctionCall  %6  %106 %285 %284 %284
+%295 = OpFunctionCall  %6  %124 %287 %284
+%296 = OpFunctionCall  %6  %141 %285 %284
+%297 = OpFunctionCall  %2  %233 %284 %53
+%298 = OpFunctionCall  %2  %240 %285 %53
+%299 = OpFunctionCall  %2  %248 %285 %286 %53
+%300 = OpFunctionCall  %2  %258 %285 %284 %53
+%301 = OpFunctionCall  %2  %266 %287 %53
+OpStore %271 %289
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/control-flow.spvasm b/naga/tests/out/spv/control-flow.spvasm
index 2fc9337cfe..f3c3644b4f 100644
--- a/naga/tests/out/spv/control-flow.spvasm
+++ b/naga/tests/out/spv/control-flow.spvasm
@@ -1,13 +1,13 @@
 ; SPIR-V
 ; Version: 1.1
 ; Generator: rspirv
-; Bound: 69
+; Bound: 134
 OpCapability Shader
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
-OpEntryPoint GLCompute %36 "main" %33
-OpExecutionMode %36 LocalSize 1 1 1
-OpDecorate %33 BuiltIn GlobalInvocationId
+OpEntryPoint GLCompute %104 "main" %101
+OpExecutionMode %104 LocalSize 1 1 1
+OpDecorate %101 BuiltIn GlobalInvocationId
 %2 = OpTypeVoid
 %4 = OpTypeInt 32 0
 %3 = OpTypeVector %4 3
@@ -15,19 +15,21 @@ OpDecorate %33 BuiltIn GlobalInvocationId
 %9 = OpTypeFunction %2 %5
 %15 = OpTypeFunction %2
 %16 = OpConstant  %5  0
-%34 = OpTypePointer Input %3
-%33 = OpVariable  %34  Input
-%37 = OpConstant  %5  1
-%38 = OpConstant  %5  2
-%39 = OpConstant  %5  3
-%40 = OpConstant  %5  4
-%41 = OpConstant  %4  0
-%43 = OpTypePointer Function %5
-%44 = OpConstantNull  %5
-%46 = OpConstant  %4  2
-%47 = OpConstant  %4  1
-%48 = OpConstant  %4  72
-%49 = OpConstant  %4  264
+%37 = OpTypeFunction %2 %5 %5 %5
+%73 = OpTypeFunction %2 %5 %5 %5 %5
+%74 = OpConstant  %5  1
+%75 = OpConstant  %5  2
+%77 = OpTypePointer Function %5
+%102 = OpTypePointer Input %3
+%101 = OpVariable  %102  Input
+%105 = OpConstant  %5  3
+%106 = OpConstant  %5  4
+%107 = OpConstant  %4  0
+%109 = OpConstantNull  %5
+%111 = OpConstant  %4  2
+%112 = OpConstant  %4  1
+%113 = OpConstant  %4  72
+%114 = OpConstant  %4  264
 %8 = OpFunction  %2  None %9
 %7 = OpFunctionParameter  %5
 %6 = OpLabel
@@ -76,63 +78,198 @@ OpBranch %25
 %26 = OpLabel
 OpReturn
 OpFunctionEnd
-%36 = OpFunction  %2  None %15
+%36 = OpFunction  %2  None %37
+%33 = OpFunctionParameter  %5
+%34 = OpFunctionParameter  %5
+%35 = OpFunctionParameter  %5
 %32 = OpLabel
-%42 = OpVariable  %43  Function %44
-%35 = OpLoad  %3  %33
-OpBranch %45
+OpBranch %38
+%38 = OpLabel
+OpBranch %39
+%39 = OpLabel
+OpLoopMerge %40 %42 None
+OpBranch %41
+%41 = OpLabel
+OpSelectionMerge %43 None
+OpSwitch %33 %46 1 %44 2 %45
+%44 = OpLabel
+OpBranch %42
 %45 = OpLabel
-OpControlBarrier %46 %47 %48
-OpControlBarrier %46 %46 %49
-OpSelectionMerge %50 None
-OpSwitch %37 %51
-%51 = OpLabel
-OpStore %42 %37
+OpSelectionMerge %47 None
+OpSwitch %34 %49 1 %48
+%48 = OpLabel
+OpBranch %42
+%49 = OpLabel
 OpBranch %50
 %50 = OpLabel
-%52 = OpLoad  %5  %42
-OpSelectionMerge %53 None
-OpSwitch %52 %58 1 %54 2 %55 3 %56 4 %56 5 %57 6 %58
-%54 = OpLabel
-OpStore %42 %16
-OpBranch %53
+OpLoopMerge %51 %53 None
+OpBranch %52
+%52 = OpLabel
+OpSelectionMerge %54 None
+OpSwitch %35 %56 1 %55
 %55 = OpLabel
-OpStore %42 %37
 OpBranch %53
 %56 = OpLabel
-OpStore %42 %38
-OpBranch %53
-%57 = OpLabel
-OpStore %42 %39
-OpBranch %53
-%58 = OpLabel
-OpStore %42 %40
+OpBranch %54
+%54 = OpLabel
 OpBranch %53
 %53 = OpLabel
-OpSelectionMerge %59 None
-OpSwitch %41 %61 0 %60
-%60 = OpLabel
-OpBranch %59
-%61 = OpLabel
+OpBranch %50
+%51 = OpLabel
+OpBranch %47
+%47 = OpLabel
+OpBranch %43
+%46 = OpLabel
+OpBranch %43
+%43 = OpLabel
+OpSelectionMerge %57 None
+OpSwitch %34 %58
+%58 = OpLabel
+OpBranch %42
+%57 = OpLabel
+OpBranch %42
+%42 = OpLabel
+OpBranch %39
+%40 = OpLabel
 OpBranch %59
 %59 = OpLabel
-%62 = OpLoad  %5  %42
+OpLoopMerge %60 %62 None
+OpBranch %61
+%61 = OpLabel
 OpSelectionMerge %63 None
-OpSwitch %62 %68 1 %64 2 %65 3 %66 4 %67
+OpSwitch %34 %64 1 %64
 %64 = OpLabel
-OpStore %42 %16
-OpBranch %63
-%65 = OpLabel
-OpStore %42 %37
-OpReturn
+OpSelectionMerge %65 None
+OpSwitch %35 %66
 %66 = OpLabel
-OpStore %42 %38
+OpBranch %62
+%65 = OpLabel
+OpBranch %63
+%63 = OpLabel
+OpBranch %62
+%62 = OpLabel
+OpBranch %59
+%60 = OpLabel
 OpReturn
+OpFunctionEnd
+%72 = OpFunction  %2  None %73
+%68 = OpFunctionParameter  %5
+%69 = OpFunctionParameter  %5
+%70 = OpFunctionParameter  %5
+%71 = OpFunctionParameter  %5
 %67 = OpLabel
+%76 = OpVariable  %77  Function %16
+OpBranch %78
+%78 = OpLabel
+OpBranch %79
+%79 = OpLabel
+OpLoopMerge %80 %82 None
+OpBranch %81
+%81 = OpLabel
+OpSelectionMerge %83 None
+OpSwitch %68 %85 1 %84
+%84 = OpLabel
+OpStore %76 %74
+OpBranch %83
+%85 = OpLabel
+OpBranch %83
+%83 = OpLabel
+OpBranch %82
+%82 = OpLabel
+OpBranch %79
+%80 = OpLabel
+OpBranch %86
+%86 = OpLabel
+OpLoopMerge %87 %89 None
+OpBranch %88
+%88 = OpLabel
+OpSelectionMerge %90 None
+OpSwitch %68 %93 1 %91 2 %92
+%91 = OpLabel
+OpBranch %90
+%92 = OpLabel
+OpSelectionMerge %94 None
+OpSwitch %69 %96 1 %95
+%95 = OpLabel
+OpBranch %89
+%96 = OpLabel
+OpSelectionMerge %97 None
+OpSwitch %70 %99 1 %98
+%98 = OpLabel
+OpStore %76 %75
+OpBranch %97
+%99 = OpLabel
+OpBranch %97
+%97 = OpLabel
+OpBranch %94
+%94 = OpLabel
+OpBranch %90
+%93 = OpLabel
+OpBranch %90
+%90 = OpLabel
+OpBranch %89
+%89 = OpLabel
+OpBranch %86
+%87 = OpLabel
 OpReturn
-%68 = OpLabel
-OpStore %42 %39
+OpFunctionEnd
+%104 = OpFunction  %2  None %15
+%100 = OpLabel
+%108 = OpVariable  %77  Function %109
+%103 = OpLoad  %3  %101
+OpBranch %110
+%110 = OpLabel
+OpControlBarrier %111 %112 %113
+OpControlBarrier %111 %111 %114
+OpSelectionMerge %115 None
+OpSwitch %74 %116
+%116 = OpLabel
+OpStore %108 %74
+OpBranch %115
+%115 = OpLabel
+%117 = OpLoad  %5  %108
+OpSelectionMerge %118 None
+OpSwitch %117 %123 1 %119 2 %120 3 %121 4 %121 5 %122 6 %123
+%119 = OpLabel
+OpStore %108 %16
+OpBranch %118
+%120 = OpLabel
+OpStore %108 %74
+OpBranch %118
+%121 = OpLabel
+OpStore %108 %75
+OpBranch %118
+%122 = OpLabel
+OpStore %108 %105
+OpBranch %118
+%123 = OpLabel
+OpStore %108 %106
+OpBranch %118
+%118 = OpLabel
+OpSelectionMerge %124 None
+OpSwitch %107 %126 0 %125
+%125 = OpLabel
+OpBranch %124
+%126 = OpLabel
+OpBranch %124
+%124 = OpLabel
+%127 = OpLoad  %5  %108
+OpSelectionMerge %128 None
+OpSwitch %127 %133 1 %129 2 %130 3 %131 4 %132
+%129 = OpLabel
+OpStore %108 %16
+OpBranch %128
+%130 = OpLabel
+OpStore %108 %74
 OpReturn
-%63 = OpLabel
+%131 = OpLabel
+OpStore %108 %75
+OpReturn
+%132 = OpLabel
+OpReturn
+%133 = OpLabel
+OpStore %108 %105
+OpReturn
+%128 = OpLabel
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/math-functions.spvasm b/naga/tests/out/spv/math-functions.spvasm
index 6e07c6d7a6..366857f91f 100644
--- a/naga/tests/out/spv/math-functions.spvasm
+++ b/naga/tests/out/spv/math-functions.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.1
 ; Generator: rspirv
-; Bound: 96
+; Bound: 87
 OpCapability Shader
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
@@ -40,77 +40,68 @@ OpMemberDecorate %15 1 Offset 16
 %24 = OpConstant  %4  -1.0
 %25 = OpConstantComposite  %3  %24 %24 %24 %24
 %26 = OpConstantNull  %7
-%27 = OpConstant  %9  0
+%27 = OpConstant  %9  4294967295
 %28 = OpConstantComposite  %7  %22 %22
-%29 = OpConstant  %9  1
+%29 = OpConstant  %9  0
 %30 = OpConstantComposite  %8  %29 %29
-%31 = OpConstant  %9  32
-%32 = OpConstant  %6  32
-%33 = OpConstant  %6  0
-%34 = OpConstantComposite  %8  %31 %31
-%35 = OpConstantComposite  %7  %32 %32
-%36 = OpConstantComposite  %8  %27 %27
-%37 = OpConstantComposite  %7  %33 %33
-%38 = OpConstant  %9  31
-%39 = OpConstantComposite  %8  %38 %38
-%40 = OpConstant  %6  2
-%41 = OpConstant  %4  2.0
-%42 = OpConstantComposite  %10  %19 %41
-%43 = OpConstant  %6  3
-%44 = OpConstant  %6  4
-%45 = OpConstantComposite  %7  %43 %44
-%46 = OpConstant  %4  1.5
-%47 = OpConstantComposite  %10  %46 %46
-%48 = OpConstantComposite  %3  %46 %46 %46 %46
-%55 = OpConstantComposite  %3  %19 %19 %19 %19
-%58 = OpConstantNull  %6
+%31 = OpConstant  %6  0
+%32 = OpConstantComposite  %7  %31 %31
+%33 = OpConstant  %9  32
+%34 = OpConstant  %6  32
+%35 = OpConstantComposite  %8  %33 %33
+%36 = OpConstantComposite  %7  %34 %34
+%37 = OpConstant  %9  31
+%38 = OpConstantComposite  %8  %37 %37
+%39 = OpConstant  %6  2
+%40 = OpConstant  %4  2.0
+%41 = OpConstantComposite  %10  %19 %40
+%42 = OpConstant  %6  3
+%43 = OpConstant  %6  4
+%44 = OpConstantComposite  %7  %42 %43
+%45 = OpConstant  %4  1.5
+%46 = OpConstantComposite  %10  %45 %45
+%47 = OpConstantComposite  %3  %45 %45 %45 %45
+%54 = OpConstantComposite  %3  %19 %19 %19 %19
+%57 = OpConstantNull  %6
 %17 = OpFunction  %2  None %18
 %16 = OpLabel
-OpBranch %49
-%49 = OpLabel
-%50 = OpExtInst  %4  %1 Degrees %19
-%51 = OpExtInst  %4  %1 Radians %19
-%52 = OpExtInst  %3  %1 Degrees %21
-%53 = OpExtInst  %3  %1 Radians %21
-%54 = OpExtInst  %3  %1 FClamp %21 %21 %55
-%56 = OpExtInst  %3  %1 Refract %21 %21 %19
+OpBranch %48
+%48 = OpLabel
+%49 = OpExtInst  %4  %1 Degrees %19
+%50 = OpExtInst  %4  %1 Radians %19
+%51 = OpExtInst  %3  %1 Degrees %21
+%52 = OpExtInst  %3  %1 Radians %21
+%53 = OpExtInst  %3  %1 FClamp %21 %21 %54
+%55 = OpExtInst  %3  %1 Refract %21 %21 %19
+%58 = OpCompositeExtract  %6  %26 0
 %59 = OpCompositeExtract  %6  %26 0
-%60 = OpCompositeExtract  %6  %26 0
-%61 = OpIMul  %6  %59 %60
-%62 = OpIAdd  %6  %58 %61
+%60 = OpIMul  %6  %58 %59
+%61 = OpIAdd  %6  %57 %60
+%62 = OpCompositeExtract  %6  %26 1
 %63 = OpCompositeExtract  %6  %26 1
-%64 = OpCompositeExtract  %6  %26 1
-%65 = OpIMul  %6  %63 %64
-%57 = OpIAdd  %6  %62 %65
-%66 = OpExtInst  %9  %1 FindUMsb %27
-%67 = OpExtInst  %6  %1 FindSMsb %22
-%68 = OpExtInst  %7  %1 FindSMsb %28
-%69 = OpExtInst  %8  %1 FindUMsb %30
-%70 = OpExtInst  %6  %1 FindILsb %22
-%71 = OpExtInst  %9  %1 FindILsb %29
-%72 = OpExtInst  %7  %1 FindILsb %28
-%73 = OpExtInst  %8  %1 FindILsb %30
-%74 = OpExtInst  %4  %1 Ldexp %19 %40
-%75 = OpExtInst  %10  %1 Ldexp %42 %45
-%76 = OpExtInst  %11  %1 ModfStruct %46
-%77 = OpExtInst  %11  %1 ModfStruct %46
-%78 = OpCompositeExtract  %4  %77 0
-%79 = OpExtInst  %11  %1 ModfStruct %46
-%80 = OpCompositeExtract  %4  %79 1
-%81 = OpExtInst  %12  %1 ModfStruct %47
-%82 = OpExtInst  %13  %1 ModfStruct %48
-%83 = OpCompositeExtract  %3  %82 1
-%84 = OpCompositeExtract  %4  %83 0
-%85 = OpExtInst  %12  %1 ModfStruct %47
-%86 = OpCompositeExtract  %10  %85 0
-%87 = OpCompositeExtract  %4  %86 1
-%88 = OpExtInst  %14  %1 FrexpStruct %46
-%89 = OpExtInst  %14  %1 FrexpStruct %46
-%90 = OpCompositeExtract  %4  %89 0
-%91 = OpExtInst  %14  %1 FrexpStruct %46
-%92 = OpCompositeExtract  %6  %91 1
-%93 = OpExtInst  %15  %1 FrexpStruct %48
-%94 = OpCompositeExtract  %5  %93 1
-%95 = OpCompositeExtract  %6  %94 0
+%64 = OpIMul  %6  %62 %63
+%56 = OpIAdd  %6  %61 %64
+%65 = OpExtInst  %4  %1 Ldexp %19 %39
+%66 = OpExtInst  %10  %1 Ldexp %41 %44
+%67 = OpExtInst  %11  %1 ModfStruct %45
+%68 = OpExtInst  %11  %1 ModfStruct %45
+%69 = OpCompositeExtract  %4  %68 0
+%70 = OpExtInst  %11  %1 ModfStruct %45
+%71 = OpCompositeExtract  %4  %70 1
+%72 = OpExtInst  %12  %1 ModfStruct %46
+%73 = OpExtInst  %13  %1 ModfStruct %47
+%74 = OpCompositeExtract  %3  %73 1
+%75 = OpCompositeExtract  %4  %74 0
+%76 = OpExtInst  %12  %1 ModfStruct %46
+%77 = OpCompositeExtract  %10  %76 0
+%78 = OpCompositeExtract  %4  %77 1
+%79 = OpExtInst  %14  %1 FrexpStruct %45
+%80 = OpExtInst  %14  %1 FrexpStruct %45
+%81 = OpCompositeExtract  %4  %80 0
+%82 = OpExtInst  %14  %1 FrexpStruct %45
+%83 = OpCompositeExtract  %6  %82 1
+%84 = OpExtInst  %15  %1 FrexpStruct %47
+%85 = OpCompositeExtract  %5  %84 1
+%86 = OpCompositeExtract  %6  %85 0
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/ray-query.spvasm b/naga/tests/out/spv/ray-query.spvasm
index 23d5dd1baa..328c820fea 100644
--- a/naga/tests/out/spv/ray-query.spvasm
+++ b/naga/tests/out/spv/ray-query.spvasm
@@ -1,37 +1,37 @@
 ; SPIR-V
 ; Version: 1.4
 ; Generator: rspirv
-; Bound: 95
+; Bound: 104
 OpCapability Shader
 OpCapability RayQueryKHR
 OpExtension "SPV_KHR_ray_query"
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
-OpEntryPoint GLCompute %41 "main" %15 %17
-OpExecutionMode %41 LocalSize 1 1 1
-OpMemberDecorate %7 0 Offset 0
-OpMemberDecorate %7 1 Offset 16
-OpMemberDecorate %11 0 Offset 0
-OpMemberDecorate %11 1 Offset 4
-OpMemberDecorate %11 2 Offset 8
-OpMemberDecorate %11 3 Offset 12
-OpMemberDecorate %11 4 Offset 16
-OpMemberDecorate %11 5 Offset 20
-OpMemberDecorate %11 6 Offset 24
-OpMemberDecorate %11 7 Offset 28
-OpMemberDecorate %11 8 Offset 36
-OpMemberDecorate %11 9 Offset 48
-OpMemberDecorate %11 9 ColMajor
-OpMemberDecorate %11 9 MatrixStride 16
-OpMemberDecorate %11 10 Offset 112
-OpMemberDecorate %11 10 ColMajor
-OpMemberDecorate %11 10 MatrixStride 16
-OpMemberDecorate %14 0 Offset 0
-OpMemberDecorate %14 1 Offset 4
-OpMemberDecorate %14 2 Offset 8
-OpMemberDecorate %14 3 Offset 12
-OpMemberDecorate %14 4 Offset 16
-OpMemberDecorate %14 5 Offset 32
+OpEntryPoint GLCompute %84 "main" %15 %17
+OpExecutionMode %84 LocalSize 1 1 1
+OpMemberDecorate %10 0 Offset 0
+OpMemberDecorate %10 1 Offset 4
+OpMemberDecorate %10 2 Offset 8
+OpMemberDecorate %10 3 Offset 12
+OpMemberDecorate %10 4 Offset 16
+OpMemberDecorate %10 5 Offset 20
+OpMemberDecorate %10 6 Offset 24
+OpMemberDecorate %10 7 Offset 28
+OpMemberDecorate %10 8 Offset 36
+OpMemberDecorate %10 9 Offset 48
+OpMemberDecorate %10 9 ColMajor
+OpMemberDecorate %10 9 MatrixStride 16
+OpMemberDecorate %10 10 Offset 112
+OpMemberDecorate %10 10 ColMajor
+OpMemberDecorate %10 10 MatrixStride 16
+OpMemberDecorate %12 0 Offset 0
+OpMemberDecorate %12 1 Offset 4
+OpMemberDecorate %12 2 Offset 8
+OpMemberDecorate %12 3 Offset 12
+OpMemberDecorate %12 4 Offset 16
+OpMemberDecorate %12 5 Offset 32
+OpMemberDecorate %13 0 Offset 0
+OpMemberDecorate %13 1 Offset 16
 OpDecorate %15 DescriptorSet 0
 OpDecorate %15 Binding 0
 OpDecorate %17 DescriptorSet 0
@@ -39,114 +39,126 @@ OpDecorate %17 Binding 1
 OpDecorate %18 Block
 OpMemberDecorate %18 0 Offset 0
 %2 = OpTypeVoid
-%3 = OpTypeAccelerationStructureNV
-%4 = OpTypeInt 32 0
-%6 = OpTypeFloat 32
-%5 = OpTypeVector %6 3
-%7 = OpTypeStruct %4 %5
-%8 = OpTypeVector %6 2
-%9 = OpTypeBool
-%10 = OpTypeMatrix %5 4
-%11 = OpTypeStruct %4 %6 %4 %4 %4 %4 %4 %8 %9 %10 %10
-%12 = OpTypeVector %6 4
-%13 = OpTypeRayQueryKHR
-%14 = OpTypeStruct %4 %4 %6 %6 %5 %5
-%16 = OpTypePointer UniformConstant %3
+%4 = OpTypeFloat 32
+%3 = OpTypeVector %4 3
+%5 = OpTypeAccelerationStructureNV
+%6 = OpTypeInt 32 0
+%7 = OpTypeVector %4 2
+%8 = OpTypeBool
+%9 = OpTypeMatrix %3 4
+%10 = OpTypeStruct %6 %4 %6 %6 %6 %6 %6 %7 %8 %9 %9
+%11 = OpTypeRayQueryKHR
+%12 = OpTypeStruct %6 %6 %4 %4 %3 %3
+%13 = OpTypeStruct %6 %3
+%14 = OpTypeVector %4 4
+%16 = OpTypePointer UniformConstant %5
 %15 = OpVariable  %16  UniformConstant
-%18 = OpTypeStruct %7
+%18 = OpTypeStruct %13
 %19 = OpTypePointer StorageBuffer %18
 %17 = OpVariable  %19  StorageBuffer
-%24 = OpTypeFunction %5 %5 %11
-%25 = OpConstant  %6  1.0
-%26 = OpConstant  %6  2.4
-%27 = OpConstant  %6  0.0
-%42 = OpTypeFunction %2
-%44 = OpTypePointer StorageBuffer %7
-%45 = OpConstant  %4  0
-%47 = OpConstantComposite  %5  %27 %25 %27
-%48 = OpConstant  %4  4
-%49 = OpConstant  %4  255
-%50 = OpConstantComposite  %5  %27 %27 %27
-%51 = OpConstant  %6  0.1
-%52 = OpConstant  %6  100.0
-%53 = OpConstantComposite  %14  %48 %49 %51 %52 %50 %47
-%55 = OpTypePointer Function %13
-%72 = OpConstant  %4  1
-%85 = OpTypePointer StorageBuffer %4
-%90 = OpTypePointer StorageBuffer %5
-%23 = OpFunction  %5  None %24
-%21 = OpFunctionParameter  %5
-%22 = OpFunctionParameter  %11
+%26 = OpTypeFunction %10 %3 %3 %16
+%27 = OpConstant  %6  4
+%28 = OpConstant  %6  255
+%29 = OpConstant  %4  0.1
+%30 = OpConstant  %4  100.0
+%32 = OpTypePointer Function %11
+%50 = OpConstant  %6  1
+%67 = OpTypeFunction %3 %3 %10
+%68 = OpConstant  %4  1.0
+%69 = OpConstant  %4  2.4
+%70 = OpConstant  %4  0.0
+%85 = OpTypeFunction %2
+%87 = OpTypePointer StorageBuffer %13
+%88 = OpConstant  %6  0
+%90 = OpConstantComposite  %3  %70 %70 %70
+%91 = OpConstantComposite  %3  %70 %68 %70
+%94 = OpTypePointer StorageBuffer %6
+%99 = OpTypePointer StorageBuffer %3
+%25 = OpFunction  %10  None %26
+%21 = OpFunctionParameter  %3
+%22 = OpFunctionParameter  %3
+%23 = OpFunctionParameter  %16
 %20 = OpLabel
-OpBranch %28
-%28 = OpLabel
-%29 = OpCompositeExtract  %10  %22 10
-%30 = OpCompositeConstruct  %12  %21 %25
-%31 = OpMatrixTimesVector  %5  %29 %30
-%32 = OpVectorShuffle  %8  %31 %31 0 1
-%33 = OpExtInst  %8  %1 Normalize %32
-%34 = OpVectorTimesScalar  %8  %33 %26
-%35 = OpCompositeExtract  %10  %22 9
-%36 = OpCompositeConstruct  %12  %34 %27 %25
-%37 = OpMatrixTimesVector  %5  %35 %36
-%38 = OpFSub  %5  %21 %37
-%39 = OpExtInst  %5  %1 Normalize %38
-OpReturnValue %39
+%31 = OpVariable  %32  Function
+%24 = OpLoad  %5  %23
+OpBranch %33
+%33 = OpLabel
+%34 = OpCompositeConstruct  %12  %27 %28 %29 %30 %21 %22
+%35 = OpCompositeExtract  %6  %34 0
+%36 = OpCompositeExtract  %6  %34 1
+%37 = OpCompositeExtract  %4  %34 2
+%38 = OpCompositeExtract  %4  %34 3
+%39 = OpCompositeExtract  %3  %34 4
+%40 = OpCompositeExtract  %3  %34 5
+OpRayQueryInitializeKHR %31 %24 %35 %36 %39 %37 %40 %38
+OpBranch %41
+%41 = OpLabel
+OpLoopMerge %42 %44 None
+OpBranch %43
+%43 = OpLabel
+%45 = OpRayQueryProceedKHR  %8  %31
+OpSelectionMerge %46 None
+OpBranchConditional %45 %46 %47
+%47 = OpLabel
+OpBranch %42
+%46 = OpLabel
+OpBranch %48
+%48 = OpLabel
+OpBranch %49
+%49 = OpLabel
+OpBranch %44
+%44 = OpLabel
+OpBranch %41
+%42 = OpLabel
+%51 = OpRayQueryGetIntersectionTypeKHR  %6  %31 %50
+%52 = OpRayQueryGetIntersectionInstanceCustomIndexKHR  %6  %31 %50
+%53 = OpRayQueryGetIntersectionInstanceIdKHR  %6  %31 %50
+%54 = OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR  %6  %31 %50
+%55 = OpRayQueryGetIntersectionGeometryIndexKHR  %6  %31 %50
+%56 = OpRayQueryGetIntersectionPrimitiveIndexKHR  %6  %31 %50
+%57 = OpRayQueryGetIntersectionTKHR  %4  %31 %50
+%58 = OpRayQueryGetIntersectionBarycentricsKHR  %7  %31 %50
+%59 = OpRayQueryGetIntersectionFrontFaceKHR  %8  %31 %50
+%60 = OpRayQueryGetIntersectionObjectToWorldKHR  %9  %31 %50
+%61 = OpRayQueryGetIntersectionWorldToObjectKHR  %9  %31 %50
+%62 = OpCompositeConstruct  %10  %51 %57 %52 %53 %54 %55 %56 %58 %59 %60 %61
+OpReturnValue %62
 OpFunctionEnd
-%41 = OpFunction  %2  None %42
-%40 = OpLabel
-%54 = OpVariable  %55  Function
-%43 = OpLoad  %3  %15
-%46 = OpAccessChain  %44  %17 %45
-OpBranch %56
-%56 = OpLabel
-%57 = OpCompositeExtract  %4  %53 0
-%58 = OpCompositeExtract  %4  %53 1
-%59 = OpCompositeExtract  %6  %53 2
-%60 = OpCompositeExtract  %6  %53 3
-%61 = OpCompositeExtract  %5  %53 4
-%62 = OpCompositeExtract  %5  %53 5
-OpRayQueryInitializeKHR %54 %43 %57 %58 %61 %59 %62 %60
-OpBranch %63
+%66 = OpFunction  %3  None %67
+%64 = OpFunctionParameter  %3
+%65 = OpFunctionParameter  %10
 %63 = OpLabel
-OpLoopMerge %64 %66 None
-OpBranch %65
-%65 = OpLabel
-%67 = OpRayQueryProceedKHR  %9  %54
-OpSelectionMerge %68 None
-OpBranchConditional %67 %68 %69
-%69 = OpLabel
-OpBranch %64
-%68 = OpLabel
-OpBranch %70
-%70 = OpLabel
 OpBranch %71
 %71 = OpLabel
-OpBranch %66
-%66 = OpLabel
-OpBranch %63
-%64 = OpLabel
-%73 = OpRayQueryGetIntersectionTypeKHR  %4  %54 %72
-%74 = OpRayQueryGetIntersectionInstanceCustomIndexKHR  %4  %54 %72
-%75 = OpRayQueryGetIntersectionInstanceIdKHR  %4  %54 %72
-%76 = OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR  %4  %54 %72
-%77 = OpRayQueryGetIntersectionGeometryIndexKHR  %4  %54 %72
-%78 = OpRayQueryGetIntersectionPrimitiveIndexKHR  %4  %54 %72
-%79 = OpRayQueryGetIntersectionTKHR  %6  %54 %72
-%80 = OpRayQueryGetIntersectionBarycentricsKHR  %8  %54 %72
-%81 = OpRayQueryGetIntersectionFrontFaceKHR  %9  %54 %72
-%82 = OpRayQueryGetIntersectionObjectToWorldKHR  %10  %54 %72
-%83 = OpRayQueryGetIntersectionWorldToObjectKHR  %10  %54 %72
-%84 = OpCompositeConstruct  %11  %73 %79 %74 %75 %76 %77 %78 %80 %81 %82 %83
-%86 = OpCompositeExtract  %4  %84 0
-%87 = OpIEqual  %9  %86 %45
-%88 = OpSelect  %4  %87 %72 %45
-%89 = OpAccessChain  %85  %46 %45
-OpStore %89 %88
-%91 = OpCompositeExtract  %6  %84 1
-%92 = OpVectorTimesScalar  %5  %47 %91
-%93 = OpFunctionCall  %5  %23 %92 %84
-%94 = OpAccessChain  %90  %46 %72
-OpStore %94 %93
+%72 = OpCompositeExtract  %9  %65 10
+%73 = OpCompositeConstruct  %14  %64 %68
+%74 = OpMatrixTimesVector  %3  %72 %73
+%75 = OpVectorShuffle  %7  %74 %74 0 1
+%76 = OpExtInst  %7  %1 Normalize %75
+%77 = OpVectorTimesScalar  %7  %76 %69
+%78 = OpCompositeExtract  %9  %65 9
+%79 = OpCompositeConstruct  %14  %77 %70 %68
+%80 = OpMatrixTimesVector  %3  %78 %79
+%81 = OpFSub  %3  %64 %80
+%82 = OpExtInst  %3  %1 Normalize %81
+OpReturnValue %82
+OpFunctionEnd
+%84 = OpFunction  %2  None %85
+%83 = OpLabel
+%86 = OpLoad  %5  %15
+%89 = OpAccessChain  %87  %17 %88
+OpBranch %92
+%92 = OpLabel
+%93 = OpFunctionCall  %10  %25 %90 %91 %15
+%95 = OpCompositeExtract  %6  %93 0
+%96 = OpIEqual  %8  %95 %88
+%97 = OpSelect  %6  %96 %50 %88
+%98 = OpAccessChain  %94  %89 %88
+OpStore %98 %97
+%100 = OpCompositeExtract  %4  %93 1
+%101 = OpVectorTimesScalar  %3  %91 %100
+%102 = OpFunctionCall  %3  %66 %101 %93
+%103 = OpAccessChain  %99  %89 %50
+OpStore %103 %102
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/wgsl/atomicOps-int64-min-max.wgsl b/naga/tests/out/wgsl/atomicOps-int64-min-max.wgsl
index 37bbb680f5..126758b0b5 100644
--- a/naga/tests/out/wgsl/atomicOps-int64-min-max.wgsl
+++ b/naga/tests/out/wgsl/atomicOps-int64-min-max.wgsl
@@ -9,17 +9,23 @@ var<storage, read_write> storage_atomic_scalar: atomic<u64>;
 var<storage, read_write> storage_atomic_arr: array<atomic<u64>, 2>;
 @group(0) @binding(2) 
 var<storage, read_write> storage_struct: Struct;
+@group(0) @binding(3) 
+var<uniform> input: u64;
 
 @compute @workgroup_size(2, 1, 1) 
 fn cs_main(@builtin(local_invocation_id) id: vec3<u32>) {
-    atomicMax((&storage_atomic_scalar), 1lu);
-    atomicMax((&storage_atomic_arr[1]), 1lu);
+    let _e3 = input;
+    atomicMax((&storage_atomic_scalar), _e3);
+    let _e7 = input;
+    atomicMax((&storage_atomic_arr[1]), (1lu + _e7));
     atomicMax((&storage_struct.atomic_scalar), 1lu);
-    atomicMax((&storage_struct.atomic_arr[1]), 1lu);
+    atomicMax((&storage_struct.atomic_arr[1]), u64(id.x));
     workgroupBarrier();
-    atomicMin((&storage_atomic_scalar), 1lu);
-    atomicMin((&storage_atomic_arr[1]), 1lu);
+    let _e20 = input;
+    atomicMin((&storage_atomic_scalar), _e20);
+    let _e24 = input;
+    atomicMin((&storage_atomic_arr[1]), (1lu + _e24));
     atomicMin((&storage_struct.atomic_scalar), 1lu);
-    atomicMin((&storage_struct.atomic_arr[1]), 1lu);
+    atomicMin((&storage_struct.atomic_arr[1]), u64(id.x));
     return;
 }
diff --git a/naga/tests/out/wgsl/control-flow.wgsl b/naga/tests/out/wgsl/control-flow.wgsl
index dcc3f90365..ad071af58a 100644
--- a/naga/tests/out/wgsl/control-flow.wgsl
+++ b/naga/tests/out/wgsl/control-flow.wgsl
@@ -30,6 +30,92 @@ fn loop_switch_continue(x: i32) {
     return;
 }
 
+fn loop_switch_continue_nesting(x_1: i32, y: i32, z: i32) {
+    loop {
+        switch x_1 {
+            case 1: {
+                continue;
+            }
+            case 2: {
+                switch y {
+                    case 1: {
+                        continue;
+                    }
+                    default: {
+                        loop {
+                            switch z {
+                                case 1: {
+                                    continue;
+                                }
+                                default: {
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            default: {
+            }
+        }
+        switch y {
+            default: {
+                continue;
+            }
+        }
+    }
+    loop {
+        switch y {
+            case 1, default: {
+                switch z {
+                    default: {
+                        continue;
+                    }
+                }
+            }
+        }
+    }
+    return;
+}
+
+fn loop_switch_omit_continue_variable_checks(x_2: i32, y_1: i32, z_1: i32, w: i32) {
+    var pos_1: i32 = 0i;
+
+    loop {
+        switch x_2 {
+            case 1: {
+                pos_1 = 1i;
+            }
+            default: {
+            }
+        }
+    }
+    loop {
+        switch x_2 {
+            case 1: {
+            }
+            case 2: {
+                switch y_1 {
+                    case 1: {
+                        continue;
+                    }
+                    default: {
+                        switch z_1 {
+                            case 1: {
+                                pos_1 = 2i;
+                            }
+                            default: {
+                            }
+                        }
+                    }
+                }
+            }
+            default: {
+            }
+        }
+    }
+    return;
+}
+
 @compute @workgroup_size(1, 1, 1) 
 fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
     var pos: i32;
diff --git a/naga/tests/out/wgsl/math-functions.wgsl b/naga/tests/out/wgsl/math-functions.wgsl
index 228248b3ce..2271bb9cb0 100644
--- a/naga/tests/out/wgsl/math-functions.wgsl
+++ b/naga/tests/out/wgsl/math-functions.wgsl
@@ -10,14 +10,10 @@ fn main() {
     let sign_b = vec4<i32>(-1i, -1i, -1i, -1i);
     let sign_d = vec4<f32>(-1f, -1f, -1f, -1f);
     let const_dot = dot(vec2<i32>(), vec2<i32>());
-    let first_leading_bit_abs = firstLeadingBit(0u);
-    let flb_a = firstLeadingBit(-1i);
-    let flb_b = firstLeadingBit(vec2(-1i));
-    let flb_c = firstLeadingBit(vec2(1u));
-    let ftb_a = firstTrailingBit(-1i);
-    let ftb_b = firstTrailingBit(1u);
-    let ftb_c = firstTrailingBit(vec2(-1i));
-    let ftb_d = firstTrailingBit(vec2(1u));
+    let flb_b = vec2<i32>(-1i, -1i);
+    let flb_c = vec2<u32>(0u, 0u);
+    let ftb_c = vec2<i32>(0i, 0i);
+    let ftb_d = vec2<u32>(0u, 0u);
     let ctz_e = vec2<u32>(32u, 32u);
     let ctz_f = vec2<i32>(32i, 32i);
     let ctz_g = vec2<u32>(0u, 0u);
diff --git a/naga/xtask/src/validate.rs b/naga/xtask/src/validate.rs
index d90ee8d84a..fa330f0a96 100644
--- a/naga/xtask/src/validate.rs
+++ b/naga/xtask/src/validate.rs
@@ -208,7 +208,10 @@ fn validate_spirv(path: &Path, spirv_as: &str, spirv_val: &str) -> anyhow::Resul
         buf
     };
     let expected_header_prefix = "; Version: ";
-    let Some(version) = second_line.strip_prefix(expected_header_prefix) else {
+    let Some(version) = second_line
+        .strip_prefix(expected_header_prefix)
+        .map(str::trim)
+    else {
         bail!("no {expected_header_prefix:?} header found in {path:?}");
     };
     let file = open_file(path)?;
@@ -222,7 +225,18 @@ fn validate_spirv(path: &Path, spirv_as: &str, spirv_val: &str) -> anyhow::Resul
     let child = spirv_as_cmd
         .spawn()
         .with_context(|| format!("failed to spawn {spirv_as_cmd:?}"))?;
-    EasyCommand::new(spirv_val, |cmd| cmd.stdin(child.stdout.unwrap())).success()
+    let error_message = || {
+        format!(
+            "Failed to validate {path:?}.
+Note: Labels and line numbers will not match the input file.
+      Use this command to view the corresponding spvasm:
+      '{spirv_as} --target-env spv{version} {} -o - | spirv-dis'\n",
+            path.display(),
+        )
+    };
+    EasyCommand::new(spirv_val, |cmd| cmd.stdin(child.stdout.unwrap()))
+        .success()
+        .with_context(error_message)
 }
 
 fn validate_metal(path: &Path, xcrun: &str) -> anyhow::Result<()> {
diff --git a/player/src/bin/play.rs b/player/src/bin/play.rs
index 5c438dd20d..4726fe63a7 100644
--- a/player/src/bin/play.rs
+++ b/player/src/bin/play.rs
@@ -1,10 +1,9 @@
-/*! This is a player for WebGPU traces.
-!*/
+//! This is a player for WebGPU traces.
 
 #[cfg(not(target_arch = "wasm32"))]
 fn main() {
     use player::GlobalPlay as _;
-    use wgc::{device::trace, gfx_select};
+    use wgc::device::trace;
 
     use std::{
         fs,
@@ -62,7 +61,7 @@ fn main() {
     }
     .unwrap();
 
-    let device = match actions.pop() {
+    let (device, queue) = match actions.pop() {
         Some(trace::Action::Init { desc, backend }) => {
             log::info!("Initializing the device for backend: {:?}", backend);
             let adapter = global
@@ -79,20 +78,21 @@ fn main() {
                 )
                 .expect("Unable to find an adapter for selected backend");
 
-            let info = gfx_select!(adapter => global.adapter_get_info(adapter)).unwrap();
+            let info = global.adapter_get_info(adapter).unwrap();
             log::info!("Picked '{}'", info.name);
-            let id = wgc::id::Id::zip(1, 0, backend);
-            let (_, _, error) = gfx_select!(adapter => global.adapter_request_device(
+            let device_id = wgc::id::Id::zip(1, 0, backend);
+            let queue_id = wgc::id::Id::zip(1, 0, backend);
+            let (_, _, error) = global.adapter_request_device(
                 adapter,
                 &desc,
                 None,
-                Some(id),
-                Some(id.into_queue_id())
-            ));
+                Some(device_id),
+                Some(queue_id),
+            );
             if let Some(e) = error {
                 panic!("{:?}", e);
             }
-            id
+            (device_id, queue_id)
         }
         _ => panic!("Expected Action::Init"),
     };
@@ -100,14 +100,14 @@ fn main() {
     log::info!("Executing actions");
     #[cfg(not(feature = "winit"))]
     {
-        gfx_select!(device => global.device_start_capture(device));
+        global.device_start_capture(device);
 
         while let Some(action) = actions.pop() {
-            gfx_select!(device => global.process(device, action, &dir, &mut command_buffer_id_manager));
+            global.process(device, queue, action, &dir, &mut command_buffer_id_manager);
         }
 
-        gfx_select!(device => global.device_stop_capture(device));
-        gfx_select!(device => global.device_poll(device, wgt::Maintain::wait())).unwrap();
+        global.device_stop_capture(device);
+        global.device_poll(device, wgt::Maintain::wait()).unwrap();
     }
     #[cfg(feature = "winit")]
     {
@@ -119,81 +119,92 @@ fn main() {
         let mut resize_config = None;
         let mut frame_count = 0;
         let mut done = false;
-        event_loop.run(move |event, target| {
-            target.set_control_flow(ControlFlow::Poll);
-
-            match event {
-                Event::WindowEvent { event, .. } => match event {
-                    WindowEvent::RedrawRequested if resize_config.is_none() => {
-
-                    match actions.pop() {
-                        Some(trace::Action::ConfigureSurface(_device_id, config)) => {
-                            log::info!("Configuring the surface");
-                            let current_size: (u32, u32) = window.inner_size().into();
-                            let size = (config.width, config.height);
-                            if current_size != size {
-                                let _ = window.request_inner_size(winit::dpi::PhysicalSize::new(
-                                    config.width,
-                                    config.height,
-                                ));
-                                resize_config = Some(config);
-                                target.exit();
-                            } else {
-                                let error = gfx_select!(device => global.surface_configure(surface, device, &config));
+        event_loop
+            .run(move |event, target| {
+                target.set_control_flow(ControlFlow::Poll);
+
+                match event {
+                    Event::WindowEvent { event, .. } => match event {
+                        WindowEvent::RedrawRequested if resize_config.is_none() => {
+                            match actions.pop() {
+                                Some(trace::Action::ConfigureSurface(_device_id, config)) => {
+                                    log::info!("Configuring the surface");
+                                    let current_size: (u32, u32) = window.inner_size().into();
+                                    let size = (config.width, config.height);
+                                    if current_size != size {
+                                        let _ = window.request_inner_size(
+                                            winit::dpi::PhysicalSize::new(
+                                                config.width,
+                                                config.height,
+                                            ),
+                                        );
+                                        resize_config = Some(config);
+                                        target.exit();
+                                    } else {
+                                        let error =
+                                            global.surface_configure(surface, device, &config);
+                                        if let Some(e) = error {
+                                            panic!("{:?}", e);
+                                        }
+                                    }
+                                }
+                                Some(trace::Action::Present(id)) => {
+                                    frame_count += 1;
+                                    log::debug!("Presenting frame {}", frame_count);
+                                    global.surface_present(id).unwrap();
+                                    target.exit();
+                                }
+                                Some(trace::Action::DiscardSurfaceTexture(id)) => {
+                                    log::debug!("Discarding frame {}", frame_count);
+                                    global.surface_texture_discard(id).unwrap();
+                                    target.exit();
+                                }
+                                Some(action) => {
+                                    global.process(
+                                        device,
+                                        queue,
+                                        action,
+                                        &dir,
+                                        &mut command_buffer_id_manager,
+                                    );
+                                }
+                                None => {
+                                    if !done {
+                                        println!("Finished the end at frame {}", frame_count);
+                                        done = true;
+                                    }
+                                    target.exit();
+                                }
+                            }
+                        }
+                        WindowEvent::Resized(_) => {
+                            if let Some(config) = resize_config.take() {
+                                let error = global.surface_configure(surface, device, &config);
                                 if let Some(e) = error {
                                     panic!("{:?}", e);
                                 }
                             }
                         }
-                        Some(trace::Action::Present(id)) => {
-                            frame_count += 1;
-                            log::debug!("Presenting frame {}", frame_count);
-                            gfx_select!(device => global.surface_present(id)).unwrap();
-                                target.exit();
-                        }
-                        Some(trace::Action::DiscardSurfaceTexture(id)) => {
-                            log::debug!("Discarding frame {}", frame_count);
-                            gfx_select!(device => global.surface_texture_discard(id)).unwrap();
-                                target.exit();
-                        }
-                        Some(action) => {
-                            gfx_select!(device => global.process(device, action, &dir, &mut command_buffer_id_manager));
-                        }
-                        None => {
-                            if !done {
-                                println!("Finished the end at frame {}", frame_count);
-                                done = true;
-                            }
-                                target.exit();
+                        WindowEvent::KeyboardInput {
+                            event:
+                                KeyEvent {
+                                    logical_key: Key::Named(NamedKey::Escape),
+                                    state: ElementState::Pressed,
+                                    ..
+                                },
+                            ..
                         }
-                    }
+                        | WindowEvent::CloseRequested => target.exit(),
+                        _ => {}
                     },
-                    WindowEvent::Resized(_) => {
-                        if let Some(config) = resize_config.take() {
-                            let error = gfx_select!(device => global.surface_configure(surface, device, &config));
-                            if let Some(e) = error {
-                                panic!("{:?}", e);
-                            }
-                        }
-                    }
-                    WindowEvent::KeyboardInput {
-                        event: KeyEvent {
-                            logical_key: Key::Named(NamedKey::Escape),
-                            state: ElementState::Pressed,
-                            ..
-                        },
-                        ..
+                    Event::LoopExiting => {
+                        log::info!("Closing");
+                        global.device_poll(device, wgt::Maintain::wait()).unwrap();
                     }
-                    | WindowEvent::CloseRequested => target.exit(),
                     _ => {}
-                },
-                Event::LoopExiting => {
-                    log::info!("Closing");
-                    gfx_select!(device => global.device_poll(device, wgt::Maintain::wait())).unwrap();
                 }
-                _ => {}
-            }
-        }).unwrap();
+            })
+            .unwrap();
     }
 }
 
diff --git a/player/src/lib.rs b/player/src/lib.rs
index d2520e00f5..c9ee55aa6d 100644
--- a/player/src/lib.rs
+++ b/player/src/lib.rs
@@ -1,6 +1,5 @@
-/*! This is a player library for WebGPU traces.
- *
-!*/
+//! This is a player library for WebGPU traces.
+
 #![cfg(not(target_arch = "wasm32"))]
 #![warn(unsafe_op_in_unsafe_fn)]
 
@@ -9,14 +8,15 @@ use wgc::device::trace;
 use std::{borrow::Cow, fs, path::Path};
 
 pub trait GlobalPlay {
-    fn encode_commands<A: wgc::hal_api::HalApi>(
+    fn encode_commands(
         &self,
         encoder: wgc::id::CommandEncoderId,
         commands: Vec<trace::Command>,
     ) -> wgc::id::CommandBufferId;
-    fn process<A: wgc::hal_api::HalApi>(
+    fn process(
         &self,
         device: wgc::id::DeviceId,
+        queue: wgc::id::QueueId,
         action: trace::Action,
         dir: &Path,
         comb_manager: &mut wgc::identity::IdentityManager<wgc::id::markers::CommandBuffer>,
@@ -24,7 +24,7 @@ pub trait GlobalPlay {
 }
 
 impl GlobalPlay for wgc::global::Global {
-    fn encode_commands<A: wgc::hal_api::HalApi>(
+    fn encode_commands(
         &self,
         encoder: wgc::id::CommandEncoderId,
         commands: Vec<trace::Command>,
@@ -38,33 +38,33 @@ impl GlobalPlay for wgc::global::Global {
                     dst_offset,
                     size,
                 } => self
-                    .command_encoder_copy_buffer_to_buffer::<A>(
+                    .command_encoder_copy_buffer_to_buffer(
                         encoder, src, src_offset, dst, dst_offset, size,
                     )
                     .unwrap(),
                 trace::Command::CopyBufferToTexture { src, dst, size } => self
-                    .command_encoder_copy_buffer_to_texture::<A>(encoder, &src, &dst, &size)
+                    .command_encoder_copy_buffer_to_texture(encoder, &src, &dst, &size)
                     .unwrap(),
                 trace::Command::CopyTextureToBuffer { src, dst, size } => self
-                    .command_encoder_copy_texture_to_buffer::<A>(encoder, &src, &dst, &size)
+                    .command_encoder_copy_texture_to_buffer(encoder, &src, &dst, &size)
                     .unwrap(),
                 trace::Command::CopyTextureToTexture { src, dst, size } => self
-                    .command_encoder_copy_texture_to_texture::<A>(encoder, &src, &dst, &size)
+                    .command_encoder_copy_texture_to_texture(encoder, &src, &dst, &size)
                     .unwrap(),
                 trace::Command::ClearBuffer { dst, offset, size } => self
-                    .command_encoder_clear_buffer::<A>(encoder, dst, offset, size)
+                    .command_encoder_clear_buffer(encoder, dst, offset, size)
                     .unwrap(),
                 trace::Command::ClearTexture {
                     dst,
                     subresource_range,
                 } => self
-                    .command_encoder_clear_texture::<A>(encoder, dst, &subresource_range)
+                    .command_encoder_clear_texture(encoder, dst, &subresource_range)
                     .unwrap(),
                 trace::Command::WriteTimestamp {
                     query_set_id,
                     query_index,
                 } => self
-                    .command_encoder_write_timestamp::<A>(encoder, query_set_id, query_index)
+                    .command_encoder_write_timestamp(encoder, query_set_id, query_index)
                     .unwrap(),
                 trace::Command::ResolveQuerySet {
                     query_set_id,
@@ -73,7 +73,7 @@ impl GlobalPlay for wgc::global::Global {
                     destination,
                     destination_offset,
                 } => self
-                    .command_encoder_resolve_query_set::<A>(
+                    .command_encoder_resolve_query_set(
                         encoder,
                         query_set_id,
                         start_query,
@@ -83,19 +83,19 @@ impl GlobalPlay for wgc::global::Global {
                     )
                     .unwrap(),
                 trace::Command::PushDebugGroup(marker) => self
-                    .command_encoder_push_debug_group::<A>(encoder, &marker)
+                    .command_encoder_push_debug_group(encoder, &marker)
                     .unwrap(),
                 trace::Command::PopDebugGroup => {
-                    self.command_encoder_pop_debug_group::<A>(encoder).unwrap()
+                    self.command_encoder_pop_debug_group(encoder).unwrap()
                 }
                 trace::Command::InsertDebugMarker(marker) => self
-                    .command_encoder_insert_debug_marker::<A>(encoder, &marker)
+                    .command_encoder_insert_debug_marker(encoder, &marker)
                     .unwrap(),
                 trace::Command::RunComputePass {
                     base,
                     timestamp_writes,
                 } => {
-                    self.compute_pass_end_with_unresolved_commands::<A>(
+                    self.compute_pass_end_with_unresolved_commands(
                         encoder,
                         base,
                         timestamp_writes.as_ref(),
@@ -109,7 +109,7 @@ impl GlobalPlay for wgc::global::Global {
                     timestamp_writes,
                     occlusion_query_set_id,
                 } => {
-                    self.render_pass_end_with_unresolved_commands::<A>(
+                    self.render_pass_end_with_unresolved_commands(
                         encoder,
                         base,
                         &target_colors,
@@ -150,7 +150,7 @@ impl GlobalPlay for wgc::global::Global {
                         log::error!("a trace of command_encoder_build_acceleration_structures_unsafe_tlas containing a tlas build is not replayable! skipping tlas build");
                     }
 
-                    self.command_encoder_build_acceleration_structures_unsafe_tlas::<A>(
+                    self.command_encoder_build_acceleration_structures_unsafe_tlas(
                         encoder,
                         blas_iter,
                         std::iter::empty(),
@@ -202,24 +202,25 @@ impl GlobalPlay for wgc::global::Global {
                         }
                     });
 
-                    self.command_encoder_build_acceleration_structures::<A>(
+                    self.command_encoder_build_acceleration_structures(
                         encoder, blas_iter, tlas_iter,
                     )
                     .unwrap();
                 }
             }
         }
-        let (cmd_buf, error) = self
-            .command_encoder_finish::<A>(encoder, &wgt::CommandBufferDescriptor { label: None });
+        let (cmd_buf, error) =
+            self.command_encoder_finish(encoder, &wgt::CommandBufferDescriptor { label: None });
         if let Some(e) = error {
             panic!("{e}");
         }
         cmd_buf
     }
 
-    fn process<A: wgc::hal_api::HalApi>(
+    fn process(
         &self,
         device: wgc::id::DeviceId,
+        queue: wgc::id::QueueId,
         action: trace::Action,
         dir: &Path,
         comb_manager: &mut wgc::identity::IdentityManager<wgc::id::markers::CommandBuffer>,
@@ -237,83 +238,83 @@ impl GlobalPlay for wgc::global::Global {
                 panic!("Unexpected Surface action: winit feature is not enabled")
             }
             Action::CreateBuffer(id, desc) => {
-                let (_, error) = self.device_create_buffer::<A>(device, &desc, Some(id));
+                let (_, error) = self.device_create_buffer(device, &desc, Some(id));
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::FreeBuffer(id) => {
-                self.buffer_destroy::<A>(id).unwrap();
+                self.buffer_destroy(id).unwrap();
             }
             Action::DestroyBuffer(id) => {
-                self.buffer_drop::<A>(id, true);
+                self.buffer_drop(id);
             }
             Action::CreateTexture(id, desc) => {
-                let (_, error) = self.device_create_texture::<A>(device, &desc, Some(id));
+                let (_, error) = self.device_create_texture(device, &desc, Some(id));
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::FreeTexture(id) => {
-                self.texture_destroy::<A>(id).unwrap();
+                self.texture_destroy(id).unwrap();
             }
             Action::DestroyTexture(id) => {
-                self.texture_drop::<A>(id, true);
+                self.texture_drop(id);
             }
             Action::CreateTextureView {
                 id,
                 parent_id,
                 desc,
             } => {
-                let (_, error) = self.texture_create_view::<A>(parent_id, &desc, Some(id));
+                let (_, error) = self.texture_create_view(parent_id, &desc, Some(id));
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::DestroyTextureView(id) => {
-                self.texture_view_drop::<A>(id, true).unwrap();
+                self.texture_view_drop(id).unwrap();
             }
             Action::CreateSampler(id, desc) => {
-                let (_, error) = self.device_create_sampler::<A>(device, &desc, Some(id));
+                let (_, error) = self.device_create_sampler(device, &desc, Some(id));
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::DestroySampler(id) => {
-                self.sampler_drop::<A>(id);
+                self.sampler_drop(id);
             }
             Action::GetSurfaceTexture { id, parent_id } => {
-                self.surface_get_current_texture::<A>(parent_id, Some(id))
+                self.surface_get_current_texture(parent_id, Some(id))
                     .unwrap()
                     .texture_id
                     .unwrap();
             }
             Action::CreateBindGroupLayout(id, desc) => {
-                let (_, error) = self.device_create_bind_group_layout::<A>(device, &desc, Some(id));
+                let (_, error) = self.device_create_bind_group_layout(device, &desc, Some(id));
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::DestroyBindGroupLayout(id) => {
-                self.bind_group_layout_drop::<A>(id);
+                self.bind_group_layout_drop(id);
             }
             Action::CreatePipelineLayout(id, desc) => {
-                let (_, error) = self.device_create_pipeline_layout::<A>(device, &desc, Some(id));
+                let (_, error) = self.device_create_pipeline_layout(device, &desc, Some(id));
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::DestroyPipelineLayout(id) => {
-                self.pipeline_layout_drop::<A>(id);
+                self.pipeline_layout_drop(id);
             }
             Action::CreateBindGroup(id, desc) => {
-                let (_, error) = self.device_create_bind_group::<A>(device, &desc, Some(id));
+                let (_, error) = self.device_create_bind_group(device, &desc, Some(id));
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::DestroyBindGroup(id) => {
-                self.bind_group_drop::<A>(id);
+                self.bind_group_drop(id);
             }
             Action::CreateShaderModule { id, desc, data } => {
                 log::debug!("Creating shader from {}", data);
@@ -326,14 +327,13 @@ impl GlobalPlay for wgc::global::Global {
                 } else {
                     panic!("Unknown shader {}", data);
                 };
-                let (_, error) =
-                    self.device_create_shader_module::<A>(device, &desc, source, Some(id));
+                let (_, error) = self.device_create_shader_module(device, &desc, source, Some(id));
                 if let Some(e) = error {
                     println!("shader compilation error:\n---{code}\n---\n{e}");
                 }
             }
             Action::DestroyShaderModule(id) => {
-                self.shader_module_drop::<A>(id);
+                self.shader_module_drop(id);
             }
             Action::CreateComputePipeline {
                 id,
@@ -344,17 +344,17 @@ impl GlobalPlay for wgc::global::Global {
                     implicit_context
                         .as_ref()
                         .map(|ic| wgc::device::ImplicitPipelineIds {
-                            root_id: Some(ic.root_id),
-                            group_ids: wgc::id::as_option_slice(&ic.group_ids),
+                            root_id: ic.root_id,
+                            group_ids: &ic.group_ids,
                         });
                 let (_, error) =
-                    self.device_create_compute_pipeline::<A>(device, &desc, Some(id), implicit_ids);
+                    self.device_create_compute_pipeline(device, &desc, Some(id), implicit_ids);
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::DestroyComputePipeline(id) => {
-                self.compute_pipeline_drop::<A>(id);
+                self.compute_pipeline_drop(id);
             }
             Action::CreateRenderPipeline {
                 id,
@@ -365,28 +365,28 @@ impl GlobalPlay for wgc::global::Global {
                     implicit_context
                         .as_ref()
                         .map(|ic| wgc::device::ImplicitPipelineIds {
-                            root_id: Some(ic.root_id),
-                            group_ids: wgc::id::as_option_slice(&ic.group_ids),
+                            root_id: ic.root_id,
+                            group_ids: &ic.group_ids,
                         });
                 let (_, error) =
-                    self.device_create_render_pipeline::<A>(device, &desc, Some(id), implicit_ids);
+                    self.device_create_render_pipeline(device, &desc, Some(id), implicit_ids);
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::DestroyRenderPipeline(id) => {
-                self.render_pipeline_drop::<A>(id);
+                self.render_pipeline_drop(id);
             }
             Action::CreatePipelineCache { id, desc } => {
-                let _ = unsafe { self.device_create_pipeline_cache::<A>(device, &desc, Some(id)) };
+                let _ = unsafe { self.device_create_pipeline_cache(device, &desc, Some(id)) };
             }
             Action::DestroyPipelineCache(id) => {
-                self.pipeline_cache_drop::<A>(id);
+                self.pipeline_cache_drop(id);
             }
             Action::CreateRenderBundle { id, desc, base } => {
                 let bundle =
                     wgc::command::RenderBundleEncoder::new(&desc, device, Some(base)).unwrap();
-                let (_, error) = self.render_bundle_encoder_finish::<A>(
+                let (_, error) = self.render_bundle_encoder_finish(
                     bundle,
                     &wgt::RenderBundleDescriptor { label: desc.label },
                     Some(id),
@@ -396,16 +396,16 @@ impl GlobalPlay for wgc::global::Global {
                 }
             }
             Action::DestroyRenderBundle(id) => {
-                self.render_bundle_drop::<A>(id);
+                self.render_bundle_drop(id);
             }
             Action::CreateQuerySet { id, desc } => {
-                let (_, error) = self.device_create_query_set::<A>(device, &desc, Some(id));
+                let (_, error) = self.device_create_query_set(device, &desc, Some(id));
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::DestroyQuerySet(id) => {
-                self.query_set_drop::<A>(id);
+                self.query_set_drop(id);
             }
             Action::WriteBuffer {
                 id,
@@ -416,11 +416,10 @@ impl GlobalPlay for wgc::global::Global {
                 let bin = std::fs::read(dir.join(data)).unwrap();
                 let size = (range.end - range.start) as usize;
                 if queued {
-                    self.queue_write_buffer::<A>(device.into_queue_id(), id, range.start, &bin)
+                    self.queue_write_buffer(queue, id, range.start, &bin)
                         .unwrap();
                 } else {
-                    self.device_wait_for_buffer::<A>(device, id).unwrap();
-                    self.device_set_buffer_sub_data::<A>(device, id, range.start, &bin[..size])
+                    self.device_set_buffer_data(id, range.start, &bin[..size])
                         .unwrap();
                 }
             }
@@ -431,14 +430,14 @@ impl GlobalPlay for wgc::global::Global {
                 size,
             } => {
                 let bin = std::fs::read(dir.join(data)).unwrap();
-                self.queue_write_texture::<A>(device.into_queue_id(), &to, &bin, &layout, &size)
+                self.queue_write_texture(queue, &to, &bin, &layout, &size)
                     .unwrap();
             }
             Action::Submit(_index, ref commands) if commands.is_empty() => {
-                self.queue_submit::<A>(device.into_queue_id(), &[]).unwrap();
+                self.queue_submit(queue, &[]).unwrap();
             }
             Action::Submit(_index, commands) => {
-                let (encoder, error) = self.device_create_command_encoder::<A>(
+                let (encoder, error) = self.device_create_command_encoder(
                     device,
                     &wgt::CommandEncoderDescriptor { label: None },
                     Some(
@@ -450,27 +449,26 @@ impl GlobalPlay for wgc::global::Global {
                 if let Some(e) = error {
                     panic!("{e}");
                 }
-                let cmdbuf = self.encode_commands::<A>(encoder, commands);
-                self.queue_submit::<A>(device.into_queue_id(), &[cmdbuf])
-                    .unwrap();
+                let cmdbuf = self.encode_commands(encoder, commands);
+                self.queue_submit(queue, &[cmdbuf]).unwrap();
             }
             Action::CreateBlas { id, desc, sizes } => {
-                self.device_create_blas::<A>(device, &desc, sizes, Some(id));
+                self.device_create_blas(device, &desc, sizes, Some(id));
             }
             Action::FreeBlas(id) => {
-                self.blas_destroy::<A>(id).unwrap();
+                self.blas_destroy(id).unwrap();
             }
             Action::DestroyBlas(id) => {
-                self.blas_drop::<A>(id, true);
+                self.blas_drop(id);
             }
             Action::CreateTlas { id, desc } => {
-                self.device_create_tlas::<A>(device, &desc, Some(id));
+                self.device_create_tlas(device, &desc, Some(id));
             }
             Action::FreeTlas(id) => {
-                self.tlas_destroy::<A>(id).unwrap();
+                self.tlas_destroy(id).unwrap();
             }
             Action::DestroyTlas(id) => {
-                self.tlas_drop::<A>(id, true);
+                self.tlas_drop(id);
             }
         }
     }
diff --git a/player/tests/test.rs b/player/tests/test.rs
index a5aba15bd6..ee8e2ecc0d 100644
--- a/player/tests/test.rs
+++ b/player/tests/test.rs
@@ -1,13 +1,13 @@
-/*! Tester for WebGPU
- *  It enumerates the available backends on the system,
- *  and run the tests through them.
- *
- *  Test requirements:
- *    - all IDs have the backend `Empty`
- *    - all expected buffers have `MAP_READ` usage
- *    - last action is `Submit`
- *    - no swapchain use
-!*/
+//! Tester for WebGPU
+//!  It enumerates the available backends on the system,
+//!  and run the tests through them.
+//!
+//!  Test requirements:
+//!    - all IDs have the backend `Empty`
+//!    - all expected buffers have `MAP_READ` usage
+//!    - last action is `Submit`
+//!    - no swapchain use
+
 #![cfg(not(target_arch = "wasm32"))]
 
 use player::GlobalPlay;
@@ -106,7 +106,8 @@ impl Test<'_> {
     ) {
         let backend = adapter.backend();
         let device_id = wgc::id::Id::zip(test_num, 0, backend);
-        let (_, _, error) = wgc::gfx_select!(adapter => global.adapter_request_device(
+        let queue_id = wgc::id::Id::zip(test_num, 0, backend);
+        let (_, _, error) = global.adapter_request_device(
             adapter,
             &wgt::DeviceDescriptor {
                 label: None,
@@ -116,8 +117,8 @@ impl Test<'_> {
             },
             None,
             Some(device_id),
-            Some(device_id.into_queue_id())
-        ));
+            Some(queue_id),
+        );
         if let Some(e) = error {
             panic!("{:?}", e);
         }
@@ -125,35 +126,47 @@ impl Test<'_> {
         let mut command_buffer_id_manager = wgc::identity::IdentityManager::new();
         println!("\t\t\tRunning...");
         for action in self.actions {
-            wgc::gfx_select!(device_id => global.process(device_id, action, dir, &mut command_buffer_id_manager));
+            global.process(
+                device_id,
+                queue_id,
+                action,
+                dir,
+                &mut command_buffer_id_manager,
+            );
         }
         println!("\t\t\tMapping...");
         for expect in &self.expectations {
             let buffer = wgc::id::Id::zip(expect.buffer.index, expect.buffer.epoch, backend);
-            wgc::gfx_select!(device_id => global.buffer_map_async(
-                buffer,
-                expect.offset,
-                Some(expect.data.len() as u64),
-                wgc::resource::BufferMapOperation {
-                    host: wgc::device::HostMap::Read,
-                    callback: Some(wgc::resource::BufferMapCallback::from_rust(
-                        Box::new(map_callback)
-                    )),
-                }
-            ))
-            .unwrap();
+            global
+                .buffer_map_async(
+                    buffer,
+                    expect.offset,
+                    Some(expect.data.len() as u64),
+                    wgc::resource::BufferMapOperation {
+                        host: wgc::device::HostMap::Read,
+                        callback: Some(wgc::resource::BufferMapCallback::from_rust(Box::new(
+                            map_callback,
+                        ))),
+                    },
+                )
+                .unwrap();
         }
 
         println!("\t\t\tWaiting...");
-        wgc::gfx_select!(device_id => global.device_poll(device_id, wgt::Maintain::wait()))
+        global
+            .device_poll(device_id, wgt::Maintain::wait())
             .unwrap();
 
         for expect in self.expectations {
             println!("\t\t\tChecking {}", expect.name);
             let buffer = wgc::id::Id::zip(expect.buffer.index, expect.buffer.epoch, backend);
-            let (ptr, size) =
-                wgc::gfx_select!(device_id => global.buffer_get_mapped_range(buffer, expect.offset, Some(expect.data.len() as wgt::BufferAddress)))
-                    .unwrap();
+            let (ptr, size) = global
+                .buffer_get_mapped_range(
+                    buffer,
+                    expect.offset,
+                    Some(expect.data.len() as wgt::BufferAddress),
+                )
+                .unwrap();
             let contents = unsafe { slice::from_raw_parts(ptr.as_ptr(), size as usize) };
             let expected_data = match expect.data {
                 ExpectedData::Raw(vec) => vec,
@@ -171,7 +184,6 @@ impl Test<'_> {
                     .collect::<Vec<u8>>(),
             };
 
-            #[allow(unknown_lints, clippy::if_then_panic)]
             if &expected_data[..] != contents {
                 panic!(
                     "Test expectation is not met!\nBuffer content was:\n{:?}\nbut expected:\n{:?}",
@@ -179,8 +191,6 @@ impl Test<'_> {
                 );
             }
         }
-
-        wgc::gfx_select!(device_id => global.clear_backend(()));
     }
 }
 
@@ -203,40 +213,39 @@ impl Corpus {
         let dir = path.parent().unwrap();
         let corpus: Corpus = ron::de::from_reader(File::open(&path).unwrap()).unwrap();
 
-        let global = wgc::global::Global::new(
-            "test",
-            wgt::InstanceDescriptor {
-                backends: corpus.backends,
-                flags: wgt::InstanceFlags::debugging(),
-                dx12_shader_compiler: wgt::Dx12Compiler::Fxc,
-                gles_minor_version: wgt::Gles3MinorVersion::default(),
-            },
-        );
         for &backend in BACKENDS {
             if !corpus.backends.contains(backend.into()) {
                 continue;
             }
-            let adapter = match global.request_adapter(
-                &wgc::instance::RequestAdapterOptions {
-                    power_preference: wgt::PowerPreference::None,
-                    force_fallback_adapter: false,
-                    compatible_surface: None,
-                },
-                wgc::instance::AdapterInputs::IdSet(&[wgc::id::Id::zip(0, 0, backend)]),
-            ) {
-                Ok(adapter) => adapter,
-                Err(_) => continue,
-            };
-
-            println!("\tBackend {:?}", backend);
-            let supported_features =
-                wgc::gfx_select!(adapter => global.adapter_features(adapter)).unwrap();
-            let downlevel_caps =
-                wgc::gfx_select!(adapter => global.adapter_downlevel_capabilities(adapter))
-                    .unwrap();
             let mut test_num = 0;
             for test_path in &corpus.tests {
                 println!("\t\tTest '{:?}'", test_path);
+
+                let global = wgc::global::Global::new(
+                    "test",
+                    wgt::InstanceDescriptor {
+                        backends: backend.into(),
+                        flags: wgt::InstanceFlags::debugging(),
+                        dx12_shader_compiler: wgt::Dx12Compiler::Fxc,
+                        gles_minor_version: wgt::Gles3MinorVersion::default(),
+                    },
+                );
+                let adapter = match global.request_adapter(
+                    &wgc::instance::RequestAdapterOptions {
+                        power_preference: wgt::PowerPreference::None,
+                        force_fallback_adapter: false,
+                        compatible_surface: None,
+                    },
+                    wgc::instance::AdapterInputs::IdSet(&[wgc::id::Id::zip(0, 0, backend)]),
+                ) {
+                    Ok(adapter) => adapter,
+                    Err(_) => continue,
+                };
+
+                println!("\tBackend {:?}", backend);
+                let supported_features = global.adapter_features(adapter).unwrap();
+                let downlevel_caps = global.adapter_downlevel_capabilities(adapter).unwrap();
+
                 let test = Test::load(dir.join(test_path), adapter.backend());
                 if !supported_features.contains(test.features) {
                     println!(
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
index aa10fa14eb..45bb8d6d51 100644
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,4 +1,4 @@
 [toolchain]
-channel = "1.76"                     # Needed for deno & cts_runner. Firefox's MSRV is 1.74
+channel = "1.76"
 components = ["rustfmt", "clippy"]
 targets = ["wasm32-unknown-unknown"]
diff --git a/tests/src/expectations.rs b/tests/src/expectations.rs
index eb5523905d..a3c90eac0b 100644
--- a/tests/src/expectations.rs
+++ b/tests/src/expectations.rs
@@ -53,7 +53,7 @@ pub struct FailureCase {
     /// [`AdapterInfo::device`]: wgt::AdapterInfo::device
     pub vendor: Option<u32>,
 
-    /// Name of adaper expected to fail, or `None` for any adapter name.
+    /// Name of adapter expected to fail, or `None` for any adapter name.
     ///
     /// If this is `Some(s)` and `s` is a substring of
     /// [`AdapterInfo::name`], then this `FailureCase` applies. If
diff --git a/tests/src/image.rs b/tests/src/image.rs
index 19bbc1a913..e72d3ee442 100644
--- a/tests/src/image.rs
+++ b/tests/src/image.rs
@@ -368,7 +368,7 @@ fn copy_via_compute(
         label: Some("pipeline read"),
         layout: Some(&pll),
         module: &sm,
-        entry_point: "copy_texture_to_buffer",
+        entry_point: Some("copy_texture_to_buffer"),
         compilation_options: Default::default(),
         cache: None,
     });
diff --git a/tests/src/init.rs b/tests/src/init.rs
index 3a11b3abe3..140bb202fc 100644
--- a/tests/src/init.rs
+++ b/tests/src/init.rs
@@ -11,7 +11,7 @@ pub fn init_logger() {
 }
 
 /// Initialize a wgpu instance with the options from the environment.
-pub fn initialize_instance() -> Instance {
+pub fn initialize_instance(force_fxc: bool) -> Instance {
     // We ignore `WGPU_BACKEND` for now, merely using test filtering to only run a single backend's tests.
     //
     // We can potentially work support back into the test runner in the future, but as the adapters are matched up
@@ -27,7 +27,13 @@ pub fn initialize_instance() -> Instance {
     } else {
         Backends::all()
     };
-    let dx12_shader_compiler = wgpu::util::dx12_shader_compiler_from_env().unwrap_or_default();
+    // Some tests need to be able to force demote to FXC, to specifically test workarounds for FXC
+    // behavior.
+    let dx12_shader_compiler = if force_fxc {
+        wgpu::Dx12Compiler::Fxc
+    } else {
+        wgpu::util::dx12_shader_compiler_from_env().unwrap_or_default()
+    };
     let gles_minor_version = wgpu::util::gles_minor_version_from_env().unwrap_or_default();
     Instance::new(wgpu::InstanceDescriptor {
         backends,
@@ -38,8 +44,11 @@ pub fn initialize_instance() -> Instance {
 }
 
 /// Initialize a wgpu adapter, taking the `n`th adapter from the instance.
-pub async fn initialize_adapter(adapter_index: usize) -> (Instance, Adapter, Option<SurfaceGuard>) {
-    let instance = initialize_instance();
+pub async fn initialize_adapter(
+    adapter_index: usize,
+    force_fxc: bool,
+) -> (Instance, Adapter, Option<SurfaceGuard>) {
+    let instance = initialize_instance(force_fxc);
     #[allow(unused_variables)]
     let surface: Option<wgpu::Surface>;
     let surface_guard: Option<SurfaceGuard>;
diff --git a/tests/src/params.rs b/tests/src/params.rs
index 2f54e65bbb..e5d50a4859 100644
--- a/tests/src/params.rs
+++ b/tests/src/params.rs
@@ -19,6 +19,11 @@ pub struct TestParameters {
     pub required_downlevel_caps: DownlevelCapabilities,
     pub required_limits: Limits,
 
+    /// On Dx12, specifically test against the Fxc compiler.
+    ///
+    /// For testing workarounds to Fxc bugs.
+    pub force_fxc: bool,
+
     /// Conditions under which this test should be skipped.
     pub skips: Vec<FailureCase>,
 
@@ -32,6 +37,7 @@ impl Default for TestParameters {
             required_features: Features::empty(),
             required_downlevel_caps: LOWEST_DOWNLEVEL_PROPERTIES,
             required_limits: Limits::downlevel_webgl2_defaults(),
+            force_fxc: false,
             skips: Vec::new(),
             failures: Vec::new(),
         }
@@ -63,6 +69,11 @@ impl TestParameters {
         self
     }
 
+    pub fn force_fxc(mut self, force_fxc: bool) -> Self {
+        self.force_fxc = force_fxc;
+        self
+    }
+
     /// Mark the test as always failing, but not to be skipped.
     pub fn expect_fail(mut self, when: FailureCase) -> Self {
         self.failures.push(when);
diff --git a/tests/src/run.rs b/tests/src/run.rs
index 82ddb93399..303c4c24af 100644
--- a/tests/src/run.rs
+++ b/tests/src/run.rs
@@ -1,4 +1,4 @@
-use std::{panic::AssertUnwindSafe, sync::Arc};
+use std::panic::AssertUnwindSafe;
 
 use futures_lite::FutureExt;
 use wgpu::{Adapter, Device, Instance, Queue};
@@ -18,7 +18,7 @@ pub struct TestingContext {
     pub adapter: Adapter,
     pub adapter_info: wgpu::AdapterInfo,
     pub adapter_downlevel_capabilities: wgpu::DownlevelCapabilities,
-    pub device: Arc<Device>,
+    pub device: Device,
     pub device_features: wgpu::Features,
     pub device_limits: wgpu::Limits,
     pub queue: Queue,
@@ -42,7 +42,8 @@ pub async fn execute_test(
 
     let _test_guard = isolation::OneTestPerProcessGuard::new();
 
-    let (instance, adapter, _surface_guard) = initialize_adapter(adapter_index).await;
+    let (instance, adapter, _surface_guard) =
+        initialize_adapter(adapter_index, config.params.force_fxc).await;
 
     let adapter_info = adapter.get_info();
     let adapter_downlevel_capabilities = adapter.get_downlevel_capabilities();
@@ -72,7 +73,7 @@ pub async fn execute_test(
         adapter,
         adapter_info,
         adapter_downlevel_capabilities,
-        device: Arc::new(device),
+        device,
         device_features: config.params.required_features,
         device_limits: config.params.required_limits.clone(),
         queue,
diff --git a/tests/tests/bgra8unorm_storage.rs b/tests/tests/bgra8unorm_storage.rs
index 7bc117f097..0859473b2f 100644
--- a/tests/tests/bgra8unorm_storage.rs
+++ b/tests/tests/bgra8unorm_storage.rs
@@ -95,7 +95,7 @@ static BGRA8_UNORM_STORAGE: GpuTestConfiguration = GpuTestConfiguration::new()
         let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
             label: None,
             layout: Some(&pl),
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             module: &module,
             cache: None,
diff --git a/tests/tests/bind_group_layout_dedup.rs b/tests/tests/bind_group_layout_dedup.rs
index e4262ea215..591f4f9054 100644
--- a/tests/tests/bind_group_layout_dedup.rs
+++ b/tests/tests/bind_group_layout_dedup.rs
@@ -89,7 +89,7 @@ async fn bgl_dedupe(ctx: TestingContext) {
             label: None,
             layout: Some(&pipeline_layout),
             module: &module,
-            entry_point: "no_resources",
+            entry_point: Some("no_resources"),
             compilation_options: Default::default(),
             cache: None,
         };
@@ -219,7 +219,7 @@ fn bgl_dedupe_with_dropped_user_handle(ctx: TestingContext) {
             label: None,
             layout: Some(&pipeline_layout),
             module: &module,
-            entry_point: "no_resources",
+            entry_point: Some("no_resources"),
             compilation_options: Default::default(),
             cache: None,
         });
@@ -241,11 +241,11 @@ fn bgl_dedupe_with_dropped_user_handle(ctx: TestingContext) {
 }
 
 #[gpu_test]
-static BIND_GROUP_LAYOUT_DEDUPLICATION_DERIVED: GpuTestConfiguration = GpuTestConfiguration::new()
+static GET_DERIVED_BGL: GpuTestConfiguration = GpuTestConfiguration::new()
     .parameters(TestParameters::default().test_features_limits())
-    .run_sync(bgl_dedupe_derived);
+    .run_sync(get_derived_bgl);
 
-fn bgl_dedupe_derived(ctx: TestingContext) {
+fn get_derived_bgl(ctx: TestingContext) {
     let buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
         label: None,
         size: 4,
@@ -266,7 +266,7 @@ fn bgl_dedupe_derived(ctx: TestingContext) {
             label: None,
             layout: None,
             module: &module,
-            entry_point: "resources",
+            entry_point: Some("resources"),
             compilation_options: Default::default(),
             cache: None,
         });
@@ -314,12 +314,12 @@ fn bgl_dedupe_derived(ctx: TestingContext) {
 }
 
 #[gpu_test]
-static SEPARATE_PROGRAMS_HAVE_INCOMPATIBLE_DERIVED_BGLS: GpuTestConfiguration =
+static SEPARATE_PIPELINES_HAVE_INCOMPATIBLE_DERIVED_BGLS: GpuTestConfiguration =
     GpuTestConfiguration::new()
         .parameters(TestParameters::default().test_features_limits())
-        .run_sync(separate_programs_have_incompatible_derived_bgls);
+        .run_sync(separate_pipelines_have_incompatible_derived_bgls);
 
-fn separate_programs_have_incompatible_derived_bgls(ctx: TestingContext) {
+fn separate_pipelines_have_incompatible_derived_bgls(ctx: TestingContext) {
     let buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
         label: None,
         size: 4,
@@ -338,7 +338,7 @@ fn separate_programs_have_incompatible_derived_bgls(ctx: TestingContext) {
         label: None,
         layout: None,
         module: &module,
-        entry_point: "resources",
+        entry_point: Some("resources"),
         compilation_options: Default::default(),
         cache: None,
     };
@@ -405,7 +405,7 @@ fn derived_bgls_incompatible_with_regular_bgls(ctx: TestingContext) {
             label: None,
             layout: None,
             module: &module,
-            entry_point: "resources",
+            entry_point: Some("resources"),
             compilation_options: Default::default(),
             cache: None,
         });
@@ -448,3 +448,91 @@ fn derived_bgls_incompatible_with_regular_bgls(ctx: TestingContext) {
         None,
     )
 }
+
+#[gpu_test]
+static BIND_GROUP_LAYOUT_DEDUPLICATION_DERIVED: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(TestParameters::default().test_features_limits())
+    .run_sync(bgl_dedupe_derived);
+
+fn bgl_dedupe_derived(ctx: TestingContext) {
+    let src = "
+        @group(0) @binding(0) var<uniform> u1: vec4f;
+        @group(1) @binding(0) var<uniform> u2: vec4f;
+
+        @compute @workgroup_size(1, 1, 1)
+        fn main() {
+            // Just need a static use.
+            let _u1 = u1;
+            let _u2 = u2;
+        }
+    ";
+    let module = ctx
+        .device
+        .create_shader_module(wgpu::ShaderModuleDescriptor {
+            label: None,
+            source: wgpu::ShaderSource::Wgsl(src.into()),
+        });
+
+    let pipeline = ctx
+        .device
+        .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
+            label: None,
+            layout: None,
+            module: &module,
+            entry_point: None,
+            compilation_options: Default::default(),
+            cache: None,
+        });
+
+    let bind_group_layout_0 = pipeline.get_bind_group_layout(0);
+    let bind_group_layout_1 = pipeline.get_bind_group_layout(1);
+
+    let buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
+        label: None,
+        size: 16,
+        usage: wgpu::BufferUsages::UNIFORM,
+        mapped_at_creation: false,
+    });
+
+    let bind_group_0 = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
+        label: None,
+        layout: &bind_group_layout_1,
+        entries: &[wgpu::BindGroupEntry {
+            binding: 0,
+            resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding {
+                buffer: &buffer,
+                offset: 0,
+                size: None,
+            }),
+        }],
+    });
+    let bind_group_1 = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
+        label: None,
+        layout: &bind_group_layout_0,
+        entries: &[wgpu::BindGroupEntry {
+            binding: 0,
+            resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding {
+                buffer: &buffer,
+                offset: 0,
+                size: None,
+            }),
+        }],
+    });
+
+    let mut encoder = ctx
+        .device
+        .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+
+    let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
+        label: None,
+        timestamp_writes: None,
+    });
+    pass.set_pipeline(&pipeline);
+    pass.set_bind_group(0, &bind_group_0, &[]);
+    pass.set_bind_group(1, &bind_group_1, &[]);
+    pass.dispatch_workgroups(1, 1, 1);
+
+    drop(pass);
+
+    ctx.queue.submit(Some(encoder.finish()));
+}
diff --git a/tests/tests/bind_groups.rs b/tests/tests/bind_groups.rs
new file mode 100644
index 0000000000..fab1c065f0
--- /dev/null
+++ b/tests/tests/bind_groups.rs
@@ -0,0 +1,116 @@
+use wgpu_test::{gpu_test, GpuTestConfiguration, TestParameters, TestingContext};
+
+/// Test `descriptor` against a bind group layout that requires non-filtering sampler.
+fn try_sampler_nonfiltering_layout(
+    ctx: TestingContext,
+    descriptor: &wgpu::SamplerDescriptor,
+    good: bool,
+) {
+    let label = descriptor.label;
+    let bind_group_layout = ctx
+        .device
+        .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+            label,
+            entries: &[wgpu::BindGroupLayoutEntry {
+                binding: 0,
+                visibility: wgpu::ShaderStages::FRAGMENT,
+                ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
+                count: None,
+            }],
+        });
+
+    let sampler = ctx.device.create_sampler(descriptor);
+
+    let create_bind_group = || {
+        ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
+            label,
+            layout: &bind_group_layout,
+            entries: &[wgpu::BindGroupEntry {
+                binding: 0,
+                resource: wgpu::BindingResource::Sampler(&sampler),
+            }],
+        });
+    };
+
+    if good {
+        wgpu_test::valid(&ctx.device, create_bind_group);
+    } else {
+        wgpu_test::fail(
+            &ctx.device,
+            create_bind_group,
+            Some("but given a sampler with filtering"),
+        );
+    }
+}
+
+#[gpu_test]
+static BIND_GROUP_NONFILTERING_LAYOUT_NONFILTERING_SAMPLER: GpuTestConfiguration =
+    GpuTestConfiguration::new()
+        .parameters(TestParameters::default())
+        .run_sync(|ctx| {
+            try_sampler_nonfiltering_layout(
+                ctx,
+                &wgpu::SamplerDescriptor {
+                    label: Some("bind_group_non_filtering_layout_nonfiltering_sampler"),
+                    min_filter: wgpu::FilterMode::Nearest,
+                    mag_filter: wgpu::FilterMode::Nearest,
+                    mipmap_filter: wgpu::FilterMode::Nearest,
+                    ..wgpu::SamplerDescriptor::default()
+                },
+                true,
+            );
+        });
+
+#[gpu_test]
+static BIND_GROUP_NONFILTERING_LAYOUT_MIN_SAMPLER: GpuTestConfiguration =
+    GpuTestConfiguration::new()
+        .parameters(TestParameters::default())
+        .run_sync(|ctx| {
+            try_sampler_nonfiltering_layout(
+                ctx,
+                &wgpu::SamplerDescriptor {
+                    label: Some("bind_group_non_filtering_layout_min_sampler"),
+                    min_filter: wgpu::FilterMode::Linear,
+                    mag_filter: wgpu::FilterMode::Nearest,
+                    mipmap_filter: wgpu::FilterMode::Nearest,
+                    ..wgpu::SamplerDescriptor::default()
+                },
+                false,
+            );
+        });
+
+#[gpu_test]
+static BIND_GROUP_NONFILTERING_LAYOUT_MAG_SAMPLER: GpuTestConfiguration =
+    GpuTestConfiguration::new()
+        .parameters(TestParameters::default())
+        .run_sync(|ctx| {
+            try_sampler_nonfiltering_layout(
+                ctx,
+                &wgpu::SamplerDescriptor {
+                    label: Some("bind_group_non_filtering_layout_mag_sampler"),
+                    min_filter: wgpu::FilterMode::Nearest,
+                    mag_filter: wgpu::FilterMode::Linear,
+                    mipmap_filter: wgpu::FilterMode::Nearest,
+                    ..wgpu::SamplerDescriptor::default()
+                },
+                false,
+            );
+        });
+
+#[gpu_test]
+static BIND_GROUP_NONFILTERING_LAYOUT_MIPMAP_SAMPLER: GpuTestConfiguration =
+    GpuTestConfiguration::new()
+        .parameters(TestParameters::default())
+        .run_sync(|ctx| {
+            try_sampler_nonfiltering_layout(
+                ctx,
+                &wgpu::SamplerDescriptor {
+                    label: Some("bind_group_non_filtering_layout_mipmap_sampler"),
+                    min_filter: wgpu::FilterMode::Nearest,
+                    mag_filter: wgpu::FilterMode::Nearest,
+                    mipmap_filter: wgpu::FilterMode::Linear,
+                    ..wgpu::SamplerDescriptor::default()
+                },
+                false,
+            );
+        });
diff --git a/tests/tests/buffer.rs b/tests/tests/buffer.rs
index e2316daadc..77bc9e0640 100644
--- a/tests/tests/buffer.rs
+++ b/tests/tests/buffer.rs
@@ -225,7 +225,7 @@ static MINIMUM_BUFFER_BINDING_SIZE_LAYOUT: GpuTestConfiguration = GpuTestConfigu
                         label: None,
                         layout: Some(&pipeline_layout),
                         module: &shader_module,
-                        entry_point: "main",
+                        entry_point: Some("main"),
                         compilation_options: Default::default(),
                         cache: None,
                     });
@@ -297,7 +297,7 @@ static MINIMUM_BUFFER_BINDING_SIZE_DISPATCH: GpuTestConfiguration = GpuTestConfi
                 label: None,
                 layout: Some(&pipeline_layout),
                 module: &shader_module,
-                entry_point: "main",
+                entry_point: Some("main"),
                 compilation_options: Default::default(),
                 cache: None,
             });
diff --git a/tests/tests/clear_texture.rs b/tests/tests/clear_texture.rs
index 175c642b93..f62e2be219 100644
--- a/tests/tests/clear_texture.rs
+++ b/tests/tests/clear_texture.rs
@@ -26,7 +26,7 @@ static TEXTURE_FORMATS_UNCOMPRESSED_GLES_COMPAT: &[wgpu::TextureFormat] = &[
     wgpu::TextureFormat::Bgra8UnormSrgb,
     wgpu::TextureFormat::Rgb10a2Uint,
     wgpu::TextureFormat::Rgb10a2Unorm,
-    wgpu::TextureFormat::Rg11b10Float,
+    wgpu::TextureFormat::Rg11b10UFloat,
     wgpu::TextureFormat::Rg32Uint,
     wgpu::TextureFormat::Rg32Sint,
     wgpu::TextureFormat::Rg32Float,
@@ -273,7 +273,7 @@ async fn clear_texture_tests(ctx: TestingContext, formats: &'static [wgpu::Textu
         let is_compressed_or_depth_stencil_format =
             format.is_compressed() || format.is_depth_stencil_format();
         let supports_1d = !is_compressed_or_depth_stencil_format;
-        let supports_3d = !is_compressed_or_depth_stencil_format;
+        let supports_3d = format.is_bcn() || !is_compressed_or_depth_stencil_format;
 
         // 1D texture
         if supports_1d {
@@ -385,7 +385,15 @@ static CLEAR_TEXTURE_DEPTH32_STENCIL8: GpuTestConfiguration = GpuTestConfigurati
 static CLEAR_TEXTURE_COMPRESSED_BCN: GpuTestConfiguration = GpuTestConfiguration::new()
     .parameters(
         TestParameters::default()
-            .features(wgpu::Features::CLEAR_TEXTURE | wgpu::Features::TEXTURE_COMPRESSION_BC)
+            .features(
+                wgpu::Features::CLEAR_TEXTURE
+                    | wgpu::Features::TEXTURE_COMPRESSION_BC
+                    | wgpu::Features::TEXTURE_COMPRESSION_BC_SLICED_3D,
+            )
+            .limits(wgpu::Limits {
+                max_texture_dimension_3d: 1024,
+                ..wgpu::Limits::downlevel_defaults()
+            })
             // https://bugs.chromium.org/p/angleproject/issues/detail?id=7056
             .expect_fail(FailureCase::backend_adapter(wgpu::Backends::GL, "ANGLE"))
             // compressed texture copy to buffer not yet implemented
diff --git a/tests/tests/compute_pass_ownership.rs b/tests/tests/compute_pass_ownership.rs
index 5c0971c6d9..80f81f4d81 100644
--- a/tests/tests/compute_pass_ownership.rs
+++ b/tests/tests/compute_pass_ownership.rs
@@ -317,7 +317,7 @@ fn resource_setup(ctx: &TestingContext) -> ResourceSetup {
             label: Some("pipeline"),
             layout: Some(&pipeline_layout),
             module: &sm,
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             cache: None,
         });
diff --git a/tests/tests/create_surface_error.rs b/tests/tests/create_surface_error.rs
index 87aeb15726..e3b48cb757 100644
--- a/tests/tests/create_surface_error.rs
+++ b/tests/tests/create_surface_error.rs
@@ -6,7 +6,7 @@
 #[wasm_bindgen_test::wasm_bindgen_test]
 fn canvas_get_context_returned_null() {
     // Not using the normal testing infrastructure because that goes straight to creating the canvas for us.
-    let instance = wgpu_test::initialize_instance();
+    let instance = wgpu_test::initialize_instance(false);
     // Create canvas
     let canvas = wgpu_test::initialize_html_canvas();
 
diff --git a/tests/tests/device.rs b/tests/tests/device.rs
index e2ed9f5b60..d629f1b8e6 100644
--- a/tests/tests/device.rs
+++ b/tests/tests/device.rs
@@ -1,6 +1,8 @@
 use std::sync::atomic::AtomicBool;
 
-use wgpu_test::{fail, gpu_test, FailureCase, GpuTestConfiguration, TestParameters};
+use wgpu_test::{
+    fail, gpu_test, FailureCase, GpuTestConfiguration, TestParameters, TestingContext,
+};
 
 #[gpu_test]
 static CROSS_DEVICE_BIND_GROUP_USAGE: GpuTestConfiguration = GpuTestConfiguration::new()
@@ -107,7 +109,7 @@ static REQUEST_DEVICE_ERROR_MESSAGE_NATIVE: GpuTestConfiguration =
 async fn request_device_error_message() {
     // Not using initialize_test() because that doesn't let us catch the error
     // nor .await anything
-    let (_instance, adapter, _surface_guard) = wgpu_test::initialize_adapter(0).await;
+    let (_instance, adapter, _surface_guard) = wgpu_test::initialize_adapter(0, false).await;
 
     let device_error = adapter
         .request_device(
@@ -147,14 +149,6 @@ async fn request_device_error_message() {
 
 // This is a test of device behavior after device.destroy. Specifically, all operations
 // should trigger errors since the device is lost.
-//
-// On DX12 this test fails with a validation error in the very artificial actions taken
-// after lose the device. The error is "ID3D12CommandAllocator::Reset: The command
-// allocator cannot be reset because a command list is currently being recorded with the
-// allocator." That may indicate that DX12 doesn't like opened command buffers staying
-// open even after they return an error. For now, this test is skipped on DX12.
-//
-// The DX12 issue may be related to https://github.com/gfx-rs/wgpu/issues/3193.
 #[gpu_test]
 static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::new()
     .parameters(TestParameters::default().features(wgpu::Features::CLEAR_TEXTURE))
@@ -541,7 +535,7 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne
                         layout: None,
                         vertex: wgpu::VertexState {
                             module: &shader_module,
-                            entry_point: "",
+                            entry_point: Some(""),
                             compilation_options: Default::default(),
                             buffers: &[],
                         },
@@ -565,7 +559,7 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne
                         label: None,
                         layout: None,
                         module: &shader_module,
-                        entry_point: "",
+                        entry_point: None,
                         compilation_options: Default::default(),
                         cache: None,
                     });
@@ -582,7 +576,7 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne
                         label: None,
                         layout: None,
                         module: &shader_module,
-                        entry_point: "",
+                        entry_point: None,
                         compilation_options: Default::default(),
                         cache: None,
                     });
@@ -831,7 +825,7 @@ static DIFFERENT_BGL_ORDER_BW_SHADER_AND_API: GpuTestConfiguration = GpuTestConf
             .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
                 fragment: Some(wgpu::FragmentState {
                     module: &trivial_shaders_with_some_reversed_bindings,
-                    entry_point: "fs_main",
+                    entry_point: Some("fs_main"),
                     compilation_options: Default::default(),
                     targets: &[Some(wgt::ColorTargetState {
                         format: wgt::TextureFormat::Bgra8Unorm,
@@ -845,7 +839,7 @@ static DIFFERENT_BGL_ORDER_BW_SHADER_AND_API: GpuTestConfiguration = GpuTestConf
                 label: None,
                 vertex: wgpu::VertexState {
                     module: &trivial_shaders_with_some_reversed_bindings,
-                    entry_point: "vs_main",
+                    entry_point: Some("vs_main"),
                     compilation_options: Default::default(),
                     buffers: &[],
                 },
@@ -916,3 +910,26 @@ static DEVICE_DESTROY_THEN_BUFFER_CLEANUP: GpuTestConfiguration = GpuTestConfigu
         // Poll the device, which should try to clean up its resources.
         ctx.instance.poll_all(true);
     });
+
+#[gpu_test]
+static DEVICE_AND_QUEUE_HAVE_DIFFERENT_IDS: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(TestParameters::default())
+    .run_async(|ctx| async move {
+        let TestingContext {
+            adapter,
+            device_features,
+            device_limits,
+            device,
+            queue,
+            ..
+        } = ctx;
+
+        drop(device);
+
+        let (device2, queue2) =
+            wgpu_test::initialize_device(&adapter, device_features, device_limits).await;
+
+        drop(queue);
+        drop(device2);
+        drop(queue2); // this would previously panic since we would try to use the Device ID to drop the Queue
+    });
diff --git a/tests/tests/encoder.rs b/tests/tests/encoder.rs
index 337dffc2d0..e4755dcd74 100644
--- a/tests/tests/encoder.rs
+++ b/tests/tests/encoder.rs
@@ -19,8 +19,8 @@ static DROP_QUEUE_BEFORE_CREATING_COMMAND_ENCODER: GpuTestConfiguration =
         .run_sync(|ctx| {
             // Use the device after the queue is dropped. Currently this panics
             // but it probably shouldn't
-            let device = ctx.device.clone();
-            drop(ctx);
+            let TestingContext { device, queue, .. } = ctx;
+            drop(queue);
             let _encoder =
                 device.create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
         });
diff --git a/tests/tests/mem_leaks.rs b/tests/tests/mem_leaks.rs
index 3c59aec036..75de0776e8 100644
--- a/tests/tests/mem_leaks.rs
+++ b/tests/tests/mem_leaks.rs
@@ -13,7 +13,7 @@ async fn draw_test_with_reports(
     use wgpu::util::DeviceExt;
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.devices.num_allocated, 1);
     assert_eq!(report.queues.num_allocated, 1);
 
@@ -22,7 +22,7 @@ async fn draw_test_with_reports(
         .create_shader_module(wgpu::include_wgsl!("./vertex_indices/draw.vert.wgsl"));
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.shader_modules.num_allocated, 1);
 
     let bgl = ctx
@@ -42,7 +42,7 @@ async fn draw_test_with_reports(
         });
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.buffers.num_allocated, 0);
     assert_eq!(report.bind_groups.num_allocated, 0);
     assert_eq!(report.bind_group_layouts.num_allocated, 1);
@@ -55,7 +55,7 @@ async fn draw_test_with_reports(
     });
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.buffers.num_allocated, 1);
 
     let bg = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
@@ -68,7 +68,7 @@ async fn draw_test_with_reports(
     });
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.buffers.num_allocated, 1);
     assert_eq!(report.bind_groups.num_allocated, 1);
     assert_eq!(report.bind_group_layouts.num_allocated, 1);
@@ -82,7 +82,7 @@ async fn draw_test_with_reports(
         });
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.buffers.num_allocated, 1);
     assert_eq!(report.pipeline_layouts.num_allocated, 1);
     assert_eq!(report.render_pipelines.num_allocated, 0);
@@ -96,7 +96,7 @@ async fn draw_test_with_reports(
             vertex: wgpu::VertexState {
                 buffers: &[],
                 module: &shader,
-                entry_point: "vs_main_builtin",
+                entry_point: Some("vs_main_builtin"),
                 compilation_options: Default::default(),
             },
             primitive: wgpu::PrimitiveState::default(),
@@ -104,7 +104,7 @@ async fn draw_test_with_reports(
             multisample: wgpu::MultisampleState::default(),
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(wgpu::ColorTargetState {
                     format: wgpu::TextureFormat::Rgba8Unorm,
@@ -117,7 +117,7 @@ async fn draw_test_with_reports(
         });
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.buffers.num_allocated, 1);
     assert_eq!(report.bind_groups.num_allocated, 1);
     assert_eq!(report.bind_group_layouts.num_allocated, 1);
@@ -129,7 +129,7 @@ async fn draw_test_with_reports(
     drop(shader);
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.shader_modules.num_allocated, 0);
     assert_eq!(report.shader_modules.num_kept_from_user, 0);
     assert_eq!(report.textures.num_allocated, 0);
@@ -157,7 +157,7 @@ async fn draw_test_with_reports(
     let texture_view = texture.create_view(&wgpu::TextureViewDescriptor::default());
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.buffers.num_allocated, 1);
     assert_eq!(report.texture_views.num_allocated, 1);
     assert_eq!(report.textures.num_allocated, 1);
@@ -165,7 +165,7 @@ async fn draw_test_with_reports(
     drop(texture);
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.buffers.num_allocated, 1);
     assert_eq!(report.texture_views.num_allocated, 1);
     assert_eq!(report.texture_views.num_kept_from_user, 1);
@@ -177,7 +177,7 @@ async fn draw_test_with_reports(
         .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.command_buffers.num_allocated, 1);
     assert_eq!(report.buffers.num_allocated, 1);
 
@@ -197,7 +197,7 @@ async fn draw_test_with_reports(
     rpass.set_bind_group(0, &bg, &[]);
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.buffers.num_allocated, 1);
     assert_eq!(report.bind_groups.num_allocated, 1);
     assert_eq!(report.bind_group_layouts.num_allocated, 1);
@@ -220,7 +220,7 @@ async fn draw_test_with_reports(
     drop(buffer);
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.command_buffers.num_kept_from_user, 1);
     assert_eq!(report.render_pipelines.num_kept_from_user, 0);
     assert_eq!(report.pipeline_layouts.num_kept_from_user, 0);
@@ -242,7 +242,7 @@ async fn draw_test_with_reports(
 
     // TODO: fix in https://github.com/gfx-rs/wgpu/pull/5141
     // let global_report = ctx.instance.generate_report().unwrap();
-    // let report = global_report.hub_report(ctx.adapter_info.backend);
+    // let report = global_report.hub_report();
     // assert_eq!(report.command_buffers.num_allocated, 0);
 
     ctx.async_poll(wgpu::Maintain::wait_for(submit_index))
@@ -250,7 +250,7 @@ async fn draw_test_with_reports(
         .panic_on_timeout();
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
 
     assert_eq!(report.render_pipelines.num_allocated, 0);
     assert_eq!(report.bind_groups.num_allocated, 0);
@@ -265,7 +265,7 @@ async fn draw_test_with_reports(
     drop(ctx.adapter);
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
 
     assert_eq!(report.queues.num_kept_from_user, 0);
     assert_eq!(report.textures.num_kept_from_user, 0);
diff --git a/tests/tests/nv12_texture/mod.rs b/tests/tests/nv12_texture/mod.rs
index 6b5a4e0c6b..2f149d0148 100644
--- a/tests/tests/nv12_texture/mod.rs
+++ b/tests/tests/nv12_texture/mod.rs
@@ -23,13 +23,13 @@ static NV12_TEXTURE_CREATION_SAMPLING: GpuTestConfiguration = GpuTestConfigurati
                 layout: None,
                 vertex: wgpu::VertexState {
                     module: &shader,
-                    entry_point: "vs_main",
+                    entry_point: Some("vs_main"),
                     compilation_options: Default::default(),
                     buffers: &[],
                 },
                 fragment: Some(wgpu::FragmentState {
                     module: &shader,
-                    entry_point: "fs_main",
+                    entry_point: Some("fs_main"),
                     compilation_options: Default::default(),
                     targets: &[Some(target_format.into())],
                 }),
diff --git a/tests/tests/occlusion_query/mod.rs b/tests/tests/occlusion_query/mod.rs
index a888320e28..a9b1f12649 100644
--- a/tests/tests/occlusion_query/mod.rs
+++ b/tests/tests/occlusion_query/mod.rs
@@ -36,7 +36,7 @@ static OCCLUSION_QUERY: GpuTestConfiguration = GpuTestConfiguration::new()
                 layout: None,
                 vertex: wgpu::VertexState {
                     module: &shader,
-                    entry_point: "vs_main",
+                    entry_point: Some("vs_main"),
                     compilation_options: Default::default(),
                     buffers: &[],
                 },
diff --git a/tests/tests/partially_bounded_arrays/mod.rs b/tests/tests/partially_bounded_arrays/mod.rs
index 83f9cee382..195fd88dd4 100644
--- a/tests/tests/partially_bounded_arrays/mod.rs
+++ b/tests/tests/partially_bounded_arrays/mod.rs
@@ -68,7 +68,7 @@ static PARTIALLY_BOUNDED_ARRAY: GpuTestConfiguration = GpuTestConfiguration::new
             label: None,
             layout: Some(&pipeline_layout),
             module: &cs_module,
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             cache: None,
         });
diff --git a/tests/tests/pipeline.rs b/tests/tests/pipeline.rs
index 99d0e8da4a..3cf8d13dfe 100644
--- a/tests/tests/pipeline.rs
+++ b/tests/tests/pipeline.rs
@@ -29,7 +29,7 @@ static PIPELINE_DEFAULT_LAYOUT_BAD_MODULE: GpuTestConfiguration = GpuTestConfigu
                             label: Some("mandelbrot compute pipeline"),
                             layout: None,
                             module: &module,
-                            entry_point: "doesn't exist",
+                            entry_point: Some("doesn't exist"),
                             compilation_options: Default::default(),
                             cache: None,
                         });
@@ -66,7 +66,7 @@ static NO_TARGETLESS_RENDER: GpuTestConfiguration = GpuTestConfiguration::new()
                                 module: &ctx
                                     .device
                                     .create_shader_module(TRIVIAL_VERTEX_SHADER_DESC),
-                                entry_point: "main",
+                                entry_point: Some("main"),
                                 compilation_options: Default::default(),
                                 buffers: &[],
                             },
diff --git a/tests/tests/pipeline_cache.rs b/tests/tests/pipeline_cache.rs
index 58dae4694f..67e9e68270 100644
--- a/tests/tests/pipeline_cache.rs
+++ b/tests/tests/pipeline_cache.rs
@@ -113,7 +113,7 @@ async fn pipeline_cache_test(ctx: TestingContext) {
                 label: Some("pipeline"),
                 layout: Some(&pipeline_layout),
                 module: &sm,
-                entry_point: "main",
+                entry_point: Some("main"),
                 compilation_options: Default::default(),
                 cache: Some(&first_cache),
             });
@@ -136,7 +136,7 @@ async fn pipeline_cache_test(ctx: TestingContext) {
             label: Some("pipeline"),
             layout: Some(&pipeline_layout),
             module: &sm,
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             cache: Some(&second_cache),
         });
diff --git a/tests/tests/poll.rs b/tests/tests/poll.rs
index 740618f23c..7e99cbcd7d 100644
--- a/tests/tests/poll.rs
+++ b/tests/tests/poll.rs
@@ -1,86 +1,71 @@
 use std::num::NonZeroU64;
 
 use wgpu::{
-    BindGroup, BindGroupDescriptor, BindGroupEntry, BindGroupLayout, BindGroupLayoutDescriptor,
-    BindGroupLayoutEntry, BindingResource, BindingType, Buffer, BufferBindingType,
-    BufferDescriptor, BufferUsages, CommandBuffer, CommandEncoderDescriptor, ComputePassDescriptor,
-    Maintain, ShaderStages,
+    BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor, BindGroupLayoutEntry,
+    BindingResource, BindingType, BufferBindingType, BufferDescriptor, BufferUsages, CommandBuffer,
+    CommandEncoderDescriptor, ComputePassDescriptor, Maintain, ShaderStages,
 };
 
 use wgpu_test::{gpu_test, GpuTestConfiguration, TestingContext};
 
-struct DummyWorkData {
-    _buffer: Buffer,
-    _bgl: BindGroupLayout,
-    _bg: BindGroup,
-    cmd_buf: CommandBuffer,
-}
-
-impl DummyWorkData {
-    fn new(ctx: &TestingContext) -> Self {
-        let buffer = ctx.device.create_buffer(&BufferDescriptor {
-            label: None,
-            size: 16,
-            usage: BufferUsages::UNIFORM,
-            mapped_at_creation: false,
-        });
+fn generate_dummy_work(ctx: &TestingContext) -> CommandBuffer {
+    let buffer = ctx.device.create_buffer(&BufferDescriptor {
+        label: None,
+        size: 16,
+        usage: BufferUsages::UNIFORM,
+        mapped_at_creation: false,
+    });
 
-        let bind_group_layout = ctx
-            .device
-            .create_bind_group_layout(&BindGroupLayoutDescriptor {
-                label: None,
-                entries: &[BindGroupLayoutEntry {
-                    binding: 0,
-                    visibility: ShaderStages::COMPUTE,
-                    ty: BindingType::Buffer {
-                        ty: BufferBindingType::Uniform,
-                        has_dynamic_offset: false,
-                        min_binding_size: Some(NonZeroU64::new(16).unwrap()),
-                    },
-                    count: None,
-                }],
-            });
-
-        let bind_group = ctx.device.create_bind_group(&BindGroupDescriptor {
+    let bind_group_layout = ctx
+        .device
+        .create_bind_group_layout(&BindGroupLayoutDescriptor {
             label: None,
-            layout: &bind_group_layout,
-            entries: &[BindGroupEntry {
+            entries: &[BindGroupLayoutEntry {
                 binding: 0,
-                resource: BindingResource::Buffer(buffer.as_entire_buffer_binding()),
+                visibility: ShaderStages::COMPUTE,
+                ty: BindingType::Buffer {
+                    ty: BufferBindingType::Uniform,
+                    has_dynamic_offset: false,
+                    min_binding_size: Some(NonZeroU64::new(16).unwrap()),
+                },
+                count: None,
             }],
         });
 
-        let mut cmd_buf = ctx
-            .device
-            .create_command_encoder(&CommandEncoderDescriptor::default());
-
-        let mut cpass = cmd_buf.begin_compute_pass(&ComputePassDescriptor::default());
-        cpass.set_bind_group(0, &bind_group, &[]);
-        drop(cpass);
-
-        Self {
-            _buffer: buffer,
-            _bgl: bind_group_layout,
-            _bg: bind_group,
-            cmd_buf: cmd_buf.finish(),
-        }
-    }
+    let bind_group = ctx.device.create_bind_group(&BindGroupDescriptor {
+        label: None,
+        layout: &bind_group_layout,
+        entries: &[BindGroupEntry {
+            binding: 0,
+            resource: BindingResource::Buffer(buffer.as_entire_buffer_binding()),
+        }],
+    });
+
+    let mut cmd_buf = ctx
+        .device
+        .create_command_encoder(&CommandEncoderDescriptor::default());
+
+    let mut cpass = cmd_buf.begin_compute_pass(&ComputePassDescriptor::default());
+    cpass.set_bind_group(0, &bind_group, &[]);
+    drop(cpass);
+
+    cmd_buf.finish()
 }
 
 #[gpu_test]
 static WAIT: GpuTestConfiguration = GpuTestConfiguration::new().run_async(|ctx| async move {
-    let data = DummyWorkData::new(&ctx);
+    let cmd_buf = generate_dummy_work(&ctx);
 
-    ctx.queue.submit(Some(data.cmd_buf));
+    ctx.queue.submit(Some(cmd_buf));
     ctx.async_poll(Maintain::wait()).await.panic_on_timeout();
 });
 
 #[gpu_test]
 static DOUBLE_WAIT: GpuTestConfiguration =
     GpuTestConfiguration::new().run_async(|ctx| async move {
-        let data = DummyWorkData::new(&ctx);
+        let cmd_buf = generate_dummy_work(&ctx);
 
-        ctx.queue.submit(Some(data.cmd_buf));
+        ctx.queue.submit(Some(cmd_buf));
         ctx.async_poll(Maintain::wait()).await.panic_on_timeout();
         ctx.async_poll(Maintain::wait()).await.panic_on_timeout();
     });
@@ -88,9 +73,9 @@ static DOUBLE_WAIT: GpuTestConfiguration =
 #[gpu_test]
 static WAIT_ON_SUBMISSION: GpuTestConfiguration =
     GpuTestConfiguration::new().run_async(|ctx| async move {
-        let data = DummyWorkData::new(&ctx);
+        let cmd_buf = generate_dummy_work(&ctx);
 
-        let index = ctx.queue.submit(Some(data.cmd_buf));
+        let index = ctx.queue.submit(Some(cmd_buf));
         ctx.async_poll(Maintain::wait_for(index))
             .await
             .panic_on_timeout();
@@ -99,9 +84,9 @@ static WAIT_ON_SUBMISSION: GpuTestConfiguration =
 #[gpu_test]
 static DOUBLE_WAIT_ON_SUBMISSION: GpuTestConfiguration =
     GpuTestConfiguration::new().run_async(|ctx| async move {
-        let data = DummyWorkData::new(&ctx);
+        let cmd_buf = generate_dummy_work(&ctx);
 
-        let index = ctx.queue.submit(Some(data.cmd_buf));
+        let index = ctx.queue.submit(Some(cmd_buf));
         ctx.async_poll(Maintain::wait_for(index.clone()))
             .await
             .panic_on_timeout();
@@ -113,11 +98,11 @@ static DOUBLE_WAIT_ON_SUBMISSION: GpuTestConfiguration =
 #[gpu_test]
 static WAIT_OUT_OF_ORDER: GpuTestConfiguration =
     GpuTestConfiguration::new().run_async(|ctx| async move {
-        let data1 = DummyWorkData::new(&ctx);
-        let data2 = DummyWorkData::new(&ctx);
+        let cmd_buf1 = generate_dummy_work(&ctx);
+        let cmd_buf2 = generate_dummy_work(&ctx);
 
-        let index1 = ctx.queue.submit(Some(data1.cmd_buf));
-        let index2 = ctx.queue.submit(Some(data2.cmd_buf));
+        let index1 = ctx.queue.submit(Some(cmd_buf1));
+        let index2 = ctx.queue.submit(Some(cmd_buf2));
         ctx.async_poll(Maintain::wait_for(index2))
             .await
             .panic_on_timeout();
@@ -125,3 +110,37 @@ static WAIT_OUT_OF_ORDER: GpuTestConfiguration =
             .await
             .panic_on_timeout();
     });
+
+/// Submit a command buffer to the wrong device. A wait poll shouldn't hang.
+///
+/// We can't catch panics on Wasm, since they get reported directly to the
+/// console.
+#[gpu_test]
+static WAIT_AFTER_BAD_SUBMISSION: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(wgpu_test::TestParameters::default().skip(wgpu_test::FailureCase::webgl2()))
+    .run_async(wait_after_bad_submission);
+
+async fn wait_after_bad_submission(ctx: TestingContext) {
+    let (device2, queue2) =
+        wgpu_test::initialize_device(&ctx.adapter, ctx.device_features, ctx.device_limits.clone())
+            .await;
+
+    let command_buffer1 = ctx
+        .device
+        .create_command_encoder(&CommandEncoderDescriptor::default())
+        .finish();
+
+    // This should panic, since the command buffer belongs to the wrong
+    // device, and queue submission errors seem to be fatal errors?
+    let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
+        queue2.submit([command_buffer1]);
+    }));
+    assert!(result.is_err());
+
+    // This should not hang.
+    //
+    // Specifically, the failed submission should not cause a new fence value to
+    // be allocated that will not be signalled until further work is
+    // successfully submitted, causing a greater fence value to be signalled.
+    device2.poll(wgpu::Maintain::Wait);
+}
diff --git a/tests/tests/push_constants.rs b/tests/tests/push_constants.rs
index a18207bef6..905578d533 100644
--- a/tests/tests/push_constants.rs
+++ b/tests/tests/push_constants.rs
@@ -102,7 +102,7 @@ async fn partial_update_test(ctx: TestingContext) {
             label: Some("pipeline"),
             layout: Some(&pipeline_layout),
             module: &sm,
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             cache: None,
         });
diff --git a/tests/tests/regression/issue_3349.rs b/tests/tests/regression/issue_3349.rs
index 35d35e5bdf..21929bd9b7 100644
--- a/tests/tests/regression/issue_3349.rs
+++ b/tests/tests/regression/issue_3349.rs
@@ -101,13 +101,13 @@ async fn multi_stage_data_binding_test(ctx: TestingContext) {
             layout: Some(&pll),
             vertex: wgpu::VertexState {
                 module: &vs_sm,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: &fs_sm,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(wgpu::ColorTargetState {
                     format: wgpu::TextureFormat::Rgba8Unorm,
diff --git a/tests/tests/regression/issue_3457.rs b/tests/tests/regression/issue_3457.rs
index f0f7e64636..386b5c34bb 100644
--- a/tests/tests/regression/issue_3457.rs
+++ b/tests/tests/regression/issue_3457.rs
@@ -51,7 +51,7 @@ static PASS_RESET_VERTEX_BUFFER: GpuTestConfiguration =
                 layout: Some(&pipeline_layout),
                 vertex: VertexState {
                     module: &module,
-                    entry_point: "double_buffer_vert",
+                    entry_point: Some("double_buffer_vert"),
                     compilation_options: Default::default(),
                     buffers: &[
                         VertexBufferLayout {
@@ -71,7 +71,7 @@ static PASS_RESET_VERTEX_BUFFER: GpuTestConfiguration =
                 multisample: MultisampleState::default(),
                 fragment: Some(FragmentState {
                     module: &module,
-                    entry_point: "double_buffer_frag",
+                    entry_point: Some("double_buffer_frag"),
                     compilation_options: Default::default(),
                     targets: &[Some(ColorTargetState {
                         format: TextureFormat::Rgba8Unorm,
@@ -90,7 +90,7 @@ static PASS_RESET_VERTEX_BUFFER: GpuTestConfiguration =
                 layout: Some(&pipeline_layout),
                 vertex: VertexState {
                     module: &module,
-                    entry_point: "single_buffer_vert",
+                    entry_point: Some("single_buffer_vert"),
                     compilation_options: Default::default(),
                     buffers: &[VertexBufferLayout {
                         array_stride: 16,
@@ -103,7 +103,7 @@ static PASS_RESET_VERTEX_BUFFER: GpuTestConfiguration =
                 multisample: MultisampleState::default(),
                 fragment: Some(FragmentState {
                     module: &module,
-                    entry_point: "single_buffer_frag",
+                    entry_point: Some("single_buffer_frag"),
                     compilation_options: Default::default(),
                     targets: &[Some(ColorTargetState {
                         format: TextureFormat::Rgba8Unorm,
diff --git a/tests/tests/regression/issue_4485.rs b/tests/tests/regression/issue_4485.rs
new file mode 100644
index 0000000000..4944afe49f
--- /dev/null
+++ b/tests/tests/regression/issue_4485.rs
@@ -0,0 +1,106 @@
+use wgpu_test::{gpu_test, image, GpuTestConfiguration, TestParameters, TestingContext};
+
+/// FXC doesn't accept `continue` inside a switch. Instead we store a flag for whether
+/// the loop should continue that is checked after the switch.
+///
+/// See <https://github.com/gfx-rs/wgpu/issues/4485>.
+///
+/// The shader will fail to compile on Dx12 with FXC without this fix.
+///
+/// This also tests that shaders generated with this fix execute correctly.
+#[gpu_test]
+static CONTINUE_SWITCH: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(TestParameters::default().force_fxc(true))
+    .run_async(|ctx| async move { test_impl(&ctx).await });
+
+async fn test_impl(ctx: &TestingContext) {
+    const TEXTURE_HEIGHT: u32 = 2;
+    const TEXTURE_WIDTH: u32 = 2;
+    const BUFFER_SIZE: usize = (TEXTURE_WIDTH * TEXTURE_HEIGHT * 4) as usize;
+
+    let texture = ctx.device.create_texture(&wgpu::TextureDescriptor {
+        label: Some("Offscreen texture"),
+        size: wgpu::Extent3d {
+            width: TEXTURE_WIDTH,
+            height: TEXTURE_HEIGHT,
+            depth_or_array_layers: 1,
+        },
+        mip_level_count: 1,
+        sample_count: 1,
+        dimension: wgpu::TextureDimension::D2,
+        format: wgpu::TextureFormat::Rgba8Unorm,
+        usage: wgpu::TextureUsages::COPY_SRC | wgpu::TextureUsages::RENDER_ATTACHMENT,
+        view_formats: &[],
+    });
+    let texture_view = texture.create_view(&wgpu::TextureViewDescriptor::default());
+
+    let shader = ctx
+        .device
+        .create_shader_module(wgpu::include_wgsl!("issue_4514.wgsl"));
+
+    let pipeline = ctx
+        .device
+        .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
+            label: Some("Pipeline"),
+            layout: None,
+            vertex: wgpu::VertexState {
+                module: &shader,
+                entry_point: Some("vs_main"),
+                compilation_options: Default::default(),
+                buffers: &[],
+            },
+            primitive: wgpu::PrimitiveState::default(),
+            depth_stencil: None,
+            multisample: wgpu::MultisampleState::default(),
+            fragment: Some(wgpu::FragmentState {
+                module: &shader,
+                entry_point: Some("fs_main"),
+                compilation_options: Default::default(),
+                targets: &[Some(wgpu::ColorTargetState {
+                    format: wgpu::TextureFormat::Rgba8Unorm,
+                    blend: None,
+                    write_mask: wgpu::ColorWrites::ALL,
+                })],
+            }),
+            multiview: None,
+            cache: None,
+        });
+
+    let readback_buffer = image::ReadbackBuffers::new(&ctx.device, &texture);
+    {
+        let mut encoder = ctx
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+        {
+            let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+                label: Some("Renderpass"),
+                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+                    view: &texture_view,
+                    resolve_target: None,
+                    ops: wgpu::Operations {
+                        // Important: this isn't the color expected below
+                        load: wgpu::LoadOp::Clear(wgpu::Color {
+                            r: 0.0,
+                            g: 0.0,
+                            b: 0.0,
+                            a: 0.0,
+                        }),
+                        store: wgpu::StoreOp::Store,
+                    },
+                })],
+                depth_stencil_attachment: None,
+                timestamp_writes: None,
+                occlusion_query_set: None,
+            });
+            render_pass.set_pipeline(&pipeline);
+            render_pass.draw(0..3, 0..1);
+        }
+        readback_buffer.copy_from(&ctx.device, &mut encoder, &texture);
+        ctx.queue.submit(Some(encoder.finish()));
+    }
+
+    let expected_data = [255; BUFFER_SIZE];
+    readback_buffer
+        .assert_buffer_contents(ctx, &expected_data)
+        .await;
+}
diff --git a/tests/tests/regression/issue_4485.wgsl b/tests/tests/regression/issue_4485.wgsl
new file mode 100644
index 0000000000..e72ed6d1ea
--- /dev/null
+++ b/tests/tests/regression/issue_4485.wgsl
@@ -0,0 +1,108 @@
+// meant to be called with 3 vertex indices: 0, 1, 2
+// draws one large triangle over the clip space like this:
+// (the asterisks represent the clip space bounds)
+//-1,1           1,1
+// ---------------------------------
+// |              *              .
+// |              *           .
+// |              *        .
+// |              *      .
+// |              *    .
+// |              * .
+// |***************
+// |            . 1,-1
+// |          .
+// |       .
+// |     .
+// |   .
+// |.
+@vertex
+fn vs_main(@builtin(vertex_index) vertex_index: u32) ->  @builtin(position) vec4<f32> {
+    let x = i32(vertex_index) / 2;
+    let y = i32(vertex_index) & 1;
+    return vec4<f32>(
+        f32(x) * 4.0 - 1.0,
+        1.0 - f32(y) * 4.0,
+        0.0, 1.0
+    );
+}
+
+
+@fragment
+fn fs_main() -> @location(0) vec4<f32> {
+    var x = 0.0;
+    loop {
+        if x != 0.0 { break; }
+        x = 0.5;
+        // Compiled to a do-while in hlsl and glsl,
+        // we want to confirm that continue applies to outer loop.
+        switch 0 {
+            default {
+                x = 1.0;
+                continue;
+            }
+        }
+        x = 0.0;
+    }
+    // expect X == 1.0
+
+    var y = 0.0;
+    loop {
+        if y != 0.0 { break; }
+        y = 0.5;
+        switch 1 {
+            case 0 {
+                continue;
+            }
+            case 1 {}
+        }
+        // test that loop doesn't continue after the switch when the continue case wasn't executed
+        y = 1.0;
+        break;
+    }
+    // expect y == 1.0
+
+    var z = 0.0;
+    loop {
+        if z != 0.0 { break; }
+        switch 0 {
+            case 0 {
+                z = 0.5;
+            }
+            case 1 {
+                z = 0.5;
+            }
+        }
+        // test that loop doesn't continue after the switch that contains no continue statements
+        z = 1.0
+    }
+    // expect z == 1.0
+
+    var w = 0.0;
+    loop {
+        if w != 0.0 { break; }
+        switch 0 {
+            case 0 {
+                loop {
+                    // continue in loop->switch->loop->switch->switch should affect inner loop
+                    switch 1 {
+                        case 0 {}
+                        case 1 {
+                            switch 0 {
+                                default { continue; }
+                            }
+                        }
+                    }
+                    w = 0.5
+                }
+            }
+            case 1 {
+                w = 0.5;
+            }
+        }
+        if w == 0.0 { w = 1.0; }
+    }
+    // expect w == 1.0
+
+    return vec4<f32>(x, y, z, w);
+}
diff --git a/tests/tests/regression/issue_4514.rs b/tests/tests/regression/issue_4514.rs
new file mode 100644
index 0000000000..b3609ff9ad
--- /dev/null
+++ b/tests/tests/regression/issue_4514.rs
@@ -0,0 +1,106 @@
+use wgpu_test::{gpu_test, image, GpuTestConfiguration, TestParameters, TestingContext};
+
+/// FXC and potentially some glsl consumers have a bug when handling switch statements on a constant
+/// with just a default case. (not sure if the constant part is relevant)
+/// See <https://github.com/gfx-rs/wgpu/issues/4514>.
+///
+/// This test will fail on Dx12 with FXC if this issue is not worked around.
+///
+/// So far no specific buggy glsl consumers have been identified and it isn't known whether the
+/// bug is avoided there.
+#[gpu_test]
+static DEGENERATE_SWITCH: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(TestParameters::default().force_fxc(true))
+    .run_async(|ctx| async move { test_impl(&ctx).await });
+
+async fn test_impl(ctx: &TestingContext) {
+    const TEXTURE_HEIGHT: u32 = 2;
+    const TEXTURE_WIDTH: u32 = 2;
+    const BUFFER_SIZE: usize = (TEXTURE_WIDTH * TEXTURE_HEIGHT * 4) as usize;
+
+    let texture = ctx.device.create_texture(&wgpu::TextureDescriptor {
+        label: Some("Offscreen texture"),
+        size: wgpu::Extent3d {
+            width: TEXTURE_WIDTH,
+            height: TEXTURE_HEIGHT,
+            depth_or_array_layers: 1,
+        },
+        mip_level_count: 1,
+        sample_count: 1,
+        dimension: wgpu::TextureDimension::D2,
+        format: wgpu::TextureFormat::Rgba8Unorm,
+        usage: wgpu::TextureUsages::COPY_SRC | wgpu::TextureUsages::RENDER_ATTACHMENT,
+        view_formats: &[],
+    });
+    let texture_view = texture.create_view(&wgpu::TextureViewDescriptor::default());
+
+    let shader = ctx
+        .device
+        .create_shader_module(wgpu::include_wgsl!("issue_4514.wgsl"));
+
+    let pipeline = ctx
+        .device
+        .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
+            label: Some("Pipeline"),
+            layout: None,
+            vertex: wgpu::VertexState {
+                module: &shader,
+                entry_point: Some("vs_main"),
+                compilation_options: Default::default(),
+                buffers: &[],
+            },
+            primitive: wgpu::PrimitiveState::default(),
+            depth_stencil: None,
+            multisample: wgpu::MultisampleState::default(),
+            fragment: Some(wgpu::FragmentState {
+                module: &shader,
+                entry_point: Some("fs_main"),
+                compilation_options: Default::default(),
+                targets: &[Some(wgpu::ColorTargetState {
+                    format: wgpu::TextureFormat::Rgba8Unorm,
+                    blend: None,
+                    write_mask: wgpu::ColorWrites::ALL,
+                })],
+            }),
+            multiview: None,
+            cache: None,
+        });
+
+    let readback_buffer = image::ReadbackBuffers::new(&ctx.device, &texture);
+    {
+        let mut encoder = ctx
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+        {
+            let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+                label: Some("Renderpass"),
+                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+                    view: &texture_view,
+                    resolve_target: None,
+                    ops: wgpu::Operations {
+                        // Important: this isn't the color expected below
+                        load: wgpu::LoadOp::Clear(wgpu::Color {
+                            r: 0.0,
+                            g: 0.0,
+                            b: 0.0,
+                            a: 0.0,
+                        }),
+                        store: wgpu::StoreOp::Store,
+                    },
+                })],
+                depth_stencil_attachment: None,
+                timestamp_writes: None,
+                occlusion_query_set: None,
+            });
+            render_pass.set_pipeline(&pipeline);
+            render_pass.draw(0..3, 0..1);
+        }
+        readback_buffer.copy_from(&ctx.device, &mut encoder, &texture);
+        ctx.queue.submit(Some(encoder.finish()));
+    }
+
+    let expected_data = [255; BUFFER_SIZE];
+    readback_buffer
+        .assert_buffer_contents(ctx, &expected_data)
+        .await;
+}
diff --git a/tests/tests/regression/issue_4514.wgsl b/tests/tests/regression/issue_4514.wgsl
new file mode 100644
index 0000000000..d4bd2f80c0
--- /dev/null
+++ b/tests/tests/regression/issue_4514.wgsl
@@ -0,0 +1,68 @@
+// meant to be called with 3 vertex indices: 0, 1, 2
+// draws one large triangle over the clip space like this:
+// (the asterisks represent the clip space bounds)
+//-1,1           1,1
+// ---------------------------------
+// |              *              .
+// |              *           .
+// |              *        .
+// |              *      .
+// |              *    .
+// |              * .
+// |***************
+// |            . 1,-1
+// |          .
+// |       .
+// |     .
+// |   .
+// |.
+@vertex
+fn vs_main(@builtin(vertex_index) vertex_index: u32) ->  @builtin(position) vec4<f32> {
+    let x = i32(vertex_index) / 2;
+    let y = i32(vertex_index) & 1;
+    return vec4<f32>(
+        f32(x) * 4.0 - 1.0,
+        1.0 - f32(y) * 4.0,
+        0.0, 1.0
+    );
+}
+
+
+@fragment
+fn fs_main(@builtin(position) coord_in: vec4<f32>) -> @location(0) vec4<f32> {
+    var x = 0.0;
+    // Succeeds on FXC without workaround.
+    switch i32(coord_in.x) {
+        default {
+            x = 1.0;
+        }
+    }
+    var y = 0.0;
+    // Fails on FXC without workaround.
+    // (even if we adjust switch above to give different x values based on the input coord)
+    switch i32(x * 30.0) {
+        default {
+            y = 1.0;
+        }
+    }
+    var z = 0.0;
+    // Multiple cases with a single body also fails on FXC without a workaround.
+    switch 0 {
+        case 0, 2, default {
+            z = 1.0;
+        }
+    }
+
+    var w = 0.0;
+    // Succeeds on FXC without workaround.
+    switch 0 {
+        case 0 {
+            w = 1.0;
+        }
+        default {
+            w = 1.0;
+        }
+    }
+
+    return vec4<f32>(x, y, z, w);
+}
diff --git a/tests/tests/regression/issue_5553.rs b/tests/tests/regression/issue_5553.rs
index 19247eec1c..6debb03485 100644
--- a/tests/tests/regression/issue_5553.rs
+++ b/tests/tests/regression/issue_5553.rs
@@ -30,7 +30,7 @@ static ALLOW_INPUT_NOT_CONSUMED: GpuTestConfiguration =
                 layout: Some(&pipeline_layout),
                 vertex: VertexState {
                     module: &module,
-                    entry_point: "vs_main",
+                    entry_point: Some("vs_main"),
                     compilation_options: Default::default(),
                     buffers: &[],
                 },
@@ -39,7 +39,7 @@ static ALLOW_INPUT_NOT_CONSUMED: GpuTestConfiguration =
                 multisample: MultisampleState::default(),
                 fragment: Some(FragmentState {
                     module: &module,
-                    entry_point: "fs_main",
+                    entry_point: Some("fs_main"),
                     compilation_options: Default::default(),
                     targets: &[Some(ColorTargetState {
                         format: TextureFormat::Rgba8Unorm,
diff --git a/tests/tests/render_pass_ownership.rs b/tests/tests/render_pass_ownership.rs
index 95fc0fbdc9..502375e736 100644
--- a/tests/tests/render_pass_ownership.rs
+++ b/tests/tests/render_pass_ownership.rs
@@ -498,7 +498,7 @@ fn resource_setup(ctx: &TestingContext) -> ResourceSetup {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &sm,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[wgpu::VertexBufferLayout {
                     array_stride: 4,
@@ -508,7 +508,7 @@ fn resource_setup(ctx: &TestingContext) -> ResourceSetup {
             },
             fragment: Some(wgpu::FragmentState {
                 module: &sm,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(target_format.into())],
             }),
diff --git a/tests/tests/root.rs b/tests/tests/root.rs
index 5a34aea658..e9f45d7059 100644
--- a/tests/tests/root.rs
+++ b/tests/tests/root.rs
@@ -3,11 +3,14 @@ mod regression {
     mod issue_3457;
     mod issue_4024;
     mod issue_4122;
+    mod issue_4485;
+    mod issue_4514;
     mod issue_5553;
 }
 
 mod bgra8unorm_storage;
 mod bind_group_layout_dedup;
+mod bind_groups;
 mod buffer;
 mod buffer_copy;
 mod buffer_usages;
@@ -42,6 +45,7 @@ mod subgroup_operations;
 mod texture_bounds;
 mod texture_view_creation;
 mod transfer;
+mod vertex_formats;
 mod vertex_indices;
 mod write_texture;
 mod zero_init_texture_after_discard;
diff --git a/tests/tests/scissor_tests/mod.rs b/tests/tests/scissor_tests/mod.rs
index 3f1e7df135..583be021f3 100644
--- a/tests/tests/scissor_tests/mod.rs
+++ b/tests/tests/scissor_tests/mod.rs
@@ -43,7 +43,7 @@ async fn scissor_test_impl(
             layout: None,
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
@@ -52,7 +52,7 @@ async fn scissor_test_impl(
             multisample: wgpu::MultisampleState::default(),
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(wgpu::ColorTargetState {
                     format: wgpu::TextureFormat::Rgba8Unorm,
diff --git a/tests/tests/shader/mod.rs b/tests/tests/shader/mod.rs
index f5c2d4c96b..7d6ed7aaaa 100644
--- a/tests/tests/shader/mod.rs
+++ b/tests/tests/shader/mod.rs
@@ -314,7 +314,7 @@ async fn shader_input_output_test(
                 label: Some(&format!("pipeline {test_name}")),
                 layout: Some(&pll),
                 module: &sm,
-                entry_point: "cs_main",
+                entry_point: Some("cs_main"),
                 compilation_options: Default::default(),
                 cache: None,
             });
diff --git a/tests/tests/shader/zero_init_workgroup_mem.rs b/tests/tests/shader/zero_init_workgroup_mem.rs
index 0dcb81959b..beacb4fcc8 100644
--- a/tests/tests/shader/zero_init_workgroup_mem.rs
+++ b/tests/tests/shader/zero_init_workgroup_mem.rs
@@ -1,28 +1,21 @@
 use std::num::NonZeroU64;
 
 use wgpu::{
-    include_wgsl, Backends, BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor,
+    include_wgsl, BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor,
     BindGroupLayoutEntry, BindingResource, BindingType, BufferBinding, BufferBindingType,
     BufferDescriptor, BufferUsages, CommandEncoderDescriptor, ComputePassDescriptor,
     ComputePipelineDescriptor, DownlevelFlags, Limits, Maintain, MapMode, PipelineLayoutDescriptor,
     ShaderStages,
 };
 
-use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters};
+use wgpu_test::{gpu_test, GpuTestConfiguration, TestParameters};
 
 #[gpu_test]
 static ZERO_INIT_WORKGROUP_MEMORY: GpuTestConfiguration = GpuTestConfiguration::new()
     .parameters(
         TestParameters::default()
             .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
-            .limits(Limits::downlevel_defaults())
-            // remove once we get to https://github.com/gfx-rs/wgpu/issues/3193
-            .skip(FailureCase {
-                backends: Some(Backends::DX12),
-                vendor: Some(5140),
-                adapter: Some("Microsoft Basic Render Driver"),
-                ..FailureCase::default()
-            }),
+            .limits(Limits::downlevel_defaults()),
     )
     .run_async(|ctx| async move {
         let bgl = ctx
@@ -86,7 +79,7 @@ static ZERO_INIT_WORKGROUP_MEMORY: GpuTestConfiguration = GpuTestConfiguration::
                 label: Some("pipeline read"),
                 layout: Some(&pll),
                 module: &sm,
-                entry_point: "read",
+                entry_point: Some("read"),
                 compilation_options: Default::default(),
                 cache: None,
             });
@@ -97,7 +90,7 @@ static ZERO_INIT_WORKGROUP_MEMORY: GpuTestConfiguration = GpuTestConfiguration::
                 label: Some("pipeline write"),
                 layout: None,
                 module: &sm,
-                entry_point: "write",
+                entry_point: Some("write"),
                 compilation_options: Default::default(),
                 cache: None,
             });
diff --git a/tests/tests/shader_primitive_index/mod.rs b/tests/tests/shader_primitive_index/mod.rs
index 9972f81aa1..10708a24a2 100644
--- a/tests/tests/shader_primitive_index/mod.rs
+++ b/tests/tests/shader_primitive_index/mod.rs
@@ -121,7 +121,7 @@ async fn pulling_common(
             layout: None,
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[wgpu::VertexBufferLayout {
                     array_stride: 8,
@@ -138,7 +138,7 @@ async fn pulling_common(
             multisample: wgpu::MultisampleState::default(),
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(wgpu::ColorTargetState {
                     format: wgpu::TextureFormat::Rgba8Unorm,
diff --git a/tests/tests/shader_view_format/mod.rs b/tests/tests/shader_view_format/mod.rs
index d34b8d851d..b2bc0426eb 100644
--- a/tests/tests/shader_view_format/mod.rs
+++ b/tests/tests/shader_view_format/mod.rs
@@ -92,13 +92,14 @@ async fn reinterpret(
             layout: None,
             vertex: wgpu::VertexState {
                 module: shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
+
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(src_format.into())],
             }),
diff --git a/tests/tests/subgroup_operations/mod.rs b/tests/tests/subgroup_operations/mod.rs
index 7d0aec8241..7696fb78df 100644
--- a/tests/tests/subgroup_operations/mod.rs
+++ b/tests/tests/subgroup_operations/mod.rs
@@ -73,7 +73,7 @@ static SUBGROUP_OPERATIONS: GpuTestConfiguration = GpuTestConfiguration::new()
             label: None,
             layout: Some(&pipeline_layout),
             module: &cs_module,
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             cache: None,
         });
diff --git a/tests/tests/vertex_formats/draw.vert.wgsl b/tests/tests/vertex_formats/draw.vert.wgsl
new file mode 100644
index 0000000000..bf6a08aac6
--- /dev/null
+++ b/tests/tests/vertex_formats/draw.vert.wgsl
@@ -0,0 +1,316 @@
+@group(0) @binding(0)
+var<storage, read_write> checksums: array<f32>;
+
+const index_uint = 0u;
+const index_sint = 1u;
+const index_unorm = 2u;
+const index_snorm = 3u;
+const index_float16 = 4u;
+const index_float32 = 5u;
+
+fn init_checksums() {
+  checksums[index_uint] = 0.0;
+  checksums[index_sint] = 0.0;
+  checksums[index_unorm] = 0.0;
+  checksums[index_snorm] = 0.0;
+  checksums[index_float16] = 0.0;
+  checksums[index_float32] = 0.0;
+}
+
+// Break down the 31 vertex formats specified at
+// https://gpuweb.github.io/gpuweb/#vertex-formats into blocks
+// of 8, to keep under the limits of max locations. Each
+// AttributeBlockX structure will get a corresponding
+// vertex_block_X function to process its attributes into
+// values written to the checksums buffer.
+
+struct AttributeBlock0 {
+  // 4-byte-aligned unorm formats
+  @location(0) unorm8x4: vec4<f32>,
+  @location(1) unorm16x2: vec2<f32>,
+  @location(2) unorm16x4: vec4<f32>,
+
+  // 4-byte-aligned snorm formats
+  @location(3) snorm8x4: vec4<f32>,
+  @location(4) snorm16x2: vec2<f32>,
+  @location(5) snorm16x4: vec4<f32>,
+
+  // 2-byte-aligned formats
+  @location(6) unorm8x2: vec2<f32>,
+  @location(7) snorm8x2: vec2<f32>,
+}
+
+@vertex
+fn vertex_block_0(v_in: AttributeBlock0) -> @builtin(position) vec4<f32>
+{
+  init_checksums();
+
+  // Accumulate all unorm into one checksum value.
+  var all_unorm: f32 = 0.0;
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x2.x);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x2.y);
+
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x4.x);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x4.y);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x4.z);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x4.w);
+
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x2.x);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x2.y);
+
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x4.x);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x4.y);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x4.z);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x4.w);
+
+  checksums[index_unorm] = f32(all_unorm);
+
+  // Accumulate all snorm into one checksum value.
+  var all_snorm: f32 = 0.0;
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x2.x);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x2.y);
+
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x4.x);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x4.y);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x4.z);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x4.w);
+
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x2.x);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x2.y);
+
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x4.x);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x4.y);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x4.z);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x4.w);
+
+  checksums[index_snorm] = f32(all_snorm);
+
+  return vec4(0.0);
+}
+
+struct AttributeBlock1 {
+  // 4-byte-aligned uint formats
+  @location(0) uint8x4: vec4<u32>,
+  @location(1) uint16x2: vec2<u32>,
+  @location(2) uint16x4: vec4<u32>,
+
+  // 4-byte-aligned sint formats
+  @location(3) sint8x4: vec4<i32>,
+  @location(4) sint16x2: vec2<i32>,
+  @location(5) sint16x4: vec4<i32>,
+
+  // 2-byte-aligned formats
+  @location(6) uint8x2: vec2<u32>,
+  @location(7) sint8x2: vec2<i32>,
+}
+
+@vertex
+fn vertex_block_1(v_in: AttributeBlock1) -> @builtin(position) vec4<f32>
+{
+  init_checksums();
+
+  // Accumulate all uint into one checksum value.
+  var all_uint: u32 = 0;
+  all_uint = accumulate_uint(all_uint, v_in.uint8x2.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint8x2.y);
+
+  all_uint = accumulate_uint(all_uint, v_in.uint8x4.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint8x4.y);
+  all_uint = accumulate_uint(all_uint, v_in.uint8x4.z);
+  all_uint = accumulate_uint(all_uint, v_in.uint8x4.w);
+
+  all_uint = accumulate_uint(all_uint, v_in.uint16x2.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint16x2.y);
+
+  all_uint = accumulate_uint(all_uint, v_in.uint16x4.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint16x4.y);
+  all_uint = accumulate_uint(all_uint, v_in.uint16x4.z);
+  all_uint = accumulate_uint(all_uint, v_in.uint16x4.w);
+
+  checksums[index_uint] = f32(all_uint);
+
+  // Accumulate all sint into one checksum value.
+  var all_sint: i32 = 0;
+  all_sint = accumulate_sint(all_sint, v_in.sint8x2.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint8x2.y);
+
+  all_sint = accumulate_sint(all_sint, v_in.sint8x4.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint8x4.y);
+  all_sint = accumulate_sint(all_sint, v_in.sint8x4.z);
+  all_sint = accumulate_sint(all_sint, v_in.sint8x4.w);
+
+  all_sint = accumulate_sint(all_sint, v_in.sint16x2.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint16x2.y);
+
+  all_sint = accumulate_sint(all_sint, v_in.sint16x4.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint16x4.y);
+  all_sint = accumulate_sint(all_sint, v_in.sint16x4.z);
+  all_sint = accumulate_sint(all_sint, v_in.sint16x4.w);
+
+  checksums[index_sint] = f32(all_sint);
+
+  return vec4(0.0);
+}
+
+struct AttributeBlock2 {
+  @location(0) uint32: u32,
+  @location(1) uint32x2: vec2<u32>,
+  @location(2) uint32x3: vec3<u32>,
+  @location(3) uint32x4: vec4<u32>,
+}
+
+@vertex
+fn vertex_block_2(v_in: AttributeBlock2) -> @builtin(position) vec4<f32>
+{
+  init_checksums();
+
+  // Accumulate all uint into one checksum value.
+  var all_uint: u32 = 0;
+  all_uint = accumulate_uint(all_uint, v_in.uint32);
+
+  all_uint = accumulate_uint(all_uint, v_in.uint32x2.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint32x2.y);
+
+  all_uint = accumulate_uint(all_uint, v_in.uint32x3.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint32x3.y);
+  all_uint = accumulate_uint(all_uint, v_in.uint32x3.z);
+
+  all_uint = accumulate_uint(all_uint, v_in.uint32x4.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint32x4.y);
+  all_uint = accumulate_uint(all_uint, v_in.uint32x4.z);
+  all_uint = accumulate_uint(all_uint, v_in.uint32x4.w);
+
+  checksums[index_uint] = f32(all_uint);
+
+  return vec4(0.0);
+}
+
+struct AttributeBlock3 {
+  @location(0) sint32: i32,
+  @location(1) sint32x2: vec2<i32>,
+  @location(2) sint32x3: vec3<i32>,
+  @location(3) sint32x4: vec4<i32>,
+}
+
+@vertex
+fn vertex_block_3(v_in: AttributeBlock3) -> @builtin(position) vec4<f32>
+{
+  init_checksums();
+
+  // Accumulate all sint into one checksum value.
+  var all_sint: i32 = 0;
+  all_sint = accumulate_sint(all_sint, v_in.sint32);
+
+  all_sint = accumulate_sint(all_sint, v_in.sint32x2.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint32x2.y);
+
+  all_sint = accumulate_sint(all_sint, v_in.sint32x3.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint32x3.y);
+  all_sint = accumulate_sint(all_sint, v_in.sint32x3.z);
+
+  all_sint = accumulate_sint(all_sint, v_in.sint32x4.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint32x4.y);
+  all_sint = accumulate_sint(all_sint, v_in.sint32x4.z);
+  all_sint = accumulate_sint(all_sint, v_in.sint32x4.w);
+
+  checksums[index_sint] = f32(all_sint);
+
+  return vec4(0.0);
+}
+
+struct AttributeBlock4{
+  @location(0) float32: f32,
+  @location(1) float32x2: vec2<f32>,
+  @location(2) float32x3: vec3<f32>,
+  @location(3) float32x4: vec4<f32>,
+  @location(4) float16x2: vec2<f32>,
+  @location(5) float16x4: vec4<f32>,
+}
+
+@vertex
+fn vertex_block_4(v_in: AttributeBlock4) -> @builtin(position) vec4<f32>
+{
+  init_checksums();
+
+  // Accumulate all float32 into one checksum value.
+  var all_float32: f32 = 0.0;
+  all_float32 = accumulate_float32(all_float32, v_in.float32);
+
+  all_float32 = accumulate_float32(all_float32, v_in.float32x2.x);
+  all_float32 = accumulate_float32(all_float32, v_in.float32x2.y);
+
+  all_float32 = accumulate_float32(all_float32, v_in.float32x3.x);
+  all_float32 = accumulate_float32(all_float32, v_in.float32x3.y);
+  all_float32 = accumulate_float32(all_float32, v_in.float32x3.z);
+
+  all_float32 = accumulate_float32(all_float32, v_in.float32x4.x);
+  all_float32 = accumulate_float32(all_float32, v_in.float32x4.y);
+  all_float32 = accumulate_float32(all_float32, v_in.float32x4.z);
+  all_float32 = accumulate_float32(all_float32, v_in.float32x4.w);
+
+  checksums[index_float32] = f32(all_float32);
+
+  // Accumulate all float16 into one checksum value.
+  var all_float16: f32 = 0.0;
+  all_float16 = accumulate_float16(all_float16, v_in.float16x2.x);
+  all_float16 = accumulate_float16(all_float16, v_in.float16x2.y);
+
+  all_float16 = accumulate_float16(all_float16, v_in.float16x4.x);
+  all_float16 = accumulate_float16(all_float16, v_in.float16x4.y);
+  all_float16 = accumulate_float16(all_float16, v_in.float16x4.z);
+  all_float16 = accumulate_float16(all_float16, v_in.float16x4.w);
+
+  checksums[index_float16] = f32(all_float16);
+
+  return vec4(0.0);
+}
+
+struct AttributeBlock5{
+  @location(0) unorm10_10_10_2: vec4<f32>,
+}
+
+@vertex
+fn vertex_block_5(v_in: AttributeBlock5) -> @builtin(position) vec4<f32>
+{
+  init_checksums();
+
+  // Accumulate all unorm into one checksum value.
+  var all_unorm: f32 = 0.0;
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm10_10_10_2.x);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm10_10_10_2.y);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm10_10_10_2.z);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm10_10_10_2.w);
+
+  checksums[index_unorm] = f32(all_unorm);
+
+  return vec4(0.0);
+}
+
+fn accumulate_uint(accum: u32, val: u32) -> u32 {
+  return accum + val;
+}
+
+fn accumulate_sint(accum: i32, val: i32) -> i32 {
+  return accum + val;
+}
+
+fn accumulate_unorm(accum: f32, val: f32) -> f32 {
+  return accum + val;
+}
+
+fn accumulate_snorm(accum: f32, val: f32) -> f32 {
+  return accum + val;
+}
+
+fn accumulate_float16(accum: f32, val: f32) -> f32 {
+  return accum + val;
+}
+
+fn accumulate_float32(accum: f32, val: f32) -> f32 {
+  return accum + val;
+}
+
+@fragment
+fn fragment_main() -> @location(0) vec4<f32> {
+    return vec4<f32>(0.0);
+}
diff --git a/tests/tests/vertex_formats/mod.rs b/tests/tests/vertex_formats/mod.rs
new file mode 100644
index 0000000000..60ef177efa
--- /dev/null
+++ b/tests/tests/vertex_formats/mod.rs
@@ -0,0 +1,388 @@
+//! Tests that vertex formats pass through to vertex shaders accurately.
+
+use std::num::NonZeroU64;
+
+use wgpu::util::{BufferInitDescriptor, DeviceExt};
+
+use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters, TestingContext};
+
+#[derive(Debug, Copy, Clone)]
+enum TestCase {
+    UnormsAndSnorms,
+    UintsAndSintsSmall,
+    UintsBig,
+    SintsBig,
+    Floats,
+    Unorm1010102,
+}
+
+struct Test<'a> {
+    case: TestCase,
+    entry_point: &'a str,
+    attributes: &'a [wgt::VertexAttribute],
+    input: &'a [u8],
+    checksums: &'a [f32],
+}
+
+async fn vertex_formats_all(ctx: TestingContext) {
+    let attributes_block_0 = &wgpu::vertex_attr_array![
+        0 => Unorm8x4,
+        1 => Unorm16x2,
+        2 => Unorm16x4,
+        3 => Snorm8x4,
+        4 => Snorm16x2,
+        5 => Snorm16x4,
+        6 => Unorm8x2,
+        7 => Snorm8x2,
+    ];
+
+    let attributes_block_1 = &wgpu::vertex_attr_array![
+        0 => Uint8x4,
+        1 => Uint16x2,
+        2 => Uint16x4,
+        3 => Sint8x4,
+        4 => Sint16x2,
+        5 => Sint16x4,
+        6 => Uint8x2,
+        7 => Sint8x2,
+    ];
+
+    let attributes_block_2 = &wgpu::vertex_attr_array![
+        0 => Uint32,
+        1 => Uint32x2,
+        2 => Uint32x3,
+        3 => Uint32x4,
+    ];
+
+    let attributes_block_3 = &wgpu::vertex_attr_array![
+        0 => Sint32,
+        1 => Sint32x2,
+        2 => Sint32x3,
+        3 => Sint32x4,
+    ];
+
+    let attributes_block_4 = &wgpu::vertex_attr_array![
+        0 => Float32,
+        1 => Float32x2,
+        2 => Float32x3,
+        3 => Float32x4,
+        4 => Float16x2,
+        5 => Float16x4,
+    ];
+
+    let tests = vec![
+        Test {
+            case: TestCase::UnormsAndSnorms,
+            entry_point: "vertex_block_0",
+            attributes: attributes_block_0,
+            input: &[
+                128u8, 128u8, 128u8, 128u8, // Unorm8x4 (0.5, 0.5, 0.5, 0.5)
+                0u8, 128u8, 0u8, 128u8, // Unorm16x2 (0.5, 0.5)
+                0u8, 64u8, 0u8, 64u8, 0u8, 64u8, 0u8,
+                64u8, // Unorm16x4 (0.25, 0.25, 0.25, 0.25)
+                127u8, 127u8, 127u8, 127u8, // Snorm8x4 (1, 1, 1, 1)
+                0u8, 128u8, 0u8, 128u8, // Snorm16x2 (-1, -1)
+                255u8, 127u8, 255u8, 127u8, 255u8, 127u8, 255u8,
+                127u8, // Snorm16x4 (1, 1, 1, 1)
+                255u8, 255u8, // Unorm8x2 (1, 1)
+                128u8, 128u8, // Snorm8x2 (-1, -1)
+            ],
+            checksums: &[0.0, 0.0, 6.0, 4.0, 0.0, 0.0],
+        },
+        Test {
+            case: TestCase::UintsAndSintsSmall,
+            entry_point: "vertex_block_1",
+            attributes: attributes_block_1,
+            input: &[
+                4u8, 8u8, 16u8, 32u8, // Uint8x4 (4, 8, 16, 32)
+                64u8, 0u8, 128u8, 0u8, // Uint16x2 (64, 128)
+                0u8, 1u8, 0u8, 2u8, 0u8, 4u8, 0u8, 8u8, // Uint16x4 (256, 512, 1024, 2048)
+                127u8, 127u8, 2u8, 0u8, // Sint8x4 (127, 127, 2, 0)
+                255u8, 255u8, 1u8, 0u8, // Sint16x2 (-1, 1)
+                128u8, 255u8, 128u8, 255u8, 0u8, 1u8, 240u8,
+                255u8, // Sint16x4 (-128, -128, 256, -16)
+                1u8, 2u8, // Uint8x2 (1, 2)
+                128u8, 128u8, // Sint8x2 (-128, -128)
+            ],
+            checksums: &[4095.0, -16.0, 0.0, 0.0, 0.0, 0.0],
+        },
+        Test {
+            case: TestCase::UintsBig,
+            entry_point: "vertex_block_2",
+            attributes: attributes_block_2,
+            input: &[
+                1u8, 0u8, 0u8, 0u8, // Uint32x2 (1)
+                2u8, 0u8, 0u8, 0u8, 4u8, 0u8, 0u8, 0u8, // Uint32x2 (2, 4)
+                8u8, 0u8, 0u8, 0u8, 16u8, 0u8, 0u8, 0u8, 32u8, 0u8, 0u8,
+                0u8, // Uint32x3 (8, 16, 32)
+                64u8, 0u8, 0u8, 0u8, 128u8, 0u8, 0u8, 0u8, 0u8, 1u8, 0u8, 0u8, 0u8, 2u8, 0u8,
+                0u8, // Uint32x4 (64, 128, 256, 512)
+            ],
+            checksums: &[1023.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+        },
+        Test {
+            case: TestCase::SintsBig,
+            entry_point: "vertex_block_3",
+            attributes: attributes_block_3,
+            input: &[
+                128u8, 255u8, 255u8, 255u8, // Sint32 (-128)
+                120u8, 0u8, 0u8, 0u8, 8u8, 0u8, 0u8, 0u8, // Sint32x2 (120, 8)
+                252u8, 255u8, 255u8, 255u8, 2u8, 0u8, 0u8, 0u8, 2u8, 0u8, 0u8,
+                0u8, // Sint32x3 (-4, 2, 2)
+                24u8, 252u8, 255u8, 255u8, 88u8, 2u8, 0u8, 0u8, 44u8, 1u8, 0u8, 0u8, 99u8, 0u8,
+                0u8, 0u8, // Sint32x4 (-1000, 600, 300, 99)
+            ],
+            checksums: &[0.0, -1.0, 0.0, 0.0, 0.0, 0.0],
+        },
+        Test {
+            case: TestCase::Floats,
+            entry_point: "vertex_block_4",
+            attributes: attributes_block_4,
+            input: &[
+                0u8, 0u8, 0u8, 63u8, // Float32 (0.5)
+                0u8, 0u8, 0u8, 191u8, 0u8, 0u8, 128u8, 64u8, // Float32x2 (-0.5, 4.0)
+                0u8, 0u8, 0u8, 192u8, 0u8, 0u8, 204u8, 194u8, 0u8, 0u8, 200u8,
+                66u8, // Float32x3 (-2.0, -102.0, 100.0)
+                0u8, 0u8, 92u8, 66u8, 0u8, 0u8, 72u8, 194u8, 0u8, 0u8, 32u8, 65u8, 0u8, 0u8, 128u8,
+                63u8, // Float32x4 (55.0, -50.0, 10.0, 1.0)
+                0u8, 60u8, 72u8, 53u8, // Float16x2 (1.0, 0.33)
+                72u8, 57u8, 0u8, 192u8, 0u8, 188u8, 0u8,
+                184u8, // Float16x4 (0.66, -2.0, -1.0, -0.5)
+            ],
+            checksums: &[0.0, 0.0, 0.0, 0.0, -1.5, 16.0],
+        },
+    ];
+
+    vertex_formats_common(ctx, &tests).await;
+}
+
+async fn vertex_formats_10_10_10_2(ctx: TestingContext) {
+    let attributes_block_5 = &wgpu::vertex_attr_array![
+        0 => Unorm10_10_10_2,
+    ];
+
+    let tests = vec![Test {
+        case: TestCase::Unorm1010102,
+        entry_point: "vertex_block_5",
+        attributes: attributes_block_5,
+        input: &[
+            // We are aiming for rgba of (0.5, 0.5, 0.5, 0.66)
+            // Packing   AA BB BBBB BBBB GGGG GGGG GG RR RRRR RRRR
+            // Binary    10 10 0000 0000 1000 0000 00 10 0000 0000
+            // Hex               A0        08         02        00
+            // Decimal          160         8          2         0
+            // unorm   0.66          0.5          0.5          0.5 = 2.16
+            0u8, 2u8, 8u8, 160u8, // Unorm10_10_10_2
+        ],
+        checksums: &[0.0, 0.0, 2.16, 0.0, 0.0, 0.0],
+    }];
+
+    vertex_formats_common(ctx, &tests).await;
+}
+
+async fn vertex_formats_common(ctx: TestingContext, tests: &[Test<'_>]) {
+    let shader = ctx
+        .device
+        .create_shader_module(wgpu::include_wgsl!("draw.vert.wgsl"));
+
+    let bgl = ctx
+        .device
+        .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+            label: None,
+            entries: &[wgpu::BindGroupLayoutEntry {
+                binding: 0,
+                ty: wgpu::BindingType::Buffer {
+                    ty: wgpu::BufferBindingType::Storage { read_only: false },
+                    has_dynamic_offset: false,
+                    min_binding_size: NonZeroU64::new(4),
+                },
+                visibility: wgpu::ShaderStages::VERTEX,
+                count: None,
+            }],
+        });
+
+    let ppl = ctx
+        .device
+        .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+            label: None,
+            bind_group_layouts: &[&bgl],
+            push_constant_ranges: &[],
+        });
+
+    let dummy = ctx
+        .device
+        .create_texture_with_data(
+            &ctx.queue,
+            &wgpu::TextureDescriptor {
+                label: Some("dummy"),
+                size: wgpu::Extent3d {
+                    width: 1,
+                    height: 1,
+                    depth_or_array_layers: 1,
+                },
+                mip_level_count: 1,
+                sample_count: 1,
+                dimension: wgpu::TextureDimension::D2,
+                format: wgpu::TextureFormat::Rgba8Unorm,
+                usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::COPY_DST,
+                view_formats: &[],
+            },
+            wgpu::util::TextureDataOrder::LayerMajor,
+            &[0, 0, 0, 1],
+        )
+        .create_view(&wgpu::TextureViewDescriptor::default());
+
+    let mut failed = false;
+    for test in tests {
+        let buffer_input = ctx.device.create_buffer_init(&BufferInitDescriptor {
+            label: None,
+            contents: bytemuck::cast_slice(test.input),
+            usage: wgpu::BufferUsages::VERTEX,
+        });
+
+        let pipeline_desc = wgpu::RenderPipelineDescriptor {
+            label: None,
+            layout: Some(&ppl),
+            vertex: wgpu::VertexState {
+                buffers: &[wgpu::VertexBufferLayout {
+                    array_stride: 0, // Calculate, please!
+                    step_mode: wgpu::VertexStepMode::Vertex,
+                    attributes: test.attributes,
+                }],
+                module: &shader,
+                entry_point: Some(test.entry_point),
+                compilation_options: Default::default(),
+            },
+            primitive: wgpu::PrimitiveState::default(),
+            depth_stencil: None,
+            multisample: wgpu::MultisampleState::default(),
+            fragment: Some(wgpu::FragmentState {
+                module: &shader,
+                entry_point: Some("fragment_main"),
+                compilation_options: Default::default(),
+                targets: &[Some(wgpu::ColorTargetState {
+                    format: wgpu::TextureFormat::Rgba8Unorm,
+                    blend: None,
+                    write_mask: wgpu::ColorWrites::ALL,
+                })],
+            }),
+            multiview: None,
+            cache: None,
+        };
+
+        let pipeline = ctx.device.create_render_pipeline(&pipeline_desc);
+
+        let expected = test.checksums;
+        let buffer_size = (std::mem::size_of_val(&expected[0]) * expected.len()) as u64;
+        let cpu_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
+            label: None,
+            size: buffer_size,
+            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
+            mapped_at_creation: false,
+        });
+
+        let gpu_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
+            label: None,
+            size: buffer_size,
+            usage: wgpu::BufferUsages::COPY_SRC | wgpu::BufferUsages::STORAGE,
+            mapped_at_creation: false,
+        });
+
+        let bg = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
+            label: None,
+            layout: &bgl,
+            entries: &[wgpu::BindGroupEntry {
+                binding: 0,
+                resource: gpu_buffer.as_entire_binding(),
+            }],
+        });
+
+        let mut encoder1 = ctx
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
+
+        let mut rpass = encoder1.begin_render_pass(&wgpu::RenderPassDescriptor {
+            label: None,
+            color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+                ops: wgpu::Operations::default(),
+                resolve_target: None,
+                view: &dummy,
+            })],
+            depth_stencil_attachment: None,
+            timestamp_writes: None,
+            occlusion_query_set: None,
+        });
+
+        rpass.set_vertex_buffer(0, buffer_input.slice(..));
+        rpass.set_pipeline(&pipeline);
+        rpass.set_bind_group(0, &bg, &[]);
+
+        // Draw three vertices and no instance, which is enough to generate the
+        // checksums.
+        rpass.draw(0..3, 0..1);
+
+        drop(rpass);
+
+        let mut encoder2 = ctx
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
+
+        encoder2.copy_buffer_to_buffer(&gpu_buffer, 0, &cpu_buffer, 0, buffer_size);
+
+        // See https://github.com/gfx-rs/wgpu/issues/4732 for why this is split between two submissions
+        // with a hard wait in between.
+        ctx.queue.submit([encoder1.finish()]);
+        ctx.async_poll(wgpu::Maintain::wait())
+            .await
+            .panic_on_timeout();
+        ctx.queue.submit([encoder2.finish()]);
+        let slice = cpu_buffer.slice(..);
+        slice.map_async(wgpu::MapMode::Read, |_| ());
+        ctx.async_poll(wgpu::Maintain::wait())
+            .await
+            .panic_on_timeout();
+        let data: Vec<f32> = bytemuck::cast_slice(&slice.get_mapped_range()).to_vec();
+
+        let case_name = format!("Case {:?}", test.case);
+
+        // Calculate the difference between data and expected. Since the data is
+        // a bunch of float checksums, we allow a fairly large epsilon, which helps
+        // with the accumulation of float rounding errors.
+        const EPSILON: f32 = 0.01;
+
+        let mut deltas = data.iter().zip(expected.iter()).map(|(d, e)| (d - e).abs());
+        if deltas.any(|x| x > EPSILON) {
+            eprintln!(
+                "Failed: Got: {:?} Expected: {:?} - {case_name}",
+                data, expected,
+            );
+            failed = true;
+            continue;
+        }
+
+        eprintln!("Passed: {case_name}");
+    }
+
+    assert!(!failed);
+}
+
+#[gpu_test]
+static VERTEX_FORMATS_ALL: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .test_features_limits()
+            .features(wgpu::Features::VERTEX_WRITABLE_STORAGE),
+    )
+    .run_async(vertex_formats_all);
+
+// Some backends can handle Unorm-10-10-2, but GL backends seem to throw this error:
+// Validation Error: GL_INVALID_ENUM in glVertexAttribFormat(type = GL_UNSIGNED_INT_10_10_10_2)
+#[gpu_test]
+static VERTEX_FORMATS_10_10_10_2: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .expect_fail(FailureCase::backend(wgpu::Backends::GL))
+            .test_features_limits()
+            .features(wgpu::Features::VERTEX_WRITABLE_STORAGE),
+    )
+    .run_async(vertex_formats_10_10_10_2);
diff --git a/tests/tests/vertex_indices/mod.rs b/tests/tests/vertex_indices/mod.rs
index 59048ef31c..5a847d0fbb 100644
--- a/tests/tests/vertex_indices/mod.rs
+++ b/tests/tests/vertex_indices/mod.rs
@@ -166,7 +166,6 @@ struct Test {
     id_source: IdSource,
     draw_call_kind: DrawCallKind,
     encoder_kind: EncoderKind,
-    vertex_pulling_transform: bool,
 }
 
 impl Test {
@@ -260,7 +259,7 @@ async fn vertex_index_common(ctx: TestingContext) {
         vertex: wgpu::VertexState {
             buffers: &[],
             module: &shader,
-            entry_point: "vs_main_builtin",
+            entry_point: Some("vs_main_builtin"),
             compilation_options: Default::default(),
         },
         primitive: wgpu::PrimitiveState::default(),
@@ -268,7 +267,7 @@ async fn vertex_index_common(ctx: TestingContext) {
         multisample: wgpu::MultisampleState::default(),
         fragment: Some(wgpu::FragmentState {
             module: &shader,
-            entry_point: "fs_main",
+            entry_point: Some("fs_main"),
             compilation_options: Default::default(),
             targets: &[Some(wgpu::ColorTargetState {
                 format: wgpu::TextureFormat::Rgba8Unorm,
@@ -280,17 +279,8 @@ async fn vertex_index_common(ctx: TestingContext) {
         cache: None,
     };
     let builtin_pipeline = ctx.device.create_render_pipeline(&pipeline_desc);
-    pipeline_desc
-        .vertex
-        .compilation_options
-        .vertex_pulling_transform = true;
-    let builtin_pipeline_vpt = ctx.device.create_render_pipeline(&pipeline_desc);
-    pipeline_desc
-        .vertex
-        .compilation_options
-        .vertex_pulling_transform = false;
-
-    pipeline_desc.vertex.entry_point = "vs_main_buffers";
+
+    pipeline_desc.vertex.entry_point = Some("vs_main_buffers");
     pipeline_desc.vertex.buffers = &[
         wgpu::VertexBufferLayout {
             array_stride: 4,
@@ -304,15 +294,6 @@ async fn vertex_index_common(ctx: TestingContext) {
         },
     ];
     let buffer_pipeline = ctx.device.create_render_pipeline(&pipeline_desc);
-    pipeline_desc
-        .vertex
-        .compilation_options
-        .vertex_pulling_transform = true;
-    let buffer_pipeline_vpt = ctx.device.create_render_pipeline(&pipeline_desc);
-    pipeline_desc
-        .vertex
-        .compilation_options
-        .vertex_pulling_transform = false;
 
     let dummy = ctx
         .device
@@ -341,18 +322,12 @@ async fn vertex_index_common(ctx: TestingContext) {
         .cartesian_product(IdSource::iter())
         .cartesian_product(DrawCallKind::iter())
         .cartesian_product(EncoderKind::iter())
-        .cartesian_product([false, true])
-        .map(
-            |((((case, id_source), draw_call_kind), encoder_kind), vertex_pulling_transform)| {
-                Test {
-                    case,
-                    id_source,
-                    draw_call_kind,
-                    encoder_kind,
-                    vertex_pulling_transform,
-                }
-            },
-        )
+        .map(|(((case, id_source), draw_call_kind), encoder_kind)| Test {
+            case,
+            id_source,
+            draw_call_kind,
+            encoder_kind,
+        })
         .collect::<Vec<_>>();
 
     let features = ctx.adapter.features();
@@ -360,20 +335,8 @@ async fn vertex_index_common(ctx: TestingContext) {
     let mut failed = false;
     for test in tests {
         let pipeline = match test.id_source {
-            IdSource::Buffers => {
-                if test.vertex_pulling_transform {
-                    &buffer_pipeline_vpt
-                } else {
-                    &buffer_pipeline
-                }
-            }
-            IdSource::Builtins => {
-                if test.vertex_pulling_transform {
-                    &builtin_pipeline_vpt
-                } else {
-                    &builtin_pipeline
-                }
-            }
+            IdSource::Buffers => &buffer_pipeline,
+            IdSource::Builtins => &builtin_pipeline,
         };
 
         let expected = test.expectation(&ctx);
diff --git a/tests/tests/write_texture.rs b/tests/tests/write_texture.rs
index f8d99d6d14..fbb0485918 100644
--- a/tests/tests/write_texture.rs
+++ b/tests/tests/write_texture.rs
@@ -32,7 +32,7 @@ static WRITE_TEXTURE_SUBSET_2D: GpuTestConfiguration =
                 origin: wgpu::Origin3d::ZERO,
                 aspect: wgpu::TextureAspect::All,
             },
-            bytemuck::cast_slice(&data),
+            &data,
             wgpu::ImageDataLayout {
                 offset: 0,
                 bytes_per_row: Some(size),
@@ -127,7 +127,7 @@ static WRITE_TEXTURE_SUBSET_3D: GpuTestConfiguration =
                 origin: wgpu::Origin3d::ZERO,
                 aspect: wgpu::TextureAspect::All,
             },
-            bytemuck::cast_slice(&data),
+            &data,
             wgpu::ImageDataLayout {
                 offset: 0,
                 bytes_per_row: Some(size),
@@ -191,3 +191,44 @@ static WRITE_TEXTURE_SUBSET_3D: GpuTestConfiguration =
             assert_eq!(*byte, 0);
         }
     });
+
+#[gpu_test]
+static WRITE_TEXTURE_NO_OOB: GpuTestConfiguration =
+    GpuTestConfiguration::new().run_async(|ctx| async move {
+        let size = 256;
+
+        let tex = ctx.device.create_texture(&wgpu::TextureDescriptor {
+            label: None,
+            dimension: wgpu::TextureDimension::D2,
+            size: wgpu::Extent3d {
+                width: size,
+                height: size,
+                depth_or_array_layers: 1,
+            },
+            format: wgpu::TextureFormat::R8Uint,
+            usage: wgpu::TextureUsages::COPY_DST,
+            mip_level_count: 1,
+            sample_count: 1,
+            view_formats: &[],
+        });
+        let data = vec![1u8; size as usize * 2 + 100]; // check that we don't attempt to copy OOB internally by adding 100 bytes here
+        ctx.queue.write_texture(
+            wgpu::ImageCopyTexture {
+                texture: &tex,
+                mip_level: 0,
+                origin: wgpu::Origin3d::ZERO,
+                aspect: wgpu::TextureAspect::All,
+            },
+            &data,
+            wgpu::ImageDataLayout {
+                offset: 0,
+                bytes_per_row: Some(size),
+                rows_per_image: Some(size),
+            },
+            wgpu::Extent3d {
+                width: size,
+                height: 2,
+                depth_or_array_layers: 1,
+            },
+        );
+    });
diff --git a/typos.toml b/typos.toml
index cb33d95bd9..47406a8074 100644
--- a/typos.toml
+++ b/typos.toml
@@ -1,5 +1,8 @@
 [files]
+# Include .github, .cargo, etc.
+ignore-hidden = false
 extend-exclude = [
+    '/.git',
     # spirv-asm isn't real source code
     '*.spvasm',
     'etc/big-picture.xml',
@@ -13,15 +16,22 @@ extend-exclude = [
 [default.extend-words]
 # Things that aren't typos
 lod = "lod"
-inout = "inout"
-derivate = "derivate"
-implace = "implace"
-Ded = "Ded"           # This shows up in "ANDed"
-pn = "pn"             # used as a normal name in debug-symbol-terrain.wgsl
 
 # Usernames
 Healthire = "Healthire"
 REASY = "REASY"
 
 [type.rust.extend-identifiers]
+ANDed = "ANDed"
 D3DCOLORtoUBYTE4 = "D3DCOLORtoUBYTE4"
+Derivate = "Derivate"
+inout = "inout"
+
+[type.wgsl]
+extend-glob = ["*.wgsl"]
+
+[type.wgsl.extend-identifiers]
+pn = "pn"
+
+[type.yaml.extend-words]
+dota = "dota"
diff --git a/wgpu-core/Cargo.toml b/wgpu-core/Cargo.toml
index 7a2b9ae15c..22d813c4cb 100644
--- a/wgpu-core/Cargo.toml
+++ b/wgpu-core/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "wgpu-core"
-version = "0.20.0"
+version = "22.0.0"
 authors = ["gfx-rs developers"]
 edition = "2021"
 description = "WebGPU core logic on wgpu-hal"
@@ -13,7 +13,7 @@ license = "MIT OR Apache-2.0"
 # copy the crates it actually uses out of the workspace, so it's meaningful for
 # them to have less restrictive MSRVs individually than the workspace as a
 # whole, if their code permits. See `../README.md` for details.
-rust-version = "1.74"
+rust-version = "1.76"
 
 [package.metadata.docs.rs]
 all-features = true
@@ -60,6 +60,9 @@ trace = ["dep:ron", "serde", "naga/serialize"]
 ## Enable API replaying
 replay = ["serde", "naga/deserialize"]
 
+## Enable creating instances using raw-window-handle
+raw-window-handle = ["dep:raw-window-handle"]
+
 ## Enable `ShaderModuleSource::Wgsl`
 wgsl = ["naga/wgsl-in"]
 
@@ -100,37 +103,36 @@ gles = ["hal/gles"]
 dx12 = ["hal/dx12"]
 
 [dependencies]
-arrayvec = "0.7"
-bit-vec = "0.7"
-bitflags = "2"
-bytemuck = { version = "1.16", optional = true }
+arrayvec.workspace = true
+bit-vec.workspace = true
+bitflags.workspace = true
+bytemuck = { workspace = true, optional = true }
 document-features.workspace = true
-indexmap = "2"
-log = "0.4"
-once_cell = "1"
-# parking_lot 0.12 switches from `winapi` to `windows`; permit either
-parking_lot = ">=0.11, <0.13"
-profiling = { version = "1", default-features = false }
-raw-window-handle = { version = "0.6", optional = true }
-ron = { version = "0.8", optional = true }
-rustc-hash = "1.1"
-serde = { version = "1", features = ["serde_derive"], optional = true }
-smallvec = "1"
-thiserror = "1"
+indexmap.workspace = true
+log.workspace = true
+once_cell.workspace = true
+parking_lot.workspace = true
+profiling = { workspace = true, default-features = false }
+raw-window-handle = { workspace = true, optional = true }
+ron = { workspace = true, optional = true }
+rustc-hash.workspace = true
+serde = { workspace = true, features = ["derive"], optional = true }
+smallvec.workspace = true
+thiserror.workspace = true
 
 [dependencies.naga]
 path = "../naga"
-version = "0.20.0"
+version = "22.0.0"
 
 [dependencies.wgt]
 package = "wgpu-types"
 path = "../wgpu-types"
-version = "0.20.0"
+version = "22.0.0"
 
 [dependencies.hal]
 package = "wgpu-hal"
 path = "../wgpu-hal"
-version = "0.20.0"
+version = "22.0.0"
 default-features = false
 
 [build-dependencies]
diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs
index fe20c1a929..9fd344c48c 100644
--- a/wgpu-core/src/binding_model.rs
+++ b/wgpu-core/src/binding_model.rs
@@ -2,7 +2,6 @@ use crate::{
     device::{
         bgl, Device, DeviceError, MissingDownlevelFlags, MissingFeatures, SHADER_STAGE_COUNT,
     },
-    hal_api::HalApi,
     id::{BindGroupLayoutId, BufferId, SamplerId, TextureViewId, TlasId},
     init_tracker::{BufferInitTrackerAction, TextureInitTrackerAction},
     pipeline::{ComputePipeline, RenderPipeline},
@@ -26,6 +25,7 @@ use serde::Serialize;
 
 use std::{
     borrow::Cow,
+    mem::ManuallyDrop,
     ops::Range,
     sync::{Arc, Weak},
 };
@@ -67,7 +67,7 @@ pub enum CreateBindGroupLayoutError {
     },
     #[error(transparent)]
     TooManyBindings(BindingTypeMaxCountError),
-    #[error("Binding index {binding} is greater than the maximum index {maximum}")]
+    #[error("Binding index {binding} is greater than the maximum number {maximum}")]
     InvalidBindingIndex { binding: u32, maximum: u32 },
     #[error("Invalid visibility {0:?}")]
     InvalidVisibility(wgt::ShaderStages),
@@ -422,12 +422,12 @@ pub struct BindGroupEntry<'a> {
 
 /// Bindable resource and the slot to bind it to.
 #[derive(Clone, Debug)]
-pub struct ResolvedBindGroupEntry<'a, A: HalApi> {
+pub struct ResolvedBindGroupEntry<'a> {
     /// Slot for which binding provides resource. Corresponds to an entry of the same
     /// binding index in the [`BindGroupLayoutDescriptor`].
     pub binding: u32,
     /// Resource to attach to the binding
-    pub resource: ResolvedBindingResource<'a, A>,
+    pub resource: ResolvedBindingResource<'a>,
 }
 
 /// Describes a group of bindings and the resources to be bound.
@@ -446,15 +446,15 @@ pub struct BindGroupDescriptor<'a> {
 
 /// Describes a group of bindings and the resources to be bound.
 #[derive(Clone, Debug)]
-pub struct ResolvedBindGroupDescriptor<'a, A: HalApi> {
+pub struct ResolvedBindGroupDescriptor<'a> {
     /// Debug label of the bind group.
     ///
     /// This will show up in graphics debuggers for easy identification.
     pub label: Label<'a>,
     /// The [`BindGroupLayout`] that corresponds to this bind group.
-    pub layout: Arc<BindGroupLayout<A>>,
+    pub layout: Arc<BindGroupLayout>,
     /// The resources to bind to this bind group.
-    pub entries: Cow<'a, [ResolvedBindGroupEntry<'a, A>]>,
+    pub entries: Cow<'a, [ResolvedBindGroupEntry<'a>]>,
 }
 
 /// Describes a [`BindGroupLayout`].
@@ -473,13 +473,13 @@ pub struct BindGroupLayoutDescriptor<'a> {
 /// used with a specific pipeline. This constraint only happens when
 /// the BGLs have been derived from a pipeline without a layout.
 #[derive(Debug)]
-pub(crate) enum ExclusivePipeline<A: HalApi> {
+pub(crate) enum ExclusivePipeline {
     None,
-    Render(Weak<RenderPipeline<A>>),
-    Compute(Weak<ComputePipeline<A>>),
+    Render(Weak<RenderPipeline>),
+    Compute(Weak<ComputePipeline>),
 }
 
-impl<A: HalApi> std::fmt::Display for ExclusivePipeline<A> {
+impl std::fmt::Display for ExclusivePipeline {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
             ExclusivePipeline::None => f.write_str("None"),
@@ -503,9 +503,9 @@ impl<A: HalApi> std::fmt::Display for ExclusivePipeline<A> {
 
 /// Bind group layout.
 #[derive(Debug)]
-pub struct BindGroupLayout<A: HalApi> {
-    pub(crate) raw: Option<A::BindGroupLayout>,
-    pub(crate) device: Arc<Device<A>>,
+pub struct BindGroupLayout {
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynBindGroupLayout>>,
+    pub(crate) device: Arc<Device>,
     pub(crate) entries: bgl::EntryMap,
     /// It is very important that we know if the bind group comes from the BGL pool.
     ///
@@ -514,25 +514,23 @@ pub struct BindGroupLayout<A: HalApi> {
     /// We cannot unconditionally remove from the pool, as BGLs that don't come from the pool
     /// (derived BGLs) must not be removed.
     pub(crate) origin: bgl::Origin,
-    pub(crate) exclusive_pipeline: OnceCell<ExclusivePipeline<A>>,
+    pub(crate) exclusive_pipeline: OnceCell<ExclusivePipeline>,
     #[allow(unused)]
     pub(crate) binding_count_validator: BindingTypeMaxCountValidator,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
-    pub(crate) tracking_data: TrackingData,
 }
 
-impl<A: HalApi> Drop for BindGroupLayout<A> {
+impl Drop for BindGroupLayout {
     fn drop(&mut self) {
+        resource_log!("Destroy raw {}", self.error_ident());
         if matches!(self.origin, bgl::Origin::Pool) {
             self.device.bgl_pool.remove(&self.entries);
         }
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw {}", self.error_ident());
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_bind_group_layout(raw);
-            }
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            self.device.raw().destroy_bind_group_layout(raw);
         }
     }
 }
@@ -541,11 +539,10 @@ crate::impl_resource_type!(BindGroupLayout);
 crate::impl_labeled!(BindGroupLayout);
 crate::impl_parent_device!(BindGroupLayout);
 crate::impl_storage_item!(BindGroupLayout);
-crate::impl_trackable!(BindGroupLayout);
 
-impl<A: HalApi> BindGroupLayout<A> {
-    pub(crate) fn raw(&self) -> &A::BindGroupLayout {
-        self.raw.as_ref().unwrap()
+impl BindGroupLayout {
+    pub(crate) fn raw(&self) -> &dyn hal::DynBindGroupLayout {
+        self.raw.as_ref()
     }
 }
 
@@ -639,14 +636,14 @@ pub struct PipelineLayoutDescriptor<'a> {
 ///
 /// A `PipelineLayoutDescriptor` can be used to create a pipeline layout.
 #[derive(Debug)]
-pub struct ResolvedPipelineLayoutDescriptor<'a, A: HalApi> {
+pub struct ResolvedPipelineLayoutDescriptor<'a> {
     /// Debug label of the pipeline layout.
     ///
     /// This will show up in graphics debuggers for easy identification.
     pub label: Label<'a>,
     /// Bind groups that this pipeline uses. The first entry will provide all the bindings for
     /// "set = 0", second entry will provide all the bindings for "set = 1" etc.
-    pub bind_group_layouts: Cow<'a, [Arc<BindGroupLayout<A>>]>,
+    pub bind_group_layouts: Cow<'a, [Arc<BindGroupLayout>]>,
     /// Set of push constant ranges this pipeline uses. Each shader stage that
     /// uses push constants must define the range in push constant memory that
     /// corresponds to its single `layout(push_constant)` uniform block.
@@ -658,31 +655,29 @@ pub struct ResolvedPipelineLayoutDescriptor<'a, A: HalApi> {
 }
 
 #[derive(Debug)]
-pub struct PipelineLayout<A: HalApi> {
-    pub(crate) raw: Option<A::PipelineLayout>,
-    pub(crate) device: Arc<Device<A>>,
+pub struct PipelineLayout {
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynPipelineLayout>>,
+    pub(crate) device: Arc<Device>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
-    pub(crate) tracking_data: TrackingData,
-    pub(crate) bind_group_layouts: ArrayVec<Arc<BindGroupLayout<A>>, { hal::MAX_BIND_GROUPS }>,
+    pub(crate) bind_group_layouts: ArrayVec<Arc<BindGroupLayout>, { hal::MAX_BIND_GROUPS }>,
     pub(crate) push_constant_ranges: ArrayVec<wgt::PushConstantRange, { SHADER_STAGE_COUNT }>,
 }
 
-impl<A: HalApi> Drop for PipelineLayout<A> {
+impl Drop for PipelineLayout {
     fn drop(&mut self) {
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw {}", self.error_ident());
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_pipeline_layout(raw);
-            }
+        resource_log!("Destroy raw {}", self.error_ident());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            self.device.raw().destroy_pipeline_layout(raw);
         }
     }
 }
 
-impl<A: HalApi> PipelineLayout<A> {
-    pub(crate) fn raw(&self) -> &A::PipelineLayout {
-        self.raw.as_ref().unwrap()
+impl PipelineLayout {
+    pub(crate) fn raw(&self) -> &dyn hal::DynPipelineLayout {
+        self.raw.as_ref()
     }
 
     pub(crate) fn get_binding_maps(&self) -> ArrayVec<&bgl::EntryMap, { hal::MAX_BIND_GROUPS }> {
@@ -775,7 +770,6 @@ crate::impl_resource_type!(PipelineLayout);
 crate::impl_labeled!(PipelineLayout);
 crate::impl_parent_device!(PipelineLayout);
 crate::impl_storage_item!(PipelineLayout);
-crate::impl_trackable!(PipelineLayout);
 
 #[repr(C)]
 #[derive(Clone, Debug, Hash, Eq, PartialEq)]
@@ -787,8 +781,8 @@ pub struct BufferBinding {
 }
 
 #[derive(Clone, Debug)]
-pub struct ResolvedBufferBinding<A: HalApi> {
-    pub buffer: Arc<Buffer<A>>,
+pub struct ResolvedBufferBinding {
+    pub buffer: Arc<Buffer>,
     pub offset: wgt::BufferAddress,
     pub size: Option<wgt::BufferSize>,
 }
@@ -810,14 +804,14 @@ pub enum BindingResource<'a> {
 // Note: Duplicated in `wgpu-rs` as `BindingResource`
 // They're different enough that it doesn't make sense to share a common type
 #[derive(Debug, Clone)]
-pub enum ResolvedBindingResource<'a, A: HalApi> {
-    Buffer(ResolvedBufferBinding<A>),
-    BufferArray(Cow<'a, [ResolvedBufferBinding<A>]>),
-    Sampler(Arc<Sampler<A>>),
-    SamplerArray(Cow<'a, [Arc<Sampler<A>>]>),
-    TextureView(Arc<TextureView<A>>),
-    TextureViewArray(Cow<'a, [Arc<TextureView<A>>]>),
-    AccelerationStructure(Arc<Tlas<A>>),
+pub enum ResolvedBindingResource<'a> {
+    Buffer(ResolvedBufferBinding),
+    BufferArray(Cow<'a, [ResolvedBufferBinding]>),
+    Sampler(Arc<Sampler>),
+    SamplerArray(Cow<'a, [Arc<Sampler>]>),
+    TextureView(Arc<TextureView>),
+    TextureViewArray(Cow<'a, [Arc<TextureView>]>),
+    AccelerationStructure(Arc<Tlas>),
 }
 
 #[derive(Clone, Debug, Error)]
@@ -899,39 +893,38 @@ pub(crate) fn buffer_binding_type_alignment(
 }
 
 #[derive(Debug)]
-pub struct BindGroup<A: HalApi> {
-    pub(crate) raw: Snatchable<A::BindGroup>,
-    pub(crate) device: Arc<Device<A>>,
-    pub(crate) layout: Arc<BindGroupLayout<A>>,
+pub struct BindGroup {
+    pub(crate) raw: Snatchable<Box<dyn hal::DynBindGroup>>,
+    pub(crate) device: Arc<Device>,
+    pub(crate) layout: Arc<BindGroupLayout>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
     pub(crate) tracking_data: TrackingData,
-    pub(crate) used: BindGroupStates<A>,
-    pub(crate) used_buffer_ranges: Vec<BufferInitTrackerAction<A>>,
-    pub(crate) used_texture_ranges: Vec<TextureInitTrackerAction<A>>,
+    pub(crate) used: BindGroupStates,
+    pub(crate) used_buffer_ranges: Vec<BufferInitTrackerAction>,
+    pub(crate) used_texture_ranges: Vec<TextureInitTrackerAction>,
     pub(crate) dynamic_binding_info: Vec<BindGroupDynamicBindingData>,
     /// Actual binding sizes for buffers that don't have `min_binding_size`
     /// specified in BGL. Listed in the order of iteration of `BGL.entries`.
     pub(crate) late_buffer_binding_sizes: Vec<wgt::BufferSize>,
 }
 
-impl<A: HalApi> Drop for BindGroup<A> {
+impl Drop for BindGroup {
     fn drop(&mut self) {
         if let Some(raw) = self.raw.take() {
             resource_log!("Destroy raw {}", self.error_ident());
             unsafe {
-                use hal::Device;
                 self.device.raw().destroy_bind_group(raw);
             }
         }
     }
 }
 
-impl<A: HalApi> BindGroup<A> {
+impl BindGroup {
     pub(crate) fn try_raw<'a>(
         &'a self,
         guard: &'a SnatchGuard,
-    ) -> Result<&A::BindGroup, DestroyedResourceError> {
+    ) -> Result<&dyn hal::DynBindGroup, DestroyedResourceError> {
         // Clippy insist on writing it this way. The idea is to return None
         // if any of the raw buffer is not valid anymore.
         for buffer in &self.used_buffer_ranges {
@@ -943,6 +936,7 @@ impl<A: HalApi> BindGroup<A> {
 
         self.raw
             .get(guard)
+            .map(|raw| raw.as_ref())
             .ok_or_else(|| DestroyedResourceError(self.error_ident()))
     }
 
diff --git a/wgpu-core/src/command/allocator.rs b/wgpu-core/src/command/allocator.rs
index e17fd08d76..b05898a577 100644
--- a/wgpu-core/src/command/allocator.rs
+++ b/wgpu-core/src/command/allocator.rs
@@ -1,6 +1,4 @@
-use crate::hal_api::HalApi;
 use crate::resource_log;
-use hal::Device as _;
 
 use crate::lock::{rank, Mutex};
 
@@ -14,11 +12,11 @@ use crate::lock::{rank, Mutex};
 /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder
 /// [ce]: hal::CommandEncoder
 /// [cb]: hal::Api::CommandBuffer
-pub(crate) struct CommandAllocator<A: HalApi> {
-    free_encoders: Mutex<Vec<A::CommandEncoder>>,
+pub(crate) struct CommandAllocator {
+    free_encoders: Mutex<Vec<Box<dyn hal::DynCommandEncoder>>>,
 }
 
-impl<A: HalApi> CommandAllocator<A> {
+impl CommandAllocator {
     pub(crate) fn new() -> Self {
         Self {
             free_encoders: Mutex::new(rank::COMMAND_ALLOCATOR_FREE_ENCODERS, Vec::new()),
@@ -33,9 +31,9 @@ impl<A: HalApi> CommandAllocator<A> {
     /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder
     pub(crate) fn acquire_encoder(
         &self,
-        device: &A::Device,
-        queue: &A::Queue,
-    ) -> Result<A::CommandEncoder, hal::DeviceError> {
+        device: &dyn hal::DynDevice,
+        queue: &dyn hal::DynQueue,
+    ) -> Result<Box<dyn hal::DynCommandEncoder>, hal::DeviceError> {
         let mut free_encoders = self.free_encoders.lock();
         match free_encoders.pop() {
             Some(encoder) => Ok(encoder),
@@ -47,7 +45,7 @@ impl<A: HalApi> CommandAllocator<A> {
     }
 
     /// Add `encoder` back to the free pool.
-    pub(crate) fn release_encoder(&self, encoder: A::CommandEncoder) {
+    pub(crate) fn release_encoder(&self, encoder: Box<dyn hal::DynCommandEncoder>) {
         let mut free_encoders = self.free_encoders.lock();
         free_encoders.push(encoder);
     }
@@ -55,7 +53,7 @@ impl<A: HalApi> CommandAllocator<A> {
     /// Free the pool of command encoders.
     ///
     /// This is only called when the `Device` is dropped.
-    pub(crate) fn dispose(&self, device: &A::Device) {
+    pub(crate) fn dispose(&self, device: &dyn hal::DynDevice) {
         let mut free_encoders = self.free_encoders.lock();
         resource_log!("CommandAllocator::dispose encoders {}", free_encoders.len());
         for cmd_encoder in free_encoders.drain(..) {
diff --git a/wgpu-core/src/command/bind.rs b/wgpu-core/src/command/bind.rs
index a6176ac4c9..620027994f 100644
--- a/wgpu-core/src/command/bind.rs
+++ b/wgpu-core/src/command/bind.rs
@@ -3,7 +3,6 @@ use std::sync::Arc;
 use crate::{
     binding_model::{BindGroup, LateMinBufferBindingSizeMismatch, PipelineLayout},
     device::SHADER_STAGE_COUNT,
-    hal_api::HalApi,
     pipeline::LateSizedBufferGroup,
     resource::{Labeled, ResourceErrorIdent},
 };
@@ -19,7 +18,6 @@ mod compat {
     use crate::{
         binding_model::BindGroupLayout,
         error::MultiError,
-        hal_api::HalApi,
         resource::{Labeled, ParentDevice, ResourceErrorIdent},
     };
     use std::{
@@ -38,12 +36,12 @@ mod compat {
     }
 
     #[derive(Debug, Clone)]
-    struct Entry<A: HalApi> {
-        assigned: Option<Arc<BindGroupLayout<A>>>,
-        expected: Option<Arc<BindGroupLayout<A>>>,
+    struct Entry {
+        assigned: Option<Arc<BindGroupLayout>>,
+        expected: Option<Arc<BindGroupLayout>>,
     }
 
-    impl<A: HalApi> Entry<A> {
+    impl Entry {
         fn empty() -> Self {
             Self {
                 assigned: None,
@@ -142,40 +140,38 @@ mod compat {
 
                         let mut errors = Vec::new();
 
-                        let mut expected_bgl_entries = expected_bgl.entries.iter();
-                        let mut assigned_bgl_entries = assigned_bgl.entries.iter();
-                        let zipped = (&mut expected_bgl_entries).zip(&mut assigned_bgl_entries);
-
-                        for ((&binding, expected_entry), (_, assigned_entry)) in zipped {
-                            if assigned_entry.visibility != expected_entry.visibility {
-                                errors.push(EntryError::Visibility {
-                                    binding,
-                                    expected: expected_entry.visibility,
-                                    assigned: assigned_entry.visibility,
-                                });
+                        for (&binding, expected_entry) in expected_bgl.entries.iter() {
+                            if let Some(assigned_entry) = assigned_bgl.entries.get(binding) {
+                                if assigned_entry.visibility != expected_entry.visibility {
+                                    errors.push(EntryError::Visibility {
+                                        binding,
+                                        expected: expected_entry.visibility,
+                                        assigned: assigned_entry.visibility,
+                                    });
+                                }
+                                if assigned_entry.ty != expected_entry.ty {
+                                    errors.push(EntryError::Type {
+                                        binding,
+                                        expected: expected_entry.ty,
+                                        assigned: assigned_entry.ty,
+                                    });
+                                }
+                                if assigned_entry.count != expected_entry.count {
+                                    errors.push(EntryError::Count {
+                                        binding,
+                                        expected: expected_entry.count,
+                                        assigned: assigned_entry.count,
+                                    });
+                                }
+                            } else {
+                                errors.push(EntryError::ExtraExpected { binding });
                             }
-                            if assigned_entry.ty != expected_entry.ty {
-                                errors.push(EntryError::Type {
-                                    binding,
-                                    expected: expected_entry.ty,
-                                    assigned: assigned_entry.ty,
-                                });
-                            }
-                            if assigned_entry.count != expected_entry.count {
-                                errors.push(EntryError::Count {
-                                    binding,
-                                    expected: expected_entry.count,
-                                    assigned: assigned_entry.count,
-                                });
-                            }
-                        }
-
-                        for (&binding, _) in expected_bgl_entries {
-                            errors.push(EntryError::ExtraExpected { binding });
                         }
 
-                        for (&binding, _) in assigned_bgl_entries {
-                            errors.push(EntryError::ExtraAssigned { binding });
+                        for (&binding, _) in assigned_bgl.entries.iter() {
+                            if !expected_bgl.entries.contains_key(binding) {
+                                errors.push(EntryError::ExtraAssigned { binding });
+                            }
                         }
 
                         Err(Error::Incompatible {
@@ -194,11 +190,11 @@ mod compat {
     }
 
     #[derive(Debug, Default)]
-    pub(crate) struct BoundBindGroupLayouts<A: HalApi> {
-        entries: ArrayVec<Entry<A>, { hal::MAX_BIND_GROUPS }>,
+    pub(crate) struct BoundBindGroupLayouts {
+        entries: ArrayVec<Entry, { hal::MAX_BIND_GROUPS }>,
     }
 
-    impl<A: HalApi> BoundBindGroupLayouts<A> {
+    impl BoundBindGroupLayouts {
         pub fn new() -> Self {
             Self {
                 entries: (0..hal::MAX_BIND_GROUPS).map(|_| Entry::empty()).collect(),
@@ -216,7 +212,7 @@ mod compat {
 
         pub fn update_expectations(
             &mut self,
-            expectations: &[Arc<BindGroupLayout<A>>],
+            expectations: &[Arc<BindGroupLayout>],
         ) -> Range<usize> {
             let start_index = self
                 .entries
@@ -238,7 +234,7 @@ mod compat {
             self.make_range(start_index)
         }
 
-        pub fn assign(&mut self, index: usize, value: Arc<BindGroupLayout<A>>) -> Range<usize> {
+        pub fn assign(&mut self, index: usize, value: Arc<BindGroupLayout>) -> Range<usize> {
             self.entries[index].assigned = Some(value);
             self.make_range(index)
         }
@@ -250,6 +246,7 @@ mod compat {
                 .filter_map(|(i, e)| if e.is_active() { Some(i) } else { None })
         }
 
+        #[allow(clippy::result_large_err)]
         pub fn get_invalid(&self) -> Result<(), (usize, Error)> {
             for (index, entry) in self.entries.iter().enumerate() {
                 entry.check().map_err(|e| (index, e))?;
@@ -284,9 +281,9 @@ struct LateBufferBinding {
     bound_size: wgt::BufferAddress,
 }
 
-#[derive(Debug)]
-pub(super) struct EntryPayload<A: HalApi> {
-    pub(super) group: Option<Arc<BindGroup<A>>>,
+#[derive(Debug, Default)]
+pub(super) struct EntryPayload {
+    pub(super) group: Option<Arc<BindGroup>>,
     pub(super) dynamic_offsets: Vec<wgt::DynamicOffset>,
     late_buffer_bindings: Vec<LateBufferBinding>,
     /// Since `LateBufferBinding` may contain information about the bindings
@@ -294,18 +291,7 @@ pub(super) struct EntryPayload<A: HalApi> {
     pub(super) late_bindings_effective_count: usize,
 }
 
-impl<A: HalApi> Default for EntryPayload<A> {
-    fn default() -> Self {
-        Self {
-            group: None,
-            dynamic_offsets: Default::default(),
-            late_buffer_bindings: Default::default(),
-            late_bindings_effective_count: Default::default(),
-        }
-    }
-}
-
-impl<A: HalApi> EntryPayload<A> {
+impl EntryPayload {
     fn reset(&mut self) {
         self.group = None;
         self.dynamic_offsets.clear();
@@ -315,13 +301,13 @@ impl<A: HalApi> EntryPayload<A> {
 }
 
 #[derive(Debug, Default)]
-pub(super) struct Binder<A: HalApi> {
-    pub(super) pipeline_layout: Option<Arc<PipelineLayout<A>>>,
-    manager: compat::BoundBindGroupLayouts<A>,
-    payloads: [EntryPayload<A>; hal::MAX_BIND_GROUPS],
+pub(super) struct Binder {
+    pub(super) pipeline_layout: Option<Arc<PipelineLayout>>,
+    manager: compat::BoundBindGroupLayouts,
+    payloads: [EntryPayload; hal::MAX_BIND_GROUPS],
 }
 
-impl<A: HalApi> Binder<A> {
+impl Binder {
     pub(super) fn new() -> Self {
         Self {
             pipeline_layout: None,
@@ -339,9 +325,9 @@ impl<A: HalApi> Binder<A> {
 
     pub(super) fn change_pipeline_layout<'a>(
         &'a mut self,
-        new: &Arc<PipelineLayout<A>>,
+        new: &Arc<PipelineLayout>,
         late_sized_buffer_groups: &[LateSizedBufferGroup],
-    ) -> (usize, &'a [EntryPayload<A>]) {
+    ) -> (usize, &'a [EntryPayload]) {
         let old_id_opt = self.pipeline_layout.replace(new.clone());
 
         let mut bind_range = self.manager.update_expectations(&new.bind_group_layouts);
@@ -381,11 +367,9 @@ impl<A: HalApi> Binder<A> {
     pub(super) fn assign_group<'a>(
         &'a mut self,
         index: usize,
-        bind_group: &Arc<BindGroup<A>>,
+        bind_group: &Arc<BindGroup>,
         offsets: &[wgt::DynamicOffset],
-    ) -> &'a [EntryPayload<A>] {
-        log::trace!("\tBinding [{}] = group {}", index, bind_group.error_ident());
-
+    ) -> &'a [EntryPayload] {
         let payload = &mut self.payloads[index];
         payload.group = Some(bind_group.clone());
         payload.dynamic_offsets.clear();
@@ -415,7 +399,7 @@ impl<A: HalApi> Binder<A> {
         &self.payloads[bind_range]
     }
 
-    pub(super) fn list_active<'a>(&'a self) -> impl Iterator<Item = &'a Arc<BindGroup<A>>> + '_ {
+    pub(super) fn list_active<'a>(&'a self) -> impl Iterator<Item = &'a Arc<BindGroup>> + '_ {
         let payloads = &self.payloads;
         self.manager
             .list_active()
diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs
index 20ff40efef..56f7d551b0 100644
--- a/wgpu-core/src/command/bundle.rs
+++ b/wgpu-core/src/command/bundle.rs
@@ -88,7 +88,6 @@ use crate::{
         AttachmentData, Device, DeviceError, MissingDownlevelFlags, RenderPassContext,
         SHADER_STAGE_COUNT,
     },
-    hal_api::HalApi,
     hub::Hub,
     id,
     init_tracker::{BufferInitTrackerAction, MemoryInitKind, TextureInitTrackerAction},
@@ -104,16 +103,14 @@ use arrayvec::ArrayVec;
 use std::{borrow::Cow, mem, num::NonZeroU32, ops::Range, sync::Arc};
 use thiserror::Error;
 
-use hal::CommandEncoder as _;
-
 use super::{
     render_command::{ArcRenderCommand, RenderCommand},
     DrawKind,
 };
 
 /// <https://gpuweb.github.io/gpuweb/#dom-gpurendercommandsmixin-draw>
-fn validate_draw<A: HalApi>(
-    vertex: &[Option<VertexState<A>>],
+fn validate_draw(
+    vertex: &[Option<VertexState>],
     step: &[VertexStep],
     first_vertex: u32,
     vertex_count: u32,
@@ -153,10 +150,10 @@ fn validate_draw<A: HalApi>(
 }
 
 // See https://gpuweb.github.io/gpuweb/#dom-gpurendercommandsmixin-drawindexed
-fn validate_indexed_draw<A: HalApi>(
-    vertex: &[Option<VertexState<A>>],
+fn validate_indexed_draw(
+    vertex: &[Option<VertexState>],
     step: &[VertexStep],
-    index_state: &IndexState<A>,
+    index_state: &IndexState,
     first_index: u32,
     index_count: u32,
     first_instance: u32,
@@ -341,12 +338,12 @@ impl RenderBundleEncoder {
     /// and accumulate buffer and texture initialization actions.
     ///
     /// [`ExecuteBundle`]: RenderCommand::ExecuteBundle
-    pub(crate) fn finish<A: HalApi>(
+    pub(crate) fn finish(
         self,
         desc: &RenderBundleDescriptor,
-        device: &Arc<Device<A>>,
-        hub: &Hub<A>,
-    ) -> Result<Arc<RenderBundle<A>>, RenderBundleError> {
+        device: &Arc<Device>,
+        hub: &Hub,
+    ) -> Result<Arc<RenderBundle>, RenderBundleError> {
         let scope = PassErrorScope::Bundle;
 
         device.check_is_valid().map_pass_err(scope)?;
@@ -370,31 +367,8 @@ impl RenderBundleEncoder {
         };
 
         let indices = &state.device.tracker_indices;
-        state
-            .trackers
-            .buffers
-            .write()
-            .set_size(indices.buffers.size());
-        state
-            .trackers
-            .textures
-            .write()
-            .set_size(indices.textures.size());
-        state
-            .trackers
-            .bind_groups
-            .write()
-            .set_size(indices.bind_groups.size());
-        state
-            .trackers
-            .render_pipelines
-            .write()
-            .set_size(indices.render_pipelines.size());
-        state
-            .trackers
-            .query_sets
-            .write()
-            .set_size(indices.query_sets.size());
+        state.trackers.buffers.set_size(indices.buffers.size());
+        state.trackers.textures.set_size(indices.textures.size());
 
         let base = &self.base;
 
@@ -602,9 +576,9 @@ impl RenderBundleEncoder {
     }
 }
 
-fn set_bind_group<A: HalApi>(
-    state: &mut State<A>,
-    bind_group_guard: &crate::lock::RwLockReadGuard<crate::storage::Storage<BindGroup<A>>>,
+fn set_bind_group(
+    state: &mut State,
+    bind_group_guard: &crate::lock::RwLockReadGuard<crate::storage::Storage<BindGroup>>,
     dynamic_offsets: &[u32],
     index: u32,
     num_dynamic_offsets: usize,
@@ -641,15 +615,15 @@ fn set_bind_group<A: HalApi>(
 
     state.set_bind_group(index, &bind_group, offsets_range);
     unsafe { state.trackers.merge_bind_group(&bind_group.used)? };
-    state.trackers.bind_groups.write().insert_single(bind_group);
+    state.trackers.bind_groups.insert_single(bind_group);
     // Note: stateless trackers are not merged: the lifetime reference
     // is held to the bind group itself.
     Ok(())
 }
 
-fn set_pipeline<A: HalApi>(
-    state: &mut State<A>,
-    pipeline_guard: &crate::lock::RwLockReadGuard<crate::storage::Storage<RenderPipeline<A>>>,
+fn set_pipeline(
+    state: &mut State,
+    pipeline_guard: &crate::lock::RwLockReadGuard<crate::storage::Storage<RenderPipeline>>,
     context: &RenderPassContext,
     is_depth_read_only: bool,
     is_stencil_read_only: bool,
@@ -686,17 +660,13 @@ fn set_pipeline<A: HalApi>(
     state.invalidate_bind_groups(&pipeline_state, &pipeline.layout);
     state.pipeline = Some(pipeline_state);
 
-    state
-        .trackers
-        .render_pipelines
-        .write()
-        .insert_single(pipeline);
+    state.trackers.render_pipelines.insert_single(pipeline);
     Ok(())
 }
 
-fn set_index_buffer<A: HalApi>(
-    state: &mut State<A>,
-    buffer_guard: &crate::lock::RwLockReadGuard<crate::storage::Storage<Buffer<A>>>,
+fn set_index_buffer(
+    state: &mut State,
+    buffer_guard: &crate::lock::RwLockReadGuard<crate::storage::Storage<Buffer>>,
     buffer_id: id::Id<id::markers::Buffer>,
     index_format: wgt::IndexFormat,
     offset: u64,
@@ -709,7 +679,6 @@ fn set_index_buffer<A: HalApi>(
     state
         .trackers
         .buffers
-        .write()
         .merge_single(&buffer, hal::BufferUses::INDEX)?;
 
     buffer.same_device(&state.device)?;
@@ -730,9 +699,9 @@ fn set_index_buffer<A: HalApi>(
     Ok(())
 }
 
-fn set_vertex_buffer<A: HalApi>(
-    state: &mut State<A>,
-    buffer_guard: &crate::lock::RwLockReadGuard<crate::storage::Storage<Buffer<A>>>,
+fn set_vertex_buffer(
+    state: &mut State,
+    buffer_guard: &crate::lock::RwLockReadGuard<crate::storage::Storage<Buffer>>,
     slot: u32,
     buffer_id: id::Id<id::markers::Buffer>,
     offset: u64,
@@ -754,7 +723,6 @@ fn set_vertex_buffer<A: HalApi>(
     state
         .trackers
         .buffers
-        .write()
         .merge_single(&buffer, hal::BufferUses::VERTEX)?;
 
     buffer.same_device(&state.device)?;
@@ -775,8 +743,8 @@ fn set_vertex_buffer<A: HalApi>(
     Ok(())
 }
 
-fn set_push_constant<A: HalApi>(
-    state: &mut State<A>,
+fn set_push_constant(
+    state: &mut State,
     stages: wgt::ShaderStages,
     offset: u32,
     size_bytes: u32,
@@ -800,8 +768,8 @@ fn set_push_constant<A: HalApi>(
     Ok(())
 }
 
-fn draw<A: HalApi>(
-    state: &mut State<A>,
+fn draw(
+    state: &mut State,
     dynamic_offsets: &[u32],
     vertex_count: u32,
     instance_count: u32,
@@ -833,8 +801,8 @@ fn draw<A: HalApi>(
     Ok(())
 }
 
-fn draw_indexed<A: HalApi>(
-    state: &mut State<A>,
+fn draw_indexed(
+    state: &mut State,
     dynamic_offsets: &[u32],
     index_count: u32,
     instance_count: u32,
@@ -874,10 +842,10 @@ fn draw_indexed<A: HalApi>(
     Ok(())
 }
 
-fn multi_draw_indirect<A: HalApi>(
-    state: &mut State<A>,
+fn multi_draw_indirect(
+    state: &mut State,
     dynamic_offsets: &[u32],
-    buffer_guard: &crate::lock::RwLockReadGuard<crate::storage::Storage<Buffer<A>>>,
+    buffer_guard: &crate::lock::RwLockReadGuard<crate::storage::Storage<Buffer>>,
     buffer_id: id::Id<id::markers::Buffer>,
     offset: u64,
     indexed: bool,
@@ -896,7 +864,6 @@ fn multi_draw_indirect<A: HalApi>(
     state
         .trackers
         .buffers
-        .write()
         .merge_single(&buffer, hal::BufferUses::INDIRECT)?;
 
     buffer.same_device(&state.device)?;
@@ -955,16 +922,16 @@ pub type RenderBundleDescriptor<'a> = wgt::RenderBundleDescriptor<Label<'a>>;
 // The plan is to back it by an actual Vulkan secondary buffer, D3D12 Bundle,
 // or Metal indirect command buffer.
 #[derive(Debug)]
-pub struct RenderBundle<A: HalApi> {
+pub struct RenderBundle {
     // Normalized command stream. It can be executed verbatim,
     // without re-binding anything on the pipeline change.
-    base: BasePass<ArcRenderCommand<A>>,
+    base: BasePass<ArcRenderCommand>,
     pub(super) is_depth_read_only: bool,
     pub(super) is_stencil_read_only: bool,
-    pub(crate) device: Arc<Device<A>>,
-    pub(crate) used: RenderBundleScope<A>,
-    pub(super) buffer_memory_init_actions: Vec<BufferInitTrackerAction<A>>,
-    pub(super) texture_memory_init_actions: Vec<TextureInitTrackerAction<A>>,
+    pub(crate) device: Arc<Device>,
+    pub(crate) used: RenderBundleScope,
+    pub(super) buffer_memory_init_actions: Vec<BufferInitTrackerAction>,
+    pub(super) texture_memory_init_actions: Vec<TextureInitTrackerAction>,
     pub(super) context: RenderPassContext,
     /// The `label` from the descriptor used to create the resource.
     label: String,
@@ -972,18 +939,18 @@ pub struct RenderBundle<A: HalApi> {
     discard_hal_labels: bool,
 }
 
-impl<A: HalApi> Drop for RenderBundle<A> {
+impl Drop for RenderBundle {
     fn drop(&mut self) {
         resource_log!("Drop {}", self.error_ident());
     }
 }
 
 #[cfg(send_sync)]
-unsafe impl<A: HalApi> Send for RenderBundle<A> {}
+unsafe impl Send for RenderBundle {}
 #[cfg(send_sync)]
-unsafe impl<A: HalApi> Sync for RenderBundle<A> {}
+unsafe impl Sync for RenderBundle {}
 
-impl<A: HalApi> RenderBundle<A> {
+impl RenderBundle {
     /// Actually encode the contents into a native command buffer.
     ///
     /// This is partially duplicating the logic of `render_pass_end`.
@@ -995,11 +962,11 @@ impl<A: HalApi> RenderBundle<A> {
     /// The only failure condition is if some of the used buffers are destroyed.
     pub(super) unsafe fn execute(
         &self,
-        raw: &mut A::CommandEncoder,
+        raw: &mut dyn hal::DynCommandEncoder,
         snatch_guard: &SnatchGuard,
     ) -> Result<(), ExecutionError> {
         let mut offsets = self.base.dynamic_offsets.as_slice();
-        let mut pipeline_layout = None::<Arc<PipelineLayout<A>>>;
+        let mut pipeline_layout = None::<Arc<PipelineLayout>>;
         if !self.discard_hal_labels {
             if let Some(ref label) = self.base.label {
                 unsafe { raw.begin_debug_marker(label) };
@@ -1036,7 +1003,7 @@ impl<A: HalApi> RenderBundle<A> {
                     offset,
                     size,
                 } => {
-                    let buffer: &A::Buffer = buffer.try_raw(snatch_guard)?;
+                    let buffer = buffer.try_raw(snatch_guard)?;
                     let bb = hal::BufferBinding {
                         buffer,
                         offset: *offset,
@@ -1190,14 +1157,14 @@ crate::impl_trackable!(RenderBundle);
 /// and calls [`State::flush_index`] before any indexed draw command to produce
 /// a `SetIndexBuffer` command if one is necessary.
 #[derive(Debug)]
-struct IndexState<A: HalApi> {
-    buffer: Arc<Buffer<A>>,
+struct IndexState {
+    buffer: Arc<Buffer>,
     format: wgt::IndexFormat,
     range: Range<wgt::BufferAddress>,
     is_dirty: bool,
 }
 
-impl<A: HalApi> IndexState<A> {
+impl IndexState {
     /// Return the number of entries in the current index buffer.
     ///
     /// Panic if no index buffer has been set.
@@ -1212,7 +1179,7 @@ impl<A: HalApi> IndexState<A> {
 
     /// Generate a `SetIndexBuffer` command to prepare for an indexed draw
     /// command, if needed.
-    fn flush(&mut self) -> Option<ArcRenderCommand<A>> {
+    fn flush(&mut self) -> Option<ArcRenderCommand> {
         if self.is_dirty {
             self.is_dirty = false;
             Some(ArcRenderCommand::SetIndexBuffer {
@@ -1237,14 +1204,14 @@ impl<A: HalApi> IndexState<A> {
 ///
 /// [`flush`]: IndexState::flush
 #[derive(Debug)]
-struct VertexState<A: HalApi> {
-    buffer: Arc<Buffer<A>>,
+struct VertexState {
+    buffer: Arc<Buffer>,
     range: Range<wgt::BufferAddress>,
     is_dirty: bool,
 }
 
-impl<A: HalApi> VertexState<A> {
-    fn new(buffer: Arc<Buffer<A>>, range: Range<wgt::BufferAddress>) -> Self {
+impl VertexState {
+    fn new(buffer: Arc<Buffer>, range: Range<wgt::BufferAddress>) -> Self {
         Self {
             buffer,
             range,
@@ -1255,7 +1222,7 @@ impl<A: HalApi> VertexState<A> {
     /// Generate a `SetVertexBuffer` command for this slot, if necessary.
     ///
     /// `slot` is the index of the vertex buffer slot that `self` tracks.
-    fn flush(&mut self, slot: u32) -> Option<ArcRenderCommand<A>> {
+    fn flush(&mut self, slot: u32) -> Option<ArcRenderCommand> {
         if self.is_dirty {
             self.is_dirty = false;
             Some(ArcRenderCommand::SetVertexBuffer {
@@ -1272,9 +1239,9 @@ impl<A: HalApi> VertexState<A> {
 
 /// A bind group that has been set at a particular index during render bundle encoding.
 #[derive(Debug)]
-struct BindState<A: HalApi> {
+struct BindState {
     /// The id of the bind group set at this index.
-    bind_group: Arc<BindGroup<A>>,
+    bind_group: Arc<BindGroup>,
 
     /// The range of dynamic offsets for this bind group, in the original
     /// command stream's `BassPass::dynamic_offsets` array.
@@ -1286,9 +1253,9 @@ struct BindState<A: HalApi> {
 }
 
 /// The bundle's current pipeline, and some cached information needed for validation.
-struct PipelineState<A: HalApi> {
+struct PipelineState {
     /// The pipeline
-    pipeline: Arc<RenderPipeline<A>>,
+    pipeline: Arc<RenderPipeline>,
 
     /// How this pipeline's vertex shader traverses each vertex buffer, indexed
     /// by vertex buffer slot number.
@@ -1302,8 +1269,8 @@ struct PipelineState<A: HalApi> {
     used_bind_groups: usize,
 }
 
-impl<A: HalApi> PipelineState<A> {
-    fn new(pipeline: &Arc<RenderPipeline<A>>) -> Self {
+impl PipelineState {
+    fn new(pipeline: &Arc<RenderPipeline>) -> Self {
         Self {
             pipeline: pipeline.clone(),
             steps: pipeline.vertex_steps.to_vec(),
@@ -1319,7 +1286,7 @@ impl<A: HalApi> PipelineState<A> {
 
     /// Return a sequence of commands to zero the push constant ranges this
     /// pipeline uses. If no initialization is necessary, return `None`.
-    fn zero_push_constants(&self) -> Option<impl Iterator<Item = ArcRenderCommand<A>>> {
+    fn zero_push_constants(&self) -> Option<impl Iterator<Item = ArcRenderCommand>> {
         if !self.push_constant_ranges.is_empty() {
             let nonoverlapping_ranges =
                 super::bind::compute_nonoverlapping_ranges(&self.push_constant_ranges);
@@ -1350,22 +1317,22 @@ impl<A: HalApi> PipelineState<A> {
 ///
 /// [`SetBindGroup`]: RenderCommand::SetBindGroup
 /// [`SetIndexBuffer`]: RenderCommand::SetIndexBuffer
-struct State<A: HalApi> {
+struct State {
     /// Resources used by this bundle. This will become [`RenderBundle::used`].
-    trackers: RenderBundleScope<A>,
+    trackers: RenderBundleScope,
 
     /// The currently set pipeline, if any.
-    pipeline: Option<PipelineState<A>>,
+    pipeline: Option<PipelineState>,
 
     /// The bind group set at each index, if any.
-    bind: ArrayVec<Option<BindState<A>>, { hal::MAX_BIND_GROUPS }>,
+    bind: ArrayVec<Option<BindState>, { hal::MAX_BIND_GROUPS }>,
 
     /// The state of each vertex buffer slot.
-    vertex: ArrayVec<Option<VertexState<A>>, { hal::MAX_VERTEX_BUFFERS }>,
+    vertex: ArrayVec<Option<VertexState>, { hal::MAX_VERTEX_BUFFERS }>,
 
     /// The current index buffer, if one has been set. We flush this state
     /// before indexed draw commands.
-    index: Option<IndexState<A>>,
+    index: Option<IndexState>,
 
     /// Dynamic offset values used by the cleaned-up command sequence.
     ///
@@ -1375,16 +1342,16 @@ struct State<A: HalApi> {
     /// [`dynamic_offsets`]: BasePass::dynamic_offsets
     flat_dynamic_offsets: Vec<wgt::DynamicOffset>,
 
-    device: Arc<Device<A>>,
-    commands: Vec<ArcRenderCommand<A>>,
-    buffer_memory_init_actions: Vec<BufferInitTrackerAction<A>>,
-    texture_memory_init_actions: Vec<TextureInitTrackerAction<A>>,
+    device: Arc<Device>,
+    commands: Vec<ArcRenderCommand>,
+    buffer_memory_init_actions: Vec<BufferInitTrackerAction>,
+    texture_memory_init_actions: Vec<TextureInitTrackerAction>,
     next_dynamic_offset: usize,
 }
 
-impl<A: HalApi> State<A> {
+impl State {
     /// Return the current pipeline state. Return an error if none is set.
-    fn pipeline(&self) -> Result<&PipelineState<A>, RenderBundleErrorInner> {
+    fn pipeline(&self) -> Result<&PipelineState, RenderBundleErrorInner> {
         self.pipeline
             .as_ref()
             .ok_or(DrawError::MissingPipeline.into())
@@ -1400,7 +1367,7 @@ impl<A: HalApi> State<A> {
     fn set_bind_group(
         &mut self,
         slot: u32,
-        bind_group: &Arc<BindGroup<A>>,
+        bind_group: &Arc<BindGroup>,
         dynamic_offsets: Range<usize>,
     ) {
         // If this call wouldn't actually change this index's state, we can
@@ -1439,7 +1406,7 @@ impl<A: HalApi> State<A> {
     ///
     /// - Changing the push constant ranges at all requires re-establishing
     ///   all bind groups.
-    fn invalidate_bind_groups(&mut self, new: &PipelineState<A>, layout: &PipelineLayout<A>) {
+    fn invalidate_bind_groups(&mut self, new: &PipelineState, layout: &PipelineLayout) {
         match self.pipeline {
             None => {
                 // Establishing entirely new pipeline state.
@@ -1473,7 +1440,7 @@ impl<A: HalApi> State<A> {
     /// Set the bundle's current index buffer and its associated parameters.
     fn set_index_buffer(
         &mut self,
-        buffer: Arc<Buffer<A>>,
+        buffer: Arc<Buffer>,
         format: wgt::IndexFormat,
         range: Range<wgt::BufferAddress>,
     ) {
diff --git a/wgpu-core/src/command/clear.rs b/wgpu-core/src/command/clear.rs
index 6f51f73d57..944dd40af4 100644
--- a/wgpu-core/src/command/clear.rs
+++ b/wgpu-core/src/command/clear.rs
@@ -8,7 +8,6 @@ use crate::{
     device::DeviceError,
     get_lowest_common_denom,
     global::Global,
-    hal_api::HalApi,
     id::{BufferId, CommandEncoderId, TextureId},
     init_tracker::{MemoryInitKind, TextureInitRange},
     resource::{
@@ -19,7 +18,6 @@ use crate::{
     track::{TextureSelector, TextureTrackerSetSingle},
 };
 
-use hal::CommandEncoder as _;
 use thiserror::Error;
 use wgt::{math::align_to, BufferAddress, BufferUsages, ImageSubresourceRange, TextureAspect};
 
@@ -80,7 +78,7 @@ whereas subesource range specified start {subresource_base_array_layer} and coun
 }
 
 impl Global {
-    pub fn command_encoder_clear_buffer<A: HalApi>(
+    pub fn command_encoder_clear_buffer(
         &self,
         command_encoder_id: CommandEncoderId,
         dst: BufferId,
@@ -90,7 +88,7 @@ impl Global {
         profiling::scope!("CommandEncoder::clear_buffer");
         api_log!("CommandEncoder::clear_buffer {dst:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub
             .command_buffers
@@ -167,13 +165,13 @@ impl Global {
         let dst_barrier = dst_pending.map(|pending| pending.into_hal(&dst_buffer, &snatch_guard));
         let cmd_buf_raw = cmd_buf_data.encoder.open()?;
         unsafe {
-            cmd_buf_raw.transition_buffers(dst_barrier.into_iter());
+            cmd_buf_raw.transition_buffers(dst_barrier.as_slice());
             cmd_buf_raw.clear_buffer(dst_raw, offset..end_offset);
         }
         Ok(())
     }
 
-    pub fn command_encoder_clear_texture<A: HalApi>(
+    pub fn command_encoder_clear_texture(
         &self,
         command_encoder_id: CommandEncoderId,
         dst: TextureId,
@@ -182,7 +180,7 @@ impl Global {
         profiling::scope!("CommandEncoder::clear_texture");
         api_log!("CommandEncoder::clear_texture {dst:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub
             .command_buffers
@@ -263,25 +261,25 @@ impl Global {
             encoder,
             &mut tracker.textures,
             &device.alignments,
-            device.zero_buffer.as_ref().unwrap(),
+            device.zero_buffer.as_ref(),
             &snatch_guard,
         )
     }
 }
 
-pub(crate) fn clear_texture<A: HalApi, T: TextureTrackerSetSingle<A>>(
-    dst_texture: &Arc<Texture<A>>,
+pub(crate) fn clear_texture<T: TextureTrackerSetSingle>(
+    dst_texture: &Arc<Texture>,
     range: TextureInitRange,
-    encoder: &mut A::CommandEncoder,
+    encoder: &mut dyn hal::DynCommandEncoder,
     texture_tracker: &mut T,
     alignments: &hal::Alignments,
-    zero_buffer: &A::Buffer,
+    zero_buffer: &dyn hal::DynBuffer,
     snatch_guard: &SnatchGuard<'_>,
 ) -> Result<(), ClearError> {
     let dst_raw = dst_texture.try_raw(snatch_guard)?;
 
     // Issue the right barrier.
-    let clear_usage = match *dst_texture.clear_mode.read() {
+    let clear_usage = match dst_texture.clear_mode {
         TextureClearMode::BufferCopy => hal::TextureUses::COPY_DST,
         TextureClearMode::RenderPass {
             is_color: false, ..
@@ -316,14 +314,15 @@ pub(crate) fn clear_texture<A: HalApi, T: TextureTrackerSetSingle<A>>(
     // change_replace_tracked whenever possible.
     let dst_barrier = texture_tracker
         .set_single(dst_texture, selector, clear_usage)
-        .map(|pending| pending.into_hal(dst_raw));
+        .map(|pending| pending.into_hal(dst_raw))
+        .collect::<Vec<_>>();
     unsafe {
-        encoder.transition_textures(dst_barrier.into_iter());
+        encoder.transition_textures(&dst_barrier);
     }
 
     // Record actual clearing
-    match *dst_texture.clear_mode.read() {
-        TextureClearMode::BufferCopy => clear_texture_via_buffer_copies::<A>(
+    match dst_texture.clear_mode {
+        TextureClearMode::BufferCopy => clear_texture_via_buffer_copies(
             &dst_texture.desc,
             alignments,
             zero_buffer,
@@ -346,13 +345,13 @@ pub(crate) fn clear_texture<A: HalApi, T: TextureTrackerSetSingle<A>>(
     Ok(())
 }
 
-fn clear_texture_via_buffer_copies<A: HalApi>(
+fn clear_texture_via_buffer_copies(
     texture_desc: &wgt::TextureDescriptor<(), Vec<wgt::TextureFormat>>,
     alignments: &hal::Alignments,
-    zero_buffer: &A::Buffer, // Buffer of size device::ZERO_BUFFER_SIZE
+    zero_buffer: &dyn hal::DynBuffer, // Buffer of size device::ZERO_BUFFER_SIZE
     range: TextureInitRange,
-    encoder: &mut A::CommandEncoder,
-    dst_raw: &A::Texture,
+    encoder: &mut dyn hal::DynCommandEncoder,
+    dst_raw: &dyn hal::DynTexture,
 ) {
     assert!(!texture_desc.format.is_depth_stencil_format());
 
@@ -436,15 +435,15 @@ fn clear_texture_via_buffer_copies<A: HalApi>(
     }
 
     unsafe {
-        encoder.copy_buffer_to_texture(zero_buffer, dst_raw, zero_buffer_copy_regions.into_iter());
+        encoder.copy_buffer_to_texture(zero_buffer, dst_raw, &zero_buffer_copy_regions);
     }
 }
 
-fn clear_texture_via_render_passes<A: HalApi>(
-    dst_texture: &Texture<A>,
+fn clear_texture_via_render_passes(
+    dst_texture: &Texture,
     range: TextureInitRange,
     is_color: bool,
-    encoder: &mut A::CommandEncoder,
+    encoder: &mut dyn hal::DynCommandEncoder,
 ) {
     assert_eq!(dst_texture.desc.dimension, wgt::TextureDimension::D2);
 
@@ -453,7 +452,6 @@ fn clear_texture_via_render_passes<A: HalApi>(
         height: dst_texture.desc.size.height,
         depth_or_array_layers: 1, // Only one layer is cleared at a time.
     };
-    let clear_mode = &dst_texture.clear_mode.read();
 
     for mip_level in range.mip_range {
         let extent = extent_base.mip_level_size(mip_level, dst_texture.desc.dimension);
@@ -463,7 +461,7 @@ fn clear_texture_via_render_passes<A: HalApi>(
                 color_attachments_tmp = [Some(hal::ColorAttachment {
                     target: hal::Attachment {
                         view: Texture::get_clear_view(
-                            clear_mode,
+                            &dst_texture.clear_mode,
                             &dst_texture.desc,
                             mip_level,
                             depth_or_layer,
@@ -481,7 +479,7 @@ fn clear_texture_via_render_passes<A: HalApi>(
                     Some(hal::DepthStencilAttachment {
                         target: hal::Attachment {
                             view: Texture::get_clear_view(
-                                clear_mode,
+                                &dst_texture.clear_mode,
                                 &dst_texture.desc,
                                 mip_level,
                                 depth_or_layer,
diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs
index 613e10f7a3..2c1d62cbb7 100644
--- a/wgpu-core/src/command/compute.rs
+++ b/wgpu-core/src/command/compute.rs
@@ -13,7 +13,6 @@ use crate::{
     },
     device::{Device, DeviceError, MissingDownlevelFlags, MissingFeatures},
     global::Global,
-    hal_api::HalApi,
     hal_label, id,
     init_tracker::{BufferInitTrackerAction, MemoryInitKind},
     pipeline::ComputePipeline,
@@ -26,38 +25,36 @@ use crate::{
     Label,
 };
 
-use hal::CommandEncoder as _;
-
 use thiserror::Error;
 use wgt::{BufferAddress, DynamicOffset};
 
-use super::{bind::BinderError, memory_init::CommandBufferTextureMemoryActions, DynComputePass};
+use super::{bind::BinderError, memory_init::CommandBufferTextureMemoryActions};
 use crate::ray_tracing::TlasAction;
 use std::sync::Arc;
 use std::{fmt, mem, str};
 
-pub struct ComputePass<A: HalApi> {
+pub struct ComputePass {
     /// All pass data & records is stored here.
     ///
     /// If this is `None`, the pass is in the 'ended' state and can no longer be used.
     /// Any attempt to record more commands will result in a validation error.
-    base: Option<BasePass<ArcComputeCommand<A>>>,
+    base: Option<BasePass<ArcComputeCommand>>,
 
     /// Parent command buffer that this pass records commands into.
     ///
     /// If it is none, this pass is invalid and any operation on it will return an error.
-    parent: Option<Arc<CommandBuffer<A>>>,
+    parent: Option<Arc<CommandBuffer>>,
 
-    timestamp_writes: Option<ArcPassTimestampWrites<A>>,
+    timestamp_writes: Option<ArcPassTimestampWrites>,
 
     // Resource binding dedupe state.
     current_bind_groups: BindGroupStateChange,
     current_pipeline: StateChange<id::ComputePipelineId>,
 }
 
-impl<A: HalApi> ComputePass<A> {
+impl ComputePass {
     /// If the parent command buffer is invalid, the returned pass will be invalid.
-    fn new(parent: Option<Arc<CommandBuffer<A>>>, desc: ArcComputePassDescriptor<A>) -> Self {
+    fn new(parent: Option<Arc<CommandBuffer>>, desc: ArcComputePassDescriptor) -> Self {
         let ArcComputePassDescriptor {
             label,
             timestamp_writes,
@@ -81,7 +78,7 @@ impl<A: HalApi> ComputePass<A> {
     fn base_mut<'a>(
         &'a mut self,
         scope: PassErrorScope,
-    ) -> Result<&'a mut BasePass<ArcComputeCommand<A>>, ComputePassError> {
+    ) -> Result<&'a mut BasePass<ArcComputeCommand>, ComputePassError> {
         self.base
             .as_mut()
             .ok_or(ComputePassErrorInner::PassEnded)
@@ -89,7 +86,7 @@ impl<A: HalApi> ComputePass<A> {
     }
 }
 
-impl<A: HalApi> fmt::Debug for ComputePass<A> {
+impl fmt::Debug for ComputePass {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         match self.parent {
             Some(ref cmd_buf) => write!(f, "ComputePass {{ parent: {} }}", cmd_buf.error_ident()),
@@ -105,10 +102,10 @@ pub struct ComputePassDescriptor<'a> {
     pub timestamp_writes: Option<&'a PassTimestampWrites>,
 }
 
-struct ArcComputePassDescriptor<'a, A: HalApi> {
+struct ArcComputePassDescriptor<'a> {
     pub label: &'a Label<'a>,
     /// Defines where and when timestamp values will be written for this pass.
-    pub timestamp_writes: Option<ArcPassTimestampWrites<A>>,
+    pub timestamp_writes: Option<ArcPassTimestampWrites>,
 }
 
 #[derive(Clone, Debug, Error)]
@@ -202,37 +199,37 @@ where
     }
 }
 
-struct State<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder, A: HalApi> {
-    binder: Binder<A>,
-    pipeline: Option<Arc<ComputePipeline<A>>>,
-    scope: UsageScope<'scope, A>,
+struct State<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder> {
+    binder: Binder,
+    pipeline: Option<Arc<ComputePipeline>>,
+    scope: UsageScope<'scope>,
     debug_scope_depth: u32,
 
     snatch_guard: SnatchGuard<'snatch_guard>,
 
-    device: &'cmd_buf Arc<Device<A>>,
+    device: &'cmd_buf Arc<Device>,
 
-    raw_encoder: &'raw_encoder mut A::CommandEncoder,
+    raw_encoder: &'raw_encoder mut dyn hal::DynCommandEncoder,
 
-    tracker: &'cmd_buf mut Tracker<A>,
-    buffer_memory_init_actions: &'cmd_buf mut Vec<BufferInitTrackerAction<A>>,
-    texture_memory_actions: &'cmd_buf mut CommandBufferTextureMemoryActions<A>,
-    tlas_actions: &'cmd_buf mut Vec<TlasAction<A>>,
+    tracker: &'cmd_buf mut Tracker,
+    buffer_memory_init_actions: &'cmd_buf mut Vec<BufferInitTrackerAction>,
+    texture_memory_actions: &'cmd_buf mut CommandBufferTextureMemoryActions,
+    tlas_actions: &'cmd_buf mut Vec<TlasAction>,
 
     temp_offsets: Vec<u32>,
     dynamic_offset_count: usize,
     string_offset: usize,
-    active_query: Option<(Arc<resource::QuerySet<A>>, u32)>,
+    active_query: Option<(Arc<resource::QuerySet>, u32)>,
 
-    intermediate_trackers: Tracker<A>,
+    intermediate_trackers: Tracker,
 
     /// Immediate texture inits required because of prior discards. Need to
     /// be inserted before texture reads.
-    pending_discard_init_fixups: SurfacesInDiscardState<A>,
+    pending_discard_init_fixups: SurfacesInDiscardState,
 }
 
-impl<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder, A: HalApi>
-    State<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder, A>
+impl<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder>
+    State<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder>
 {
     fn is_ready(&self) -> Result<(), DispatchError> {
         if let Some(pipeline) = self.pipeline.as_ref() {
@@ -270,8 +267,6 @@ impl<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder, A: HalApi>
                 .set_and_remove_from_usage_scope_sparse(&mut self.scope.buffers, indirect_buffer);
         }
 
-        log::trace!("Encoding dispatch barriers");
-
         CommandBuffer::drain_barriers(
             self.raw_encoder,
             &mut self.intermediate_trackers,
@@ -290,12 +285,12 @@ impl Global {
     /// Any operation on an invalid pass will return an error.
     ///
     /// If successful, puts the encoder into the [`CommandEncoderStatus::Locked`] state.
-    pub fn command_encoder_create_compute_pass<A: HalApi>(
+    pub fn command_encoder_create_compute_pass(
         &self,
         encoder_id: id::CommandEncoderId,
         desc: &ComputePassDescriptor<'_>,
-    ) -> (ComputePass<A>, Option<CommandEncoderError>) {
-        let hub = A::hub(self);
+    ) -> (ComputePass, Option<CommandEncoderError>) {
+        let hub = &self.hub;
 
         let mut arc_desc = ArcComputePassDescriptor {
             label: &desc.label,
@@ -322,10 +317,6 @@ impl Global {
                 );
             };
 
-            if let Err(e) = query_set.same_device_as(cmd_buf.as_ref()) {
-                return make_err(e.into(), arc_desc);
-            }
-
             Some(ArcPassTimestampWrites {
                 query_set,
                 beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
@@ -338,23 +329,7 @@ impl Global {
         (ComputePass::new(Some(cmd_buf), arc_desc), None)
     }
 
-    /// Creates a type erased compute pass.
-    ///
-    /// If creation fails, an invalid pass is returned.
-    /// Any operation on an invalid pass will return an error.
-    pub fn command_encoder_create_compute_pass_dyn<A: HalApi>(
-        &self,
-        encoder_id: id::CommandEncoderId,
-        desc: &ComputePassDescriptor,
-    ) -> (Box<dyn DynComputePass>, Option<CommandEncoderError>) {
-        let (pass, err) = self.command_encoder_create_compute_pass::<A>(encoder_id, desc);
-        (Box::new(pass), err)
-    }
-
-    pub fn compute_pass_end<A: HalApi>(
-        &self,
-        pass: &mut ComputePass<A>,
-    ) -> Result<(), ComputePassError> {
+    pub fn compute_pass_end(&self, pass: &mut ComputePass) -> Result<(), ComputePassError> {
         let scope = PassErrorScope::Pass;
 
         let cmd_buf = pass
@@ -375,13 +350,13 @@ impl Global {
 
     #[doc(hidden)]
     #[cfg(any(feature = "serde", feature = "replay"))]
-    pub fn compute_pass_end_with_unresolved_commands<A: HalApi>(
+    pub fn compute_pass_end_with_unresolved_commands(
         &self,
         encoder_id: id::CommandEncoderId,
         base: BasePass<super::ComputeCommand>,
         timestamp_writes: Option<&PassTimestampWrites>,
     ) -> Result<(), ComputePassError> {
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let scope = PassErrorScope::Pass;
 
         let cmd_buf = match hub.command_buffers.get(encoder_id.into_command_buffer_id()) {
@@ -409,7 +384,7 @@ impl Global {
         }
 
         let commands =
-            super::ComputeCommand::resolve_compute_command_ids(A::hub(self), &base.commands)?;
+            super::ComputeCommand::resolve_compute_command_ids(&self.hub, &base.commands)?;
 
         let timestamp_writes = if let Some(tw) = timestamp_writes {
             Some(ArcPassTimestampWrites {
@@ -425,7 +400,7 @@ impl Global {
             None
         };
 
-        self.compute_pass_end_impl::<A>(
+        self.compute_pass_end_impl(
             &cmd_buf,
             BasePass {
                 label: base.label,
@@ -438,11 +413,11 @@ impl Global {
         )
     }
 
-    fn compute_pass_end_impl<A: HalApi>(
+    fn compute_pass_end_impl(
         &self,
-        cmd_buf: &CommandBuffer<A>,
-        base: BasePass<ArcComputeCommand<A>>,
-        mut timestamp_writes: Option<ArcPassTimestampWrites<A>>,
+        cmd_buf: &CommandBuffer,
+        base: BasePass<ArcComputeCommand>,
+        mut timestamp_writes: Option<ArcPassTimestampWrites>,
     ) -> Result<(), ComputePassError> {
         profiling::scope!("CommandEncoder::run_compute_pass");
         let pass_scope = PassErrorScope::Pass;
@@ -492,48 +467,42 @@ impl Global {
         let indices = &state.device.tracker_indices;
         state.tracker.buffers.set_size(indices.buffers.size());
         state.tracker.textures.set_size(indices.textures.size());
-        state
-            .tracker
-            .bind_groups
-            .set_size(indices.bind_groups.size());
-        state
-            .tracker
-            .compute_pipelines
-            .set_size(indices.compute_pipelines.size());
-        state.tracker.query_sets.set_size(indices.query_sets.size());
-
-        let timestamp_writes = if let Some(tw) = timestamp_writes.take() {
-            let query_set = state.tracker.query_sets.insert_single(tw.query_set);
-
-            // Unlike in render passes we can't delay resetting the query sets since
-            // there is no auxiliary pass.
-            let range = if let (Some(index_a), Some(index_b)) =
-                (tw.beginning_of_pass_write_index, tw.end_of_pass_write_index)
-            {
-                Some(index_a.min(index_b)..index_a.max(index_b) + 1)
-            } else {
-                tw.beginning_of_pass_write_index
-                    .or(tw.end_of_pass_write_index)
-                    .map(|i| i..i + 1)
-            };
-            // Range should always be Some, both values being None should lead to a validation error.
-            // But no point in erroring over that nuance here!
-            if let Some(range) = range {
-                unsafe {
-                    state
-                        .raw_encoder
-                        .reset_queries(query_set.raw.as_ref().unwrap(), range);
+
+        let timestamp_writes: Option<hal::PassTimestampWrites<'_, dyn hal::DynQuerySet>> =
+            if let Some(tw) = timestamp_writes.take() {
+                tw.query_set
+                    .same_device_as(cmd_buf)
+                    .map_pass_err(pass_scope)?;
+
+                let query_set = state.tracker.query_sets.insert_single(tw.query_set);
+
+                // Unlike in render passes we can't delay resetting the query sets since
+                // there is no auxiliary pass.
+                let range = if let (Some(index_a), Some(index_b)) =
+                    (tw.beginning_of_pass_write_index, tw.end_of_pass_write_index)
+                {
+                    Some(index_a.min(index_b)..index_a.max(index_b) + 1)
+                } else {
+                    tw.beginning_of_pass_write_index
+                        .or(tw.end_of_pass_write_index)
+                        .map(|i| i..i + 1)
+                };
+                // Range should always be Some, both values being None should lead to a validation error.
+                // But no point in erroring over that nuance here!
+                if let Some(range) = range {
+                    unsafe {
+                        state.raw_encoder.reset_queries(query_set.raw(), range);
+                    }
                 }
-            }
 
-            Some(hal::ComputePassTimestampWrites {
-                query_set: query_set.raw.as_ref().unwrap(),
-                beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
-                end_of_pass_write_index: tw.end_of_pass_write_index,
-            })
-        } else {
-            None
-        };
+                Some(hal::PassTimestampWrites {
+                    query_set: query_set.raw(),
+                    beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
+                    end_of_pass_write_index: tw.end_of_pass_write_index,
+                })
+            } else {
+                None
+            };
 
         let hal_desc = hal::ComputePassDescriptor {
             label: hal_label(base.label.as_deref(), self.instance.flags),
@@ -676,13 +645,13 @@ impl Global {
     }
 }
 
-fn set_bind_group<A: HalApi>(
-    state: &mut State<A>,
-    cmd_buf: &CommandBuffer<A>,
+fn set_bind_group(
+    state: &mut State,
+    cmd_buf: &CommandBuffer,
     dynamic_offsets: &[DynamicOffset],
     index: u32,
     num_dynamic_offsets: usize,
-    bind_group: Arc<BindGroup<A>>,
+    bind_group: Arc<BindGroup>,
 ) -> Result<(), ComputePassErrorInner> {
     bind_group.same_device_as(cmd_buf)?;
 
@@ -723,9 +692,9 @@ fn set_bind_group<A: HalApi>(
     let used_resource = bind_group
         .used
         .acceleration_structures
-        .used_resources()
+        .into_iter()
         .map(|tlas| TlasAction {
-            tlas,
+            tlas: tlas.clone(),
             kind: crate::ray_tracing::TlasActionKind::Use,
         });
 
@@ -754,10 +723,10 @@ fn set_bind_group<A: HalApi>(
     Ok(())
 }
 
-fn set_pipeline<A: HalApi>(
-    state: &mut State<A>,
-    cmd_buf: &CommandBuffer<A>,
-    pipeline: Arc<ComputePipeline<A>>,
+fn set_pipeline(
+    state: &mut State,
+    cmd_buf: &CommandBuffer,
+    pipeline: Arc<ComputePipeline>,
 ) -> Result<(), ComputePassErrorInner> {
     pipeline.same_device_as(cmd_buf)?;
 
@@ -816,8 +785,8 @@ fn set_pipeline<A: HalApi>(
     Ok(())
 }
 
-fn set_push_constant<A: HalApi>(
-    state: &mut State<A>,
+fn set_push_constant(
+    state: &mut State,
     push_constant_data: &[u32],
     offset: u32,
     size_bytes: u32,
@@ -853,10 +822,7 @@ fn set_push_constant<A: HalApi>(
     Ok(())
 }
 
-fn dispatch<A: HalApi>(
-    state: &mut State<A>,
-    groups: [u32; 3],
-) -> Result<(), ComputePassErrorInner> {
+fn dispatch(state: &mut State, groups: [u32; 3]) -> Result<(), ComputePassErrorInner> {
     state.is_ready()?;
 
     state.flush_states(None)?;
@@ -881,10 +847,10 @@ fn dispatch<A: HalApi>(
     Ok(())
 }
 
-fn dispatch_indirect<A: HalApi>(
-    state: &mut State<A>,
-    cmd_buf: &CommandBuffer<A>,
-    buffer: Arc<Buffer<A>>,
+fn dispatch_indirect(
+    state: &mut State,
+    cmd_buf: &CommandBuffer,
+    buffer: Arc<Buffer>,
     offset: u64,
 ) -> Result<(), ComputePassErrorInner> {
     buffer.same_device_as(cmd_buf)?;
@@ -929,7 +895,7 @@ fn dispatch_indirect<A: HalApi>(
     Ok(())
 }
 
-fn push_debug_group<A: HalApi>(state: &mut State<A>, string_data: &[u8], len: usize) {
+fn push_debug_group(state: &mut State, string_data: &[u8], len: usize) {
     state.debug_scope_depth += 1;
     if !state
         .device
@@ -945,7 +911,7 @@ fn push_debug_group<A: HalApi>(state: &mut State<A>, string_data: &[u8], len: us
     state.string_offset += len;
 }
 
-fn pop_debug_group<A: HalApi>(state: &mut State<A>) -> Result<(), ComputePassErrorInner> {
+fn pop_debug_group(state: &mut State) -> Result<(), ComputePassErrorInner> {
     if state.debug_scope_depth == 0 {
         return Err(ComputePassErrorInner::InvalidPopDebugGroup);
     }
@@ -962,7 +928,7 @@ fn pop_debug_group<A: HalApi>(state: &mut State<A>) -> Result<(), ComputePassErr
     Ok(())
 }
 
-fn insert_debug_marker<A: HalApi>(state: &mut State<A>, string_data: &[u8], len: usize) {
+fn insert_debug_marker(state: &mut State, string_data: &[u8], len: usize) {
     if !state
         .device
         .instance_flags
@@ -975,10 +941,10 @@ fn insert_debug_marker<A: HalApi>(state: &mut State<A>, string_data: &[u8], len:
     state.string_offset += len;
 }
 
-fn write_timestamp<A: HalApi>(
-    state: &mut State<A>,
-    cmd_buf: &CommandBuffer<A>,
-    query_set: Arc<resource::QuerySet<A>>,
+fn write_timestamp(
+    state: &mut State,
+    cmd_buf: &CommandBuffer,
+    query_set: Arc<resource::QuerySet>,
     query_index: u32,
 ) -> Result<(), ComputePassErrorInner> {
     query_set.same_device_as(cmd_buf)?;
@@ -995,9 +961,9 @@ fn write_timestamp<A: HalApi>(
 
 // Recording a compute pass.
 impl Global {
-    pub fn compute_pass_set_bind_group<A: HalApi>(
+    pub fn compute_pass_set_bind_group(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         index: u32,
         bind_group_id: id::BindGroupId,
         offsets: &[DynamicOffset],
@@ -1020,7 +986,7 @@ impl Global {
             return Ok(());
         }
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let bind_group = hub
             .bind_groups
             .get(bind_group_id)
@@ -1036,9 +1002,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn compute_pass_set_pipeline<A: HalApi>(
+    pub fn compute_pass_set_pipeline(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         pipeline_id: id::ComputePipelineId,
     ) -> Result<(), ComputePassError> {
         let redundant = pass.current_pipeline.set_and_check_redundant(pipeline_id);
@@ -1051,7 +1017,7 @@ impl Global {
             return Ok(());
         }
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let pipeline = hub
             .compute_pipelines
             .get(pipeline_id)
@@ -1063,9 +1029,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn compute_pass_set_push_constants<A: HalApi>(
+    pub fn compute_pass_set_push_constants(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         offset: u32,
         data: &[u8],
     ) -> Result<(), ComputePassError> {
@@ -1091,7 +1057,7 @@ impl Global {
                 .map(|arr| u32::from_ne_bytes([arr[0], arr[1], arr[2], arr[3]])),
         );
 
-        base.commands.push(ArcComputeCommand::<A>::SetPushConstant {
+        base.commands.push(ArcComputeCommand::SetPushConstant {
             offset,
             size_bytes: data.len() as u32,
             values_offset: value_offset,
@@ -1100,9 +1066,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn compute_pass_dispatch_workgroups<A: HalApi>(
+    pub fn compute_pass_dispatch_workgroups(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         groups_x: u32,
         groups_y: u32,
         groups_z: u32,
@@ -1110,20 +1076,19 @@ impl Global {
         let scope = PassErrorScope::Dispatch { indirect: false };
 
         let base = pass.base_mut(scope)?;
-        base.commands.push(ArcComputeCommand::<A>::Dispatch([
-            groups_x, groups_y, groups_z,
-        ]));
+        base.commands
+            .push(ArcComputeCommand::Dispatch([groups_x, groups_y, groups_z]));
 
         Ok(())
     }
 
-    pub fn compute_pass_dispatch_workgroups_indirect<A: HalApi>(
+    pub fn compute_pass_dispatch_workgroups_indirect(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         buffer_id: id::BufferId,
         offset: BufferAddress,
     ) -> Result<(), ComputePassError> {
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let scope = PassErrorScope::Dispatch { indirect: true };
         let base = pass.base_mut(scope)?;
 
@@ -1134,14 +1099,14 @@ impl Global {
             .map_pass_err(scope)?;
 
         base.commands
-            .push(ArcComputeCommand::<A>::DispatchIndirect { buffer, offset });
+            .push(ArcComputeCommand::DispatchIndirect { buffer, offset });
 
         Ok(())
     }
 
-    pub fn compute_pass_push_debug_group<A: HalApi>(
+    pub fn compute_pass_push_debug_group(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         label: &str,
         color: u32,
     ) -> Result<(), ComputePassError> {
@@ -1150,7 +1115,7 @@ impl Global {
         let bytes = label.as_bytes();
         base.string_data.extend_from_slice(bytes);
 
-        base.commands.push(ArcComputeCommand::<A>::PushDebugGroup {
+        base.commands.push(ArcComputeCommand::PushDebugGroup {
             color,
             len: bytes.len(),
         });
@@ -1158,20 +1123,20 @@ impl Global {
         Ok(())
     }
 
-    pub fn compute_pass_pop_debug_group<A: HalApi>(
+    pub fn compute_pass_pop_debug_group(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
     ) -> Result<(), ComputePassError> {
         let base = pass.base_mut(PassErrorScope::PopDebugGroup)?;
 
-        base.commands.push(ArcComputeCommand::<A>::PopDebugGroup);
+        base.commands.push(ArcComputeCommand::PopDebugGroup);
 
         Ok(())
     }
 
-    pub fn compute_pass_insert_debug_marker<A: HalApi>(
+    pub fn compute_pass_insert_debug_marker(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         label: &str,
         color: u32,
     ) -> Result<(), ComputePassError> {
@@ -1180,25 +1145,24 @@ impl Global {
         let bytes = label.as_bytes();
         base.string_data.extend_from_slice(bytes);
 
-        base.commands
-            .push(ArcComputeCommand::<A>::InsertDebugMarker {
-                color,
-                len: bytes.len(),
-            });
+        base.commands.push(ArcComputeCommand::InsertDebugMarker {
+            color,
+            len: bytes.len(),
+        });
 
         Ok(())
     }
 
-    pub fn compute_pass_write_timestamp<A: HalApi>(
+    pub fn compute_pass_write_timestamp(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         query_set_id: id::QuerySetId,
         query_index: u32,
     ) -> Result<(), ComputePassError> {
         let scope = PassErrorScope::WriteTimestamp;
         let base = pass.base_mut(scope)?;
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let query_set = hub
             .query_sets
             .get(query_set_id)
@@ -1213,16 +1177,16 @@ impl Global {
         Ok(())
     }
 
-    pub fn compute_pass_begin_pipeline_statistics_query<A: HalApi>(
+    pub fn compute_pass_begin_pipeline_statistics_query(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         query_set_id: id::QuerySetId,
         query_index: u32,
     ) -> Result<(), ComputePassError> {
         let scope = PassErrorScope::BeginPipelineStatisticsQuery;
         let base = pass.base_mut(scope)?;
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let query_set = hub
             .query_sets
             .get(query_set_id)
@@ -1238,14 +1202,14 @@ impl Global {
         Ok(())
     }
 
-    pub fn compute_pass_end_pipeline_statistics_query<A: HalApi>(
+    pub fn compute_pass_end_pipeline_statistics_query(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
     ) -> Result<(), ComputePassError> {
         let scope = PassErrorScope::EndPipelineStatisticsQuery;
         let base = pass.base_mut(scope)?;
         base.commands
-            .push(ArcComputeCommand::<A>::EndPipelineStatisticsQuery);
+            .push(ArcComputeCommand::EndPipelineStatisticsQuery);
 
         Ok(())
     }
diff --git a/wgpu-core/src/command/compute_command.rs b/wgpu-core/src/command/compute_command.rs
index 761827b85a..e16487b7ea 100644
--- a/wgpu-core/src/command/compute_command.rs
+++ b/wgpu-core/src/command/compute_command.rs
@@ -2,7 +2,6 @@ use std::sync::Arc;
 
 use crate::{
     binding_model::BindGroup,
-    hal_api::HalApi,
     id,
     pipeline::ComputePipeline,
     resource::{Buffer, QuerySet},
@@ -71,10 +70,10 @@ pub enum ComputeCommand {
 impl ComputeCommand {
     /// Resolves all ids in a list of commands into the corresponding resource Arc.
     #[cfg(any(feature = "serde", feature = "replay"))]
-    pub fn resolve_compute_command_ids<A: HalApi>(
-        hub: &crate::hub::Hub<A>,
+    pub fn resolve_compute_command_ids(
+        hub: &crate::hub::Hub,
         commands: &[ComputeCommand],
-    ) -> Result<Vec<ArcComputeCommand<A>>, super::ComputePassError> {
+    ) -> Result<Vec<ArcComputeCommand>, super::ComputePassError> {
         use super::{ComputePassError, ComputePassErrorInner, PassErrorScope};
 
         let buffers_guard = hub.buffers.read();
@@ -82,9 +81,9 @@ impl ComputeCommand {
         let query_set_guard = hub.query_sets.read();
         let pipelines_guard = hub.compute_pipelines.read();
 
-        let resolved_commands: Vec<ArcComputeCommand<A>> = commands
+        let resolved_commands: Vec<ArcComputeCommand> = commands
             .iter()
-            .map(|c| -> Result<ArcComputeCommand<A>, ComputePassError> {
+            .map(|c| -> Result<ArcComputeCommand, ComputePassError> {
                 Ok(match *c {
                     ComputeCommand::SetBindGroup {
                         index,
@@ -182,14 +181,14 @@ impl ComputeCommand {
 
 /// Equivalent to `ComputeCommand` but the Ids resolved into resource Arcs.
 #[derive(Clone, Debug)]
-pub enum ArcComputeCommand<A: HalApi> {
+pub enum ArcComputeCommand {
     SetBindGroup {
         index: u32,
         num_dynamic_offsets: usize,
-        bind_group: Arc<BindGroup<A>>,
+        bind_group: Arc<BindGroup>,
     },
 
-    SetPipeline(Arc<ComputePipeline<A>>),
+    SetPipeline(Arc<ComputePipeline>),
 
     /// Set a range of push constants to values stored in `push_constant_data`.
     SetPushConstant {
@@ -211,7 +210,7 @@ pub enum ArcComputeCommand<A: HalApi> {
     Dispatch([u32; 3]),
 
     DispatchIndirect {
-        buffer: Arc<Buffer<A>>,
+        buffer: Arc<Buffer>,
         offset: wgt::BufferAddress,
     },
 
@@ -228,12 +227,12 @@ pub enum ArcComputeCommand<A: HalApi> {
     },
 
     WriteTimestamp {
-        query_set: Arc<QuerySet<A>>,
+        query_set: Arc<QuerySet>,
         query_index: u32,
     },
 
     BeginPipelineStatisticsQuery {
-        query_set: Arc<QuerySet<A>>,
+        query_set: Arc<QuerySet>,
         query_index: u32,
     },
 
diff --git a/wgpu-core/src/command/dyn_compute_pass.rs b/wgpu-core/src/command/dyn_compute_pass.rs
deleted file mode 100644
index ea15e2667d..0000000000
--- a/wgpu-core/src/command/dyn_compute_pass.rs
+++ /dev/null
@@ -1,178 +0,0 @@
-use wgt::WasmNotSendSync;
-
-use crate::{global, hal_api::HalApi, id};
-
-use super::{ComputePass, ComputePassError};
-
-/// Trait for type erasing ComputePass.
-// TODO(#5124): wgpu-core's ComputePass trait should not be hal type dependent.
-// Practically speaking this allows us merge gfx_select with type erasure:
-// The alternative would be to introduce ComputePassId which then first needs to be looked up and then dispatch via gfx_select.
-pub trait DynComputePass: std::fmt::Debug + WasmNotSendSync {
-    fn set_bind_group(
-        &mut self,
-        context: &global::Global,
-        index: u32,
-        bind_group_id: id::BindGroupId,
-        offsets: &[wgt::DynamicOffset],
-    ) -> Result<(), ComputePassError>;
-    fn set_pipeline(
-        &mut self,
-        context: &global::Global,
-        pipeline_id: id::ComputePipelineId,
-    ) -> Result<(), ComputePassError>;
-    fn set_push_constants(
-        &mut self,
-        context: &global::Global,
-        offset: u32,
-        data: &[u8],
-    ) -> Result<(), ComputePassError>;
-    fn dispatch_workgroups(
-        &mut self,
-        context: &global::Global,
-        groups_x: u32,
-        groups_y: u32,
-        groups_z: u32,
-    ) -> Result<(), ComputePassError>;
-    fn dispatch_workgroups_indirect(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-    ) -> Result<(), ComputePassError>;
-    fn push_debug_group(
-        &mut self,
-        context: &global::Global,
-        label: &str,
-        color: u32,
-    ) -> Result<(), ComputePassError>;
-    fn pop_debug_group(&mut self, context: &global::Global) -> Result<(), ComputePassError>;
-    fn insert_debug_marker(
-        &mut self,
-        context: &global::Global,
-        label: &str,
-        color: u32,
-    ) -> Result<(), ComputePassError>;
-    fn write_timestamp(
-        &mut self,
-        context: &global::Global,
-        query_set_id: id::QuerySetId,
-        query_index: u32,
-    ) -> Result<(), ComputePassError>;
-    fn begin_pipeline_statistics_query(
-        &mut self,
-        context: &global::Global,
-        query_set_id: id::QuerySetId,
-        query_index: u32,
-    ) -> Result<(), ComputePassError>;
-    fn end_pipeline_statistics_query(
-        &mut self,
-        context: &global::Global,
-    ) -> Result<(), ComputePassError>;
-    fn end(&mut self, context: &global::Global) -> Result<(), ComputePassError>;
-
-    fn label(&self) -> Option<&str>;
-}
-
-impl<A: HalApi> DynComputePass for ComputePass<A> {
-    fn set_bind_group(
-        &mut self,
-        context: &global::Global,
-        index: u32,
-        bind_group_id: id::BindGroupId,
-        offsets: &[wgt::DynamicOffset],
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_set_bind_group(self, index, bind_group_id, offsets)
-    }
-
-    fn set_pipeline(
-        &mut self,
-        context: &global::Global,
-        pipeline_id: id::ComputePipelineId,
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_set_pipeline(self, pipeline_id)
-    }
-
-    fn set_push_constants(
-        &mut self,
-        context: &global::Global,
-        offset: u32,
-        data: &[u8],
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_set_push_constants(self, offset, data)
-    }
-
-    fn dispatch_workgroups(
-        &mut self,
-        context: &global::Global,
-        groups_x: u32,
-        groups_y: u32,
-        groups_z: u32,
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_dispatch_workgroups(self, groups_x, groups_y, groups_z)
-    }
-
-    fn dispatch_workgroups_indirect(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_dispatch_workgroups_indirect(self, buffer_id, offset)
-    }
-
-    fn push_debug_group(
-        &mut self,
-        context: &global::Global,
-        label: &str,
-        color: u32,
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_push_debug_group(self, label, color)
-    }
-
-    fn pop_debug_group(&mut self, context: &global::Global) -> Result<(), ComputePassError> {
-        context.compute_pass_pop_debug_group(self)
-    }
-
-    fn insert_debug_marker(
-        &mut self,
-        context: &global::Global,
-        label: &str,
-        color: u32,
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_insert_debug_marker(self, label, color)
-    }
-
-    fn write_timestamp(
-        &mut self,
-        context: &global::Global,
-        query_set_id: id::QuerySetId,
-        query_index: u32,
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_write_timestamp(self, query_set_id, query_index)
-    }
-
-    fn begin_pipeline_statistics_query(
-        &mut self,
-        context: &global::Global,
-        query_set_id: id::QuerySetId,
-        query_index: u32,
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_begin_pipeline_statistics_query(self, query_set_id, query_index)
-    }
-
-    fn end_pipeline_statistics_query(
-        &mut self,
-        context: &global::Global,
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_end_pipeline_statistics_query(self)
-    }
-
-    fn end(&mut self, context: &global::Global) -> Result<(), ComputePassError> {
-        context.compute_pass_end(self)
-    }
-
-    fn label(&self) -> Option<&str> {
-        self.label()
-    }
-}
diff --git a/wgpu-core/src/command/dyn_render_pass.rs b/wgpu-core/src/command/dyn_render_pass.rs
deleted file mode 100644
index 7ad79262b3..0000000000
--- a/wgpu-core/src/command/dyn_render_pass.rs
+++ /dev/null
@@ -1,458 +0,0 @@
-use wgt::WasmNotSendSync;
-
-use crate::{global, hal_api::HalApi, id};
-
-use super::{RenderPass, RenderPassError};
-
-/// Trait for type erasing RenderPass.
-// TODO(#5124): wgpu-core's RenderPass trait should not be hal type dependent.
-// Practically speaking this allows us merge gfx_select with type erasure:
-// The alternative would be to introduce RenderPassId which then first needs to be looked up and then dispatch via gfx_select.
-pub trait DynRenderPass: std::fmt::Debug + WasmNotSendSync {
-    fn set_bind_group(
-        &mut self,
-        context: &global::Global,
-        index: u32,
-        bind_group_id: id::BindGroupId,
-        offsets: &[wgt::DynamicOffset],
-    ) -> Result<(), RenderPassError>;
-    fn set_index_buffer(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        index_format: wgt::IndexFormat,
-        offset: wgt::BufferAddress,
-        size: Option<wgt::BufferSize>,
-    ) -> Result<(), RenderPassError>;
-    fn set_vertex_buffer(
-        &mut self,
-        context: &global::Global,
-        slot: u32,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-        size: Option<wgt::BufferSize>,
-    ) -> Result<(), RenderPassError>;
-    fn set_pipeline(
-        &mut self,
-        context: &global::Global,
-        pipeline_id: id::RenderPipelineId,
-    ) -> Result<(), RenderPassError>;
-    fn set_push_constants(
-        &mut self,
-        context: &global::Global,
-        stages: wgt::ShaderStages,
-        offset: u32,
-        data: &[u8],
-    ) -> Result<(), RenderPassError>;
-    fn draw(
-        &mut self,
-        context: &global::Global,
-        vertex_count: u32,
-        instance_count: u32,
-        first_vertex: u32,
-        first_instance: u32,
-    ) -> Result<(), RenderPassError>;
-    fn draw_indexed(
-        &mut self,
-        context: &global::Global,
-        index_count: u32,
-        instance_count: u32,
-        first_index: u32,
-        base_vertex: i32,
-        first_instance: u32,
-    ) -> Result<(), RenderPassError>;
-    fn draw_indirect(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-    ) -> Result<(), RenderPassError>;
-    fn draw_indexed_indirect(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-    ) -> Result<(), RenderPassError>;
-    fn multi_draw_indirect(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-        count: u32,
-    ) -> Result<(), RenderPassError>;
-    fn multi_draw_indexed_indirect(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-        count: u32,
-    ) -> Result<(), RenderPassError>;
-    fn multi_draw_indirect_count(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-        count_buffer_id: id::BufferId,
-        count_buffer_offset: wgt::BufferAddress,
-        max_count: u32,
-    ) -> Result<(), RenderPassError>;
-    fn multi_draw_indexed_indirect_count(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-        count_buffer_id: id::BufferId,
-        count_buffer_offset: wgt::BufferAddress,
-        max_count: u32,
-    ) -> Result<(), RenderPassError>;
-    fn set_blend_constant(
-        &mut self,
-        context: &global::Global,
-        color: wgt::Color,
-    ) -> Result<(), RenderPassError>;
-    fn set_scissor_rect(
-        &mut self,
-        context: &global::Global,
-        x: u32,
-        y: u32,
-        width: u32,
-        height: u32,
-    ) -> Result<(), RenderPassError>;
-    fn set_viewport(
-        &mut self,
-        context: &global::Global,
-        x: f32,
-        y: f32,
-        width: f32,
-        height: f32,
-        min_depth: f32,
-        max_depth: f32,
-    ) -> Result<(), RenderPassError>;
-    fn set_stencil_reference(
-        &mut self,
-        context: &global::Global,
-        reference: u32,
-    ) -> Result<(), RenderPassError>;
-    fn push_debug_group(
-        &mut self,
-        context: &global::Global,
-        label: &str,
-        color: u32,
-    ) -> Result<(), RenderPassError>;
-    fn pop_debug_group(&mut self, context: &global::Global) -> Result<(), RenderPassError>;
-    fn insert_debug_marker(
-        &mut self,
-        context: &global::Global,
-        label: &str,
-        color: u32,
-    ) -> Result<(), RenderPassError>;
-    fn write_timestamp(
-        &mut self,
-        context: &global::Global,
-        query_set_id: id::QuerySetId,
-        query_index: u32,
-    ) -> Result<(), RenderPassError>;
-    fn begin_occlusion_query(
-        &mut self,
-        context: &global::Global,
-        query_index: u32,
-    ) -> Result<(), RenderPassError>;
-    fn end_occlusion_query(&mut self, context: &global::Global) -> Result<(), RenderPassError>;
-    fn begin_pipeline_statistics_query(
-        &mut self,
-        context: &global::Global,
-        query_set_id: id::QuerySetId,
-        query_index: u32,
-    ) -> Result<(), RenderPassError>;
-    fn end_pipeline_statistics_query(
-        &mut self,
-        context: &global::Global,
-    ) -> Result<(), RenderPassError>;
-    fn execute_bundles(
-        &mut self,
-        context: &global::Global,
-        bundles: &[id::RenderBundleId],
-    ) -> Result<(), RenderPassError>;
-    fn end(&mut self, context: &global::Global) -> Result<(), RenderPassError>;
-
-    fn label(&self) -> Option<&str>;
-}
-
-impl<A: HalApi> DynRenderPass for RenderPass<A> {
-    fn set_index_buffer(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        index_format: wgt::IndexFormat,
-        offset: wgt::BufferAddress,
-        size: Option<wgt::BufferSize>,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_set_index_buffer(self, buffer_id, index_format, offset, size)
-    }
-
-    fn set_vertex_buffer(
-        &mut self,
-        context: &global::Global,
-        slot: u32,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-        size: Option<wgt::BufferSize>,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_set_vertex_buffer(self, slot, buffer_id, offset, size)
-    }
-
-    fn set_bind_group(
-        &mut self,
-        context: &global::Global,
-        index: u32,
-        bind_group_id: id::BindGroupId,
-        offsets: &[wgt::DynamicOffset],
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_set_bind_group(self, index, bind_group_id, offsets)
-    }
-
-    fn set_pipeline(
-        &mut self,
-        context: &global::Global,
-        pipeline_id: id::RenderPipelineId,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_set_pipeline(self, pipeline_id)
-    }
-
-    fn set_push_constants(
-        &mut self,
-        context: &global::Global,
-        stages: wgt::ShaderStages,
-        offset: u32,
-        data: &[u8],
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_set_push_constants(self, stages, offset, data)
-    }
-
-    fn draw(
-        &mut self,
-        context: &global::Global,
-        vertex_count: u32,
-        instance_count: u32,
-        first_vertex: u32,
-        first_instance: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_draw(
-            self,
-            vertex_count,
-            instance_count,
-            first_vertex,
-            first_instance,
-        )
-    }
-
-    fn draw_indexed(
-        &mut self,
-        context: &global::Global,
-        index_count: u32,
-        instance_count: u32,
-        first_index: u32,
-        base_vertex: i32,
-        first_instance: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_draw_indexed(
-            self,
-            index_count,
-            instance_count,
-            first_index,
-            base_vertex,
-            first_instance,
-        )
-    }
-
-    fn draw_indirect(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_draw_indirect(self, buffer_id, offset)
-    }
-
-    fn draw_indexed_indirect(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_draw_indexed_indirect(self, buffer_id, offset)
-    }
-
-    fn multi_draw_indirect(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-        count: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_multi_draw_indirect(self, buffer_id, offset, count)
-    }
-
-    fn multi_draw_indexed_indirect(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-        count: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_multi_draw_indexed_indirect(self, buffer_id, offset, count)
-    }
-
-    fn multi_draw_indirect_count(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-        count_buffer_id: id::BufferId,
-        count_buffer_offset: wgt::BufferAddress,
-        max_count: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_multi_draw_indirect_count(
-            self,
-            buffer_id,
-            offset,
-            count_buffer_id,
-            count_buffer_offset,
-            max_count,
-        )
-    }
-
-    fn multi_draw_indexed_indirect_count(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-        count_buffer_id: id::BufferId,
-        count_buffer_offset: wgt::BufferAddress,
-        max_count: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_multi_draw_indexed_indirect_count(
-            self,
-            buffer_id,
-            offset,
-            count_buffer_id,
-            count_buffer_offset,
-            max_count,
-        )
-    }
-
-    fn set_blend_constant(
-        &mut self,
-        context: &global::Global,
-        color: wgt::Color,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_set_blend_constant(self, color)
-    }
-
-    fn set_scissor_rect(
-        &mut self,
-        context: &global::Global,
-        x: u32,
-        y: u32,
-        width: u32,
-        height: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_set_scissor_rect(self, x, y, width, height)
-    }
-
-    fn set_viewport(
-        &mut self,
-        context: &global::Global,
-        x: f32,
-        y: f32,
-        width: f32,
-        height: f32,
-        min_depth: f32,
-        max_depth: f32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_set_viewport(self, x, y, width, height, min_depth, max_depth)
-    }
-
-    fn set_stencil_reference(
-        &mut self,
-        context: &global::Global,
-        reference: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_set_stencil_reference(self, reference)
-    }
-
-    fn push_debug_group(
-        &mut self,
-        context: &global::Global,
-        label: &str,
-        color: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_push_debug_group(self, label, color)
-    }
-
-    fn pop_debug_group(&mut self, context: &global::Global) -> Result<(), RenderPassError> {
-        context.render_pass_pop_debug_group(self)
-    }
-
-    fn insert_debug_marker(
-        &mut self,
-        context: &global::Global,
-        label: &str,
-        color: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_insert_debug_marker(self, label, color)
-    }
-
-    fn write_timestamp(
-        &mut self,
-        context: &global::Global,
-        query_set_id: id::QuerySetId,
-        query_index: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_write_timestamp(self, query_set_id, query_index)
-    }
-
-    fn begin_occlusion_query(
-        &mut self,
-        context: &global::Global,
-        query_index: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_begin_occlusion_query(self, query_index)
-    }
-
-    fn end_occlusion_query(&mut self, context: &global::Global) -> Result<(), RenderPassError> {
-        context.render_pass_end_occlusion_query(self)
-    }
-
-    fn begin_pipeline_statistics_query(
-        &mut self,
-        context: &global::Global,
-        query_set_id: id::QuerySetId,
-        query_index: u32,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_begin_pipeline_statistics_query(self, query_set_id, query_index)
-    }
-
-    fn end_pipeline_statistics_query(
-        &mut self,
-        context: &global::Global,
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_end_pipeline_statistics_query(self)
-    }
-
-    fn execute_bundles(
-        &mut self,
-        context: &global::Global,
-        bundles: &[id::RenderBundleId],
-    ) -> Result<(), RenderPassError> {
-        context.render_pass_execute_bundles(self, bundles)
-    }
-
-    fn end(&mut self, context: &global::Global) -> Result<(), RenderPassError> {
-        context.render_pass_end(self)
-    }
-
-    fn label(&self) -> Option<&str> {
-        self.label()
-    }
-}
diff --git a/wgpu-core/src/command/memory_init.rs b/wgpu-core/src/command/memory_init.rs
index 895901d92f..a4711998b2 100644
--- a/wgpu-core/src/command/memory_init.rs
+++ b/wgpu-core/src/command/memory_init.rs
@@ -1,10 +1,7 @@
 use std::{collections::hash_map::Entry, ops::Range, sync::Arc, vec::Drain};
 
-use hal::CommandEncoder;
-
 use crate::{
     device::Device,
-    hal_api::HalApi,
     init_tracker::*,
     resource::{DestroyedResourceError, ParentDevice, Texture, Trackable},
     snatch::SnatchGuard,
@@ -17,39 +14,31 @@ use super::{clear::clear_texture, BakedCommands, ClearError};
 /// Surface that was discarded by `StoreOp::Discard` of a preceding renderpass.
 /// Any read access to this surface needs to be preceded by a texture initialization.
 #[derive(Clone)]
-pub(crate) struct TextureSurfaceDiscard<A: HalApi> {
-    pub texture: Arc<Texture<A>>,
+pub(crate) struct TextureSurfaceDiscard {
+    pub texture: Arc<Texture>,
     pub mip_level: u32,
     pub layer: u32,
 }
 
-pub(crate) type SurfacesInDiscardState<A> = Vec<TextureSurfaceDiscard<A>>;
+pub(crate) type SurfacesInDiscardState = Vec<TextureSurfaceDiscard>;
 
-pub(crate) struct CommandBufferTextureMemoryActions<A: HalApi> {
+#[derive(Default)]
+pub(crate) struct CommandBufferTextureMemoryActions {
     /// The tracker actions that we need to be executed before the command
     /// buffer is executed.
-    init_actions: Vec<TextureInitTrackerAction<A>>,
+    init_actions: Vec<TextureInitTrackerAction>,
     /// All the discards that haven't been followed by init again within the
     /// command buffer i.e. everything in this list resets the texture init
     /// state *after* the command buffer execution
-    discards: Vec<TextureSurfaceDiscard<A>>,
-}
-
-impl<A: HalApi> Default for CommandBufferTextureMemoryActions<A> {
-    fn default() -> Self {
-        Self {
-            init_actions: Default::default(),
-            discards: Default::default(),
-        }
-    }
+    discards: Vec<TextureSurfaceDiscard>,
 }
 
-impl<A: HalApi> CommandBufferTextureMemoryActions<A> {
-    pub(crate) fn drain_init_actions(&mut self) -> Drain<TextureInitTrackerAction<A>> {
+impl CommandBufferTextureMemoryActions {
+    pub(crate) fn drain_init_actions(&mut self) -> Drain<TextureInitTrackerAction> {
         self.init_actions.drain(..)
     }
 
-    pub(crate) fn discard(&mut self, discard: TextureSurfaceDiscard<A>) {
+    pub(crate) fn discard(&mut self, discard: TextureSurfaceDiscard) {
         self.discards.push(discard);
     }
 
@@ -59,8 +48,8 @@ impl<A: HalApi> CommandBufferTextureMemoryActions<A> {
     #[must_use]
     pub(crate) fn register_init_action(
         &mut self,
-        action: &TextureInitTrackerAction<A>,
-    ) -> SurfacesInDiscardState<A> {
+        action: &TextureInitTrackerAction,
+    ) -> SurfacesInDiscardState {
         let mut immediately_necessary_clears = SurfacesInDiscardState::new();
 
         // Note that within a command buffer we may stack arbitrary memory init
@@ -119,7 +108,7 @@ impl<A: HalApi> CommandBufferTextureMemoryActions<A> {
     // implicit init, not requiring any immediate resource init.
     pub(crate) fn register_implicit_init(
         &mut self,
-        texture: &Arc<Texture<A>>,
+        texture: &Arc<Texture>,
         range: TextureInitRange,
     ) {
         let must_be_empty = self.register_init_action(&TextureInitTrackerAction {
@@ -135,14 +124,11 @@ impl<A: HalApi> CommandBufferTextureMemoryActions<A> {
 // register_init_action and initializes them on the spot.
 //
 // Takes care of barriers as well!
-pub(crate) fn fixup_discarded_surfaces<
-    A: HalApi,
-    InitIter: Iterator<Item = TextureSurfaceDiscard<A>>,
->(
+pub(crate) fn fixup_discarded_surfaces<InitIter: Iterator<Item = TextureSurfaceDiscard>>(
     inits: InitIter,
-    encoder: &mut A::CommandEncoder,
-    texture_tracker: &mut TextureTracker<A>,
-    device: &Device<A>,
+    encoder: &mut dyn hal::DynCommandEncoder,
+    texture_tracker: &mut TextureTracker,
+    device: &Device,
     snatch_guard: &SnatchGuard<'_>,
 ) {
     for init in inits {
@@ -155,19 +141,19 @@ pub(crate) fn fixup_discarded_surfaces<
             encoder,
             texture_tracker,
             &device.alignments,
-            device.zero_buffer.as_ref().unwrap(),
+            device.zero_buffer.as_ref(),
             snatch_guard,
         )
         .unwrap();
     }
 }
 
-impl<A: HalApi> BakedCommands<A> {
+impl BakedCommands {
     // inserts all buffer initializations that are going to be needed for
     // executing the commands and updates resource init states accordingly
     pub(crate) fn initialize_buffer_memory(
         &mut self,
-        device_tracker: &mut DeviceTracker<A>,
+        device_tracker: &mut DeviceTracker,
         snatch_guard: &SnatchGuard<'_>,
     ) -> Result<(), DestroyedResourceError> {
         profiling::scope!("initialize_buffer_memory");
@@ -233,7 +219,7 @@ impl<A: HalApi> BakedCommands<A> {
                 self.encoder.transition_buffers(
                     transition
                         .map(|pending| pending.into_hal(&buffer, snatch_guard))
-                        .into_iter(),
+                        .as_slice(),
                 );
             }
 
@@ -267,8 +253,8 @@ impl<A: HalApi> BakedCommands<A> {
     // uninitialized
     pub(crate) fn initialize_texture_memory(
         &mut self,
-        device_tracker: &mut DeviceTracker<A>,
-        device: &Device<A>,
+        device_tracker: &mut DeviceTracker,
+        device: &Device,
         snatch_guard: &SnatchGuard<'_>,
     ) -> Result<(), DestroyedResourceError> {
         profiling::scope!("initialize_texture_memory");
@@ -307,10 +293,10 @@ impl<A: HalApi> BakedCommands<A> {
                 let clear_result = clear_texture(
                     &texture_use.texture,
                     range,
-                    &mut self.encoder,
+                    self.encoder.as_mut(),
                     &mut device_tracker.textures,
                     &device.alignments,
-                    device.zero_buffer.as_ref().unwrap(),
+                    device.zero_buffer.as_ref(),
                     snatch_guard,
                 );
 
diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs
index b246abf6d3..d31a41bd8a 100644
--- a/wgpu-core/src/command/mod.rs
+++ b/wgpu-core/src/command/mod.rs
@@ -5,8 +5,6 @@ mod clear;
 mod compute;
 mod compute_command;
 mod draw;
-mod dyn_compute_pass;
-mod dyn_render_pass;
 mod memory_init;
 mod query;
 mod ray_tracing;
@@ -19,9 +17,8 @@ use std::sync::Arc;
 
 pub(crate) use self::clear::clear_texture;
 pub use self::{
-    bundle::*, clear::ClearError, compute::*, compute_command::ComputeCommand, draw::*,
-    dyn_compute_pass::DynComputePass, dyn_render_pass::DynRenderPass, query::*, render::*,
-    render_command::RenderCommand, transfer::*,
+    bundle::*, clear::ClearError, compute::*, compute_command::ComputeCommand, draw::*, query::*,
+    render::*, render_command::RenderCommand, transfer::*,
 };
 pub(crate) use allocator::CommandAllocator;
 
@@ -39,9 +36,8 @@ use crate::ray_tracing::{BlasAction, TlasAction};
 use crate::resource::Labeled;
 use crate::track::{DeviceTracker, Tracker, UsageScope};
 use crate::LabelHelpers;
-use crate::{api_log, global::Global, hal_api::HalApi, id, resource_log, Label};
+use crate::{api_log, global::Global, id, resource_log, Label};
 
-use hal::CommandEncoder as _;
 use thiserror::Error;
 
 #[cfg(feature = "trace")]
@@ -117,7 +113,7 @@ pub(crate) enum CommandEncoderStatus {
 /// [rce]: hal::Api::CommandEncoder
 /// [rcb]: hal::Api::CommandBuffer
 /// [`CommandEncoderId`]: crate::id::CommandEncoderId
-pub(crate) struct CommandEncoder<A: HalApi> {
+pub(crate) struct CommandEncoder {
     /// The underlying `wgpu_hal` [`CommandEncoder`].
     ///
     /// Successfully executed command buffers' encoders are saved in a
@@ -125,7 +121,7 @@ pub(crate) struct CommandEncoder<A: HalApi> {
     ///
     /// [`CommandEncoder`]: hal::Api::CommandEncoder
     /// [`CommandAllocator`]: crate::command::CommandAllocator
-    raw: A::CommandEncoder,
+    raw: Box<dyn hal::DynCommandEncoder>,
 
     /// All the raw command buffers for our owning [`CommandBuffer`], in
     /// submission order.
@@ -138,7 +134,7 @@ pub(crate) struct CommandEncoder<A: HalApi> {
     ///
     /// [CE::ra]: hal::CommandEncoder::reset_all
     /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder
-    list: Vec<A::CommandBuffer>,
+    list: Vec<Box<dyn hal::DynCommandBuffer>>,
 
     /// True if `raw` is in the "recording" state.
     ///
@@ -152,7 +148,7 @@ pub(crate) struct CommandEncoder<A: HalApi> {
 }
 
 //TODO: handle errors better
-impl<A: HalApi> CommandEncoder<A> {
+impl CommandEncoder {
     /// Finish the current command buffer, if any, and place it
     /// at the second-to-last position in our list.
     ///
@@ -221,14 +217,14 @@ impl<A: HalApi> CommandEncoder<A> {
     /// Begin recording a new command buffer, if we haven't already.
     ///
     /// The underlying hal encoder is put in the "recording" state.
-    pub(crate) fn open(&mut self) -> Result<&mut A::CommandEncoder, DeviceError> {
+    pub(crate) fn open(&mut self) -> Result<&mut dyn hal::DynCommandEncoder, DeviceError> {
         if !self.is_open {
             self.is_open = true;
             let hal_label = self.hal_label.as_deref();
             unsafe { self.raw.begin_encoding(hal_label)? };
         }
 
-        Ok(&mut self.raw)
+        Ok(self.raw.as_mut())
     }
 
     /// Begin recording a new command buffer for a render pass, with
@@ -243,29 +239,29 @@ impl<A: HalApi> CommandEncoder<A> {
     }
 }
 
-pub(crate) struct BakedCommands<A: HalApi> {
-    pub(crate) encoder: A::CommandEncoder,
-    pub(crate) list: Vec<A::CommandBuffer>,
-    pub(crate) trackers: Tracker<A>,
-    buffer_memory_init_actions: Vec<BufferInitTrackerAction<A>>,
-    texture_memory_actions: CommandBufferTextureMemoryActions<A>,
-    blas_actions: Vec<BlasAction<A>>,
-    tlas_actions: Vec<TlasAction<A>>,
+pub(crate) struct BakedCommands {
+    pub(crate) encoder: Box<dyn hal::DynCommandEncoder>,
+    pub(crate) list: Vec<Box<dyn hal::DynCommandBuffer>>,
+    pub(crate) trackers: Tracker,
+    buffer_memory_init_actions: Vec<BufferInitTrackerAction>,
+    texture_memory_actions: CommandBufferTextureMemoryActions,
+    blas_actions: Vec<BlasAction>,
+    tlas_actions: Vec<TlasAction>,
 }
 
 /// The mutable state of a [`CommandBuffer`].
-pub struct CommandBufferMutable<A: HalApi> {
+pub struct CommandBufferMutable {
     /// The [`wgpu_hal::Api::CommandBuffer`]s we've built so far, and the encoder
     /// they belong to.
     ///
     /// [`wgpu_hal::Api::CommandBuffer`]: hal::Api::CommandBuffer
-    pub(crate) encoder: CommandEncoder<A>,
+    pub(crate) encoder: CommandEncoder,
 
     /// The current state of this command buffer's encoder.
     status: CommandEncoderStatus,
 
     /// All the resources that the commands recorded so far have referred to.
-    pub(crate) trackers: Tracker<A>,
+    pub(crate) trackers: Tracker,
 
     /// The regions of buffers and textures these commands will read and write.
     ///
@@ -273,20 +269,20 @@ pub struct CommandBufferMutable<A: HalApi> {
     /// buffers/textures we actually need to initialize. If we're
     /// definitely going to write to something before we read from it,
     /// we don't need to clear its contents.
-    buffer_memory_init_actions: Vec<BufferInitTrackerAction<A>>,
-    texture_memory_actions: CommandBufferTextureMemoryActions<A>,
+    buffer_memory_init_actions: Vec<BufferInitTrackerAction>,
+    texture_memory_actions: CommandBufferTextureMemoryActions,
 
-    pub(crate) pending_query_resets: QueryResetMap<A>,
-    blas_actions: Vec<BlasAction<A>>,
-    tlas_actions: Vec<TlasAction<A>>,
+    pub(crate) pending_query_resets: QueryResetMap,
+    blas_actions: Vec<BlasAction>,
+    tlas_actions: Vec<TlasAction>,
     #[cfg(feature = "trace")]
     pub(crate) commands: Option<Vec<TraceCommand>>,
 }
 
-impl<A: HalApi> CommandBufferMutable<A> {
+impl CommandBufferMutable {
     pub(crate) fn open_encoder_and_tracker(
         &mut self,
-    ) -> Result<(&mut A::CommandEncoder, &mut Tracker<A>), DeviceError> {
+    ) -> Result<(&mut dyn hal::DynCommandEncoder, &mut Tracker), DeviceError> {
         let encoder = self.encoder.open()?;
         let tracker = &mut self.trackers;
 
@@ -312,8 +308,8 @@ impl<A: HalApi> CommandBufferMutable<A> {
 /// - Once a command buffer is submitted to the queue, it is removed from the id
 ///   registry, and its contents are taken to construct a [`BakedCommands`],
 ///   whose contents eventually become the property of the submission queue.
-pub struct CommandBuffer<A: HalApi> {
-    pub(crate) device: Arc<Device<A>>,
+pub struct CommandBuffer {
+    pub(crate) device: Arc<Device>,
     support_clear_texture: bool,
     /// The `label` from the descriptor used to create the resource.
     label: String,
@@ -324,10 +320,10 @@ pub struct CommandBuffer<A: HalApi> {
     /// When this is submitted, dropped, or destroyed, its contents are
     /// extracted into a [`BakedCommands`] by
     /// [`CommandBuffer::extract_baked_commands`].
-    pub(crate) data: Mutex<Option<CommandBufferMutable<A>>>,
+    pub(crate) data: Mutex<Option<CommandBufferMutable>>,
 }
 
-impl<A: HalApi> Drop for CommandBuffer<A> {
+impl Drop for CommandBuffer {
     fn drop(&mut self) {
         resource_log!("Drop {}", self.error_ident());
         if self.data.lock().is_none() {
@@ -335,20 +331,18 @@ impl<A: HalApi> Drop for CommandBuffer<A> {
         }
         let mut baked = self.extract_baked_commands();
         unsafe {
-            baked.encoder.reset_all(baked.list.into_iter());
+            baked.encoder.reset_all(baked.list);
         }
         unsafe {
-            use hal::Device;
             self.device.raw().destroy_command_encoder(baked.encoder);
         }
     }
 }
 
-impl<A: HalApi> CommandBuffer<A> {
+impl CommandBuffer {
     pub(crate) fn new(
-        encoder: A::CommandEncoder,
-        device: &Arc<Device<A>>,
-        #[cfg(feature = "trace")] enable_tracing: bool,
+        encoder: Box<dyn hal::DynCommandEncoder>,
+        device: &Arc<Device>,
         label: &Label,
     ) -> Self {
         CommandBuffer {
@@ -372,7 +366,7 @@ impl<A: HalApi> CommandBuffer<A> {
                     blas_actions: Default::default(),
                     tlas_actions: Default::default(),
                     #[cfg(feature = "trace")]
-                    commands: if enable_tracing {
+                    commands: if device.trace.lock().is_some() {
                         Some(Vec::new())
                     } else {
                         None
@@ -383,9 +377,9 @@ impl<A: HalApi> CommandBuffer<A> {
     }
 
     pub(crate) fn insert_barriers_from_tracker(
-        raw: &mut A::CommandEncoder,
-        base: &mut Tracker<A>,
-        head: &Tracker<A>,
+        raw: &mut dyn hal::DynCommandEncoder,
+        base: &mut Tracker,
+        head: &Tracker,
         snatch_guard: &SnatchGuard,
     ) {
         profiling::scope!("insert_barriers");
@@ -397,9 +391,9 @@ impl<A: HalApi> CommandBuffer<A> {
     }
 
     pub(crate) fn insert_barriers_from_scope(
-        raw: &mut A::CommandEncoder,
-        base: &mut Tracker<A>,
-        head: &UsageScope<A>,
+        raw: &mut dyn hal::DynCommandEncoder,
+        base: &mut Tracker,
+        head: &UsageScope,
         snatch_guard: &SnatchGuard,
     ) {
         profiling::scope!("insert_barriers");
@@ -411,49 +405,55 @@ impl<A: HalApi> CommandBuffer<A> {
     }
 
     pub(crate) fn drain_barriers(
-        raw: &mut A::CommandEncoder,
-        base: &mut Tracker<A>,
+        raw: &mut dyn hal::DynCommandEncoder,
+        base: &mut Tracker,
         snatch_guard: &SnatchGuard,
     ) {
         profiling::scope!("drain_barriers");
 
-        let buffer_barriers = base.buffers.drain_transitions(snatch_guard);
+        let buffer_barriers = base
+            .buffers
+            .drain_transitions(snatch_guard)
+            .collect::<Vec<_>>();
         let (transitions, textures) = base.textures.drain_transitions(snatch_guard);
         let texture_barriers = transitions
             .into_iter()
             .enumerate()
-            .map(|(i, p)| p.into_hal(textures[i].unwrap().raw().unwrap()));
+            .map(|(i, p)| p.into_hal(textures[i].unwrap().raw()))
+            .collect::<Vec<_>>();
 
         unsafe {
-            raw.transition_buffers(buffer_barriers);
-            raw.transition_textures(texture_barriers);
+            raw.transition_buffers(&buffer_barriers);
+            raw.transition_textures(&texture_barriers);
         }
     }
 
     pub(crate) fn insert_barriers_from_device_tracker(
-        raw: &mut A::CommandEncoder,
-        base: &mut DeviceTracker<A>,
-        head: &Tracker<A>,
+        raw: &mut dyn hal::DynCommandEncoder,
+        base: &mut DeviceTracker,
+        head: &Tracker,
         snatch_guard: &SnatchGuard,
     ) {
         profiling::scope!("insert_barriers_from_device_tracker");
 
         let buffer_barriers = base
             .buffers
-            .set_from_tracker_and_drain_transitions(&head.buffers, snatch_guard);
+            .set_from_tracker_and_drain_transitions(&head.buffers, snatch_guard)
+            .collect::<Vec<_>>();
 
         let texture_barriers = base
             .textures
-            .set_from_tracker_and_drain_transitions(&head.textures, snatch_guard);
+            .set_from_tracker_and_drain_transitions(&head.textures, snatch_guard)
+            .collect::<Vec<_>>();
 
         unsafe {
-            raw.transition_buffers(buffer_barriers);
-            raw.transition_textures(texture_barriers);
+            raw.transition_buffers(&buffer_barriers);
+            raw.transition_textures(&texture_barriers);
         }
     }
 }
 
-impl<A: HalApi> CommandBuffer<A> {
+impl CommandBuffer {
     fn lock_encoder_impl(&self, lock: bool) -> Result<(), CommandEncoderError> {
         let mut cmd_buf_data_guard = self.data.lock();
         let cmd_buf_data = cmd_buf_data_guard.as_mut().unwrap();
@@ -513,8 +513,7 @@ impl<A: HalApi> CommandBuffer<A> {
         }
     }
 
-    pub(crate) fn extract_baked_commands(&mut self) -> BakedCommands<A> {
-        log::trace!("Extracting BakedCommands from {}", self.error_ident());
+    pub(crate) fn extract_baked_commands(&mut self) -> BakedCommands {
         let data = self.data.lock().take().unwrap();
         BakedCommands {
             encoder: data.encoder.raw,
@@ -527,7 +526,7 @@ impl<A: HalApi> CommandBuffer<A> {
         }
     }
 
-    pub(crate) fn from_arc_into_baked(self: Arc<Self>) -> BakedCommands<A> {
+    pub(crate) fn from_arc_into_baked(self: Arc<Self>) -> BakedCommands {
         let mut command_buffer = Arc::into_inner(self)
             .expect("CommandBuffer cannot be destroyed because is still in use");
         command_buffer.extract_baked_commands()
@@ -606,6 +605,8 @@ pub enum CommandEncoderError {
     InvalidTimestampWritesQuerySetId(id::QuerySetId),
     #[error("Attachment TextureViewId {0:?} is invalid")]
     InvalidAttachmentId(id::TextureViewId),
+    #[error(transparent)]
+    InvalidColorAttachment(#[from] ColorAttachmentError),
     #[error("Resolve attachment TextureViewId {0:?} is invalid")]
     InvalidResolveTargetId(id::TextureViewId),
     #[error("Depth stencil attachment TextureViewId {0:?} is invalid")]
@@ -615,14 +616,14 @@ pub enum CommandEncoderError {
 }
 
 impl Global {
-    pub fn command_encoder_finish<A: HalApi>(
+    pub fn command_encoder_finish(
         &self,
         encoder_id: id::CommandEncoderId,
         _desc: &wgt::CommandBufferDescriptor<Label>,
     ) -> (id::CommandBufferId, Option<CommandEncoderError>) {
         profiling::scope!("CommandEncoder::finish");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let error = match hub.command_buffers.get(encoder_id.into_command_buffer_id()) {
             Ok(cmd_buf) => {
@@ -636,7 +637,6 @@ impl Global {
                             cmd_buf_data.status = CommandEncoderStatus::Finished;
                             //Note: if we want to stop tracking the swapchain texture view,
                             // this is the place to do it.
-                            log::trace!("Command buffer {:?}", encoder_id);
                             None
                         }
                     }
@@ -658,7 +658,7 @@ impl Global {
         (encoder_id.into_command_buffer_id(), error)
     }
 
-    pub fn command_encoder_push_debug_group<A: HalApi>(
+    pub fn command_encoder_push_debug_group(
         &self,
         encoder_id: id::CommandEncoderId,
         label: &str,
@@ -666,7 +666,7 @@ impl Global {
         profiling::scope!("CommandEncoder::push_debug_group");
         api_log!("CommandEncoder::push_debug_group {label}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub.command_buffers.get(encoder_id.into_command_buffer_id()) {
             Ok(cmd_buf) => cmd_buf,
@@ -694,7 +694,7 @@ impl Global {
         Ok(())
     }
 
-    pub fn command_encoder_insert_debug_marker<A: HalApi>(
+    pub fn command_encoder_insert_debug_marker(
         &self,
         encoder_id: id::CommandEncoderId,
         label: &str,
@@ -702,7 +702,7 @@ impl Global {
         profiling::scope!("CommandEncoder::insert_debug_marker");
         api_log!("CommandEncoder::insert_debug_marker {label}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub.command_buffers.get(encoder_id.into_command_buffer_id()) {
             Ok(cmd_buf) => cmd_buf,
@@ -731,14 +731,14 @@ impl Global {
         Ok(())
     }
 
-    pub fn command_encoder_pop_debug_group<A: HalApi>(
+    pub fn command_encoder_pop_debug_group(
         &self,
         encoder_id: id::CommandEncoderId,
     ) -> Result<(), CommandEncoderError> {
         profiling::scope!("CommandEncoder::pop_debug_marker");
         api_log!("CommandEncoder::pop_debug_group");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub.command_buffers.get(encoder_id.into_command_buffer_id()) {
             Ok(cmd_buf) => cmd_buf,
diff --git a/wgpu-core/src/command/query.rs b/wgpu-core/src/command/query.rs
index 35facbf260..de5103ac88 100644
--- a/wgpu-core/src/command/query.rs
+++ b/wgpu-core/src/command/query.rs
@@ -1,12 +1,9 @@
-use hal::CommandEncoder as _;
-
 #[cfg(feature = "trace")]
 use crate::device::trace::Command as TraceCommand;
 use crate::{
     command::{CommandBuffer, CommandEncoderError},
     device::{DeviceError, MissingFeatures},
     global::Global,
-    hal_api::HalApi,
     id,
     init_tracker::MemoryInitKind,
     resource::{
@@ -20,17 +17,17 @@ use thiserror::Error;
 use wgt::BufferAddress;
 
 #[derive(Debug)]
-pub(crate) struct QueryResetMap<A: HalApi> {
-    map: FastHashMap<TrackerIndex, (Vec<bool>, Arc<QuerySet<A>>)>,
+pub(crate) struct QueryResetMap {
+    map: FastHashMap<TrackerIndex, (Vec<bool>, Arc<QuerySet>)>,
 }
-impl<A: HalApi> QueryResetMap<A> {
+impl QueryResetMap {
     pub fn new() -> Self {
         Self {
             map: FastHashMap::default(),
         }
     }
 
-    pub fn use_query_set(&mut self, query_set: &Arc<QuerySet<A>>, query: u32) -> bool {
+    pub fn use_query_set(&mut self, query_set: &Arc<QuerySet>, query: u32) -> bool {
         let vec_pair = self
             .map
             .entry(query_set.tracker_index())
@@ -44,7 +41,7 @@ impl<A: HalApi> QueryResetMap<A> {
         std::mem::replace(&mut vec_pair.0[query as usize], true)
     }
 
-    pub fn reset_queries(&mut self, raw_encoder: &mut A::CommandEncoder) {
+    pub fn reset_queries(&mut self, raw_encoder: &mut dyn hal::DynCommandEncoder) {
         for (_, (state, query_set)) in self.map.drain() {
             debug_assert_eq!(state.len(), query_set.desc.count as usize);
 
@@ -163,12 +160,12 @@ pub enum ResolveError {
     },
 }
 
-impl<A: HalApi> QuerySet<A> {
+impl QuerySet {
     fn validate_query(
         self: &Arc<Self>,
         query_type: SimplifiedQueryType,
         query_index: u32,
-        reset_state: Option<&mut QueryResetMap<A>>,
+        reset_state: Option<&mut QueryResetMap>,
     ) -> Result<(), QueryUseError> {
         // We need to defer our resets because we are in a renderpass,
         // add the usage to the reset map.
@@ -199,9 +196,9 @@ impl<A: HalApi> QuerySet<A> {
 
     pub(super) fn validate_and_write_timestamp(
         self: &Arc<Self>,
-        raw_encoder: &mut A::CommandEncoder,
+        raw_encoder: &mut dyn hal::DynCommandEncoder,
         query_index: u32,
-        reset_state: Option<&mut QueryResetMap<A>>,
+        reset_state: Option<&mut QueryResetMap>,
     ) -> Result<(), QueryUseError> {
         let needs_reset = reset_state.is_none();
         self.validate_query(SimplifiedQueryType::Timestamp, query_index, reset_state)?;
@@ -218,13 +215,13 @@ impl<A: HalApi> QuerySet<A> {
     }
 }
 
-pub(super) fn validate_and_begin_occlusion_query<A: HalApi>(
-    query_set: Arc<QuerySet<A>>,
-    raw_encoder: &mut A::CommandEncoder,
-    tracker: &mut StatelessTracker<QuerySet<A>>,
+pub(super) fn validate_and_begin_occlusion_query(
+    query_set: Arc<QuerySet>,
+    raw_encoder: &mut dyn hal::DynCommandEncoder,
+    tracker: &mut StatelessTracker<QuerySet>,
     query_index: u32,
-    reset_state: Option<&mut QueryResetMap<A>>,
-    active_query: &mut Option<(Arc<QuerySet<A>>, u32)>,
+    reset_state: Option<&mut QueryResetMap>,
+    active_query: &mut Option<(Arc<QuerySet>, u32)>,
 ) -> Result<(), QueryUseError> {
     let needs_reset = reset_state.is_none();
     query_set.validate_query(SimplifiedQueryType::Occlusion, query_index, reset_state)?;
@@ -250,26 +247,26 @@ pub(super) fn validate_and_begin_occlusion_query<A: HalApi>(
     Ok(())
 }
 
-pub(super) fn end_occlusion_query<A: HalApi>(
-    raw_encoder: &mut A::CommandEncoder,
-    active_query: &mut Option<(Arc<QuerySet<A>>, u32)>,
+pub(super) fn end_occlusion_query(
+    raw_encoder: &mut dyn hal::DynCommandEncoder,
+    active_query: &mut Option<(Arc<QuerySet>, u32)>,
 ) -> Result<(), QueryUseError> {
     if let Some((query_set, query_index)) = active_query.take() {
-        unsafe { raw_encoder.end_query(query_set.raw.as_ref().unwrap(), query_index) };
+        unsafe { raw_encoder.end_query(query_set.raw(), query_index) };
         Ok(())
     } else {
         Err(QueryUseError::AlreadyStopped)
     }
 }
 
-pub(super) fn validate_and_begin_pipeline_statistics_query<A: HalApi>(
-    query_set: Arc<QuerySet<A>>,
-    raw_encoder: &mut A::CommandEncoder,
-    tracker: &mut StatelessTracker<QuerySet<A>>,
-    cmd_buf: &CommandBuffer<A>,
+pub(super) fn validate_and_begin_pipeline_statistics_query(
+    query_set: Arc<QuerySet>,
+    raw_encoder: &mut dyn hal::DynCommandEncoder,
+    tracker: &mut StatelessTracker<QuerySet>,
+    cmd_buf: &CommandBuffer,
     query_index: u32,
-    reset_state: Option<&mut QueryResetMap<A>>,
-    active_query: &mut Option<(Arc<QuerySet<A>>, u32)>,
+    reset_state: Option<&mut QueryResetMap>,
+    active_query: &mut Option<(Arc<QuerySet>, u32)>,
 ) -> Result<(), QueryUseError> {
     query_set.same_device_as(cmd_buf)?;
 
@@ -301,9 +298,9 @@ pub(super) fn validate_and_begin_pipeline_statistics_query<A: HalApi>(
     Ok(())
 }
 
-pub(super) fn end_pipeline_statistics_query<A: HalApi>(
-    raw_encoder: &mut A::CommandEncoder,
-    active_query: &mut Option<(Arc<QuerySet<A>>, u32)>,
+pub(super) fn end_pipeline_statistics_query(
+    raw_encoder: &mut dyn hal::DynCommandEncoder,
+    active_query: &mut Option<(Arc<QuerySet>, u32)>,
 ) -> Result<(), QueryUseError> {
     if let Some((query_set, query_index)) = active_query.take() {
         unsafe { raw_encoder.end_query(query_set.raw(), query_index) };
@@ -314,13 +311,13 @@ pub(super) fn end_pipeline_statistics_query<A: HalApi>(
 }
 
 impl Global {
-    pub fn command_encoder_write_timestamp<A: HalApi>(
+    pub fn command_encoder_write_timestamp(
         &self,
         command_encoder_id: id::CommandEncoderId,
         query_set_id: id::QuerySetId,
         query_index: u32,
     ) -> Result<(), QueryError> {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub
             .command_buffers
@@ -363,7 +360,7 @@ impl Global {
         Ok(())
     }
 
-    pub fn command_encoder_resolve_query_set<A: HalApi>(
+    pub fn command_encoder_resolve_query_set(
         &self,
         command_encoder_id: id::CommandEncoderId,
         query_set_id: id::QuerySetId,
@@ -372,7 +369,7 @@ impl Global {
         destination: id::BufferId,
         destination_offset: BufferAddress,
     ) -> Result<(), QueryError> {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub
             .command_buffers
@@ -477,7 +474,7 @@ impl Global {
         let raw_dst_buffer = dst_buffer.try_raw(&snatch_guard)?;
 
         unsafe {
-            raw_encoder.transition_buffers(dst_barrier.into_iter());
+            raw_encoder.transition_buffers(dst_barrier.as_slice());
             raw_encoder.copy_query_results(
                 query_set.raw(),
                 start_query..end_query,
diff --git a/wgpu-core/src/command/ray_tracing.rs b/wgpu-core/src/command/ray_tracing.rs
index 640853a632..038ff0ece2 100644
--- a/wgpu-core/src/command/ray_tracing.rs
+++ b/wgpu-core/src/command/ray_tracing.rs
@@ -1,10 +1,9 @@
 use crate::{
     device::queue::TempResource,
     global::Global,
-    hal_api::HalApi,
     id::CommandEncoderId,
     init_tracker::MemoryInitKind,
-    lock::{Mutex, RwLockReadGuard},
+    lock::RwLockReadGuard,
     ray_tracing::{
         tlas_instance_into_bytes, BlasAction, BlasBuildEntry, BlasGeometries,
         BuildAccelerationStructureError, TlasAction, TlasBuildEntry, TlasPackage,
@@ -17,33 +16,38 @@ use crate::{
 use wgt::{math::align_to, BufferAddress, BufferUsages};
 
 use super::{BakedCommands, CommandBufferMutable, CommandEncoderError};
-use crate::lock::rank;
 use crate::ray_tracing::BlasTriangleGeometry;
-use crate::resource::{Buffer, Labeled, StagingBuffer, Trackable};
+use crate::resource::{
+    AccelerationStructure, Buffer, Labeled, ScratchBuffer, StagingBuffer, Trackable,
+};
 use crate::snatch::SnatchGuard;
 use crate::storage::Storage;
 use crate::track::PendingTransition;
-use hal::{Api, BufferUses, CommandEncoder, Device};
+use hal::BufferUses;
 use std::ops::Deref;
 use std::sync::Arc;
-use std::{cmp::max, iter, num::NonZeroU64, ops::Range, ptr};
+use std::{cmp::max, num::NonZeroU64, ops::Range};
 
-type BufferStorage<'a, A> = Vec<(
-    Arc<Buffer<A>>,
+type BufferStorage<'a> = Vec<(
+    Arc<Buffer>,
     Option<PendingTransition<BufferUses>>,
-    Option<(Arc<Buffer<A>>, Option<PendingTransition<BufferUses>>)>,
-    Option<(Arc<Buffer<A>>, Option<PendingTransition<BufferUses>>)>,
+    Option<(Arc<Buffer>, Option<PendingTransition<BufferUses>>)>,
+    Option<(Arc<Buffer>, Option<PendingTransition<BufferUses>>)>,
     BlasTriangleGeometry<'a>,
-    Option<Arc<Blas<A>>>,
+    Option<Arc<Blas>>,
 )>;
 
-type BlasStorage<'a, A> = Vec<(Arc<Blas<A>>, hal::AccelerationStructureEntries<'a, A>, u64)>;
+type BlasStorage<'a> = Vec<(
+    Arc<Blas>,
+    hal::AccelerationStructureEntries<'a, dyn hal::DynBuffer>,
+    u64,
+)>;
 
 // This should be queried from the device, maybe the the hal api should pre aline it, since I am unsure how else we can idiomatically get this value.
 const SCRATCH_BUFFER_ALIGNMENT: u32 = 256;
 
 impl Global {
-    pub fn command_encoder_build_acceleration_structures_unsafe_tlas<'a, A: HalApi>(
+    pub fn command_encoder_build_acceleration_structures_unsafe_tlas<'a>(
         &self,
         command_encoder_id: CommandEncoderId,
         blas_iter: impl Iterator<Item = BlasBuildEntry<'a>>,
@@ -51,7 +55,7 @@ impl Global {
     ) -> Result<(), BuildAccelerationStructureError> {
         profiling::scope!("CommandEncoder::build_acceleration_structures_unsafe_tlas");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub
             .command_buffers
@@ -147,7 +151,7 @@ impl Global {
         #[cfg(feature = "trace")]
         let tlas_iter = trace_tlas.iter();
 
-        let mut input_barriers = Vec::<hal::BufferBarrier<A>>::new();
+        let mut input_barriers = Vec::<hal::BufferBarrier<dyn hal::DynBuffer>>::new();
         let mut buf_storage = BufferStorage::new();
 
         let mut scratch_buffer_blas_size = 0;
@@ -176,9 +180,13 @@ impl Global {
         )?;
 
         let mut scratch_buffer_tlas_size = 0;
-        let mut tlas_storage = Vec::<(&Tlas<A>, hal::AccelerationStructureEntries<A>, u64)>::new();
+        let mut tlas_storage = Vec::<(
+            &Tlas,
+            hal::AccelerationStructureEntries<dyn hal::DynBuffer>,
+            u64,
+        )>::new();
         let mut tlas_buf_storage = Vec::<(
-            Arc<Buffer<A>>,
+            Arc<Buffer>,
             Option<PendingTransition<BufferUses>>,
             TlasBuildEntry,
         )>::new();
@@ -221,13 +229,7 @@ impl Global {
             let tlas = tlas_guard
                 .get(entry.tlas_id)
                 .map_err(|_| BuildAccelerationStructureError::InvalidTlasId)?;
-            cmd_buf_data.trackers.tlas_s.insert_single(tlas.clone());
-
-            if tlas.raw.is_none() {
-                return Err(BuildAccelerationStructureError::InvalidTlas(
-                    tlas.error_ident(),
-                ));
-            }
+            cmd_buf_data.trackers.tlas_s.set_single(tlas.clone());
 
             cmd_buf_data.tlas_actions.push(TlasAction {
                 tlas: tlas.clone(),
@@ -246,7 +248,7 @@ impl Global {
             tlas_storage.push((
                 tlas,
                 hal::AccelerationStructureEntries::Instances(hal::AccelerationStructureInstances {
-                    buffer: Some(instance_buffer),
+                    buffer: Some(instance_buffer.as_ref()),
                     offset: 0,
                     count: entry.instance_count,
                 }),
@@ -254,31 +256,24 @@ impl Global {
             ));
         }
 
-        if max(scratch_buffer_blas_size, scratch_buffer_tlas_size) == 0 {
-            return Ok(());
-        }
+        let scratch_size =
+            match wgt::BufferSize::new(max(scratch_buffer_blas_size, scratch_buffer_tlas_size)) {
+                None => return Ok(()),
+                Some(size) => size,
+            };
 
-        let scratch_buffer = unsafe {
-            device
-                .raw()
-                .create_buffer(&hal::BufferDescriptor {
-                    label: Some("(wgpu) scratch buffer"),
-                    size: max(scratch_buffer_blas_size, scratch_buffer_tlas_size),
-                    usage: hal::BufferUses::ACCELERATION_STRUCTURE_SCRATCH | BufferUses::MAP_WRITE,
-                    memory_flags: hal::MemoryFlags::empty(),
-                })
-                .map_err(crate::device::DeviceError::from)?
-        };
+        let scratch_buffer =
+            ScratchBuffer::new(device, scratch_size).map_err(crate::device::DeviceError::from)?;
 
-        let scratch_buffer_barrier = hal::BufferBarrier::<A> {
-            buffer: &scratch_buffer,
+        let scratch_buffer_barrier = hal::BufferBarrier::<dyn hal::DynBuffer> {
+            buffer: scratch_buffer.raw(),
             usage: BufferUses::ACCELERATION_STRUCTURE_SCRATCH
                 ..BufferUses::ACCELERATION_STRUCTURE_SCRATCH,
         };
 
         let blas_descriptors = blas_storage
             .iter()
-            .map(|storage| map_blas(storage, &scratch_buffer));
+            .map(|storage| map_blas(storage, scratch_buffer.raw()));
 
         let tlas_descriptors =
             tlas_storage
@@ -292,8 +287,8 @@ impl Global {
                         mode: hal::AccelerationStructureBuildMode::Build,
                         flags: tlas.flags,
                         source_acceleration_structure: None,
-                        destination_acceleration_structure: tlas.raw.as_ref().unwrap(),
-                        scratch_buffer: &scratch_buffer,
+                        destination_acceleration_structure: tlas.raw(),
+                        scratch_buffer: scratch_buffer.raw(),
                         scratch_buffer_offset: *scratch_buffer_offset,
                     }
                 });
@@ -308,15 +303,13 @@ impl Global {
             blas_present,
             tlas_present,
             input_barriers,
-            blas_storage.len() as u32,
-            blas_descriptors,
+            &blas_descriptors.collect::<Vec<_>>(),
             scratch_buffer_barrier,
         );
 
         if tlas_present {
             unsafe {
-                cmd_buf_raw
-                    .build_acceleration_structures(tlas_storage.len() as u32, tlas_descriptors);
+                cmd_buf_raw.build_acceleration_structures(&tlas_descriptors.collect::<Vec<_>>());
 
                 cmd_buf_raw.place_acceleration_structure_barrier(
                     hal::AccelerationStructureBarrier {
@@ -327,31 +320,15 @@ impl Global {
             }
         }
 
-        let scratch_mapping = unsafe {
-            device
-                .raw()
-                .map_buffer(
-                    &scratch_buffer,
-                    0..max(scratch_buffer_blas_size, scratch_buffer_tlas_size),
-                )
-                .map_err(crate::device::DeviceError::from)?
-        };
         device
             .pending_writes
             .lock()
-            .as_mut()
-            .unwrap()
-            .consume_temp(TempResource::StagingBuffer(StagingBuffer {
-                raw: Mutex::new(rank::BLAS, Some(scratch_buffer)),
-                device: device.clone(),
-                size: max(scratch_buffer_blas_size, scratch_buffer_tlas_size),
-                is_coherent: scratch_mapping.is_coherent,
-            }));
+            .consume_temp(TempResource::ScratchBuffer(scratch_buffer));
 
         Ok(())
     }
 
-    pub fn command_encoder_build_acceleration_structures<'a, A: HalApi>(
+    pub fn command_encoder_build_acceleration_structures<'a>(
         &self,
         command_encoder_id: CommandEncoderId,
         blas_iter: impl Iterator<Item = BlasBuildEntry<'a>>,
@@ -359,7 +336,7 @@ impl Global {
     ) -> Result<(), BuildAccelerationStructureError> {
         profiling::scope!("CommandEncoder::build_acceleration_structures");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub
             .command_buffers
@@ -486,7 +463,7 @@ impl Global {
             }
         });
 
-        let mut input_barriers = Vec::<hal::BufferBarrier<A>>::new();
+        let mut input_barriers = Vec::<hal::BufferBarrier<dyn hal::DynBuffer>>::new();
         let mut buf_storage = BufferStorage::new();
 
         let mut scratch_buffer_blas_size = 0;
@@ -513,25 +490,22 @@ impl Global {
             &mut scratch_buffer_blas_size,
             &mut blas_storage,
         )?;
-        let mut tlas_lock_store = Vec::<(
-            RwLockReadGuard<Option<A::Buffer>>,
-            Option<TlasPackage>,
-            Arc<Tlas<A>>,
-        )>::new();
+        let mut tlas_lock_store =
+            Vec::<(&dyn hal::DynBuffer, Option<TlasPackage>, Arc<Tlas>)>::new();
 
         for package in tlas_iter {
             let tlas = tlas_guard
                 .get(package.tlas_id)
                 .map_err(|_| BuildAccelerationStructureError::InvalidTlasId)?;
 
-            cmd_buf_data.trackers.tlas_s.insert_single(tlas.clone());
-            tlas_lock_store.push((tlas.instance_buffer.read(), Some(package), tlas.clone()))
+            cmd_buf_data.trackers.tlas_s.set_single(tlas.clone());
+            tlas_lock_store.push((tlas.instance_buffer.as_ref(), Some(package), tlas.clone()))
         }
 
         let mut scratch_buffer_tlas_size = 0;
         let mut tlas_storage = Vec::<(
-            &Tlas<A>,
-            hal::AccelerationStructureEntries<A>,
+            &Tlas,
+            hal::AccelerationStructureEntries<dyn hal::DynBuffer>,
             u64,
             Range<usize>,
         )>::new();
@@ -540,11 +514,6 @@ impl Global {
         for entry in &mut tlas_lock_store {
             let package = entry.1.take().unwrap();
             let tlas = &entry.2;
-            if tlas.raw.is_none() {
-                return Err(BuildAccelerationStructureError::InvalidTlas(
-                    tlas.error_ident(),
-                ));
-            }
 
             let scratch_buffer_offset = scratch_buffer_tlas_size;
             scratch_buffer_tlas_size += align_to(
@@ -568,10 +537,13 @@ impl Global {
                     .map_err(|_| BuildAccelerationStructureError::InvalidBlasIdForInstance)?
                     .clone();
 
-                cmd_buf_data.trackers.blas_s.insert_single(blas.clone());
+                cmd_buf_data.trackers.blas_s.set_single(blas.clone());
 
-                instance_buffer_staging_source
-                    .extend(tlas_instance_into_bytes::<A>(&instance, blas.handle));
+                instance_buffer_staging_source.extend(tlas_instance_into_bytes(
+                    &instance,
+                    blas.handle,
+                    device.backend(),
+                ));
 
                 instance_count += 1;
 
@@ -602,7 +574,7 @@ impl Global {
             tlas_storage.push((
                 tlas,
                 hal::AccelerationStructureEntries::Instances(hal::AccelerationStructureInstances {
-                    buffer: Some(entry.0.as_ref().unwrap()),
+                    buffer: Some(entry.0),
                     offset: 0,
                     count: instance_count,
                 }),
@@ -611,106 +583,43 @@ impl Global {
             ));
         }
 
-        if max(scratch_buffer_blas_size, scratch_buffer_tlas_size) == 0 {
-            return Ok(());
-        }
-
-        let staging_buffer = if !instance_buffer_staging_source.is_empty() {
-            unsafe {
-                let staging_buffer = device
-                    .raw()
-                    .create_buffer(&hal::BufferDescriptor {
-                        label: Some("(wgpu) instance staging buffer"),
-                        size: instance_buffer_staging_source.len() as u64,
-                        usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::COPY_SRC,
-                        memory_flags: hal::MemoryFlags::empty(),
-                    })
-                    .map_err(crate::device::DeviceError::from)?;
-                let mapping = device
-                    .raw()
-                    .map_buffer(
-                        &staging_buffer,
-                        0..instance_buffer_staging_source.len() as u64,
-                    )
-                    .map_err(crate::device::DeviceError::from)?;
-                ptr::copy_nonoverlapping(
-                    instance_buffer_staging_source.as_ptr(),
-                    mapping.ptr.as_ptr(),
-                    instance_buffer_staging_source.len(),
-                );
-                device
-                    .raw()
-                    .unmap_buffer(&staging_buffer)
-                    .map_err(crate::device::DeviceError::from)?;
-                assert!(mapping.is_coherent);
-                Some(StagingBuffer {
-                    raw: Mutex::new(rank::STAGING_BUFFER_RAW, Some(staging_buffer)),
-                    device: device.clone(),
-                    size: instance_buffer_staging_source.len() as u64,
-                    is_coherent: mapping.is_coherent,
-                })
-            }
-        } else {
-            None
-        };
+        let scratch_size =
+            match wgt::BufferSize::new(max(scratch_buffer_blas_size, scratch_buffer_tlas_size)) {
+                // if the size is zero there is nothing to build
+                None => return Ok(()),
+                Some(size) => size,
+            };
 
-        let scratch_buffer = unsafe {
-            device
-                .raw()
-                .create_buffer(&hal::BufferDescriptor {
-                    label: Some("(wgpu) scratch buffer"),
-                    size: max(scratch_buffer_blas_size, scratch_buffer_tlas_size),
-                    usage: hal::BufferUses::ACCELERATION_STRUCTURE_SCRATCH | BufferUses::MAP_WRITE,
-                    memory_flags: hal::MemoryFlags::empty(),
-                })
-                .map_err(crate::device::DeviceError::from)?
-        };
+        let scratch_buffer =
+            ScratchBuffer::new(device, scratch_size).map_err(crate::device::DeviceError::from)?;
 
-        let scratch_buffer_barrier = hal::BufferBarrier::<A> {
-            buffer: &scratch_buffer,
+        let scratch_buffer_barrier = hal::BufferBarrier::<dyn hal::DynBuffer> {
+            buffer: scratch_buffer.raw(),
             usage: BufferUses::ACCELERATION_STRUCTURE_SCRATCH
                 ..BufferUses::ACCELERATION_STRUCTURE_SCRATCH,
         };
 
         let blas_descriptors = blas_storage
             .iter()
-            .map(|storage| map_blas(storage, &scratch_buffer));
-
-        let tlas_descriptors = tlas_storage.iter().map(
-            |&(tlas, ref entries, ref scratch_buffer_offset, ref _range)| {
-                if tlas.update_mode == wgt::AccelerationStructureUpdateMode::PreferUpdate {
-                    log::info!("only rebuild implemented")
-                }
-                hal::BuildAccelerationStructureDescriptor {
-                    entries,
-                    mode: hal::AccelerationStructureBuildMode::Build,
-                    flags: tlas.flags,
-                    source_acceleration_structure: None,
-                    destination_acceleration_structure: tlas.raw.as_ref().unwrap(),
-                    scratch_buffer: &scratch_buffer,
-                    scratch_buffer_offset: *scratch_buffer_offset,
-                }
-            },
-        );
+            .map(|storage| map_blas(storage, scratch_buffer.raw()));
 
-        let mut lock_vec = Vec::<Option<RwLockReadGuard<Option<<A>::Buffer>>>>::new();
+        let mut tlas_descriptors = Vec::with_capacity(tlas_storage.len());
 
-        for tlas in &tlas_storage {
-            let size = (tlas.3.end - tlas.3.start) as u64;
-            lock_vec.push(if size == 0 {
-                None
-            } else {
-                Some(tlas.0.instance_buffer.read())
+        for &(tlas, ref entries, ref scratch_buffer_offset, _) in &tlas_storage {
+            if tlas.update_mode == wgt::AccelerationStructureUpdateMode::PreferUpdate {
+                log::info!("only rebuild implemented")
+            }
+            tlas_descriptors.push(hal::BuildAccelerationStructureDescriptor {
+                entries,
+                mode: hal::AccelerationStructureBuildMode::Build,
+                flags: tlas.flags,
+                source_acceleration_structure: None,
+                destination_acceleration_structure: tlas.raw.as_ref(),
+                scratch_buffer: scratch_buffer.raw(),
+                scratch_buffer_offset: *scratch_buffer_offset,
             })
         }
 
-        let instance_buffer_barriers = lock_vec.iter().filter_map(|lock| {
-            lock.as_ref().map(|lock| hal::BufferBarrier::<A> {
-                buffer: lock.as_ref().unwrap(),
-                usage: BufferUses::COPY_DST..BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT,
-            })
-        });
-
         let blas_present = !blas_storage.is_empty();
         let tlas_present = !tlas_storage.is_empty();
 
@@ -721,55 +630,67 @@ impl Global {
             blas_present,
             tlas_present,
             input_barriers,
-            blas_storage.len() as u32,
-            blas_descriptors,
+            &blas_descriptors.collect::<Vec<_>>(),
             scratch_buffer_barrier,
         );
 
         if tlas_present {
+            let staging_buffer = if !instance_buffer_staging_source.is_empty() {
+                let mut staging_buffer = StagingBuffer::new(
+                    device,
+                    wgt::BufferSize::new(instance_buffer_staging_source.len() as u64).unwrap(),
+                )
+                .map_err(crate::device::DeviceError::from)?;
+                staging_buffer.write(&instance_buffer_staging_source);
+                let flushed = staging_buffer.flush();
+                Some(flushed)
+            } else {
+                None
+            };
+
             unsafe {
                 if let Some(ref staging_buffer) = staging_buffer {
-                    cmd_buf_raw.transition_buffers(iter::once(hal::BufferBarrier::<A> {
-                        buffer: staging_buffer.raw.lock().as_ref().unwrap(),
+                    cmd_buf_raw.transition_buffers(&[hal::BufferBarrier::<dyn hal::DynBuffer> {
+                        buffer: staging_buffer.raw(),
                         usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC,
-                    }));
+                    }]);
                 }
             }
 
-            for &(tlas, ref _entries, ref _scratch_buffer_offset, ref range) in &tlas_storage {
-                let size = (range.end - range.start) as u64;
-                if size == 0 {
-                    continue;
-                }
+            let mut instance_buffer_barriers = Vec::new();
+            for &(tlas, _, _, ref range) in &tlas_storage {
+                let size = match wgt::BufferSize::new((range.end - range.start) as u64) {
+                    None => continue,
+                    Some(size) => size,
+                };
+                instance_buffer_barriers.push(hal::BufferBarrier::<dyn hal::DynBuffer> {
+                    buffer: tlas.instance_buffer.as_ref(),
+                    usage: BufferUses::COPY_DST..BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT,
+                });
                 unsafe {
-                    cmd_buf_raw.transition_buffers(iter::once(hal::BufferBarrier::<A> {
-                        buffer: tlas.instance_buffer.read().as_ref().unwrap(),
+                    cmd_buf_raw.transition_buffers(&[hal::BufferBarrier::<dyn hal::DynBuffer> {
+                        buffer: tlas.instance_buffer.as_ref(),
                         usage: hal::BufferUses::MAP_READ..hal::BufferUses::COPY_DST,
-                    }));
+                    }]);
                     let temp = hal::BufferCopy {
                         src_offset: range.start as u64,
                         dst_offset: 0,
-                        size: NonZeroU64::new(size).unwrap(),
+                        size,
                     };
                     cmd_buf_raw.copy_buffer_to_buffer(
-                        staging_buffer
-                            .as_ref()
-                            .unwrap()
-                            .raw
-                            .lock()
-                            .as_ref()
-                            .unwrap(),
-                        tlas.instance_buffer.read().as_ref().unwrap(),
-                        iter::once(temp),
+                        // the range whose size we just checked end is at (at that point in time) instance_buffer_staging_source.len()
+                        // and since instance_buffer_staging_source doesn't shrink we can un wrap this without a panic
+                        staging_buffer.as_ref().unwrap().raw(),
+                        tlas.instance_buffer.as_ref(),
+                        &[temp],
                     );
                 }
             }
 
             unsafe {
-                cmd_buf_raw.transition_buffers(instance_buffer_barriers);
+                cmd_buf_raw.transition_buffers(&instance_buffer_barriers);
 
-                cmd_buf_raw
-                    .build_acceleration_structures(tlas_storage.len() as u32, tlas_descriptors);
+                cmd_buf_raw.build_acceleration_structures(&tlas_descriptors);
 
                 cmd_buf_raw.place_acceleration_structure_barrier(
                     hal::AccelerationStructureBarrier {
@@ -783,40 +704,20 @@ impl Global {
                 device
                     .pending_writes
                     .lock()
-                    .as_mut()
-                    .unwrap()
                     .consume_temp(TempResource::StagingBuffer(staging_buffer));
             }
         }
-        let scratch_mapping = unsafe {
-            device
-                .raw()
-                .map_buffer(
-                    &scratch_buffer,
-                    0..max(scratch_buffer_blas_size, scratch_buffer_tlas_size),
-                )
-                .map_err(crate::device::DeviceError::from)?
-        };
-
-        let buf = StagingBuffer {
-            raw: Mutex::new(rank::STAGING_BUFFER_RAW, Some(scratch_buffer)),
-            device: device.clone(),
-            size: max(scratch_buffer_blas_size, scratch_buffer_tlas_size),
-            is_coherent: scratch_mapping.is_coherent,
-        };
 
         device
             .pending_writes
             .lock()
-            .as_mut()
-            .unwrap()
-            .consume_temp(TempResource::StagingBuffer(buf));
+            .consume_temp(TempResource::ScratchBuffer(scratch_buffer));
 
         Ok(())
     }
 }
 
-impl<A: HalApi> BakedCommands<A> {
+impl BakedCommands {
     // makes sure a blas is build before it is used
     pub(crate) fn validate_blas_actions(&mut self) -> Result<(), ValidateBlasActionsError> {
         profiling::scope!("CommandEncoder::[submission]::validate_blas_actions");
@@ -884,25 +785,19 @@ impl<A: HalApi> BakedCommands<A> {
 }
 
 ///iterates over the blas iterator, and it's geometry, pushing the buffers into a storage vector (and also some validation).
-fn iter_blas<'a, A: HalApi>(
+fn iter_blas<'a>(
     blas_iter: impl Iterator<Item = BlasBuildEntry<'a>>,
-    cmd_buf_data: &mut CommandBufferMutable<A>,
+    cmd_buf_data: &mut CommandBufferMutable,
     build_command_index: NonZeroU64,
-    buffer_guard: &RwLockReadGuard<Storage<Buffer<A>>>,
-    blas_guard: &RwLockReadGuard<Storage<Blas<A>>>,
-    buf_storage: &mut BufferStorage<'a, A>,
+    buffer_guard: &RwLockReadGuard<Storage<Buffer>>,
+    blas_guard: &RwLockReadGuard<Storage<Blas>>,
+    buf_storage: &mut BufferStorage<'a>,
 ) -> Result<(), BuildAccelerationStructureError> {
     for entry in blas_iter {
         let blas = blas_guard
             .get(entry.blas_id)
             .map_err(|_| BuildAccelerationStructureError::InvalidBlasId)?;
-        cmd_buf_data.trackers.blas_s.insert_single(blas.clone());
-
-        if blas.raw.is_none() {
-            return Err(BuildAccelerationStructureError::InvalidBlas(
-                blas.error_ident(),
-            ));
-        }
+        cmd_buf_data.trackers.blas_s.set_single(blas.clone());
 
         cmd_buf_data.blas_actions.push(BlasAction {
             blas: blas.clone(),
@@ -1009,16 +904,17 @@ fn iter_blas<'a, A: HalApi>(
 }
 
 /// Iterates over the buffers generated [iter_blas] and convert the barriers into hal barriers, and the triangles into hal [AccelerationStructureEntries] (and also some validation).
-fn iter_buffers<'a, 'b, A: HalApi>(
-    buf_storage: &'a mut BufferStorage<'b, A>,
+fn iter_buffers<'a, 'b>(
+    buf_storage: &'a mut BufferStorage<'b>,
     snatch_guard: &'a SnatchGuard,
-    input_barriers: &mut Vec<hal::BufferBarrier<'a, A>>,
-    cmd_buf_data: &mut CommandBufferMutable<A>,
-    buffer_guard: &RwLockReadGuard<Storage<Buffer<A>>>,
+    input_barriers: &mut Vec<hal::BufferBarrier<'a, dyn hal::DynBuffer>>,
+    cmd_buf_data: &mut CommandBufferMutable,
+    buffer_guard: &RwLockReadGuard<Storage<Buffer>>,
     scratch_buffer_blas_size: &mut u64,
-    blas_storage: &mut BlasStorage<'a, A>,
+    blas_storage: &mut BlasStorage<'a>,
 ) -> Result<(), BuildAccelerationStructureError> {
-    let mut triangle_entries = Vec::<hal::AccelerationStructureTriangles<A>>::new();
+    let mut triangle_entries =
+        Vec::<hal::AccelerationStructureTriangles<dyn hal::DynBuffer>>::new();
     for buf in buf_storage {
         let mesh = &buf.4;
         let vertex_buffer = {
@@ -1164,22 +1060,22 @@ fn iter_buffers<'a, 'b, A: HalApi>(
         };
 
         let triangles = hal::AccelerationStructureTriangles {
-            vertex_buffer: Some(vertex_buffer),
+            vertex_buffer: Some(vertex_buffer.as_ref()),
             vertex_format: mesh.size.vertex_format,
             first_vertex: mesh.first_vertex,
             vertex_count: mesh.size.vertex_count,
             vertex_stride: mesh.vertex_stride,
             indices: index_buffer.map(|index_buffer| hal::AccelerationStructureTriangleIndices::<
-                A,
+                dyn hal::DynBuffer,
             > {
                 format: mesh.size.index_format.unwrap(),
-                buffer: Some(index_buffer),
+                buffer: Some(index_buffer.as_ref()),
                 offset: mesh.index_buffer_offset.unwrap() as u32,
                 count: mesh.size.index_count.unwrap(),
             }),
             transform: transform_buffer.map(|transform_buffer| {
                 hal::AccelerationStructureTriangleTransform {
-                    buffer: transform_buffer,
+                    buffer: transform_buffer.as_ref(),
                     offset: mesh.transform_buffer_offset.unwrap() as u32,
                 }
             }),
@@ -1204,14 +1100,18 @@ fn iter_buffers<'a, 'b, A: HalApi>(
     Ok(())
 }
 
-fn map_blas<'a, A: HalApi>(
+fn map_blas<'a>(
     storage: &'a (
-        Arc<Blas<A>>,
-        hal::AccelerationStructureEntries<A>,
+        Arc<Blas>,
+        hal::AccelerationStructureEntries<dyn hal::DynBuffer>,
         BufferAddress,
     ),
-    scratch_buffer: &'a <A as Api>::Buffer,
-) -> hal::BuildAccelerationStructureDescriptor<'a, A> {
+    scratch_buffer: &'a dyn hal::DynBuffer,
+) -> hal::BuildAccelerationStructureDescriptor<
+    'a,
+    dyn hal::DynBuffer,
+    dyn hal::DynAccelerationStructure,
+> {
     let (blas, entries, scratch_buffer_offset) = storage;
     if blas.update_mode == wgt::AccelerationStructureUpdateMode::PreferUpdate {
         log::info!("only rebuild implemented")
@@ -1221,23 +1121,26 @@ fn map_blas<'a, A: HalApi>(
         mode: hal::AccelerationStructureBuildMode::Build,
         flags: blas.flags,
         source_acceleration_structure: None,
-        destination_acceleration_structure: blas.raw.as_ref().unwrap(),
+        destination_acceleration_structure: blas.raw.as_ref(),
         scratch_buffer,
         scratch_buffer_offset: *scratch_buffer_offset,
     }
 }
 
-fn build_blas<'a, A: HalApi>(
-    cmd_buf_raw: &mut A::CommandEncoder,
+fn build_blas<'a>(
+    cmd_buf_raw: &mut dyn hal::DynCommandEncoder,
     blas_present: bool,
     tlas_present: bool,
-    input_barriers: Vec<hal::BufferBarrier<A>>,
-    desc_len: u32,
-    blas_descriptors: impl Iterator<Item = hal::BuildAccelerationStructureDescriptor<'a, A>>,
-    scratch_buffer_barrier: hal::BufferBarrier<A>,
+    input_barriers: Vec<hal::BufferBarrier<dyn hal::DynBuffer>>,
+    blas_descriptors: &[hal::BuildAccelerationStructureDescriptor<
+        'a,
+        dyn hal::DynBuffer,
+        dyn hal::DynAccelerationStructure,
+    >],
+    scratch_buffer_barrier: hal::BufferBarrier<dyn hal::DynBuffer>,
 ) {
     unsafe {
-        cmd_buf_raw.transition_buffers(input_barriers.into_iter());
+        cmd_buf_raw.transition_buffers(&input_barriers);
     }
 
     if blas_present {
@@ -1247,13 +1150,13 @@ fn build_blas<'a, A: HalApi>(
                     ..hal::AccelerationStructureUses::BUILD_OUTPUT,
             });
 
-            cmd_buf_raw.build_acceleration_structures(desc_len, blas_descriptors);
+            cmd_buf_raw.build_acceleration_structures(blas_descriptors);
         }
     }
 
     if blas_present && tlas_present {
         unsafe {
-            cmd_buf_raw.transition_buffers(iter::once(scratch_buffer_barrier));
+            cmd_buf_raw.transition_buffers(&[scratch_buffer_barrier]);
         }
     }
 
diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs
index 3305db890c..1f11ba0937 100644
--- a/wgpu-core/src/command/render.rs
+++ b/wgpu-core/src/command/render.rs
@@ -21,7 +21,6 @@ use crate::{
         RenderPassCompatibilityError, RenderPassContext,
     },
     global::Global,
-    hal_api::HalApi,
     hal_label, id,
     init_tracker::{MemoryInitKind, TextureInitRange, TextureInitTrackerAction},
     pipeline::{self, PipelineFlags},
@@ -34,7 +33,6 @@ use crate::{
 };
 
 use arrayvec::ArrayVec;
-use hal::CommandEncoder as _;
 use thiserror::Error;
 use wgt::{
     BufferAddress, BufferSize, BufferUsages, Color, DynamicOffset, IndexFormat, ShaderStages,
@@ -54,7 +52,7 @@ use super::{
     memory_init::TextureSurfaceDiscard, CommandBufferTextureMemoryActions, CommandEncoder,
     QueryResetMap,
 };
-use super::{DrawKind, DynRenderPass, Rect};
+use super::{DrawKind, Rect};
 
 /// Operation to perform to the output attachment at the start of a renderpass.
 #[repr(C)]
@@ -134,11 +132,11 @@ pub struct RenderPassColorAttachment {
 
 /// Describes a color attachment to a render pass.
 #[derive(Debug)]
-struct ArcRenderPassColorAttachment<A: HalApi> {
+struct ArcRenderPassColorAttachment {
     /// The view to use as an attachment.
-    pub view: Arc<TextureView<A>>,
+    pub view: Arc<TextureView>,
     /// The view that will receive the resolved output if multisampling is used.
-    pub resolve_target: Option<Arc<TextureView<A>>>,
+    pub resolve_target: Option<Arc<TextureView>>,
     /// What operations will be performed on this color attachment.
     pub channel: PassChannel<Color>,
 }
@@ -157,16 +155,16 @@ pub struct RenderPassDepthStencilAttachment {
 }
 /// Describes a depth/stencil attachment to a render pass.
 #[derive(Debug)]
-pub struct ArcRenderPassDepthStencilAttachment<A: HalApi> {
+pub struct ArcRenderPassDepthStencilAttachment {
     /// The view to use as an attachment.
-    pub view: Arc<TextureView<A>>,
+    pub view: Arc<TextureView>,
     /// What operations will be performed on the depth part of the attachment.
     pub depth: PassChannel<f32>,
     /// What operations will be performed on the stencil part of the attachment.
     pub stencil: PassChannel<u32>,
 }
 
-impl<A: HalApi> ArcRenderPassDepthStencilAttachment<A> {
+impl ArcRenderPassDepthStencilAttachment {
     /// Validate the given aspects' read-only flags against their load
     /// and store ops.
     ///
@@ -219,45 +217,45 @@ pub struct RenderPassDescriptor<'a> {
 }
 
 /// Describes the attachments of a render pass.
-struct ArcRenderPassDescriptor<'a, A: HalApi> {
+struct ArcRenderPassDescriptor<'a> {
     pub label: &'a Label<'a>,
     /// The color attachments of the render pass.
     pub color_attachments:
-        ArrayVec<Option<ArcRenderPassColorAttachment<A>>, { hal::MAX_COLOR_ATTACHMENTS }>,
+        ArrayVec<Option<ArcRenderPassColorAttachment>, { hal::MAX_COLOR_ATTACHMENTS }>,
     /// The depth and stencil attachment of the render pass, if any.
-    pub depth_stencil_attachment: Option<ArcRenderPassDepthStencilAttachment<A>>,
+    pub depth_stencil_attachment: Option<ArcRenderPassDepthStencilAttachment>,
     /// Defines where and when timestamp values will be written for this pass.
-    pub timestamp_writes: Option<ArcPassTimestampWrites<A>>,
+    pub timestamp_writes: Option<ArcPassTimestampWrites>,
     /// Defines where the occlusion query results will be stored for this pass.
-    pub occlusion_query_set: Option<Arc<QuerySet<A>>>,
+    pub occlusion_query_set: Option<Arc<QuerySet>>,
 }
 
-pub struct RenderPass<A: HalApi> {
+pub struct RenderPass {
     /// All pass data & records is stored here.
     ///
     /// If this is `None`, the pass is in the 'ended' state and can no longer be used.
     /// Any attempt to record more commands will result in a validation error.
-    base: Option<BasePass<ArcRenderCommand<A>>>,
+    base: Option<BasePass<ArcRenderCommand>>,
 
     /// Parent command buffer that this pass records commands into.
     ///
     /// If it is none, this pass is invalid and any operation on it will return an error.
-    parent: Option<Arc<CommandBuffer<A>>>,
+    parent: Option<Arc<CommandBuffer>>,
 
     color_attachments:
-        ArrayVec<Option<ArcRenderPassColorAttachment<A>>, { hal::MAX_COLOR_ATTACHMENTS }>,
-    depth_stencil_attachment: Option<ArcRenderPassDepthStencilAttachment<A>>,
-    timestamp_writes: Option<ArcPassTimestampWrites<A>>,
-    occlusion_query_set: Option<Arc<QuerySet<A>>>,
+        ArrayVec<Option<ArcRenderPassColorAttachment>, { hal::MAX_COLOR_ATTACHMENTS }>,
+    depth_stencil_attachment: Option<ArcRenderPassDepthStencilAttachment>,
+    timestamp_writes: Option<ArcPassTimestampWrites>,
+    occlusion_query_set: Option<Arc<QuerySet>>,
 
     // Resource binding dedupe state.
     current_bind_groups: BindGroupStateChange,
     current_pipeline: StateChange<id::RenderPipelineId>,
 }
 
-impl<A: HalApi> RenderPass<A> {
+impl RenderPass {
     /// If the parent command buffer is invalid, the returned pass will be invalid.
-    fn new(parent: Option<Arc<CommandBuffer<A>>>, desc: ArcRenderPassDescriptor<A>) -> Self {
+    fn new(parent: Option<Arc<CommandBuffer>>, desc: ArcRenderPassDescriptor) -> Self {
         let ArcRenderPassDescriptor {
             label,
             timestamp_writes,
@@ -287,7 +285,7 @@ impl<A: HalApi> RenderPass<A> {
     fn base_mut<'a>(
         &'a mut self,
         scope: PassErrorScope,
-    ) -> Result<&'a mut BasePass<ArcRenderCommand<A>>, RenderPassError> {
+    ) -> Result<&'a mut BasePass<ArcRenderCommand>, RenderPassError> {
         self.base
             .as_mut()
             .ok_or(RenderPassErrorInner::PassEnded)
@@ -295,7 +293,7 @@ impl<A: HalApi> RenderPass<A> {
     }
 }
 
-impl<A: HalApi> fmt::Debug for RenderPass<A> {
+impl fmt::Debug for RenderPass {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         f.debug_struct("RenderPass")
             .field("label", &self.label())
@@ -445,38 +443,38 @@ impl VertexState {
     }
 }
 
-struct State<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder, A: HalApi> {
+struct State<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder> {
     pipeline_flags: PipelineFlags,
-    binder: Binder<A>,
+    binder: Binder,
     blend_constant: OptionalState,
     stencil_reference: u32,
-    pipeline: Option<Arc<RenderPipeline<A>>>,
+    pipeline: Option<Arc<RenderPipeline>>,
     index: IndexState,
     vertex: VertexState,
     debug_scope_depth: u32,
 
-    info: RenderPassInfo<'scope, A>,
+    info: RenderPassInfo<'scope>,
 
     snatch_guard: &'snatch_guard SnatchGuard<'snatch_guard>,
 
-    device: &'cmd_buf Arc<Device<A>>,
+    device: &'cmd_buf Arc<Device>,
 
-    raw_encoder: &'raw_encoder mut A::CommandEncoder,
+    raw_encoder: &'raw_encoder mut dyn hal::DynCommandEncoder,
 
-    tracker: &'cmd_buf mut Tracker<A>,
-    buffer_memory_init_actions: &'cmd_buf mut Vec<BufferInitTrackerAction<A>>,
-    texture_memory_actions: &'cmd_buf mut CommandBufferTextureMemoryActions<A>,
+    tracker: &'cmd_buf mut Tracker,
+    buffer_memory_init_actions: &'cmd_buf mut Vec<BufferInitTrackerAction>,
+    texture_memory_actions: &'cmd_buf mut CommandBufferTextureMemoryActions,
 
     temp_offsets: Vec<u32>,
     dynamic_offset_count: usize,
     string_offset: usize,
 
-    active_occlusion_query: Option<(Arc<QuerySet<A>>, u32)>,
-    active_pipeline_statistics_query: Option<(Arc<QuerySet<A>>, u32)>,
+    active_occlusion_query: Option<(Arc<QuerySet>, u32)>,
+    active_pipeline_statistics_query: Option<(Arc<QuerySet>, u32)>,
 }
 
-impl<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder, A: HalApi>
-    State<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder, A>
+impl<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder>
+    State<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder>
 {
     fn is_ready(&self, indexed: bool) -> Result<(), DrawError> {
         if let Some(pipeline) = self.pipeline.as_ref() {
@@ -748,14 +746,14 @@ where
     }
 }
 
-struct RenderAttachment<A: HalApi> {
-    texture: Arc<Texture<A>>,
+struct RenderAttachment {
+    texture: Arc<Texture>,
     selector: TextureSelector,
     usage: hal::TextureUses,
 }
 
-impl<A: HalApi> TextureView<A> {
-    fn to_render_attachment(&self, usage: hal::TextureUses) -> RenderAttachment<A> {
+impl TextureView {
+    fn to_render_attachment(&self, usage: hal::TextureUses) -> RenderAttachment {
         RenderAttachment {
             texture: self.parent.clone(),
             selector: self.selector.clone(),
@@ -767,26 +765,26 @@ impl<A: HalApi> TextureView<A> {
 const MAX_TOTAL_ATTACHMENTS: usize = hal::MAX_COLOR_ATTACHMENTS + hal::MAX_COLOR_ATTACHMENTS + 1;
 type AttachmentDataVec<T> = ArrayVec<T, MAX_TOTAL_ATTACHMENTS>;
 
-struct RenderPassInfo<'d, A: HalApi> {
+struct RenderPassInfo<'d> {
     context: RenderPassContext,
-    usage_scope: UsageScope<'d, A>,
+    usage_scope: UsageScope<'d>,
     /// All render attachments, including depth/stencil
-    render_attachments: AttachmentDataVec<RenderAttachment<A>>,
+    render_attachments: AttachmentDataVec<RenderAttachment>,
     is_depth_read_only: bool,
     is_stencil_read_only: bool,
     extent: wgt::Extent3d,
 
-    pending_discard_init_fixups: SurfacesInDiscardState<A>,
-    divergent_discarded_depth_stencil_aspect: Option<(wgt::TextureAspect, Arc<TextureView<A>>)>,
+    pending_discard_init_fixups: SurfacesInDiscardState,
+    divergent_discarded_depth_stencil_aspect: Option<(wgt::TextureAspect, Arc<TextureView>)>,
     multiview: Option<NonZeroU32>,
 }
 
-impl<'d, A: HalApi> RenderPassInfo<'d, A> {
+impl<'d> RenderPassInfo<'d> {
     fn add_pass_texture_init_actions<V>(
         channel: &PassChannel<V>,
-        texture_memory_actions: &mut CommandBufferTextureMemoryActions<A>,
-        view: &TextureView<A>,
-        pending_discard_init_fixups: &mut SurfacesInDiscardState<A>,
+        texture_memory_actions: &mut CommandBufferTextureMemoryActions,
+        view: &TextureView,
+        pending_discard_init_fixups: &mut SurfacesInDiscardState,
     ) {
         if channel.load_op == LoadOp::Load {
             pending_discard_init_fixups.extend(texture_memory_actions.register_init_action(
@@ -817,19 +815,19 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
     }
 
     fn start(
-        device: &'d Device<A>,
+        device: &'d Arc<Device>,
         hal_label: Option<&str>,
         color_attachments: ArrayVec<
-            Option<ArcRenderPassColorAttachment<A>>,
+            Option<ArcRenderPassColorAttachment>,
             { hal::MAX_COLOR_ATTACHMENTS },
         >,
-        mut depth_stencil_attachment: Option<ArcRenderPassDepthStencilAttachment<A>>,
-        mut timestamp_writes: Option<ArcPassTimestampWrites<A>>,
-        mut occlusion_query_set: Option<Arc<QuerySet<A>>>,
-        encoder: &mut CommandEncoder<A>,
-        trackers: &mut Tracker<A>,
-        texture_memory_actions: &mut CommandBufferTextureMemoryActions<A>,
-        pending_query_resets: &mut QueryResetMap<A>,
+        mut depth_stencil_attachment: Option<ArcRenderPassDepthStencilAttachment>,
+        mut timestamp_writes: Option<ArcPassTimestampWrites>,
+        mut occlusion_query_set: Option<Arc<QuerySet>>,
+        encoder: &mut CommandEncoder,
+        trackers: &mut Tracker,
+        texture_memory_actions: &mut CommandBufferTextureMemoryActions,
+        pending_query_resets: &mut QueryResetMap,
         snatch_guard: &SnatchGuard<'_>,
     ) -> Result<Self, RenderPassErrorInner> {
         profiling::scope!("RenderPassInfo::start");
@@ -840,7 +838,7 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
         let mut is_depth_read_only = false;
         let mut is_stencil_read_only = false;
 
-        let mut render_attachments = AttachmentDataVec::<RenderAttachment<A>>::new();
+        let mut render_attachments = AttachmentDataVec::<RenderAttachment>::new();
         let mut discarded_surfaces = AttachmentDataVec::new();
         let mut pending_discard_init_fixups = SurfacesInDiscardState::new();
         let mut divergent_discarded_depth_stencil_aspect = None;
@@ -854,7 +852,7 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
 
         let mut detected_multiview: Option<Option<NonZeroU32>> = None;
 
-        let mut check_multiview = |view: &TextureView<A>| {
+        let mut check_multiview = |view: &TextureView| {
             // Get the multiview configuration for this texture view
             let layers = view.selector.layers.end - view.selector.layers.start;
             let this_multiview = if layers >= 2 {
@@ -885,7 +883,7 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
 
             Ok(())
         };
-        let mut add_view = |view: &TextureView<A>, location| {
+        let mut add_view = |view: &TextureView, location| {
             let render_extent = view.render_extent.map_err(|reason| {
                 RenderPassErrorInner::TextureViewIsNotRenderable { location, reason }
             })?;
@@ -919,6 +917,7 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
 
         if let Some(at) = depth_stencil_attachment.as_ref() {
             let view = &at.view;
+            view.same_device(device)?;
             check_multiview(view)?;
             add_view(view, AttachmentErrorLocation::Depth)?;
 
@@ -1040,7 +1039,7 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
         }
 
         let mut color_attachments_hal =
-            ArrayVec::<Option<hal::ColorAttachment<A>>, { hal::MAX_COLOR_ATTACHMENTS }>::new();
+            ArrayVec::<Option<hal::ColorAttachment<_>>, { hal::MAX_COLOR_ATTACHMENTS }>::new();
         for (index, attachment) in color_attachments.iter().enumerate() {
             let at = if let Some(attachment) = attachment.as_ref() {
                 attachment
@@ -1048,7 +1047,8 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
                 color_attachments_hal.push(None);
                 continue;
             };
-            let color_view: &TextureView<A> = &at.view;
+            let color_view: &TextureView = &at.view;
+            color_view.same_device(device)?;
             check_multiview(color_view)?;
             add_view(
                 color_view,
@@ -1079,6 +1079,7 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
 
             let mut hal_resolve_target = None;
             if let Some(resolve_view) = &at.resolve_target {
+                resolve_view.same_device(device)?;
                 check_multiview(resolve_view)?;
 
                 let resolve_location = AttachmentErrorLocation::Color {
@@ -1155,7 +1156,7 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
         let attachment_formats = AttachmentData {
             colors: color_attachments
                 .iter()
-                .map(|at| at.as_ref().map(|at| at.view.desc.texture_format))
+                .map(|at| at.as_ref().map(|at| at.view.desc.format))
                 .collect(),
             resolves: color_attachments
                 .iter()
@@ -1178,8 +1179,9 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
             multiview,
         };
 
-        let timestamp_writes_hal = timestamp_writes.as_ref().map(|tw| {
+        let timestamp_writes_hal = if let Some(tw) = timestamp_writes.as_ref() {
             let query_set = &tw.query_set;
+            query_set.same_device(device)?;
 
             if let Some(index) = tw.beginning_of_pass_write_index {
                 pending_query_resets.use_query_set(query_set, index);
@@ -1188,16 +1190,21 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
                 pending_query_resets.use_query_set(query_set, index);
             }
 
-            hal::RenderPassTimestampWrites {
-                query_set: query_set.raw.as_ref().unwrap(),
+            Some(hal::PassTimestampWrites {
+                query_set: query_set.raw(),
                 beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
                 end_of_pass_write_index: tw.end_of_pass_write_index,
-            }
-        });
+            })
+        } else {
+            None
+        };
 
-        let occlusion_query_set_hal = occlusion_query_set
-            .as_ref()
-            .map(|query_set| query_set.raw.as_ref().unwrap());
+        let occlusion_query_set_hal = if let Some(query_set) = occlusion_query_set.as_ref() {
+            query_set.same_device(device)?;
+            Some(query_set.raw())
+        } else {
+            None
+        };
 
         let hal_desc = hal::RenderPassDescriptor {
             label: hal_label,
@@ -1246,9 +1253,9 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
 
     fn finish(
         mut self,
-        raw: &mut A::CommandEncoder,
+        raw: &mut dyn hal::DynCommandEncoder,
         snatch_guard: &SnatchGuard,
-    ) -> Result<(UsageScope<'d, A>, SurfacesInDiscardState<A>), RenderPassErrorInner> {
+    ) -> Result<(UsageScope<'d>, SurfacesInDiscardState), RenderPassErrorInner> {
         profiling::scope!("RenderPassInfo::finish");
         unsafe {
             raw.end_render_pass();
@@ -1289,7 +1296,7 @@ impl<'d, A: HalApi> RenderPassInfo<'d, A> {
                     hal::AttachmentOps::STORE,                            // clear depth
                 )
             };
-            let desc = hal::RenderPassDescriptor {
+            let desc = hal::RenderPassDescriptor::<'_, _, dyn hal::DynTextureView> {
                 label: Some("(wgpu internal) Zero init discarded depth/stencil aspect"),
                 extent: view.render_extent.unwrap(),
                 sample_count: view.samples,
@@ -1324,20 +1331,30 @@ impl Global {
     /// Any operation on an invalid pass will return an error.
     ///
     /// If successful, puts the encoder into the [`CommandEncoderStatus::Locked`] state.
-    pub fn command_encoder_create_render_pass<A: HalApi>(
+    pub fn command_encoder_create_render_pass(
         &self,
         encoder_id: id::CommandEncoderId,
         desc: &RenderPassDescriptor<'_>,
-    ) -> (RenderPass<A>, Option<CommandEncoderError>) {
-        fn fill_arc_desc<A: HalApi>(
-            hub: &crate::hub::Hub<A>,
-            device: &Arc<Device<A>>,
+    ) -> (RenderPass, Option<CommandEncoderError>) {
+        fn fill_arc_desc(
+            hub: &crate::hub::Hub,
             desc: &RenderPassDescriptor<'_>,
-            arc_desc: &mut ArcRenderPassDescriptor<A>,
+            arc_desc: &mut ArcRenderPassDescriptor,
+            device: &Device,
         ) -> Result<(), CommandEncoderError> {
             let query_sets = hub.query_sets.read();
             let texture_views = hub.texture_views.read();
 
+            let max_color_attachments = device.limits.max_color_attachments as usize;
+            if desc.color_attachments.len() > max_color_attachments {
+                return Err(CommandEncoderError::InvalidColorAttachment(
+                    ColorAttachmentError::TooMany {
+                        given: desc.color_attachments.len(),
+                        limit: max_color_attachments,
+                    },
+                ));
+            }
+
             for color_attachment in desc.color_attachments.iter() {
                 if let Some(RenderPassColorAttachment {
                     view: view_id,
@@ -1348,13 +1365,11 @@ impl Global {
                     let view = texture_views
                         .get_owned(*view_id)
                         .map_err(|_| CommandEncoderError::InvalidAttachmentId(*view_id))?;
-                    view.same_device(device)?;
 
                     let resolve_target = if let Some(resolve_target_id) = resolve_target {
                         let rt_arc = texture_views.get_owned(*resolve_target_id).map_err(|_| {
                             CommandEncoderError::InvalidResolveTargetId(*resolve_target_id)
                         })?;
-                        rt_arc.same_device(device)?;
 
                         Some(rt_arc)
                     } else {
@@ -1382,7 +1397,6 @@ impl Global {
                                 depth_stencil_attachment.view,
                             )
                         })?;
-                    view.same_device(device)?;
 
                     Some(ArcRenderPassDepthStencilAttachment {
                         view,
@@ -1397,7 +1411,6 @@ impl Global {
                 let query_set = query_sets.get_owned(tw.query_set).map_err(|_| {
                     CommandEncoderError::InvalidTimestampWritesQuerySetId(tw.query_set)
                 })?;
-                query_set.same_device(device)?;
 
                 Some(ArcPassTimestampWrites {
                     query_set,
@@ -1413,7 +1426,6 @@ impl Global {
                     let query_set = query_sets.get_owned(occlusion_query_set).map_err(|_| {
                         CommandEncoderError::InvalidOcclusionQuerySetId(occlusion_query_set)
                     })?;
-                    query_set.same_device(device)?;
 
                     Some(query_set)
                 } else {
@@ -1423,7 +1435,7 @@ impl Global {
             Ok(())
         }
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let mut arc_desc = ArcRenderPassDescriptor {
             label: &desc.label,
             timestamp_writes: None,
@@ -1444,27 +1456,14 @@ impl Global {
             Err(e) => return make_err(e, arc_desc),
         };
 
-        let err = fill_arc_desc(hub, &cmd_buf.device, desc, &mut arc_desc).err();
+        let err = fill_arc_desc(hub, desc, &mut arc_desc, &cmd_buf.device).err();
 
         (RenderPass::new(Some(cmd_buf), arc_desc), err)
     }
 
-    /// Creates a type erased render pass.
-    ///
-    /// If creation fails, an invalid pass is returned.
-    /// Any operation on an invalid pass will return an error.
-    pub fn command_encoder_create_render_pass_dyn<A: HalApi>(
-        &self,
-        encoder_id: id::CommandEncoderId,
-        desc: &RenderPassDescriptor<'_>,
-    ) -> (Box<dyn DynRenderPass>, Option<CommandEncoderError>) {
-        let (pass, err) = self.command_encoder_create_render_pass::<A>(encoder_id, desc);
-        (Box::new(pass), err)
-    }
-
     #[doc(hidden)]
     #[cfg(any(feature = "serde", feature = "replay"))]
-    pub fn render_pass_end_with_unresolved_commands<A: HalApi>(
+    pub fn render_pass_end_with_unresolved_commands(
         &self,
         encoder_id: id::CommandEncoderId,
         base: BasePass<super::RenderCommand>,
@@ -1477,7 +1476,7 @@ impl Global {
 
         #[cfg(feature = "trace")]
         {
-            let hub = A::hub(self);
+            let hub = &self.hub;
 
             let cmd_buf = match hub.command_buffers.get(encoder_id.into_command_buffer_id()) {
                 Ok(cmd_buf) => cmd_buf,
@@ -1512,7 +1511,7 @@ impl Global {
             push_constant_data,
         } = base;
 
-        let (mut render_pass, encoder_error) = self.command_encoder_create_render_pass::<A>(
+        let (mut render_pass, encoder_error) = self.command_encoder_create_render_pass(
             encoder_id,
             &RenderPassDescriptor {
                 label: label.as_deref().map(Cow::Borrowed),
@@ -1529,7 +1528,7 @@ impl Global {
             });
         };
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         render_pass.base = Some(BasePass {
             label,
             commands: super::RenderCommand::resolve_render_command_ids(hub, &commands)?,
@@ -1549,10 +1548,7 @@ impl Global {
     }
 
     #[doc(hidden)]
-    pub fn render_pass_end<A: HalApi>(
-        &self,
-        pass: &mut RenderPass<A>,
-    ) -> Result<(), RenderPassError> {
+    pub fn render_pass_end(&self, pass: &mut RenderPass) -> Result<(), RenderPassError> {
         let pass_scope = PassErrorScope::Pass;
 
         let base = pass
@@ -1563,7 +1559,7 @@ impl Global {
 
         profiling::scope!(
             "CommandEncoder::run_render_pass {}",
-            base.label.unwrap_or("")
+            base.label.as_deref().unwrap_or("")
         );
 
         let Some(cmd_buf) = pass.parent.as_ref() else {
@@ -1597,8 +1593,6 @@ impl Global {
             *status = CommandEncoderStatus::Error;
             encoder.open_pass(hal_label).map_pass_err(pass_scope)?;
 
-            log::trace!("Encoding render pass begin in {}", cmd_buf.error_ident());
-
             let info = RenderPassInfo::start(
                 device,
                 hal_label,
@@ -1619,16 +1613,6 @@ impl Global {
             let indices = &device.tracker_indices;
             tracker.buffers.set_size(indices.buffers.size());
             tracker.textures.set_size(indices.textures.size());
-            tracker.views.set_size(indices.texture_views.size());
-            tracker.bind_groups.set_size(indices.bind_groups.size());
-            tracker
-                .render_pipelines
-                .set_size(indices.render_pipelines.size());
-            tracker.bundles.set_size(indices.bundles.size());
-            tracker.query_sets.set_size(indices.query_sets.size());
-            tracker.tlas_s.set_size(indices.tlas_s.size());
-
-            let raw = &mut encoder.raw;
 
             let mut state = State {
                 pipeline_flags: PipelineFlags::empty(),
@@ -1645,7 +1629,7 @@ impl Global {
                 snatch_guard,
 
                 device,
-                raw_encoder: raw,
+                raw_encoder: encoder.raw.as_mut(),
                 tracker,
                 buffer_memory_init_actions,
                 texture_memory_actions,
@@ -1789,7 +1773,7 @@ impl Global {
                             },
                             indexed,
                         };
-                        multi_draw_indirect(&mut state, buffer, offset, count, indexed)
+                        multi_draw_indirect(&mut state, cmd_buf, buffer, offset, count, indexed)
                             .map_pass_err(scope)?;
                     }
                     ArcRenderCommand::MultiDrawIndirectCount {
@@ -1806,6 +1790,7 @@ impl Global {
                         };
                         multi_draw_indirect_count(
                             &mut state,
+                            cmd_buf,
                             buffer,
                             offset,
                             count_buffer,
@@ -1832,6 +1817,7 @@ impl Global {
                         let scope = PassErrorScope::WriteTimestamp;
                         write_timestamp(
                             &mut state,
+                            cmd_buf,
                             &mut cmd_buf_data.pending_query_resets,
                             query_set,
                             query_index,
@@ -1903,7 +1889,6 @@ impl Global {
                 }
             }
 
-            log::trace!("Merging renderpass into {}", cmd_buf.error_ident());
             let (trackers, pending_discard_init_fixups) = state
                 .info
                 .finish(state.raw_encoder, state.snatch_guard)
@@ -1943,13 +1928,13 @@ impl Global {
     }
 }
 
-fn set_bind_group<A: HalApi>(
-    state: &mut State<A>,
-    cmd_buf: &Arc<CommandBuffer<A>>,
+fn set_bind_group(
+    state: &mut State,
+    cmd_buf: &Arc<CommandBuffer>,
     dynamic_offsets: &[DynamicOffset],
     index: u32,
     num_dynamic_offsets: usize,
-    bind_group: Arc<BindGroup<A>>,
+    bind_group: Arc<BindGroup>,
 ) -> Result<(), RenderPassErrorInner> {
     api_log!(
         "RenderPass::set_bind_group {index} {}",
@@ -2024,10 +2009,10 @@ fn set_bind_group<A: HalApi>(
     Ok(())
 }
 
-fn set_pipeline<A: HalApi>(
-    state: &mut State<A>,
-    cmd_buf: &Arc<CommandBuffer<A>>,
-    pipeline: Arc<RenderPipeline<A>>,
+fn set_pipeline(
+    state: &mut State,
+    cmd_buf: &Arc<CommandBuffer>,
+    pipeline: Arc<RenderPipeline>,
 ) -> Result<(), RenderPassErrorInner> {
     api_log!("RenderPass::set_pipeline {}", pipeline.error_ident());
 
@@ -2133,10 +2118,10 @@ fn set_pipeline<A: HalApi>(
     Ok(())
 }
 
-fn set_index_buffer<A: HalApi>(
-    state: &mut State<A>,
-    cmd_buf: &Arc<CommandBuffer<A>>,
-    buffer: Arc<crate::resource::Buffer<A>>,
+fn set_index_buffer(
+    state: &mut State,
+    cmd_buf: &Arc<CommandBuffer>,
+    buffer: Arc<crate::resource::Buffer>,
     index_format: IndexFormat,
     offset: u64,
     size: Option<BufferSize>,
@@ -2174,16 +2159,16 @@ fn set_index_buffer<A: HalApi>(
         size,
     };
     unsafe {
-        state.raw_encoder.set_index_buffer(bb, index_format);
+        hal::DynCommandEncoder::set_index_buffer(state.raw_encoder, bb, index_format);
     }
     Ok(())
 }
 
-fn set_vertex_buffer<A: HalApi>(
-    state: &mut State<A>,
-    cmd_buf: &Arc<CommandBuffer<A>>,
+fn set_vertex_buffer(
+    state: &mut State,
+    cmd_buf: &Arc<CommandBuffer>,
     slot: u32,
-    buffer: Arc<crate::resource::Buffer<A>>,
+    buffer: Arc<crate::resource::Buffer>,
     offset: u64,
     size: Option<BufferSize>,
 ) -> Result<(), RenderPassErrorInner> {
@@ -2239,13 +2224,13 @@ fn set_vertex_buffer<A: HalApi>(
         size,
     };
     unsafe {
-        state.raw_encoder.set_vertex_buffer(slot, bb);
+        hal::DynCommandEncoder::set_vertex_buffer(state.raw_encoder, slot, bb);
     }
     state.vertex.update_limits();
     Ok(())
 }
 
-fn set_blend_constant<A: HalApi>(state: &mut State<A>, color: &Color) {
+fn set_blend_constant(state: &mut State, color: &Color) {
     api_log!("RenderPass::set_blend_constant");
 
     state.blend_constant = OptionalState::Set;
@@ -2260,7 +2245,7 @@ fn set_blend_constant<A: HalApi>(state: &mut State<A>, color: &Color) {
     }
 }
 
-fn set_stencil_reference<A: HalApi>(state: &mut State<A>, value: u32) {
+fn set_stencil_reference(state: &mut State, value: u32) {
     api_log!("RenderPass::set_stencil_reference {value}");
 
     state.stencil_reference = value;
@@ -2274,8 +2259,8 @@ fn set_stencil_reference<A: HalApi>(state: &mut State<A>, value: u32) {
     }
 }
 
-fn set_viewport<A: HalApi>(
-    state: &mut State<A>,
+fn set_viewport(
+    state: &mut State,
     rect: Rect<f32>,
     depth_min: f32,
     depth_max: f32,
@@ -2305,8 +2290,8 @@ fn set_viewport<A: HalApi>(
     Ok(())
 }
 
-fn set_push_constant<A: HalApi>(
-    state: &mut State<A>,
+fn set_push_constant(
+    state: &mut State,
     push_constant_data: &[u32],
     stages: ShaderStages,
     offset: u32,
@@ -2339,10 +2324,7 @@ fn set_push_constant<A: HalApi>(
     Ok(())
 }
 
-fn set_scissor<A: HalApi>(
-    state: &mut State<A>,
-    rect: Rect<u32>,
-) -> Result<(), RenderPassErrorInner> {
+fn set_scissor(state: &mut State, rect: Rect<u32>) -> Result<(), RenderPassErrorInner> {
     api_log!("RenderPass::set_scissor_rect {rect:?}");
 
     if rect.x + rect.w > state.info.extent.width || rect.y + rect.h > state.info.extent.height {
@@ -2360,8 +2342,8 @@ fn set_scissor<A: HalApi>(
     Ok(())
 }
 
-fn draw<A: HalApi>(
-    state: &mut State<A>,
+fn draw(
+    state: &mut State,
     vertex_count: u32,
     instance_count: u32,
     first_vertex: u32,
@@ -2400,8 +2382,8 @@ fn draw<A: HalApi>(
     Ok(())
 }
 
-fn draw_indexed<A: HalApi>(
-    state: &mut State<A>,
+fn draw_indexed(
+    state: &mut State,
     index_count: u32,
     instance_count: u32,
     first_index: u32,
@@ -2444,9 +2426,10 @@ fn draw_indexed<A: HalApi>(
     Ok(())
 }
 
-fn multi_draw_indirect<A: HalApi>(
-    state: &mut State<A>,
-    indirect_buffer: Arc<crate::resource::Buffer<A>>,
+fn multi_draw_indirect(
+    state: &mut State,
+    cmd_buf: &Arc<CommandBuffer>,
+    indirect_buffer: Arc<crate::resource::Buffer>,
     offset: u64,
     count: Option<NonZeroU32>,
     indexed: bool,
@@ -2472,6 +2455,8 @@ fn multi_draw_indirect<A: HalApi>(
         .device
         .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION)?;
 
+    indirect_buffer.same_device_as(cmd_buf.as_ref())?;
+
     state
         .info
         .usage_scope
@@ -2516,11 +2501,12 @@ fn multi_draw_indirect<A: HalApi>(
     Ok(())
 }
 
-fn multi_draw_indirect_count<A: HalApi>(
-    state: &mut State<A>,
-    indirect_buffer: Arc<crate::resource::Buffer<A>>,
+fn multi_draw_indirect_count(
+    state: &mut State,
+    cmd_buf: &Arc<CommandBuffer>,
+    indirect_buffer: Arc<crate::resource::Buffer>,
     offset: u64,
-    count_buffer: Arc<crate::resource::Buffer<A>>,
+    count_buffer: Arc<crate::resource::Buffer>,
     count_buffer_offset: u64,
     max_count: u32,
     indexed: bool,
@@ -2545,6 +2531,9 @@ fn multi_draw_indirect_count<A: HalApi>(
         .device
         .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION)?;
 
+    indirect_buffer.same_device_as(cmd_buf.as_ref())?;
+    count_buffer.same_device_as(cmd_buf.as_ref())?;
+
     state
         .info
         .usage_scope
@@ -2620,7 +2609,7 @@ fn multi_draw_indirect_count<A: HalApi>(
     Ok(())
 }
 
-fn push_debug_group<A: HalApi>(state: &mut State<A>, string_data: &[u8], len: usize) {
+fn push_debug_group(state: &mut State, string_data: &[u8], len: usize) {
     state.debug_scope_depth += 1;
     if !state
         .device
@@ -2638,7 +2627,7 @@ fn push_debug_group<A: HalApi>(state: &mut State<A>, string_data: &[u8], len: us
     state.string_offset += len;
 }
 
-fn pop_debug_group<A: HalApi>(state: &mut State<A>) -> Result<(), RenderPassErrorInner> {
+fn pop_debug_group(state: &mut State) -> Result<(), RenderPassErrorInner> {
     api_log!("RenderPass::pop_debug_group");
 
     if state.debug_scope_depth == 0 {
@@ -2657,7 +2646,7 @@ fn pop_debug_group<A: HalApi>(state: &mut State<A>) -> Result<(), RenderPassErro
     Ok(())
 }
 
-fn insert_debug_marker<A: HalApi>(state: &mut State<A>, string_data: &[u8], len: usize) {
+fn insert_debug_marker(state: &mut State, string_data: &[u8], len: usize) {
     if !state
         .device
         .instance_flags
@@ -2673,10 +2662,11 @@ fn insert_debug_marker<A: HalApi>(state: &mut State<A>, string_data: &[u8], len:
     state.string_offset += len;
 }
 
-fn write_timestamp<A: HalApi>(
-    state: &mut State<A>,
-    pending_query_resets: &mut QueryResetMap<A>,
-    query_set: Arc<QuerySet<A>>,
+fn write_timestamp(
+    state: &mut State,
+    cmd_buf: &CommandBuffer,
+    pending_query_resets: &mut QueryResetMap,
+    query_set: Arc<QuerySet>,
     query_index: u32,
 ) -> Result<(), RenderPassErrorInner> {
     api_log!(
@@ -2684,6 +2674,8 @@ fn write_timestamp<A: HalApi>(
         query_set.error_ident()
     );
 
+    query_set.same_device_as(cmd_buf)?;
+
     state
         .device
         .require_features(wgt::Features::TIMESTAMP_QUERY_INSIDE_PASSES)?;
@@ -2698,16 +2690,14 @@ fn write_timestamp<A: HalApi>(
     Ok(())
 }
 
-fn execute_bundle<A: HalApi>(
-    state: &mut State<A>,
-    cmd_buf: &Arc<CommandBuffer<A>>,
-    bundle: Arc<super::RenderBundle<A>>,
+fn execute_bundle(
+    state: &mut State,
+    cmd_buf: &Arc<CommandBuffer>,
+    bundle: Arc<super::RenderBundle>,
 ) -> Result<(), RenderPassErrorInner> {
     api_log!("RenderPass::execute_bundle {}", bundle.error_ident());
 
-    // Have to clone the bundle arc, otherwise we keep a mutable reference to the bundle
-    // while later trying to add the bundle's resources to the tracker.
-    let bundle = state.tracker.bundles.insert_single(bundle).clone();
+    let bundle = state.tracker.bundles.insert_single(bundle);
 
     bundle.same_device_as(cmd_buf.as_ref())?;
 
@@ -2758,19 +2748,18 @@ fn execute_bundle<A: HalApi>(
 
     unsafe {
         state.info.usage_scope.merge_render_bundle(&bundle.used)?;
-        state.tracker.add_from_render_bundle(&bundle.used)?;
     };
     state.reset_bundle();
     Ok(())
 }
 
 impl Global {
-    fn resolve_render_pass_buffer_id<A: HalApi>(
+    fn resolve_render_pass_buffer_id(
         &self,
         scope: PassErrorScope,
         buffer_id: id::Id<id::markers::Buffer>,
-    ) -> Result<Arc<crate::resource::Buffer<A>>, RenderPassError> {
-        let hub = A::hub(self);
+    ) -> Result<Arc<crate::resource::Buffer>, RenderPassError> {
+        let hub = &self.hub;
         let buffer = hub
             .buffers
             .get(buffer_id)
@@ -2780,12 +2769,12 @@ impl Global {
         Ok(buffer)
     }
 
-    fn resolve_render_pass_query_set<A: HalApi>(
+    fn resolve_render_pass_query_set(
         &self,
         scope: PassErrorScope,
         query_set_id: id::Id<id::markers::QuerySet>,
-    ) -> Result<Arc<QuerySet<A>>, RenderPassError> {
-        let hub = A::hub(self);
+    ) -> Result<Arc<QuerySet>, RenderPassError> {
+        let hub = &self.hub;
         let query_set = hub
             .query_sets
             .get(query_set_id)
@@ -2795,9 +2784,9 @@ impl Global {
         Ok(query_set)
     }
 
-    pub fn render_pass_set_bind_group<A: HalApi>(
+    pub fn render_pass_set_bind_group(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         index: u32,
         bind_group_id: id::BindGroupId,
         offsets: &[DynamicOffset],
@@ -2819,7 +2808,7 @@ impl Global {
             return Ok(());
         }
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let bind_group = hub
             .bind_groups
             .get(bind_group_id)
@@ -2835,9 +2824,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_set_pipeline<A: HalApi>(
+    pub fn render_pass_set_pipeline(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         pipeline_id: id::RenderPipelineId,
     ) -> Result<(), RenderPassError> {
         let scope = PassErrorScope::SetPipelineRender;
@@ -2850,7 +2839,7 @@ impl Global {
             return Ok(());
         }
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let pipeline = hub
             .render_pipelines
             .get(pipeline_id)
@@ -2862,9 +2851,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_set_index_buffer<A: HalApi>(
+    pub fn render_pass_set_index_buffer(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         buffer_id: id::BufferId,
         index_format: IndexFormat,
         offset: BufferAddress,
@@ -2883,9 +2872,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_set_vertex_buffer<A: HalApi>(
+    pub fn render_pass_set_vertex_buffer(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         slot: u32,
         buffer_id: id::BufferId,
         offset: BufferAddress,
@@ -2904,9 +2893,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_set_blend_constant<A: HalApi>(
+    pub fn render_pass_set_blend_constant(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         color: Color,
     ) -> Result<(), RenderPassError> {
         let scope = PassErrorScope::SetBlendConstant;
@@ -2918,9 +2907,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_set_stencil_reference<A: HalApi>(
+    pub fn render_pass_set_stencil_reference(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         value: u32,
     ) -> Result<(), RenderPassError> {
         let scope = PassErrorScope::SetStencilReference;
@@ -2932,9 +2921,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_set_viewport<A: HalApi>(
+    pub fn render_pass_set_viewport(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         x: f32,
         y: f32,
         w: f32,
@@ -2954,9 +2943,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_set_scissor_rect<A: HalApi>(
+    pub fn render_pass_set_scissor_rect(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         x: u32,
         y: u32,
         w: u32,
@@ -2971,9 +2960,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_set_push_constants<A: HalApi>(
+    pub fn render_pass_set_push_constants(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         stages: ShaderStages,
         offset: u32,
         data: &[u8],
@@ -3010,9 +2999,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_draw<A: HalApi>(
+    pub fn render_pass_draw(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         vertex_count: u32,
         instance_count: u32,
         first_vertex: u32,
@@ -3034,9 +3023,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_draw_indexed<A: HalApi>(
+    pub fn render_pass_draw_indexed(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         index_count: u32,
         instance_count: u32,
         first_index: u32,
@@ -3060,9 +3049,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_draw_indirect<A: HalApi>(
+    pub fn render_pass_draw_indirect(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         buffer_id: id::BufferId,
         offset: BufferAddress,
     ) -> Result<(), RenderPassError> {
@@ -3082,9 +3071,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_draw_indexed_indirect<A: HalApi>(
+    pub fn render_pass_draw_indexed_indirect(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         buffer_id: id::BufferId,
         offset: BufferAddress,
     ) -> Result<(), RenderPassError> {
@@ -3104,9 +3093,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_multi_draw_indirect<A: HalApi>(
+    pub fn render_pass_multi_draw_indirect(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         buffer_id: id::BufferId,
         offset: BufferAddress,
         count: u32,
@@ -3127,9 +3116,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_multi_draw_indexed_indirect<A: HalApi>(
+    pub fn render_pass_multi_draw_indexed_indirect(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         buffer_id: id::BufferId,
         offset: BufferAddress,
         count: u32,
@@ -3150,9 +3139,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_multi_draw_indirect_count<A: HalApi>(
+    pub fn render_pass_multi_draw_indirect_count(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         buffer_id: id::BufferId,
         offset: BufferAddress,
         count_buffer_id: id::BufferId,
@@ -3166,7 +3155,7 @@ impl Global {
         let base = pass.base_mut(scope)?;
 
         // Don't use resolve_render_pass_buffer_id here, because we don't want to take the read-lock twice.
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let buffers = hub.buffers.read();
         let buffer = buffers
             .get_owned(buffer_id)
@@ -3190,9 +3179,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_multi_draw_indexed_indirect_count<A: HalApi>(
+    pub fn render_pass_multi_draw_indexed_indirect_count(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         buffer_id: id::BufferId,
         offset: BufferAddress,
         count_buffer_id: id::BufferId,
@@ -3206,7 +3195,7 @@ impl Global {
         let base = pass.base_mut(scope)?;
 
         // Don't use resolve_render_pass_buffer_id here, because we don't want to take the read-lock twice.
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let buffers = hub.buffers.read();
         let buffer = buffers
             .get_owned(buffer_id)
@@ -3231,9 +3220,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_push_debug_group<A: HalApi>(
+    pub fn render_pass_push_debug_group(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         label: &str,
         color: u32,
     ) -> Result<(), RenderPassError> {
@@ -3250,9 +3239,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_pop_debug_group<A: HalApi>(
+    pub fn render_pass_pop_debug_group(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
     ) -> Result<(), RenderPassError> {
         let base = pass.base_mut(PassErrorScope::PopDebugGroup)?;
 
@@ -3261,9 +3250,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_insert_debug_marker<A: HalApi>(
+    pub fn render_pass_insert_debug_marker(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         label: &str,
         color: u32,
     ) -> Result<(), RenderPassError> {
@@ -3280,9 +3269,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_write_timestamp<A: HalApi>(
+    pub fn render_pass_write_timestamp(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         query_set_id: id::QuerySetId,
         query_index: u32,
     ) -> Result<(), RenderPassError> {
@@ -3297,9 +3286,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_begin_occlusion_query<A: HalApi>(
+    pub fn render_pass_begin_occlusion_query(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         query_index: u32,
     ) -> Result<(), RenderPassError> {
         let scope = PassErrorScope::BeginOcclusionQuery;
@@ -3311,9 +3300,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_end_occlusion_query<A: HalApi>(
+    pub fn render_pass_end_occlusion_query(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
     ) -> Result<(), RenderPassError> {
         let scope = PassErrorScope::EndOcclusionQuery;
         let base = pass.base_mut(scope)?;
@@ -3323,9 +3312,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_begin_pipeline_statistics_query<A: HalApi>(
+    pub fn render_pass_begin_pipeline_statistics_query(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         query_set_id: id::QuerySetId,
         query_index: u32,
     ) -> Result<(), RenderPassError> {
@@ -3341,9 +3330,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_end_pipeline_statistics_query<A: HalApi>(
+    pub fn render_pass_end_pipeline_statistics_query(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
     ) -> Result<(), RenderPassError> {
         let scope = PassErrorScope::EndPipelineStatisticsQuery;
         let base = pass.base_mut(scope)?;
@@ -3354,15 +3343,15 @@ impl Global {
         Ok(())
     }
 
-    pub fn render_pass_execute_bundles<A: HalApi>(
+    pub fn render_pass_execute_bundles(
         &self,
-        pass: &mut RenderPass<A>,
+        pass: &mut RenderPass,
         render_bundle_ids: &[id::RenderBundleId],
     ) -> Result<(), RenderPassError> {
         let scope = PassErrorScope::ExecuteBundle;
         let base = pass.base_mut(scope)?;
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let bundles = hub.render_bundles.read();
 
         for &bundle_id in render_bundle_ids {
diff --git a/wgpu-core/src/command/render_command.rs b/wgpu-core/src/command/render_command.rs
index 287aa888f1..891ee3cfbc 100644
--- a/wgpu-core/src/command/render_command.rs
+++ b/wgpu-core/src/command/render_command.rs
@@ -1,6 +1,5 @@
 use crate::{
     binding_model::BindGroup,
-    hal_api::HalApi,
     id,
     pipeline::RenderPipeline,
     resource::{Buffer, QuerySet},
@@ -126,10 +125,10 @@ pub enum RenderCommand {
 impl RenderCommand {
     /// Resolves all ids in a list of commands into the corresponding resource Arc.
     #[cfg(any(feature = "serde", feature = "replay"))]
-    pub fn resolve_render_command_ids<A: HalApi>(
-        hub: &crate::hub::Hub<A>,
+    pub fn resolve_render_command_ids(
+        hub: &crate::hub::Hub,
         commands: &[RenderCommand],
-    ) -> Result<Vec<ArcRenderCommand<A>>, super::RenderPassError> {
+    ) -> Result<Vec<ArcRenderCommand>, super::RenderPassError> {
         use super::{
             DrawKind, PassErrorScope, RenderCommandError, RenderPassError, RenderPassErrorInner,
         };
@@ -140,9 +139,9 @@ impl RenderCommand {
         let pipelines_guard = hub.render_pipelines.read();
         let render_bundles_guard = hub.render_bundles.read();
 
-        let resolved_commands: Vec<ArcRenderCommand<A>> = commands
+        let resolved_commands: Vec<ArcRenderCommand> = commands
             .iter()
-            .map(|c| -> Result<ArcRenderCommand<A>, RenderPassError> {
+            .map(|c| -> Result<ArcRenderCommand, RenderPassError> {
                 Ok(match *c {
                     RenderCommand::SetBindGroup {
                         index,
@@ -381,22 +380,22 @@ impl RenderCommand {
 /// Equivalent to `RenderCommand` with the Ids resolved into resource Arcs.
 #[doc(hidden)]
 #[derive(Clone, Debug)]
-pub enum ArcRenderCommand<A: HalApi> {
+pub enum ArcRenderCommand {
     SetBindGroup {
         index: u32,
         num_dynamic_offsets: usize,
-        bind_group: Arc<BindGroup<A>>,
+        bind_group: Arc<BindGroup>,
     },
-    SetPipeline(Arc<RenderPipeline<A>>),
+    SetPipeline(Arc<RenderPipeline>),
     SetIndexBuffer {
-        buffer: Arc<Buffer<A>>,
+        buffer: Arc<Buffer>,
         index_format: wgt::IndexFormat,
         offset: BufferAddress,
         size: Option<BufferSize>,
     },
     SetVertexBuffer {
         slot: u32,
-        buffer: Arc<Buffer<A>>,
+        buffer: Arc<Buffer>,
         offset: BufferAddress,
         size: Option<BufferSize>,
     },
@@ -450,16 +449,16 @@ pub enum ArcRenderCommand<A: HalApi> {
         first_instance: u32,
     },
     MultiDrawIndirect {
-        buffer: Arc<Buffer<A>>,
+        buffer: Arc<Buffer>,
         offset: BufferAddress,
         /// Count of `None` represents a non-multi call.
         count: Option<NonZeroU32>,
         indexed: bool,
     },
     MultiDrawIndirectCount {
-        buffer: Arc<Buffer<A>>,
+        buffer: Arc<Buffer>,
         offset: BufferAddress,
-        count_buffer: Arc<Buffer<A>>,
+        count_buffer: Arc<Buffer>,
         count_buffer_offset: BufferAddress,
         max_count: u32,
         indexed: bool,
@@ -474,7 +473,7 @@ pub enum ArcRenderCommand<A: HalApi> {
         len: usize,
     },
     WriteTimestamp {
-        query_set: Arc<QuerySet<A>>,
+        query_set: Arc<QuerySet>,
         query_index: u32,
     },
     BeginOcclusionQuery {
@@ -482,9 +481,9 @@ pub enum ArcRenderCommand<A: HalApi> {
     },
     EndOcclusionQuery,
     BeginPipelineStatisticsQuery {
-        query_set: Arc<QuerySet<A>>,
+        query_set: Arc<QuerySet>,
         query_index: u32,
     },
     EndPipelineStatisticsQuery,
-    ExecuteBundle(Arc<RenderBundle<A>>),
+    ExecuteBundle(Arc<RenderBundle>),
 }
diff --git a/wgpu-core/src/command/timestamp_writes.rs b/wgpu-core/src/command/timestamp_writes.rs
index 82ab13c6dd..e91b48534d 100644
--- a/wgpu-core/src/command/timestamp_writes.rs
+++ b/wgpu-core/src/command/timestamp_writes.rs
@@ -1,6 +1,6 @@
 use std::sync::Arc;
 
-use crate::{hal_api::HalApi, id};
+use crate::id;
 
 /// Describes the writing of timestamp values in a render or compute pass.
 #[derive(Clone, Debug, PartialEq, Eq)]
@@ -15,9 +15,9 @@ pub struct PassTimestampWrites {
 }
 
 /// Describes the writing of timestamp values in a render or compute pass with the query set resolved.
-pub struct ArcPassTimestampWrites<A: HalApi> {
+pub struct ArcPassTimestampWrites {
     /// The query set to write the timestamps to.
-    pub query_set: Arc<crate::resource::QuerySet<A>>,
+    pub query_set: Arc<crate::resource::QuerySet>,
     /// The index of the query set at which a start timestamp of this pass is written, if any.
     pub beginning_of_pass_write_index: Option<u32>,
     /// The index of the query set at which an end timestamp of this pass is written, if any.
diff --git a/wgpu-core/src/command/transfer.rs b/wgpu-core/src/command/transfer.rs
index 4379777eb5..de5ef9ed84 100644
--- a/wgpu-core/src/command/transfer.rs
+++ b/wgpu-core/src/command/transfer.rs
@@ -6,7 +6,6 @@ use crate::{
     conv,
     device::{Device, DeviceError, MissingDownlevelFlags},
     global::Global,
-    hal_api::HalApi,
     id::{BufferId, CommandEncoderId, TextureId},
     init_tracker::{
         has_copy_partial_init_tracker_coverage, MemoryInitKind, TextureInitRange,
@@ -21,11 +20,10 @@ use crate::{
 };
 
 use arrayvec::ArrayVec;
-use hal::CommandEncoder as _;
 use thiserror::Error;
 use wgt::{BufferAddress, BufferUsages, Extent3d, TextureUsages};
 
-use std::{iter, sync::Arc};
+use std::sync::Arc;
 
 use super::{memory_init::CommandBufferTextureMemoryActions, ClearError, CommandEncoder};
 
@@ -160,10 +158,10 @@ impl From<DeviceError> for CopyError {
     }
 }
 
-pub(crate) fn extract_texture_selector<A: HalApi>(
+pub(crate) fn extract_texture_selector(
     copy_texture: &ImageCopyTexture,
     copy_size: &Extent3d,
-    texture: &Texture<A>,
+    texture: &Texture,
 ) -> Result<(TextureSelector, hal::TextureCopyBase), TransferError> {
     let format = texture.desc.format;
     let copy_aspect = hal::FormatAspects::new(format, copy_texture.aspect);
@@ -225,7 +223,7 @@ pub(crate) fn validate_linear_texture_data(
     // the copy size before calling this function (for example via `validate_texture_copy_range`).
     let copy_width = copy_size.width as BufferAddress;
     let copy_height = copy_size.height as BufferAddress;
-    let copy_depth = copy_size.depth_or_array_layers as BufferAddress;
+    let depth_or_array_layers = copy_size.depth_or_array_layers as BufferAddress;
 
     let offset = layout.offset;
 
@@ -253,19 +251,19 @@ pub(crate) fn validate_linear_texture_data(
         }
         bytes_per_row
     } else {
-        if copy_depth > 1 || height_in_blocks > 1 {
+        if depth_or_array_layers > 1 || height_in_blocks > 1 {
             return Err(TransferError::UnspecifiedBytesPerRow);
         }
         0
     };
-    let block_rows_per_image = if let Some(rows_per_image) = layout.rows_per_image {
+    let rows_per_image = if let Some(rows_per_image) = layout.rows_per_image {
         let rows_per_image = rows_per_image as BufferAddress;
         if rows_per_image < height_in_blocks {
             return Err(TransferError::InvalidRowsPerImage);
         }
         rows_per_image
     } else {
-        if copy_depth > 1 {
+        if depth_or_array_layers > 1 {
             return Err(TransferError::UnspecifiedRowsPerImage);
         }
         0
@@ -287,12 +285,12 @@ pub(crate) fn validate_linear_texture_data(
         }
     }
 
-    let bytes_per_image = bytes_per_row * block_rows_per_image;
+    let bytes_per_image = bytes_per_row * rows_per_image;
 
-    let required_bytes_in_copy = if copy_depth == 0 {
+    let required_bytes_in_copy = if depth_or_array_layers == 0 {
         0
     } else {
-        let mut required_bytes_in_copy = bytes_per_image * (copy_depth - 1);
+        let mut required_bytes_in_copy = bytes_per_image * (depth_or_array_layers - 1);
         if height_in_blocks > 0 {
             required_bytes_in_copy += bytes_per_row * (height_in_blocks - 1) + bytes_in_last_row;
         }
@@ -408,15 +406,15 @@ pub(crate) fn validate_texture_copy_range(
     Ok((copy_extent, array_layer_count))
 }
 
-fn handle_texture_init<A: HalApi>(
+fn handle_texture_init(
     init_kind: MemoryInitKind,
-    encoder: &mut CommandEncoder<A>,
-    trackers: &mut Tracker<A>,
-    texture_memory_actions: &mut CommandBufferTextureMemoryActions<A>,
-    device: &Device<A>,
+    encoder: &mut CommandEncoder,
+    trackers: &mut Tracker,
+    texture_memory_actions: &mut CommandBufferTextureMemoryActions,
+    device: &Device,
     copy_texture: &ImageCopyTexture,
     copy_size: &Extent3d,
-    texture: &Arc<Texture<A>>,
+    texture: &Arc<Texture>,
     snatch_guard: &SnatchGuard<'_>,
 ) -> Result<(), ClearError> {
     let init_action = TextureInitTrackerAction {
@@ -445,7 +443,7 @@ fn handle_texture_init<A: HalApi>(
                 cmd_buf_raw,
                 &mut trackers.textures,
                 &device.alignments,
-                device.zero_buffer.as_ref().unwrap(),
+                device.zero_buffer.as_ref(),
                 snatch_guard,
             )?;
         }
@@ -458,14 +456,14 @@ fn handle_texture_init<A: HalApi>(
 ///
 /// Ensure the source texture of a transfer is in the right initialization
 /// state, and record the state for after the transfer operation.
-fn handle_src_texture_init<A: HalApi>(
-    encoder: &mut CommandEncoder<A>,
-    trackers: &mut Tracker<A>,
-    texture_memory_actions: &mut CommandBufferTextureMemoryActions<A>,
-    device: &Device<A>,
+fn handle_src_texture_init(
+    encoder: &mut CommandEncoder,
+    trackers: &mut Tracker,
+    texture_memory_actions: &mut CommandBufferTextureMemoryActions,
+    device: &Device,
     source: &ImageCopyTexture,
     copy_size: &Extent3d,
-    texture: &Arc<Texture<A>>,
+    texture: &Arc<Texture>,
     snatch_guard: &SnatchGuard<'_>,
 ) -> Result<(), TransferError> {
     handle_texture_init(
@@ -486,14 +484,14 @@ fn handle_src_texture_init<A: HalApi>(
 ///
 /// Ensure the destination texture of a transfer is in the right initialization
 /// state, and record the state for after the transfer operation.
-fn handle_dst_texture_init<A: HalApi>(
-    encoder: &mut CommandEncoder<A>,
-    trackers: &mut Tracker<A>,
-    texture_memory_actions: &mut CommandBufferTextureMemoryActions<A>,
-    device: &Device<A>,
+fn handle_dst_texture_init(
+    encoder: &mut CommandEncoder,
+    trackers: &mut Tracker,
+    texture_memory_actions: &mut CommandBufferTextureMemoryActions,
+    device: &Device,
     destination: &ImageCopyTexture,
     copy_size: &Extent3d,
-    texture: &Arc<Texture<A>>,
+    texture: &Arc<Texture>,
     snatch_guard: &SnatchGuard<'_>,
 ) -> Result<(), TransferError> {
     // Attention: If we don't write full texture subresources, we need to a full
@@ -525,7 +523,7 @@ fn handle_dst_texture_init<A: HalApi>(
 }
 
 impl Global {
-    pub fn command_encoder_copy_buffer_to_buffer<A: HalApi>(
+    pub fn command_encoder_copy_buffer_to_buffer(
         &self,
         command_encoder_id: CommandEncoderId,
         source: BufferId,
@@ -542,7 +540,7 @@ impl Global {
         if source == destination {
             return Err(TransferError::SameSourceDestinationBuffer.into());
         }
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub
             .command_buffers
@@ -687,14 +685,18 @@ impl Global {
             size: wgt::BufferSize::new(size).unwrap(),
         };
         let cmd_buf_raw = cmd_buf_data.encoder.open()?;
+        let barriers = src_barrier
+            .into_iter()
+            .chain(dst_barrier)
+            .collect::<Vec<_>>();
         unsafe {
-            cmd_buf_raw.transition_buffers(src_barrier.into_iter().chain(dst_barrier));
-            cmd_buf_raw.copy_buffer_to_buffer(src_raw, dst_raw, iter::once(region));
+            cmd_buf_raw.transition_buffers(&barriers);
+            cmd_buf_raw.copy_buffer_to_buffer(src_raw, dst_raw, &[region]);
         }
         Ok(())
     }
 
-    pub fn command_encoder_copy_buffer_to_texture<A: HalApi>(
+    pub fn command_encoder_copy_buffer_to_texture(
         &self,
         command_encoder_id: CommandEncoderId,
         source: &ImageCopyBuffer,
@@ -708,7 +710,7 @@ impl Global {
             destination.texture
         );
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub
             .command_buffers
@@ -801,7 +803,9 @@ impl Global {
         dst_texture
             .check_usage(TextureUsages::COPY_DST)
             .map_err(TransferError::MissingTextureUsage)?;
-        let dst_barrier = dst_pending.map(|pending| pending.into_hal(dst_raw));
+        let dst_barrier = dst_pending
+            .map(|pending| pending.into_hal(dst_raw))
+            .collect::<Vec<_>>();
 
         if !dst_base.aspect.is_one() {
             return Err(TransferError::CopyAspectNotOne.into());
@@ -837,28 +841,30 @@ impl Global {
             MemoryInitKind::NeedsInitializedMemory,
         ));
 
-        let regions = (0..array_layer_count).map(|rel_array_layer| {
-            let mut texture_base = dst_base.clone();
-            texture_base.array_layer += rel_array_layer;
-            let mut buffer_layout = source.layout;
-            buffer_layout.offset += rel_array_layer as u64 * bytes_per_array_layer;
-            hal::BufferTextureCopy {
-                buffer_layout,
-                texture_base,
-                size: hal_copy_size,
-            }
-        });
+        let regions = (0..array_layer_count)
+            .map(|rel_array_layer| {
+                let mut texture_base = dst_base.clone();
+                texture_base.array_layer += rel_array_layer;
+                let mut buffer_layout = source.layout;
+                buffer_layout.offset += rel_array_layer as u64 * bytes_per_array_layer;
+                hal::BufferTextureCopy {
+                    buffer_layout,
+                    texture_base,
+                    size: hal_copy_size,
+                }
+            })
+            .collect::<Vec<_>>();
 
         let cmd_buf_raw = encoder.open()?;
         unsafe {
-            cmd_buf_raw.transition_textures(dst_barrier.into_iter());
-            cmd_buf_raw.transition_buffers(src_barrier.into_iter());
-            cmd_buf_raw.copy_buffer_to_texture(src_raw, dst_raw, regions);
+            cmd_buf_raw.transition_textures(&dst_barrier);
+            cmd_buf_raw.transition_buffers(src_barrier.as_slice());
+            cmd_buf_raw.copy_buffer_to_texture(src_raw, dst_raw, &regions);
         }
         Ok(())
     }
 
-    pub fn command_encoder_copy_texture_to_buffer<A: HalApi>(
+    pub fn command_encoder_copy_texture_to_buffer(
         &self,
         command_encoder_id: CommandEncoderId,
         source: &ImageCopyTexture,
@@ -872,7 +878,7 @@ impl Global {
             destination.buffer
         );
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub
             .command_buffers
@@ -956,7 +962,9 @@ impl Global {
             }
             .into());
         }
-        let src_barrier = src_pending.map(|pending| pending.into_hal(src_raw));
+        let src_barrier = src_pending
+            .map(|pending| pending.into_hal(src_raw))
+            .collect::<Vec<_>>();
 
         let dst_buffer = hub
             .buffers
@@ -1009,32 +1017,34 @@ impl Global {
             MemoryInitKind::ImplicitlyInitialized,
         ));
 
-        let regions = (0..array_layer_count).map(|rel_array_layer| {
-            let mut texture_base = src_base.clone();
-            texture_base.array_layer += rel_array_layer;
-            let mut buffer_layout = destination.layout;
-            buffer_layout.offset += rel_array_layer as u64 * bytes_per_array_layer;
-            hal::BufferTextureCopy {
-                buffer_layout,
-                texture_base,
-                size: hal_copy_size,
-            }
-        });
+        let regions = (0..array_layer_count)
+            .map(|rel_array_layer| {
+                let mut texture_base = src_base.clone();
+                texture_base.array_layer += rel_array_layer;
+                let mut buffer_layout = destination.layout;
+                buffer_layout.offset += rel_array_layer as u64 * bytes_per_array_layer;
+                hal::BufferTextureCopy {
+                    buffer_layout,
+                    texture_base,
+                    size: hal_copy_size,
+                }
+            })
+            .collect::<Vec<_>>();
         let cmd_buf_raw = encoder.open()?;
         unsafe {
-            cmd_buf_raw.transition_buffers(dst_barrier.into_iter());
-            cmd_buf_raw.transition_textures(src_barrier.into_iter());
+            cmd_buf_raw.transition_buffers(dst_barrier.as_slice());
+            cmd_buf_raw.transition_textures(&src_barrier);
             cmd_buf_raw.copy_texture_to_buffer(
                 src_raw,
                 hal::TextureUses::COPY_SRC,
                 dst_raw,
-                regions,
+                &regions,
             );
         }
         Ok(())
     }
 
-    pub fn command_encoder_copy_texture_to_texture<A: HalApi>(
+    pub fn command_encoder_copy_texture_to_texture(
         &self,
         command_encoder_id: CommandEncoderId,
         source: &ImageCopyTexture,
@@ -1048,7 +1058,7 @@ impl Global {
             destination.texture
         );
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let cmd_buf = match hub
             .command_buffers
@@ -1186,25 +1196,27 @@ impl Global {
             height: src_copy_size.height.min(dst_copy_size.height),
             depth: src_copy_size.depth.min(dst_copy_size.depth),
         };
-        let regions = (0..array_layer_count).map(|rel_array_layer| {
-            let mut src_base = src_tex_base.clone();
-            let mut dst_base = dst_tex_base.clone();
-            src_base.array_layer += rel_array_layer;
-            dst_base.array_layer += rel_array_layer;
-            hal::TextureCopy {
-                src_base,
-                dst_base,
-                size: hal_copy_size,
-            }
-        });
+        let regions = (0..array_layer_count)
+            .map(|rel_array_layer| {
+                let mut src_base = src_tex_base.clone();
+                let mut dst_base = dst_tex_base.clone();
+                src_base.array_layer += rel_array_layer;
+                dst_base.array_layer += rel_array_layer;
+                hal::TextureCopy {
+                    src_base,
+                    dst_base,
+                    size: hal_copy_size,
+                }
+            })
+            .collect::<Vec<_>>();
         let cmd_buf_raw = cmd_buf_data.encoder.open()?;
         unsafe {
-            cmd_buf_raw.transition_textures(barriers.into_iter());
+            cmd_buf_raw.transition_textures(&barriers);
             cmd_buf_raw.copy_texture_to_texture(
                 src_raw,
                 hal::TextureUses::COPY_SRC,
                 dst_raw,
-                regions,
+                &regions,
             );
         }
 
diff --git a/wgpu-core/src/device/any_device.rs b/wgpu-core/src/device/any_device.rs
deleted file mode 100644
index 9e459c1a94..0000000000
--- a/wgpu-core/src/device/any_device.rs
+++ /dev/null
@@ -1,102 +0,0 @@
-use wgt::Backend;
-
-use super::Device;
-/// The `AnyDevice` type: a pointer to a `Device<A>` for any backend `A`.
-use crate::hal_api::HalApi;
-
-use std::fmt;
-use std::mem::ManuallyDrop;
-use std::ptr::NonNull;
-use std::sync::Arc;
-
-struct AnyDeviceVtable {
-    // We oppurtunistically store the backend here, since we now it will be used
-    // with backend selection and it can be stored in static memory.
-    backend: Backend,
-    // Drop glue which knows how to drop the stored data.
-    drop: unsafe fn(*mut ()),
-}
-
-/// A pointer to a `Device<A>`, for any backend `A`.
-///
-/// Any `AnyDevice` is just like an `Arc<Device<A>>`, except that the `A` type
-/// parameter is erased. To access the `Device`, you must downcast to a
-/// particular backend with the \[`downcast_ref`\] or \[`downcast_clone`\]
-/// methods.
-pub struct AnyDevice {
-    data: NonNull<()>,
-    vtable: &'static AnyDeviceVtable,
-}
-
-impl AnyDevice {
-    /// Return an `AnyDevice` that holds an owning `Arc` pointer to `device`.
-    pub fn new<A: HalApi>(device: Arc<Device<A>>) -> AnyDevice {
-        unsafe fn drop_glue<A: HalApi>(ptr: *mut ()) {
-            // Drop the arc this instance is holding.
-            unsafe {
-                _ = Arc::from_raw(ptr.cast::<A::Device>());
-            }
-        }
-
-        // SAFETY: The pointer returned by Arc::into_raw is guaranteed to be
-        // non-null.
-        let data = unsafe { NonNull::new_unchecked(Arc::into_raw(device).cast_mut()) };
-
-        AnyDevice {
-            data: data.cast(),
-            vtable: &AnyDeviceVtable {
-                backend: A::VARIANT,
-                drop: drop_glue::<A>,
-            },
-        }
-    }
-
-    /// If `self` is an `Arc<Device<A>>`, return a reference to the
-    /// device.
-    pub fn downcast_ref<A: HalApi>(&self) -> Option<&Device<A>> {
-        if self.vtable.backend != A::VARIANT {
-            return None;
-        }
-
-        // SAFETY: We just checked the instance above implicitly by the backend
-        // that it was statically constructed through.
-        Some(unsafe { &*(self.data.as_ptr().cast::<Device<A>>()) })
-    }
-
-    /// If `self` is an `Arc<Device<A>>`, return a clone of that.
-    pub fn downcast_clone<A: HalApi>(&self) -> Option<Arc<Device<A>>> {
-        if self.vtable.backend != A::VARIANT {
-            return None;
-        }
-
-        // We need to prevent the destructor of the arc from running, since it
-        // refers to the instance held by this object. Dropping it would
-        // invalidate this object.
-        //
-        // SAFETY: We just checked the instance above implicitly by the backend
-        // that it was statically constructed through.
-        let this =
-            ManuallyDrop::new(unsafe { Arc::from_raw(self.data.as_ptr().cast::<Device<A>>()) });
-
-        // Cloning it increases the reference count, and we return a new arc
-        // instance.
-        Some((*this).clone())
-    }
-}
-
-impl Drop for AnyDevice {
-    fn drop(&mut self) {
-        unsafe { (self.vtable.drop)(self.data.as_ptr()) }
-    }
-}
-
-impl fmt::Debug for AnyDevice {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "AnyDevice<{}>", self.vtable.backend)
-    }
-}
-
-#[cfg(send_sync)]
-unsafe impl Send for AnyDevice {}
-#[cfg(send_sync)]
-unsafe impl Sync for AnyDevice {}
diff --git a/wgpu-core/src/device/bgl.rs b/wgpu-core/src/device/bgl.rs
index 911ac8a435..9b7bdc0fee 100644
--- a/wgpu-core/src/device/bgl.rs
+++ b/wgpu-core/src/device/bgl.rs
@@ -126,4 +126,9 @@ impl EntryMap {
         self.sorted = false;
         self.inner.entry(key)
     }
+
+    pub fn sort(&mut self) {
+        self.inner.sort_unstable_keys();
+        self.sorted = true;
+    }
 }
diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index 2f0cbc5842..2eb1466d65 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -7,7 +7,7 @@ use crate::{
         ResolvedBindGroupEntry, ResolvedBindingResource, ResolvedBufferBinding,
     },
     command, conv,
-    device::{bgl, life::WaitIdleError, queue, DeviceError, DeviceLostClosure, DeviceLostReason},
+    device::{bgl, life::WaitIdleError, DeviceError, DeviceLostClosure, DeviceLostReason},
     global::Global,
     hal_api::HalApi,
     id::{self, AdapterId, DeviceId, QueueId, SurfaceId},
@@ -19,32 +19,24 @@ use crate::{
     present,
     resource::{
         self, BufferAccessError, BufferAccessResult, BufferMapOperation, CreateBufferError,
-        Trackable,
     },
     storage::Storage,
     Label,
 };
 
-use hal::Device as _;
-
 use wgt::{BufferAddress, TextureFormat};
 
-use std::{
-    borrow::Cow,
-    iter,
-    ptr::{self, NonNull},
-    sync::{atomic::Ordering, Arc},
-};
+use std::{borrow::Cow, ptr::NonNull, sync::atomic::Ordering};
 
 use super::{ImplicitPipelineIds, UserClosures};
 
 impl Global {
-    pub fn adapter_is_surface_supported<A: HalApi>(
+    pub fn adapter_is_surface_supported(
         &self,
         adapter_id: AdapterId,
         surface_id: SurfaceId,
     ) -> Result<bool, instance::IsSurfaceSupportedError> {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let surface_guard = self.surfaces.read();
         let adapter_guard = hub.adapters.read();
@@ -57,13 +49,13 @@ impl Global {
         Ok(adapter.is_surface_supported(surface))
     }
 
-    pub fn surface_get_capabilities<A: HalApi>(
+    pub fn surface_get_capabilities(
         &self,
         surface_id: SurfaceId,
         adapter_id: AdapterId,
     ) -> Result<wgt::SurfaceCapabilities, instance::GetSurfaceSupportError> {
         profiling::scope!("Surface::get_capabilities");
-        self.fetch_adapter_and_surface::<A, _, _>(surface_id, adapter_id, |adapter, surface| {
+        self.fetch_adapter_and_surface::<_, _>(surface_id, adapter_id, |adapter, surface| {
             let mut hal_caps = surface.get_capabilities(adapter)?;
 
             hal_caps.formats.sort_by_key(|f| !f.is_srgb());
@@ -80,8 +72,7 @@ impl Global {
     }
 
     fn fetch_adapter_and_surface<
-        A: HalApi,
-        F: FnOnce(&Adapter<A>, &Surface) -> Result<B, instance::GetSurfaceSupportError>,
+        F: FnOnce(&Adapter, &Surface) -> Result<B, instance::GetSurfaceSupportError>,
         B,
     >(
         &self,
@@ -89,7 +80,7 @@ impl Global {
         adapter_id: AdapterId,
         get_supported_callback: F,
     ) -> Result<B, instance::GetSurfaceSupportError> {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let surface_guard = self.surfaces.read();
         let adapter_guard = hub.adapters.read();
@@ -103,11 +94,8 @@ impl Global {
         get_supported_callback(adapter, surface)
     }
 
-    pub fn device_features<A: HalApi>(
-        &self,
-        device_id: DeviceId,
-    ) -> Result<wgt::Features, DeviceError> {
-        let hub = A::hub(self);
+    pub fn device_features(&self, device_id: DeviceId) -> Result<wgt::Features, DeviceError> {
+        let hub = &self.hub;
 
         let device = hub
             .devices
@@ -117,11 +105,8 @@ impl Global {
         Ok(device.features)
     }
 
-    pub fn device_limits<A: HalApi>(
-        &self,
-        device_id: DeviceId,
-    ) -> Result<wgt::Limits, DeviceError> {
-        let hub = A::hub(self);
+    pub fn device_limits(&self, device_id: DeviceId) -> Result<wgt::Limits, DeviceError> {
+        let hub = &self.hub;
 
         let device = hub
             .devices
@@ -131,11 +116,11 @@ impl Global {
         Ok(device.limits.clone())
     }
 
-    pub fn device_downlevel_properties<A: HalApi>(
+    pub fn device_downlevel_properties(
         &self,
         device_id: DeviceId,
     ) -> Result<wgt::DownlevelCapabilities, DeviceError> {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let device = hub
             .devices
@@ -145,7 +130,7 @@ impl Global {
         Ok(device.downlevel.clone())
     }
 
-    pub fn device_create_buffer<A: HalApi>(
+    pub fn device_create_buffer(
         &self,
         device_id: DeviceId,
         desc: &resource::BufferDescriptor,
@@ -153,8 +138,8 @@ impl Global {
     ) -> (id::BufferId, Option<CreateBufferError>) {
         profiling::scope!("Device::create_buffer");
 
-        let hub = A::hub(self);
-        let fid = hub.buffers.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.buffers.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -228,16 +213,20 @@ impl Global {
     /// [`device_create_buffer`]: Global::device_create_buffer
     /// [`usage`]: https://www.w3.org/TR/webgpu/#dom-gputexturedescriptor-usage
     /// [`wgpu_types::BufferUsages`]: wgt::BufferUsages
-    pub fn create_buffer_error<A: HalApi>(&self, id_in: Option<id::BufferId>) {
-        let hub = A::hub(self);
-        let fid = hub.buffers.prepare(id_in);
+    pub fn create_buffer_error(&self, backend: wgt::Backend, id_in: Option<id::BufferId>) {
+        let hub = &self.hub;
+        let fid = hub.buffers.prepare(backend, id_in);
 
         fid.assign_error();
     }
 
-    pub fn create_render_bundle_error<A: HalApi>(&self, id_in: Option<id::RenderBundleId>) {
-        let hub = A::hub(self);
-        let fid = hub.render_bundles.prepare(id_in);
+    pub fn create_render_bundle_error(
+        &self,
+        backend: wgt::Backend,
+        id_in: Option<id::RenderBundleId>,
+    ) {
+        let hub = &self.hub;
+        let fid = hub.render_bundles.prepare(backend, id_in);
 
         fid.assign_error();
     }
@@ -245,148 +234,65 @@ impl Global {
     /// Assign `id_in` an error with the given `label`.
     ///
     /// See `create_buffer_error` for more context and explanation.
-    pub fn create_texture_error<A: HalApi>(&self, id_in: Option<id::TextureId>) {
-        let hub = A::hub(self);
-        let fid = hub.textures.prepare(id_in);
+    pub fn create_texture_error(&self, backend: wgt::Backend, id_in: Option<id::TextureId>) {
+        let hub = &self.hub;
+        let fid = hub.textures.prepare(backend, id_in);
 
         fid.assign_error();
     }
 
     #[cfg(feature = "replay")]
-    pub fn device_wait_for_buffer<A: HalApi>(
-        &self,
-        device_id: DeviceId,
-        buffer_id: id::BufferId,
-    ) -> Result<(), WaitIdleError> {
-        let hub = A::hub(self);
-
-        let last_submission = match hub.buffers.read().get(buffer_id) {
-            Ok(buffer) => buffer.submission_index(),
-            Err(_) => return Ok(()),
-        };
-
-        hub.devices
-            .get(device_id)
-            .map_err(|_| DeviceError::InvalidDeviceId)?
-            .wait_for_submit(last_submission)
-    }
-
-    #[doc(hidden)]
-    pub fn device_set_buffer_sub_data<A: HalApi>(
+    pub fn device_set_buffer_data(
         &self,
-        device_id: DeviceId,
         buffer_id: id::BufferId,
         offset: BufferAddress,
         data: &[u8],
     ) -> BufferAccessResult {
-        profiling::scope!("Device::set_buffer_sub_data");
-
-        let hub = A::hub(self);
-
-        let device = hub
-            .devices
-            .get(device_id)
-            .map_err(|_| DeviceError::InvalidDeviceId)?;
+        let hub = &self.hub;
 
         let buffer = hub
             .buffers
             .get(buffer_id)
             .map_err(|_| BufferAccessError::InvalidBufferId(buffer_id))?;
 
-        #[cfg(feature = "trace")]
-        if let Some(ref mut trace) = *device.trace.lock() {
-            let data_path = trace.make_binary("bin", data);
-            trace.add(trace::Action::WriteBuffer {
-                id: buffer_id,
-                data: data_path,
-                range: offset..offset + data.len() as BufferAddress,
-                queued: false,
-            });
-        }
+        let device = &buffer.device;
 
         device.check_is_valid()?;
         buffer.check_usage(wgt::BufferUsages::MAP_WRITE)?;
-        //assert!(buffer isn't used by the GPU);
-
-        let snatch_guard = device.snatchable_lock.read();
-        let raw_buf = buffer.try_raw(&snatch_guard)?;
-        unsafe {
-            let mapping = device
-                .raw()
-                .map_buffer(raw_buf, offset..offset + data.len() as u64)
-                .map_err(DeviceError::from)?;
-            ptr::copy_nonoverlapping(data.as_ptr(), mapping.ptr.as_ptr(), data.len());
-            if !mapping.is_coherent {
-                device
-                    .raw()
-                    .flush_mapped_ranges(raw_buf, iter::once(offset..offset + data.len() as u64));
-            }
-            device
-                .raw()
-                .unmap_buffer(raw_buf)
-                .map_err(DeviceError::from)?;
-        }
-
-        Ok(())
-    }
 
-    #[doc(hidden)]
-    pub fn device_get_buffer_sub_data<A: HalApi>(
-        &self,
-        device_id: DeviceId,
-        buffer_id: id::BufferId,
-        offset: BufferAddress,
-        data: &mut [u8],
-    ) -> BufferAccessResult {
-        profiling::scope!("Device::get_buffer_sub_data");
+        let last_submission = device
+            .lock_life()
+            .get_buffer_latest_submission_index(&buffer);
 
-        let hub = A::hub(self);
-
-        let device = hub
-            .devices
-            .get(device_id)
-            .map_err(|_| DeviceError::InvalidDeviceId)?;
-        device.check_is_valid()?;
+        if let Some(last_submission) = last_submission {
+            device.wait_for_submit(last_submission)?;
+        }
 
         let snatch_guard = device.snatchable_lock.read();
-
-        let buffer = hub
-            .buffers
-            .get(buffer_id)
-            .map_err(|_| BufferAccessError::InvalidBufferId(buffer_id))?;
-        buffer.check_usage(wgt::BufferUsages::MAP_READ)?;
-        //assert!(buffer isn't used by the GPU);
-
         let raw_buf = buffer.try_raw(&snatch_guard)?;
         unsafe {
             let mapping = device
                 .raw()
                 .map_buffer(raw_buf, offset..offset + data.len() as u64)
                 .map_err(DeviceError::from)?;
+            std::ptr::copy_nonoverlapping(data.as_ptr(), mapping.ptr.as_ptr(), data.len());
             if !mapping.is_coherent {
-                device.raw().invalidate_mapped_ranges(
-                    raw_buf,
-                    iter::once(offset..offset + data.len() as u64),
-                );
+                #[allow(clippy::single_range_in_vec_init)]
+                device
+                    .raw()
+                    .flush_mapped_ranges(raw_buf, &[offset..offset + data.len() as u64]);
             }
-            ptr::copy_nonoverlapping(mapping.ptr.as_ptr(), data.as_mut_ptr(), data.len());
-            device
-                .raw()
-                .unmap_buffer(raw_buf)
-                .map_err(DeviceError::from)?;
+            device.raw().unmap_buffer(raw_buf);
         }
 
         Ok(())
     }
 
-    pub fn buffer_destroy<A: HalApi>(
-        &self,
-        buffer_id: id::BufferId,
-    ) -> Result<(), resource::DestroyError> {
+    pub fn buffer_destroy(&self, buffer_id: id::BufferId) -> Result<(), resource::DestroyError> {
         profiling::scope!("Buffer::destroy");
         api_log!("Buffer::destroy {buffer_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let buffer = hub
             .buffers
@@ -406,11 +312,11 @@ impl Global {
         buffer.destroy()
     }
 
-    pub fn buffer_drop<A: HalApi>(&self, buffer_id: id::BufferId, wait: bool) {
+    pub fn buffer_drop(&self, buffer_id: id::BufferId) {
         profiling::scope!("Buffer::drop");
         api_log!("Buffer::drop {buffer_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let buffer = match hub.buffers.unregister(buffer_id) {
             Some(buffer) => buffer,
@@ -428,17 +334,9 @@ impl Global {
             #[cfg(feature = "trace")]
             buffer_id,
         );
-
-        if wait {
-            let last_submit_index = buffer.submission_index();
-            match buffer.device.wait_for_submit(last_submit_index) {
-                Ok(()) => (),
-                Err(e) => log::error!("Failed to wait for buffer {:?}: {}", buffer_id, e),
-            }
-        }
     }
 
-    pub fn device_create_texture<A: HalApi>(
+    pub fn device_create_texture(
         &self,
         device_id: DeviceId,
         desc: &resource::TextureDescriptor,
@@ -446,9 +344,9 @@ impl Global {
     ) -> (id::TextureId, Option<resource::CreateTextureError>) {
         profiling::scope!("Device::create_texture");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        let fid = hub.textures.prepare(id_in);
+        let fid = hub.textures.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -483,18 +381,18 @@ impl Global {
     /// - `hal_texture` must be created from `device_id` corresponding raw handle.
     /// - `hal_texture` must be created respecting `desc`
     /// - `hal_texture` must be initialized
-    pub unsafe fn create_texture_from_hal<A: HalApi>(
+    pub unsafe fn create_texture_from_hal(
         &self,
-        hal_texture: A::Texture,
+        hal_texture: Box<dyn hal::DynTexture>,
         device_id: DeviceId,
         desc: &resource::TextureDescriptor,
         id_in: Option<id::TextureId>,
     ) -> (id::TextureId, Option<resource::CreateTextureError>) {
         profiling::scope!("Device::create_texture_from_hal");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        let fid = hub.textures.prepare(id_in);
+        let fid = hub.textures.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -540,8 +438,8 @@ impl Global {
     ) -> (id::BufferId, Option<CreateBufferError>) {
         profiling::scope!("Device::create_buffer");
 
-        let hub = A::hub(self);
-        let fid = hub.buffers.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.buffers.prepare(A::VARIANT, id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -556,7 +454,7 @@ impl Global {
                 trace.add(trace::Action::CreateBuffer(fid.id(), desc.clone()));
             }
 
-            let buffer = device.create_buffer_from_hal(hal_buffer, desc);
+            let buffer = device.create_buffer_from_hal(Box::new(hal_buffer), desc);
 
             let id = fid.assign(buffer);
             api_log!("Device::create_buffer -> {id:?}");
@@ -570,14 +468,11 @@ impl Global {
         (id, Some(error))
     }
 
-    pub fn texture_destroy<A: HalApi>(
-        &self,
-        texture_id: id::TextureId,
-    ) -> Result<(), resource::DestroyError> {
+    pub fn texture_destroy(&self, texture_id: id::TextureId) -> Result<(), resource::DestroyError> {
         profiling::scope!("Texture::destroy");
         api_log!("Texture::destroy {texture_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let texture = hub
             .textures
@@ -592,29 +487,21 @@ impl Global {
         texture.destroy()
     }
 
-    pub fn texture_drop<A: HalApi>(&self, texture_id: id::TextureId, wait: bool) {
+    pub fn texture_drop(&self, texture_id: id::TextureId) {
         profiling::scope!("Texture::drop");
         api_log!("Texture::drop {texture_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        if let Some(texture) = hub.textures.unregister(texture_id) {
+        if let Some(_texture) = hub.textures.unregister(texture_id) {
             #[cfg(feature = "trace")]
-            if let Some(t) = texture.device.trace.lock().as_mut() {
+            if let Some(t) = _texture.device.trace.lock().as_mut() {
                 t.add(trace::Action::DestroyTexture(texture_id));
             }
-
-            if wait {
-                let last_submit_index = texture.submission_index();
-                match texture.device.wait_for_submit(last_submit_index) {
-                    Ok(()) => (),
-                    Err(e) => log::error!("Failed to wait for texture {texture_id:?}: {e}"),
-                }
-            }
         }
     }
 
-    pub fn texture_create_view<A: HalApi>(
+    pub fn texture_create_view(
         &self,
         texture_id: id::TextureId,
         desc: &resource::TextureViewDescriptor,
@@ -622,9 +509,9 @@ impl Global {
     ) -> (id::TextureViewId, Option<resource::CreateTextureViewError>) {
         profiling::scope!("Texture::create_view");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        let fid = hub.texture_views.prepare(id_in);
+        let fid = hub.texture_views.prepare(texture_id.backend(), id_in);
 
         let error = 'error: {
             let texture = match hub.textures.get(texture_id) {
@@ -661,36 +548,25 @@ impl Global {
         (id, Some(error))
     }
 
-    pub fn texture_view_drop<A: HalApi>(
+    pub fn texture_view_drop(
         &self,
         texture_view_id: id::TextureViewId,
-        wait: bool,
     ) -> Result<(), resource::TextureViewDestroyError> {
         profiling::scope!("TextureView::drop");
         api_log!("TextureView::drop {texture_view_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        if let Some(view) = hub.texture_views.unregister(texture_view_id) {
+        if let Some(_view) = hub.texture_views.unregister(texture_view_id) {
             #[cfg(feature = "trace")]
-            if let Some(t) = view.device.trace.lock().as_mut() {
+            if let Some(t) = _view.device.trace.lock().as_mut() {
                 t.add(trace::Action::DestroyTextureView(texture_view_id));
             }
-
-            if wait {
-                let last_submit_index = view.submission_index();
-                match view.device.wait_for_submit(last_submit_index) {
-                    Ok(()) => (),
-                    Err(e) => {
-                        log::error!("Failed to wait for texture view {texture_view_id:?}: {e}")
-                    }
-                }
-            }
         }
         Ok(())
     }
 
-    pub fn device_create_sampler<A: HalApi>(
+    pub fn device_create_sampler(
         &self,
         device_id: DeviceId,
         desc: &resource::SamplerDescriptor,
@@ -698,8 +574,8 @@ impl Global {
     ) -> (id::SamplerId, Option<resource::CreateSamplerError>) {
         profiling::scope!("Device::create_sampler");
 
-        let hub = A::hub(self);
-        let fid = hub.samplers.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.samplers.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -727,11 +603,11 @@ impl Global {
         (id, Some(error))
     }
 
-    pub fn sampler_drop<A: HalApi>(&self, sampler_id: id::SamplerId) {
+    pub fn sampler_drop(&self, sampler_id: id::SamplerId) {
         profiling::scope!("Sampler::drop");
         api_log!("Sampler::drop {sampler_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Some(_sampler) = hub.samplers.unregister(sampler_id) {
             #[cfg(feature = "trace")]
@@ -741,7 +617,7 @@ impl Global {
         }
     }
 
-    pub fn device_create_bind_group_layout<A: HalApi>(
+    pub fn device_create_bind_group_layout(
         &self,
         device_id: DeviceId,
         desc: &binding_model::BindGroupLayoutDescriptor,
@@ -752,8 +628,8 @@ impl Global {
     ) {
         profiling::scope!("Device::create_bind_group_layout");
 
-        let hub = A::hub(self);
-        let fid = hub.bind_group_layouts.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.bind_group_layouts.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -782,9 +658,6 @@ impl Global {
                 bgl.exclusive_pipeline
                     .set(binding_model::ExclusivePipeline::None)
                     .unwrap();
-
-                let bgl = Arc::new(bgl);
-
                 Ok(bgl)
             });
 
@@ -799,16 +672,16 @@ impl Global {
             return (id, None);
         };
 
-        let fid = hub.bind_group_layouts.prepare(id_in);
+        let fid = hub.bind_group_layouts.prepare(device_id.backend(), id_in);
         let id = fid.assign_error();
         (id, Some(error))
     }
 
-    pub fn bind_group_layout_drop<A: HalApi>(&self, bind_group_layout_id: id::BindGroupLayoutId) {
+    pub fn bind_group_layout_drop(&self, bind_group_layout_id: id::BindGroupLayoutId) {
         profiling::scope!("BindGroupLayout::drop");
         api_log!("BindGroupLayout::drop {bind_group_layout_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Some(_layout) = hub.bind_group_layouts.unregister(bind_group_layout_id) {
             #[cfg(feature = "trace")]
@@ -818,7 +691,7 @@ impl Global {
         }
     }
 
-    pub fn device_create_pipeline_layout<A: HalApi>(
+    pub fn device_create_pipeline_layout(
         &self,
         device_id: DeviceId,
         desc: &binding_model::PipelineLayoutDescriptor,
@@ -829,8 +702,8 @@ impl Global {
     ) {
         profiling::scope!("Device::create_pipeline_layout");
 
-        let hub = A::hub(self);
-        let fid = hub.pipeline_layouts.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.pipeline_layouts.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -873,7 +746,7 @@ impl Global {
                 Err(e) => break 'error e,
             };
 
-            let id = fid.assign(Arc::new(layout));
+            let id = fid.assign(layout);
             api_log!("Device::create_pipeline_layout -> {id:?}");
             return (id, None);
         };
@@ -882,11 +755,11 @@ impl Global {
         (id, Some(error))
     }
 
-    pub fn pipeline_layout_drop<A: HalApi>(&self, pipeline_layout_id: id::PipelineLayoutId) {
+    pub fn pipeline_layout_drop(&self, pipeline_layout_id: id::PipelineLayoutId) {
         profiling::scope!("PipelineLayout::drop");
         api_log!("PipelineLayout::drop {pipeline_layout_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         if let Some(_layout) = hub.pipeline_layouts.unregister(pipeline_layout_id) {
             #[cfg(feature = "trace")]
             if let Some(t) = _layout.device.trace.lock().as_mut() {
@@ -895,7 +768,7 @@ impl Global {
         }
     }
 
-    pub fn device_create_bind_group<A: HalApi>(
+    pub fn device_create_bind_group(
         &self,
         device_id: DeviceId,
         desc: &binding_model::BindGroupDescriptor,
@@ -903,8 +776,8 @@ impl Global {
     ) -> (id::BindGroupId, Option<binding_model::CreateBindGroupError>) {
         profiling::scope!("Device::create_bind_group");
 
-        let hub = A::hub(self);
-        let fid = hub.bind_groups.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.bind_groups.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -922,13 +795,13 @@ impl Global {
                 Err(..) => break 'error binding_model::CreateBindGroupError::InvalidLayout,
             };
 
-            fn map_entry<'a, A: HalApi>(
+            fn map_entry<'a>(
                 e: &BindGroupEntry<'a>,
-                buffer_storage: &Storage<resource::Buffer<A>>,
-                sampler_storage: &Storage<resource::Sampler<A>>,
-                texture_view_storage: &Storage<resource::TextureView<A>>,
-                tlas_storage: &Storage<resource::Tlas<A>>,
-            ) -> Result<ResolvedBindGroupEntry<'a, A>, binding_model::CreateBindGroupError>
+                buffer_storage: &Storage<resource::Buffer>,
+                sampler_storage: &Storage<resource::Sampler>,
+                texture_view_storage: &Storage<resource::TextureView>,
+                tlas_storage: &Storage<resource::Tlas>,
+            ) -> Result<ResolvedBindGroupEntry<'a>, binding_model::CreateBindGroupError>
             {
                 let map_buffer = |bb: &BufferBinding| {
                     buffer_storage
@@ -1040,11 +913,11 @@ impl Global {
         (id, Some(error))
     }
 
-    pub fn bind_group_drop<A: HalApi>(&self, bind_group_id: id::BindGroupId) {
+    pub fn bind_group_drop(&self, bind_group_id: id::BindGroupId) {
         profiling::scope!("BindGroup::drop");
         api_log!("BindGroup::drop {bind_group_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Some(_bind_group) = hub.bind_groups.unregister(bind_group_id) {
             #[cfg(feature = "trace")]
@@ -1068,7 +941,7 @@ impl Global {
     /// input.
     ///
     /// </div>
-    pub fn device_create_shader_module<A: HalApi>(
+    pub fn device_create_shader_module(
         &self,
         device_id: DeviceId,
         desc: &pipeline::ShaderModuleDescriptor,
@@ -1080,8 +953,8 @@ impl Global {
     ) {
         profiling::scope!("Device::create_shader_module");
 
-        let hub = A::hub(self);
-        let fid = hub.shader_modules.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.shader_modules.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -1126,7 +999,7 @@ impl Global {
                 Err(e) => break 'error e,
             };
 
-            let id = fid.assign(Arc::new(shader));
+            let id = fid.assign(shader);
             api_log!("Device::create_shader_module -> {id:?}");
             return (id, None);
         };
@@ -1143,7 +1016,7 @@ impl Global {
     ///
     /// This function passes SPIR-V binary to the backend as-is and can potentially result in a
     /// driver crash.
-    pub unsafe fn device_create_shader_module_spirv<A: HalApi>(
+    pub unsafe fn device_create_shader_module_spirv(
         &self,
         device_id: DeviceId,
         desc: &pipeline::ShaderModuleDescriptor,
@@ -1155,8 +1028,8 @@ impl Global {
     ) {
         profiling::scope!("Device::create_shader_module");
 
-        let hub = A::hub(self);
-        let fid = hub.shader_modules.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.shader_modules.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -1167,7 +1040,7 @@ impl Global {
             #[cfg(feature = "trace")]
             if let Some(ref mut trace) = *device.trace.lock() {
                 let data = trace.make_binary("spv", unsafe {
-                    std::slice::from_raw_parts(source.as_ptr() as *const u8, source.len() * 4)
+                    std::slice::from_raw_parts(source.as_ptr().cast::<u8>(), source.len() * 4)
                 });
                 trace.add(trace::Action::CreateShaderModule {
                     id: fid.id(),
@@ -1180,7 +1053,7 @@ impl Global {
                 Ok(shader) => shader,
                 Err(e) => break 'error e,
             };
-            let id = fid.assign(Arc::new(shader));
+            let id = fid.assign(shader);
             api_log!("Device::create_shader_module_spirv -> {id:?}");
             return (id, None);
         };
@@ -1191,11 +1064,11 @@ impl Global {
         (id, Some(error))
     }
 
-    pub fn shader_module_drop<A: HalApi>(&self, shader_module_id: id::ShaderModuleId) {
+    pub fn shader_module_drop(&self, shader_module_id: id::ShaderModuleId) {
         profiling::scope!("ShaderModule::drop");
         api_log!("ShaderModule::drop {shader_module_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Some(shader_module) = hub.shader_modules.unregister(shader_module_id) {
             #[cfg(feature = "trace")]
@@ -1206,7 +1079,7 @@ impl Global {
         }
     }
 
-    pub fn device_create_command_encoder<A: HalApi>(
+    pub fn device_create_command_encoder(
         &self,
         device_id: DeviceId,
         desc: &wgt::CommandEncoderDescriptor<Label>,
@@ -1214,10 +1087,11 @@ impl Global {
     ) -> (id::CommandEncoderId, Option<DeviceError>) {
         profiling::scope!("Device::create_command_encoder");
 
-        let hub = A::hub(self);
-        let fid = hub
-            .command_buffers
-            .prepare(id_in.map(|id| id.into_command_buffer_id()));
+        let hub = &self.hub;
+        let fid = hub.command_buffers.prepare(
+            device_id.backend(),
+            id_in.map(|id| id.into_command_buffer_id()),
+        );
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -1230,7 +1104,7 @@ impl Global {
                 Err(e) => break 'error e,
             };
 
-            let id = fid.assign(Arc::new(command_buffer));
+            let id = fid.assign(command_buffer);
             api_log!("Device::create_command_encoder -> {id:?}");
             return (id.into_command_encoder_id(), None);
         };
@@ -1239,11 +1113,11 @@ impl Global {
         (id.into_command_encoder_id(), Some(error))
     }
 
-    pub fn command_encoder_drop<A: HalApi>(&self, command_encoder_id: id::CommandEncoderId) {
+    pub fn command_encoder_drop(&self, command_encoder_id: id::CommandEncoderId) {
         profiling::scope!("CommandEncoder::drop");
         api_log!("CommandEncoder::drop {command_encoder_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Some(cmd_buf) = hub
             .command_buffers
@@ -1253,10 +1127,10 @@ impl Global {
         }
     }
 
-    pub fn command_buffer_drop<A: HalApi>(&self, command_buffer_id: id::CommandBufferId) {
+    pub fn command_buffer_drop(&self, command_buffer_id: id::CommandBufferId) {
         profiling::scope!("CommandBuffer::drop");
         api_log!("CommandBuffer::drop {command_buffer_id:?}");
-        self.command_encoder_drop::<A>(command_buffer_id.into_command_encoder_id())
+        self.command_encoder_drop(command_buffer_id.into_command_encoder_id())
     }
 
     pub fn device_create_render_bundle_encoder(
@@ -1276,7 +1150,7 @@ impl Global {
         (Box::into_raw(Box::new(encoder)), error)
     }
 
-    pub fn render_bundle_encoder_finish<A: HalApi>(
+    pub fn render_bundle_encoder_finish(
         &self,
         bundle_encoder: command::RenderBundleEncoder,
         desc: &command::RenderBundleDescriptor,
@@ -1284,9 +1158,11 @@ impl Global {
     ) -> (id::RenderBundleId, Option<command::RenderBundleError>) {
         profiling::scope!("RenderBundleEncoder::finish");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        let fid = hub.render_bundles.prepare(id_in);
+        let fid = hub
+            .render_bundles
+            .prepare(bundle_encoder.parent().backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(bundle_encoder.parent()) {
@@ -1327,11 +1203,11 @@ impl Global {
         (id, Some(error))
     }
 
-    pub fn render_bundle_drop<A: HalApi>(&self, render_bundle_id: id::RenderBundleId) {
+    pub fn render_bundle_drop(&self, render_bundle_id: id::RenderBundleId) {
         profiling::scope!("RenderBundle::drop");
         api_log!("RenderBundle::drop {render_bundle_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Some(_bundle) = hub.render_bundles.unregister(render_bundle_id) {
             #[cfg(feature = "trace")]
@@ -1341,7 +1217,7 @@ impl Global {
         }
     }
 
-    pub fn device_create_query_set<A: HalApi>(
+    pub fn device_create_query_set(
         &self,
         device_id: DeviceId,
         desc: &resource::QuerySetDescriptor,
@@ -1349,8 +1225,8 @@ impl Global {
     ) -> (id::QuerySetId, Option<resource::CreateQuerySetError>) {
         profiling::scope!("Device::create_query_set");
 
-        let hub = A::hub(self);
-        let fid = hub.query_sets.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.query_sets.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -1381,11 +1257,11 @@ impl Global {
         (id, Some(error))
     }
 
-    pub fn query_set_drop<A: HalApi>(&self, query_set_id: id::QuerySetId) {
+    pub fn query_set_drop(&self, query_set_id: id::QuerySetId) {
         profiling::scope!("QuerySet::drop");
         api_log!("QuerySet::drop {query_set_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Some(_query_set) = hub.query_sets.unregister(query_set_id) {
             #[cfg(feature = "trace")]
@@ -1395,7 +1271,7 @@ impl Global {
         }
     }
 
-    pub fn device_create_render_pipeline<A: HalApi>(
+    pub fn device_create_render_pipeline(
         &self,
         device_id: DeviceId,
         desc: &pipeline::RenderPipelineDescriptor,
@@ -1407,14 +1283,20 @@ impl Global {
     ) {
         profiling::scope!("Device::create_render_pipeline");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        let fid = hub.render_pipelines.prepare(id_in);
-        let implicit_context = implicit_pipeline_ids.map(|ipi| ipi.prepare(hub));
+        let missing_implicit_pipeline_ids =
+            desc.layout.is_none() && id_in.is_some() && implicit_pipeline_ids.is_none();
 
-        let is_auto_layout = desc.layout.is_none();
+        let fid = hub.render_pipelines.prepare(device_id.backend(), id_in);
+        let implicit_context = implicit_pipeline_ids.map(|ipi| ipi.prepare(hub));
 
         let error = 'error: {
+            if missing_implicit_pipeline_ids {
+                // TODO: categorize this error as API misuse
+                break 'error pipeline::ImplicitLayoutError::MissingImplicitPipelineIds.into();
+            }
+
             let device = match hub.devices.get(device_id) {
                 Ok(device) => device,
                 Err(_) => break 'error DeviceError::InvalidDeviceId.into(),
@@ -1475,7 +1357,6 @@ impl Global {
                         .vertex
                         .stage
                         .zero_initialize_workgroup_memory,
-                    vertex_pulling_transform: desc.vertex.stage.vertex_pulling_transform,
                 };
                 ResolvedVertexState {
                     stage,
@@ -1502,7 +1383,6 @@ impl Global {
                         .vertex
                         .stage
                         .zero_initialize_workgroup_memory,
-                    vertex_pulling_transform: state.stage.vertex_pulling_transform,
                 };
                 Some(ResolvedFragmentState {
                     stage,
@@ -1529,29 +1409,33 @@ impl Global {
                 Err(e) => break 'error e,
             };
 
-            if is_auto_layout {
-                // TODO: categorize the errors below as API misuse
-                let ids = if let Some(ids) = implicit_context.as_ref() {
-                    let group_count = pipeline.layout.bind_group_layouts.len();
-                    if ids.group_ids.len() < group_count {
-                        log::error!(
-                            "Not enough bind group IDs ({}) specified for the implicit layout ({})",
-                            ids.group_ids.len(),
-                            group_count
-                        );
-                        break 'error pipeline::ImplicitLayoutError::MissingIds(group_count as _)
-                            .into();
-                    }
-                    ids
-                } else {
-                    break 'error pipeline::ImplicitLayoutError::MissingIds(0).into();
-                };
+            if let Some(ids) = implicit_context.as_ref() {
+                let group_count = pipeline.layout.bind_group_layouts.len();
+                if ids.group_ids.len() < group_count {
+                    log::error!(
+                        "Not enough bind group IDs ({}) specified for the implicit layout ({})",
+                        ids.group_ids.len(),
+                        group_count
+                    );
+                    // TODO: categorize this error as API misuse
+                    break 'error pipeline::ImplicitLayoutError::MissingIds(group_count as _)
+                        .into();
+                }
 
                 let mut pipeline_layout_guard = hub.pipeline_layouts.write();
                 let mut bgl_guard = hub.bind_group_layouts.write();
                 pipeline_layout_guard.insert(ids.root_id, pipeline.layout.clone());
-                let group_ids = &mut ids.group_ids.iter();
-                for (bgl_id, bgl) in group_ids.zip(pipeline.layout.bind_group_layouts.iter()) {
+                let mut group_ids = ids.group_ids.iter();
+                // NOTE: If the first iterator is longer than the second, the `.zip()` impl will still advance the
+                // the first iterator before realizing that the second iterator has finished.
+                // The `pipeline.layout.bind_group_layouts` iterator will always be shorter than `ids.group_ids`,
+                // so using it as the first iterator for `.zip()` will work properly.
+                for (bgl, bgl_id) in pipeline
+                    .layout
+                    .bind_group_layouts
+                    .iter()
+                    .zip(&mut group_ids)
+                {
                     bgl_guard.insert(*bgl_id, bgl.clone());
                 }
                 for bgl_id in group_ids {
@@ -1567,16 +1451,14 @@ impl Global {
 
         let id = fid.assign_error();
 
-        if is_auto_layout {
-            // We also need to assign errors to the implicit pipeline layout and the
-            // implicit bind group layouts.
-            if let Some(ids) = implicit_context {
-                let mut pipeline_layout_guard = hub.pipeline_layouts.write();
-                let mut bgl_guard = hub.bind_group_layouts.write();
-                pipeline_layout_guard.insert_error(ids.root_id);
-                for bgl_id in ids.group_ids {
-                    bgl_guard.insert_error(bgl_id);
-                }
+        // We also need to assign errors to the implicit pipeline layout and the
+        // implicit bind group layouts.
+        if let Some(ids) = implicit_context {
+            let mut pipeline_layout_guard = hub.pipeline_layouts.write();
+            let mut bgl_guard = hub.bind_group_layouts.write();
+            pipeline_layout_guard.insert_error(ids.root_id);
+            for bgl_id in ids.group_ids {
+                bgl_guard.insert_error(bgl_id);
             }
         }
 
@@ -1587,7 +1469,7 @@ impl Global {
 
     /// Get an ID of one of the bind group layouts. The ID adds a refcount,
     /// which needs to be released by calling `bind_group_layout_drop`.
-    pub fn render_pipeline_get_bind_group_layout<A: HalApi>(
+    pub fn render_pipeline_get_bind_group_layout(
         &self,
         pipeline_id: id::RenderPipelineId,
         index: u32,
@@ -1596,7 +1478,7 @@ impl Global {
         id::BindGroupLayoutId,
         Option<binding_model::GetBindGroupLayoutError>,
     ) {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let error = 'error: {
             let pipeline = match hub.render_pipelines.get(pipeline_id) {
@@ -1604,7 +1486,10 @@ impl Global {
                 Err(_) => break 'error binding_model::GetBindGroupLayoutError::InvalidPipeline,
             };
             let id = match pipeline.layout.bind_group_layouts.get(index as usize) {
-                Some(bg) => hub.bind_group_layouts.prepare(id_in).assign(bg.clone()),
+                Some(bg) => hub
+                    .bind_group_layouts
+                    .prepare(pipeline_id.backend(), id_in)
+                    .assign(bg.clone()),
                 None => {
                     break 'error binding_model::GetBindGroupLayoutError::InvalidGroupIndex(index)
                 }
@@ -1612,15 +1497,18 @@ impl Global {
             return (id, None);
         };
 
-        let id = hub.bind_group_layouts.prepare(id_in).assign_error();
+        let id = hub
+            .bind_group_layouts
+            .prepare(pipeline_id.backend(), id_in)
+            .assign_error();
         (id, Some(error))
     }
 
-    pub fn render_pipeline_drop<A: HalApi>(&self, render_pipeline_id: id::RenderPipelineId) {
+    pub fn render_pipeline_drop(&self, render_pipeline_id: id::RenderPipelineId) {
         profiling::scope!("RenderPipeline::drop");
         api_log!("RenderPipeline::drop {render_pipeline_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Some(_pipeline) = hub.render_pipelines.unregister(render_pipeline_id) {
             #[cfg(feature = "trace")]
@@ -1630,7 +1518,7 @@ impl Global {
         }
     }
 
-    pub fn device_create_compute_pipeline<A: HalApi>(
+    pub fn device_create_compute_pipeline(
         &self,
         device_id: DeviceId,
         desc: &pipeline::ComputePipelineDescriptor,
@@ -1642,14 +1530,20 @@ impl Global {
     ) {
         profiling::scope!("Device::create_compute_pipeline");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        let fid = hub.compute_pipelines.prepare(id_in);
-        let implicit_context = implicit_pipeline_ids.map(|ipi| ipi.prepare(hub));
+        let missing_implicit_pipeline_ids =
+            desc.layout.is_none() && id_in.is_some() && implicit_pipeline_ids.is_none();
 
-        let is_auto_layout = desc.layout.is_none();
+        let fid = hub.compute_pipelines.prepare(device_id.backend(), id_in);
+        let implicit_context = implicit_pipeline_ids.map(|ipi| ipi.prepare(hub));
 
         let error = 'error: {
+            if missing_implicit_pipeline_ids {
+                // TODO: categorize this error as API misuse
+                break 'error pipeline::ImplicitLayoutError::MissingImplicitPipelineIds.into();
+            }
+
             let device = match hub.devices.get(device_id) {
                 Ok(device) => device,
                 Err(_) => break 'error DeviceError::InvalidDeviceId.into(),
@@ -1703,7 +1597,6 @@ impl Global {
                 entry_point: desc.stage.entry_point.clone(),
                 constants: desc.stage.constants.clone(),
                 zero_initialize_workgroup_memory: desc.stage.zero_initialize_workgroup_memory,
-                vertex_pulling_transform: desc.stage.vertex_pulling_transform,
             };
 
             let desc = ResolvedComputePipelineDescriptor {
@@ -1718,29 +1611,33 @@ impl Global {
                 Err(e) => break 'error e,
             };
 
-            if is_auto_layout {
-                // TODO: categorize the errors below as API misuse
-                let ids = if let Some(ids) = implicit_context.as_ref() {
-                    let group_count = pipeline.layout.bind_group_layouts.len();
-                    if ids.group_ids.len() < group_count {
-                        log::error!(
-                            "Not enough bind group IDs ({}) specified for the implicit layout ({})",
-                            ids.group_ids.len(),
-                            group_count
-                        );
-                        break 'error pipeline::ImplicitLayoutError::MissingIds(group_count as _)
-                            .into();
-                    }
-                    ids
-                } else {
-                    break 'error pipeline::ImplicitLayoutError::MissingIds(0).into();
-                };
+            if let Some(ids) = implicit_context.as_ref() {
+                let group_count = pipeline.layout.bind_group_layouts.len();
+                if ids.group_ids.len() < group_count {
+                    log::error!(
+                        "Not enough bind group IDs ({}) specified for the implicit layout ({})",
+                        ids.group_ids.len(),
+                        group_count
+                    );
+                    // TODO: categorize this error as API misuse
+                    break 'error pipeline::ImplicitLayoutError::MissingIds(group_count as _)
+                        .into();
+                }
 
                 let mut pipeline_layout_guard = hub.pipeline_layouts.write();
                 let mut bgl_guard = hub.bind_group_layouts.write();
                 pipeline_layout_guard.insert(ids.root_id, pipeline.layout.clone());
-                let group_ids = &mut ids.group_ids.iter();
-                for (bgl_id, bgl) in group_ids.zip(pipeline.layout.bind_group_layouts.iter()) {
+                let mut group_ids = ids.group_ids.iter();
+                // NOTE: If the first iterator is longer than the second, the `.zip()` impl will still advance the
+                // the first iterator before realizing that the second iterator has finished.
+                // The `pipeline.layout.bind_group_layouts` iterator will always be shorter than `ids.group_ids`,
+                // so using it as the first iterator for `.zip()` will work properly.
+                for (bgl, bgl_id) in pipeline
+                    .layout
+                    .bind_group_layouts
+                    .iter()
+                    .zip(&mut group_ids)
+                {
                     bgl_guard.insert(*bgl_id, bgl.clone());
                 }
                 for bgl_id in group_ids {
@@ -1756,16 +1653,14 @@ impl Global {
 
         let id = fid.assign_error();
 
-        if is_auto_layout {
-            // We also need to assign errors to the implicit pipeline layout and the
-            // implicit bind group layouts.
-            if let Some(ids) = implicit_context {
-                let mut pipeline_layout_guard = hub.pipeline_layouts.write();
-                let mut bgl_guard = hub.bind_group_layouts.write();
-                pipeline_layout_guard.insert_error(ids.root_id);
-                for bgl_id in ids.group_ids {
-                    bgl_guard.insert_error(bgl_id);
-                }
+        // We also need to assign errors to the implicit pipeline layout and the
+        // implicit bind group layouts.
+        if let Some(ids) = implicit_context {
+            let mut pipeline_layout_guard = hub.pipeline_layouts.write();
+            let mut bgl_guard = hub.bind_group_layouts.write();
+            pipeline_layout_guard.insert_error(ids.root_id);
+            for bgl_id in ids.group_ids {
+                bgl_guard.insert_error(bgl_id);
             }
         }
 
@@ -1774,7 +1669,7 @@ impl Global {
 
     /// Get an ID of one of the bind group layouts. The ID adds a refcount,
     /// which needs to be released by calling `bind_group_layout_drop`.
-    pub fn compute_pipeline_get_bind_group_layout<A: HalApi>(
+    pub fn compute_pipeline_get_bind_group_layout(
         &self,
         pipeline_id: id::ComputePipelineId,
         index: u32,
@@ -1783,7 +1678,7 @@ impl Global {
         id::BindGroupLayoutId,
         Option<binding_model::GetBindGroupLayoutError>,
     ) {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let error = 'error: {
             let pipeline = match hub.compute_pipelines.get(pipeline_id) {
@@ -1792,7 +1687,10 @@ impl Global {
             };
 
             let id = match pipeline.layout.bind_group_layouts.get(index as usize) {
-                Some(bg) => hub.bind_group_layouts.prepare(id_in).assign(bg.clone()),
+                Some(bg) => hub
+                    .bind_group_layouts
+                    .prepare(pipeline_id.backend(), id_in)
+                    .assign(bg.clone()),
                 None => {
                     break 'error binding_model::GetBindGroupLayoutError::InvalidGroupIndex(index)
                 }
@@ -1801,15 +1699,18 @@ impl Global {
             return (id, None);
         };
 
-        let id = hub.bind_group_layouts.prepare(id_in).assign_error();
+        let id = hub
+            .bind_group_layouts
+            .prepare(pipeline_id.backend(), id_in)
+            .assign_error();
         (id, Some(error))
     }
 
-    pub fn compute_pipeline_drop<A: HalApi>(&self, compute_pipeline_id: id::ComputePipelineId) {
+    pub fn compute_pipeline_drop(&self, compute_pipeline_id: id::ComputePipelineId) {
         profiling::scope!("ComputePipeline::drop");
         api_log!("ComputePipeline::drop {compute_pipeline_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Some(_pipeline) = hub.compute_pipelines.unregister(compute_pipeline_id) {
             #[cfg(feature = "trace")]
@@ -1822,7 +1723,7 @@ impl Global {
     /// # Safety
     /// The `data` argument of `desc` must have been returned by
     /// [Self::pipeline_cache_get_data] for the same adapter
-    pub unsafe fn device_create_pipeline_cache<A: HalApi>(
+    pub unsafe fn device_create_pipeline_cache(
         &self,
         device_id: DeviceId,
         desc: &pipeline::PipelineCacheDescriptor<'_>,
@@ -1833,9 +1734,9 @@ impl Global {
     ) {
         profiling::scope!("Device::create_pipeline_cache");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        let fid = hub.pipeline_caches.prepare(id_in);
+        let fid = hub.pipeline_caches.prepare(device_id.backend(), id_in);
         let error: pipeline::CreatePipelineCacheError = 'error: {
             let device = match hub.devices.get(device_id) {
                 Ok(device) => device,
@@ -1854,7 +1755,7 @@ impl Global {
             let cache = unsafe { device.create_pipeline_cache(desc) };
             match cache {
                 Ok(cache) => {
-                    let id = fid.assign(Arc::new(cache));
+                    let id = fid.assign(cache);
                     api_log!("Device::create_pipeline_cache -> {id:?}");
                     return (id, None);
                 }
@@ -1867,11 +1768,11 @@ impl Global {
         (id, Some(error))
     }
 
-    pub fn pipeline_cache_drop<A: HalApi>(&self, pipeline_cache_id: id::PipelineCacheId) {
+    pub fn pipeline_cache_drop(&self, pipeline_cache_id: id::PipelineCacheId) {
         profiling::scope!("PipelineCache::drop");
         api_log!("PipelineCache::drop {pipeline_cache_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Some(cache) = hub.pipeline_caches.unregister(pipeline_cache_id) {
             #[cfg(feature = "trace")]
@@ -1882,13 +1783,12 @@ impl Global {
         }
     }
 
-    pub fn surface_configure<A: HalApi>(
+    pub fn surface_configure(
         &self,
         surface_id: SurfaceId,
         device_id: DeviceId,
         config: &wgt::SurfaceConfiguration<Vec<TextureFormat>>,
     ) -> Option<present::ConfigureSurfaceError> {
-        use hal::Surface as _;
         use present::ConfigureSurfaceError as E;
         profiling::scope!("surface_configure");
 
@@ -1993,7 +1893,10 @@ impl Global {
                 config.composite_alpha_mode = new_alpha_mode;
             }
             if !caps.usage.contains(config.usage) {
-                return Err(E::UnsupportedUsage);
+                return Err(E::UnsupportedUsage {
+                    requested: config.usage,
+                    available: caps.usage,
+                });
             }
             if width == 0 || height == 0 {
                 return Err(E::ZeroArea);
@@ -2007,7 +1910,7 @@ impl Global {
             // User callbacks must not be called while we are holding locks.
             let user_callbacks;
             {
-                let hub = A::hub(self);
+                let hub = &self.hub;
                 let surface_guard = self.surfaces.read();
 
                 let device = match hub.devices.get(device_id) {
@@ -2110,11 +2013,8 @@ impl Global {
                 //
                 // https://github.com/gfx-rs/wgpu/issues/4105
 
-                match unsafe {
-                    A::surface_as_hal(surface)
-                        .unwrap()
-                        .configure(device.raw(), &hal_config)
-                } {
+                let surface_raw = surface.raw(device_id.backend()).unwrap();
+                match unsafe { surface_raw.configure(device.raw(), &hal_config) } {
                     Ok(()) => (),
                     Err(error) => {
                         break 'error match error {
@@ -2132,7 +2032,7 @@ impl Global {
 
                 let mut presentation = surface.presentation.lock();
                 *presentation = Some(present::Presentation {
-                    device: super::any_device::AnyDevice::new(device),
+                    device,
                     config: config.clone(),
                     acquired_texture: None,
                 });
@@ -2148,28 +2048,19 @@ impl Global {
     /// Check `device_id` for freeable resources and completed buffer mappings.
     ///
     /// Return `queue_empty` indicating whether there are more queue submissions still in flight.
-    pub fn device_poll<A: HalApi>(
+    pub fn device_poll(
         &self,
         device_id: DeviceId,
-        maintain: wgt::Maintain<queue::WrappedSubmissionIndex>,
+        maintain: wgt::Maintain<crate::SubmissionIndex>,
     ) -> Result<bool, WaitIdleError> {
         api_log!("Device::poll {maintain:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let device = hub
             .devices
             .get(device_id)
             .map_err(|_| DeviceError::InvalidDeviceId)?;
 
-        if let wgt::Maintain::WaitForSubmissionIndex(submission_index) = maintain {
-            if submission_index.queue_id != device_id.into_queue_id() {
-                return Err(WaitIdleError::WrongSubmissionIndex(
-                    submission_index.queue_id,
-                    device_id,
-                ));
-            }
-        }
-
         let DevicePoll {
             closures,
             queue_empty,
@@ -2180,9 +2071,9 @@ impl Global {
         Ok(queue_empty)
     }
 
-    fn poll_single_device<A: HalApi>(
-        device: &crate::device::Device<A>,
-        maintain: wgt::Maintain<queue::WrappedSubmissionIndex>,
+    fn poll_single_device(
+        device: &crate::device::Device,
+        maintain: wgt::Maintain<crate::SubmissionIndex>,
     ) -> Result<DevicePoll, WaitIdleError> {
         let snatch_guard = device.snatchable_lock.read();
         let fence = device.fence.read();
@@ -2198,25 +2089,26 @@ impl Global {
         })
     }
 
-    /// Poll all devices belonging to the backend `A`.
+    /// Poll all devices belonging to the specified backend.
     ///
     /// If `force_wait` is true, block until all buffer mappings are done.
     ///
     /// Return `all_queue_empty` indicating whether there are more queue
     /// submissions still in flight.
-    fn poll_all_devices_of_api<A: HalApi>(
+    fn poll_all_devices_of_api(
         &self,
+        backend: wgt::Backend,
         force_wait: bool,
         closures: &mut UserClosures,
     ) -> Result<bool, WaitIdleError> {
         profiling::scope!("poll_device");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let mut all_queue_empty = true;
         {
             let device_guard = hub.devices.read();
 
-            for (_id, device) in device_guard.iter(A::VARIANT) {
+            for (_id, device) in device_guard.iter(backend) {
                 let maintain = if force_wait {
                     wgt::Maintain::Wait
                 } else {
@@ -2251,22 +2143,22 @@ impl Global {
         #[cfg(vulkan)]
         {
             all_queue_empty &=
-                self.poll_all_devices_of_api::<hal::api::Vulkan>(force_wait, &mut closures)?;
+                self.poll_all_devices_of_api(wgt::Backend::Vulkan, force_wait, &mut closures)?;
         }
         #[cfg(metal)]
         {
             all_queue_empty &=
-                self.poll_all_devices_of_api::<hal::api::Metal>(force_wait, &mut closures)?;
+                self.poll_all_devices_of_api(wgt::Backend::Metal, force_wait, &mut closures)?;
         }
         #[cfg(dx12)]
         {
             all_queue_empty &=
-                self.poll_all_devices_of_api::<hal::api::Dx12>(force_wait, &mut closures)?;
+                self.poll_all_devices_of_api(wgt::Backend::Dx12, force_wait, &mut closures)?;
         }
         #[cfg(gles)]
         {
             all_queue_empty &=
-                self.poll_all_devices_of_api::<hal::api::Gles>(force_wait, &mut closures)?;
+                self.poll_all_devices_of_api(wgt::Backend::Gl, force_wait, &mut closures)?;
         }
 
         closures.fire();
@@ -2274,10 +2166,10 @@ impl Global {
         Ok(all_queue_empty)
     }
 
-    pub fn device_start_capture<A: HalApi>(&self, id: DeviceId) {
+    pub fn device_start_capture(&self, id: DeviceId) {
         api_log!("Device::start_capture");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(device) = hub.devices.get(id) {
             if !device.is_valid() {
@@ -2287,10 +2179,10 @@ impl Global {
         }
     }
 
-    pub fn device_stop_capture<A: HalApi>(&self, id: DeviceId) {
+    pub fn device_stop_capture(&self, id: DeviceId) {
         api_log!("Device::stop_capture");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(device) = hub.devices.get(id) {
             if !device.is_valid() {
@@ -2303,47 +2195,45 @@ impl Global {
     // This is a test-only function to force the device into an
     // invalid state by inserting an error value in its place in
     // the registry.
-    pub fn device_make_invalid<A: HalApi>(&self, device_id: DeviceId) {
-        let hub = A::hub(self);
+    pub fn device_make_invalid(&self, device_id: DeviceId) {
+        let hub = &self.hub;
         hub.devices.force_replace_with_error(device_id);
     }
 
-    pub fn pipeline_cache_get_data<A: HalApi>(&self, id: id::PipelineCacheId) -> Option<Vec<u8>> {
+    pub fn pipeline_cache_get_data(&self, id: id::PipelineCacheId) -> Option<Vec<u8>> {
         use crate::pipeline_cache;
         api_log!("PipelineCache::get_data");
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(cache) = hub.pipeline_caches.get(id) {
             // TODO: Is this check needed?
             if !cache.device.is_valid() {
                 return None;
             }
-            if let Some(raw_cache) = cache.raw.as_ref() {
-                let mut vec = unsafe { cache.device.raw().pipeline_cache_get_data(raw_cache) }?;
-                let validation_key = cache.device.raw().pipeline_cache_validation_key()?;
-
-                let mut header_contents = [0; pipeline_cache::HEADER_LENGTH];
-                pipeline_cache::add_cache_header(
-                    &mut header_contents,
-                    &vec,
-                    &cache.device.adapter.raw.info,
-                    validation_key,
-                );
+            let mut vec = unsafe { cache.device.raw().pipeline_cache_get_data(cache.raw()) }?;
+            let validation_key = cache.device.raw().pipeline_cache_validation_key()?;
+
+            let mut header_contents = [0; pipeline_cache::HEADER_LENGTH];
+            pipeline_cache::add_cache_header(
+                &mut header_contents,
+                &vec,
+                &cache.device.adapter.raw.info,
+                validation_key,
+            );
 
-                let deleted = vec.splice(..0, header_contents).collect::<Vec<_>>();
-                debug_assert!(deleted.is_empty());
+            let deleted = vec.splice(..0, header_contents).collect::<Vec<_>>();
+            debug_assert!(deleted.is_empty());
 
-                return Some(vec);
-            }
+            return Some(vec);
         }
         None
     }
 
-    pub fn device_drop<A: HalApi>(&self, device_id: DeviceId) {
+    pub fn device_drop(&self, device_id: DeviceId) {
         profiling::scope!("Device::drop");
         api_log!("Device::drop {device_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         if let Some(device) = hub.devices.unregister(device_id) {
             let device_lost_closure = device.lock_life().device_lost_closure.take();
             if let Some(closure) = device_lost_closure {
@@ -2355,11 +2245,7 @@ impl Global {
             // need to wait for submissions or triage them. We know we were
             // just polled, so `life_tracker.free_resources` is empty.
             debug_assert!(device.lock_life().queue_empty());
-            {
-                let mut pending_writes = device.pending_writes.lock();
-                let pending_writes = pending_writes.as_mut().unwrap();
-                pending_writes.deactivate();
-            }
+            device.pending_writes.lock().deactivate();
 
             drop(device);
         }
@@ -2367,12 +2253,12 @@ impl Global {
 
     // This closure will be called exactly once during "lose the device",
     // or when it is replaced.
-    pub fn device_set_device_lost_closure<A: HalApi>(
+    pub fn device_set_device_lost_closure(
         &self,
         device_id: DeviceId,
         device_lost_closure: DeviceLostClosure,
     ) {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(device) = hub.devices.get(device_id) {
             let mut life_tracker = device.lock_life();
@@ -2392,10 +2278,10 @@ impl Global {
         }
     }
 
-    pub fn device_destroy<A: HalApi>(&self, device_id: DeviceId) {
+    pub fn device_destroy(&self, device_id: DeviceId) {
         api_log!("Device::destroy {device_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(device) = hub.devices.get(device_id) {
             // Follow the steps at
@@ -2418,21 +2304,18 @@ impl Global {
         }
     }
 
-    pub fn device_mark_lost<A: HalApi>(&self, device_id: DeviceId, message: &str) {
+    pub fn device_mark_lost(&self, device_id: DeviceId, message: &str) {
         api_log!("Device::mark_lost {device_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(device) = hub.devices.get(device_id) {
             device.lose(message);
         }
     }
 
-    pub fn device_get_internal_counters<A: HalApi>(
-        &self,
-        device_id: DeviceId,
-    ) -> wgt::InternalCounters {
-        let hub = A::hub(self);
+    pub fn device_get_internal_counters(&self, device_id: DeviceId) -> wgt::InternalCounters {
+        let hub = &self.hub;
         if let Ok(device) = hub.devices.get(device_id) {
             wgt::InternalCounters {
                 hal: device.get_hal_counters(),
@@ -2443,17 +2326,28 @@ impl Global {
         }
     }
 
-    pub fn queue_drop<A: HalApi>(&self, queue_id: QueueId) {
+    pub fn device_generate_allocator_report(
+        &self,
+        device_id: DeviceId,
+    ) -> Option<wgt::AllocatorReport> {
+        let hub = &self.hub;
+        hub.devices
+            .get(device_id)
+            .ok()
+            .and_then(|device| device.generate_allocator_report())
+    }
+
+    pub fn queue_drop(&self, queue_id: QueueId) {
         profiling::scope!("Queue::drop");
         api_log!("Queue::drop {queue_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         if let Some(queue) = hub.queues.unregister(queue_id) {
             drop(queue);
         }
     }
 
-    pub fn buffer_map_async<A: HalApi>(
+    pub fn buffer_map_async(
         &self,
         buffer_id: id::BufferId,
         offset: BufferAddress,
@@ -2463,7 +2357,7 @@ impl Global {
         profiling::scope!("Buffer::map_async");
         api_log!("Buffer::map_async {buffer_id:?} offset {offset:?} size {size:?} op: {op:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let op_and_err = 'error: {
             let buffer = match hub.buffers.get(buffer_id) {
@@ -2488,7 +2382,7 @@ impl Global {
         Ok(())
     }
 
-    pub fn buffer_get_mapped_range<A: HalApi>(
+    pub fn buffer_get_mapped_range(
         &self,
         buffer_id: id::BufferId,
         offset: BufferAddress,
@@ -2497,7 +2391,7 @@ impl Global {
         profiling::scope!("Buffer::get_mapped_range");
         api_log!("Buffer::get_mapped_range {buffer_id:?} offset {offset:?} size {size:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let buffer = hub
             .buffers
@@ -2525,7 +2419,7 @@ impl Global {
         }
         let map_state = &*buffer.map_state.lock();
         match *map_state {
-            resource::BufferMapState::Init { ref ptr, .. } => {
+            resource::BufferMapState::Init { ref staging_buffer } => {
                 // offset (u64) can not be < 0, so no need to validate the lower bound
                 if offset + range_size > buffer.size {
                     return Err(BufferAccessError::OutOfBoundsOverrun {
@@ -2533,15 +2427,14 @@ impl Global {
                         max: buffer.size,
                     });
                 }
-                unsafe {
-                    Ok((
-                        NonNull::new_unchecked(ptr.as_ptr().offset(offset as isize)),
-                        range_size,
-                    ))
-                }
+                let ptr = unsafe { staging_buffer.ptr() };
+                let ptr = unsafe { NonNull::new_unchecked(ptr.as_ptr().offset(offset as isize)) };
+                Ok((ptr, range_size))
             }
             resource::BufferMapState::Active {
-                ref ptr, ref range, ..
+                ref mapping,
+                ref range,
+                ..
             } => {
                 if offset < range.start {
                     return Err(BufferAccessError::OutOfBoundsUnderrun {
@@ -2560,7 +2453,7 @@ impl Global {
                 let relative_offset = (offset - range.start) as isize;
                 unsafe {
                     Ok((
-                        NonNull::new_unchecked(ptr.as_ptr().offset(relative_offset)),
+                        NonNull::new_unchecked(mapping.ptr.as_ptr().offset(relative_offset)),
                         range_size,
                     ))
                 }
@@ -2570,11 +2463,11 @@ impl Global {
             }
         }
     }
-    pub fn buffer_unmap<A: HalApi>(&self, buffer_id: id::BufferId) -> BufferAccessResult {
+    pub fn buffer_unmap(&self, buffer_id: id::BufferId) -> BufferAccessResult {
         profiling::scope!("unmap", "Buffer");
         api_log!("Buffer::unmap {buffer_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let buffer = hub
             .buffers
diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs
index 4ef57e4d16..588f962000 100644
--- a/wgpu-core/src/device/life.rs
+++ b/wgpu-core/src/device/life.rs
@@ -3,14 +3,13 @@ use crate::{
         queue::{EncoderInFlight, SubmittedWorkDoneClosure, TempResource},
         DeviceError, DeviceLostClosure,
     },
-    hal_api::HalApi,
-    id,
-    resource::{self, Buffer, Labeled, Trackable},
+    resource::{self, Buffer, Texture, Trackable},
     snatch::SnatchGuard,
     SubmissionIndex,
 };
 use smallvec::SmallVec;
 
+use crate::resource::{Blas, Tlas};
 use std::sync::Arc;
 use thiserror::Error;
 
@@ -23,7 +22,7 @@ use thiserror::Error;
 ///
 /// [`wgpu_hal`]: hal
 /// [`ResourceInfo::submission_index`]: crate::resource::ResourceInfo
-struct ActiveSubmission<A: HalApi> {
+struct ActiveSubmission {
     /// The index of the submission we track.
     ///
     /// When `Device::fence`'s value is greater than or equal to this, our queue
@@ -31,10 +30,10 @@ struct ActiveSubmission<A: HalApi> {
     index: SubmissionIndex,
 
     /// Temporary resources to be freed once this queue submission has completed.
-    temp_resources: Vec<TempResource<A>>,
+    temp_resources: Vec<TempResource>,
 
     /// Buffers to be mapped once this submission has completed.
-    mapped: Vec<Arc<Buffer<A>>>,
+    mapped: Vec<Arc<Buffer>>,
 
     /// Command buffers used by this submission, and the encoder that owns them.
     ///
@@ -48,20 +47,110 @@ struct ActiveSubmission<A: HalApi> {
     /// the command encoder is recycled.
     ///
     /// [`wgpu_hal::Queue::submit`]: hal::Queue::submit
-    encoders: Vec<EncoderInFlight<A>>,
+    encoders: Vec<EncoderInFlight>,
 
     /// List of queue "on_submitted_work_done" closures to be called once this
     /// submission has completed.
     work_done_closures: SmallVec<[SubmittedWorkDoneClosure; 1]>,
 }
 
+impl ActiveSubmission {
+    /// Returns true if this submission contains the given buffer.
+    ///
+    /// This only uses constant-time operations.
+    pub fn contains_buffer(&self, buffer: &Buffer) -> bool {
+        for encoder in &self.encoders {
+            // The ownership location of buffers depends on where the command encoder
+            // came from. If it is the staging command encoder on the queue, it is
+            // in the pending buffer list. If it came from a user command encoder,
+            // it is in the tracker.
+
+            if encoder.trackers.buffers.contains(buffer) {
+                return true;
+            }
+
+            if encoder
+                .pending_buffers
+                .contains_key(&buffer.tracker_index())
+            {
+                return true;
+            }
+        }
+
+        false
+    }
+
+    /// Returns true if this submission contains the given texture.
+    ///
+    /// This only uses constant-time operations.
+    pub fn contains_texture(&self, texture: &Texture) -> bool {
+        for encoder in &self.encoders {
+            // The ownership location of textures depends on where the command encoder
+            // came from. If it is the staging command encoder on the queue, it is
+            // in the pending buffer list. If it came from a user command encoder,
+            // it is in the tracker.
+
+            if encoder.trackers.textures.contains(texture) {
+                return true;
+            }
+
+            if encoder
+                .pending_textures
+                .contains_key(&texture.tracker_index())
+            {
+                return true;
+            }
+        }
+
+        false
+    }
+
+    pub fn contains_blas(&self, blas: &Blas) -> bool {
+        for encoder in &self.encoders {
+            // The ownership location of blas's depends on where the command encoder
+            // came from. If it is the staging command encoder on the queue, it is
+            // in the pending buffer list. If it came from a user command encoder,
+            // it is in the tracker.
+
+            if encoder.trackers.blas_s.contains(blas) {
+                return true;
+            }
+
+            if encoder.pending_buffers.contains_key(&blas.tracker_index()) {
+                return true;
+            }
+        }
+
+        false
+    }
+
+    pub fn contains_tlas(&self, tlas: &Tlas) -> bool {
+        for encoder in &self.encoders {
+            // The ownership location of tlas's depends on where the command encoder
+            // came from. If it is the staging command encoder on the queue, it is
+            // in the pending buffer list. If it came from a user command encoder,
+            // it is in the tracker.
+
+            if encoder.trackers.tlas_s.contains(tlas) {
+                return true;
+            }
+
+            if encoder.pending_buffers.contains_key(&tlas.tracker_index()) {
+                return true;
+            }
+        }
+
+        false
+    }
+}
+
 #[derive(Clone, Debug, Error)]
 #[non_exhaustive]
 pub enum WaitIdleError {
     #[error(transparent)]
     Device(#[from] DeviceError),
-    #[error("Tried to wait using a submission index from the wrong device. Submission index is from device {0:?}. Called poll on device {1:?}.")]
-    WrongSubmissionIndex(id::QueueId, id::DeviceId),
+    #[error("Tried to wait using a submission index ({0}) that has not been returned by a successful submission (last successful submission: {1})")]
+    WrongSubmissionIndex(SubmissionIndex, SubmissionIndex),
     #[error("GPU got stuck :(")]
     StuckGpu,
 }
@@ -95,16 +184,15 @@ pub enum WaitIdleError {
 ///         submission index.
 ///
 ///     3)  `handle_mapping` drains `self.ready_to_map` and actually maps the
-///         buffers, collecting a list of notification closures to call. But any
-///         buffers that were dropped by the user get moved to
-///         `self.free_resources`.
+///         buffers, collecting a list of notification closures to call.
 ///
 /// Only calling `Global::buffer_map_async` clones a new `Arc` for the
 /// buffer. This new `Arc` is only dropped by `handle_mapping`.
-pub(crate) struct LifetimeTracker<A: HalApi> {
-    /// Resources that the user has requested be mapped, but which are used by
-    /// queue submissions still in flight.
-    mapped: Vec<Arc<Buffer<A>>>,
+pub(crate) struct LifetimeTracker {
+    /// Buffers for which a call to [`Buffer::map_async`] has succeeded, but
+    /// which haven't been examined by `triage_mapped` yet to decide when they
+    /// can be mapped.
+    mapped: Vec<Arc<Buffer>>,
 
     /// Resources used by queue submissions still in flight. One entry per
     /// submission, with older submissions appearing before younger.
@@ -112,11 +200,11 @@ pub(crate) struct LifetimeTracker<A: HalApi> {
     /// Entries are added by `track_submission` and drained by
     /// `LifetimeTracker::triage_submissions`. Lots of methods contribute data
     /// to particular entries.
-    active: Vec<ActiveSubmission<A>>,
+    active: Vec<ActiveSubmission>,
 
     /// Buffers the user has asked us to map, and which are not used by any
     /// queue submission still in flight.
-    ready_to_map: Vec<Arc<Buffer<A>>>,
+    ready_to_map: Vec<Arc<Buffer>>,
 
     /// Queue "on_submitted_work_done" closures that were initiated for while there is no
     /// currently pending submissions. These cannot be immediately invoked as they
@@ -130,7 +218,7 @@ pub(crate) struct LifetimeTracker<A: HalApi> {
     pub device_lost_closure: Option<DeviceLostClosure>,
 }
 
-impl<A: HalApi> LifetimeTracker<A> {
+impl LifetimeTracker {
     pub fn new() -> Self {
         Self {
             mapped: Vec::new(),
@@ -150,8 +238,8 @@ impl<A: HalApi> LifetimeTracker<A> {
     pub fn track_submission(
         &mut self,
         index: SubmissionIndex,
-        temp_resources: impl Iterator<Item = TempResource<A>>,
-        encoders: Vec<EncoderInFlight<A>>,
+        temp_resources: impl Iterator<Item = TempResource>,
+        encoders: Vec<EncoderInFlight>,
     ) {
         self.active.push(ActiveSubmission {
             index,
@@ -162,10 +250,69 @@ impl<A: HalApi> LifetimeTracker<A> {
         });
     }
 
-    pub(crate) fn map(&mut self, value: &Arc<Buffer<A>>) {
+    pub(crate) fn map(&mut self, value: &Arc<Buffer>) {
         self.mapped.push(value.clone());
     }
 
+    /// Returns the submission index of the most recent submission that uses the
+    /// given buffer.
+    pub fn get_buffer_latest_submission_index(&self, buffer: &Buffer) -> Option<SubmissionIndex> {
+        // We iterate in reverse order, so that we can bail out early as soon
+        // as we find a hit.
+        self.active.iter().rev().find_map(|submission| {
+            if submission.contains_buffer(buffer) {
+                Some(submission.index)
+            } else {
+                None
+            }
+        })
+    }
+
+    /// Returns the submission index of the most recent submission that uses the
+    /// given blas.
+    pub fn get_blas_latest_submission_index(&self, blas: &Blas) -> Option<SubmissionIndex> {
+        // We iterate in reverse order, so that we can bail out early as soon
+        // as we find a hit.
+        self.active.iter().rev().find_map(|submission| {
+            if submission.contains_blas(blas) {
+                Some(submission.index)
+            } else {
+                None
+            }
+        })
+    }
+
+    /// Returns the submission index of the most recent submission that uses the
+    /// given blas.
+    pub fn get_tlas_latest_submission_index(&self, tlas: &Tlas) -> Option<SubmissionIndex> {
+        // We iterate in reverse order, so that we can bail out early as soon
+        // as we find a hit.
+        self.active.iter().rev().find_map(|submission| {
+            if submission.contains_tlas(tlas) {
+                Some(submission.index)
+            } else {
+                None
+            }
+        })
+    }
+
+    /// Returns the submission index of the most recent submission that uses the
+    /// given texture.
+    pub fn get_texture_latest_submission_index(
+        &self,
+        texture: &Texture,
+    ) -> Option<SubmissionIndex> {
+        // We iterate in reverse order, so that we can bail out early as soon
+        // as we find a hit.
+        self.active.iter().rev().find_map(|submission| {
+            if submission.contains_texture(texture) {
+                Some(submission.index)
+            } else {
+                None
+            }
+        })
+    }
+
     /// Sort out the consequences of completed submissions.
     ///
     /// Assume that all submissions up through `last_done` have completed.
@@ -184,7 +331,7 @@ impl<A: HalApi> LifetimeTracker<A> {
     pub fn triage_submissions(
         &mut self,
         last_done: SubmissionIndex,
-        command_allocator: &crate::command::CommandAllocator<A>,
+        command_allocator: &crate::command::CommandAllocator,
     ) -> SmallVec<[SubmittedWorkDoneClosure; 1]> {
         profiling::scope!("triage_submissions");
 
@@ -198,7 +345,6 @@ impl<A: HalApi> LifetimeTracker<A> {
 
         let mut work_done_closures: SmallVec<_> = self.work_done_closures.drain(..).collect();
         for a in self.active.drain(..done_count) {
-            log::debug!("Active submission {} is done", a.index);
             self.ready_to_map.extend(a.mapped);
             for encoder in a.encoders {
                 let raw = unsafe { encoder.land() };
@@ -212,7 +358,7 @@ impl<A: HalApi> LifetimeTracker<A> {
 
     pub fn schedule_resource_destruction(
         &mut self,
-        temp_resource: TempResource<A>,
+        temp_resource: TempResource,
         last_submit_index: SubmissionIndex,
     ) {
         let resources = self
@@ -237,9 +383,7 @@ impl<A: HalApi> LifetimeTracker<A> {
             }
         }
     }
-}
 
-impl<A: HalApi> LifetimeTracker<A> {
     /// Determine which buffers are ready to map, and which must wait for the
     /// GPU.
     ///
@@ -250,17 +394,13 @@ impl<A: HalApi> LifetimeTracker<A> {
         }
 
         for buffer in self.mapped.drain(..) {
-            let submit_index = buffer.submission_index();
-            log::trace!(
-                "Mapping of {} at submission {:?} gets assigned to active {:?}",
-                buffer.error_ident(),
-                submit_index,
-                self.active.iter().position(|a| a.index == submit_index)
-            );
-
-            self.active
+            let submission = self
+                .active
                 .iter_mut()
-                .find(|a| a.index == submit_index)
+                .rev()
+                .find(|a| a.contains_buffer(&buffer));
+
+            submission
                 .map_or(&mut self.ready_to_map, |a| &mut a.mapped)
                 .push(buffer);
         }
@@ -274,7 +414,7 @@ impl<A: HalApi> LifetimeTracker<A> {
     #[must_use]
     pub(crate) fn handle_mapping(
         &mut self,
-        raw: &A::Device,
+        raw: &dyn hal::DynDevice,
         snatch_guard: &SnatchGuard,
     ) -> Vec<super::BufferMapPendingClosure> {
         if self.ready_to_map.is_empty() {
@@ -284,8 +424,6 @@ impl<A: HalApi> LifetimeTracker<A> {
             Vec::with_capacity(self.ready_to_map.len());
 
         for buffer in self.ready_to_map.drain(..) {
-            let tracker_index = buffer.tracker_index();
-
             // This _cannot_ be inlined into the match. If it is, the lock will be held
             // open through the whole match, resulting in a deadlock when we try to re-lock
             // the buffer back to active.
@@ -306,7 +444,6 @@ impl<A: HalApi> LifetimeTracker<A> {
                 _ => panic!("No pending mapping."),
             };
             let status = if pending_mapping.range.start != pending_mapping.range.end {
-                log::debug!("Buffer {tracker_index:?} map state -> Active");
                 let host = pending_mapping.op.host;
                 let size = pending_mapping.range.end - pending_mapping.range.start;
                 match super::map_buffer(
@@ -317,10 +454,10 @@ impl<A: HalApi> LifetimeTracker<A> {
                     host,
                     snatch_guard,
                 ) {
-                    Ok(ptr) => {
+                    Ok(mapping) => {
                         *buffer.map_state.lock() = resource::BufferMapState::Active {
-                            ptr,
-                            range: pending_mapping.range.start..pending_mapping.range.start + size,
+                            mapping,
+                            range: pending_mapping.range.clone(),
                             host,
                         };
                         Ok(())
@@ -332,7 +469,10 @@ impl<A: HalApi> LifetimeTracker<A> {
                 }
             } else {
                 *buffer.map_state.lock() = resource::BufferMapState::Active {
-                    ptr: std::ptr::NonNull::dangling(),
+                    mapping: hal::BufferMapping {
+                        ptr: std::ptr::NonNull::dangling(),
+                        is_coherent: true,
+                    },
                     range: pending_mapping.range,
                     host: pending_mapping.op.host,
                 };
diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs
index b97b5e617c..c89f04d74b 100644
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@@ -1,6 +1,5 @@
 use crate::{
     binding_model,
-    hal_api::HalApi,
     hub::Hub,
     id::{BindGroupLayoutId, PipelineLayoutId},
     resource::{
@@ -12,15 +11,13 @@ use crate::{
 };
 
 use arrayvec::ArrayVec;
-use hal::Device as _;
 use smallvec::SmallVec;
 use std::os::raw::c_char;
 use thiserror::Error;
 use wgt::{BufferAddress, DeviceLostReason, TextureFormat};
 
-use std::{iter, num::NonZeroU32, ptr};
+use std::num::NonZeroU32;
 
-pub mod any_device;
 pub(crate) mod bgl;
 pub mod global;
 mod life;
@@ -301,28 +298,26 @@ impl DeviceLostClosure {
     }
 }
 
-fn map_buffer<A: HalApi>(
-    raw: &A::Device,
-    buffer: &Buffer<A>,
+fn map_buffer(
+    raw: &dyn hal::DynDevice,
+    buffer: &Buffer,
     offset: BufferAddress,
     size: BufferAddress,
     kind: HostMap,
     snatch_guard: &SnatchGuard,
-) -> Result<ptr::NonNull<u8>, BufferAccessError> {
+) -> Result<hal::BufferMapping, BufferAccessError> {
     let raw_buffer = buffer.try_raw(snatch_guard)?;
     let mapping = unsafe {
         raw.map_buffer(raw_buffer, offset..offset + size)
             .map_err(DeviceError::from)?
     };
 
-    *buffer.sync_mapped_writes.lock() = match kind {
-        HostMap::Read if !mapping.is_coherent => unsafe {
-            raw.invalidate_mapped_ranges(raw_buffer, iter::once(offset..offset + size));
-            None
-        },
-        HostMap::Write if !mapping.is_coherent => Some(offset..offset + size),
-        _ => None,
-    };
+    if !mapping.is_coherent && kind == HostMap::Read {
+        #[allow(clippy::single_range_in_vec_init)]
+        unsafe {
+            raw.invalidate_mapped_ranges(raw_buffer, &[offset..offset + size]);
+        }
+    }
 
     assert_eq!(offset % wgt::COPY_BUFFER_ALIGNMENT, 0);
     assert_eq!(size % wgt::COPY_BUFFER_ALIGNMENT, 0);
@@ -340,9 +335,6 @@ fn map_buffer<A: HalApi>(
     // If this is a write mapping zeroing out the memory here is the only
     // reasonable way as all data is pushed to GPU anyways.
 
-    // No need to flush if it is flushed later anyways.
-    let zero_init_needs_flush_now =
-        mapping.is_coherent && buffer.sync_mapped_writes.lock().is_none();
     let mapped = unsafe { std::slice::from_raw_parts_mut(mapping.ptr.as_ptr(), size as usize) };
 
     for uninitialized in buffer
@@ -356,15 +348,16 @@ fn map_buffer<A: HalApi>(
             (uninitialized.start - offset) as usize..(uninitialized.end - offset) as usize;
         mapped[fill_range].fill(0);
 
-        if zero_init_needs_flush_now {
-            unsafe { raw.flush_mapped_ranges(raw_buffer, iter::once(uninitialized)) };
+        if !mapping.is_coherent && kind == HostMap::Read {
+            unsafe { raw.flush_mapped_ranges(raw_buffer, &[uninitialized]) };
         }
     }
 
-    Ok(mapping.ptr)
+    Ok(mapping)
 }
 
 #[derive(Clone, Debug)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct DeviceMismatch {
     pub(super) res: ResourceErrorIdent,
     pub(super) res_device: ResourceErrorIdent,
@@ -389,6 +382,7 @@ impl std::fmt::Display for DeviceMismatch {
 impl std::error::Error for DeviceMismatch {}
 
 #[derive(Clone, Debug, Error)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[non_exhaustive]
 pub enum DeviceError {
     #[error("{0} is invalid.")]
@@ -434,18 +428,26 @@ pub struct ImplicitPipelineContext {
 }
 
 pub struct ImplicitPipelineIds<'a> {
-    pub root_id: Option<PipelineLayoutId>,
-    pub group_ids: &'a [Option<BindGroupLayoutId>],
+    pub root_id: PipelineLayoutId,
+    pub group_ids: &'a [BindGroupLayoutId],
 }
 
 impl ImplicitPipelineIds<'_> {
-    fn prepare<A: HalApi>(self, hub: &Hub<A>) -> ImplicitPipelineContext {
+    fn prepare(self, hub: &Hub) -> ImplicitPipelineContext {
+        let backend = self.root_id.backend();
         ImplicitPipelineContext {
-            root_id: hub.pipeline_layouts.prepare(self.root_id).into_id(),
+            root_id: hub
+                .pipeline_layouts
+                .prepare(backend, Some(self.root_id))
+                .into_id(),
             group_ids: self
                 .group_ids
                 .iter()
-                .map(|id_in| hub.bind_group_layouts.prepare(*id_in).into_id())
+                .map(|id_in| {
+                    hub.bind_group_layouts
+                        .prepare(backend, Some(*id_in))
+                        .into_id()
+                })
                 .collect(),
         }
     }
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index c0ed1d8a72..c4c6335d7a 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -10,42 +10,54 @@ use crate::{
     device::{DeviceError, WaitIdleError},
     get_lowest_common_denom,
     global::Global,
-    hal_api::HalApi,
     hal_label,
     id::{self, QueueId},
     init_tracker::{has_copy_partial_init_tracker_coverage, TextureInitRange},
-    lock::{rank, Mutex, RwLockWriteGuard},
+    lock::RwLockWriteGuard,
     resource::{
         Buffer, BufferAccessError, BufferMapState, DestroyedBuffer, DestroyedResourceError,
-        DestroyedTexture, Labeled, ParentDevice, ResourceErrorIdent, StagingBuffer, Texture,
-        TextureInner, Trackable,
+        DestroyedTexture, FlushedStagingBuffer, Labeled, ParentDevice, ResourceErrorIdent,
+        StagingBuffer, Texture, TextureInner, Trackable,
     },
     resource_log,
     track::{self, Tracker, TrackerIndex},
     FastHashMap, SubmissionIndex,
 };
 
-use hal::{CommandEncoder as _, Device as _, Queue as _};
 use smallvec::SmallVec;
 
-use crate::resource::{Blas, Tlas};
+use crate::resource::{Blas, ScratchBuffer, Tlas};
 use std::{
-    iter, mem,
-    ptr::{self, NonNull},
+    iter,
+    mem::{self, ManuallyDrop},
+    ptr::NonNull,
     sync::{atomic::Ordering, Arc},
 };
 use thiserror::Error;
 
 use super::Device;
 
-pub struct Queue<A: HalApi> {
-    pub(crate) raw: Option<A::Queue>,
-    pub(crate) device: Arc<Device<A>>,
+pub struct Queue {
+    raw: ManuallyDrop<Box<dyn hal::DynQueue>>,
+    pub(crate) device: Arc<Device>,
+}
+
+impl Queue {
+    pub(crate) fn new(device: Arc<Device>, raw: Box<dyn hal::DynQueue>) -> Self {
+        Queue {
+            raw: ManuallyDrop::new(raw),
+            device,
+        }
+    }
+
+    pub(crate) fn raw(&self) -> &dyn hal::DynQueue {
+        self.raw.as_ref()
+    }
 }
 
 crate::impl_resource_type!(Queue);
 // TODO: https://github.com/gfx-rs/wgpu/issues/4014
-impl<A: HalApi> Labeled for Queue<A> {
+impl Labeled for Queue {
     fn label(&self) -> &str {
         ""
     }
@@ -53,10 +65,11 @@ impl<A: HalApi> Labeled for Queue<A> {
 crate::impl_parent_device!(Queue);
 crate::impl_storage_item!(Queue);
 
-impl<A: HalApi> Drop for Queue<A> {
+impl Drop for Queue {
     fn drop(&mut self) {
         resource_log!("Drop {}", self.error_ident());
-        let queue = self.raw.take().unwrap();
+        // SAFETY: we never access `self.raw` beyond this point.
+        let queue = unsafe { ManuallyDrop::take(&mut self.raw) };
         self.device.release_queue(queue);
     }
 }
@@ -117,13 +130,6 @@ impl SubmittedWorkDoneClosure {
     }
 }
 
-#[repr(C)]
-#[derive(Debug, Copy, Clone)]
-pub struct WrappedSubmissionIndex {
-    pub queue_id: QueueId,
-    pub index: SubmissionIndex,
-}
-
 /// A texture or buffer to be freed soon.
 ///
 /// This is just a tagged raw texture or buffer, generally about to be added to
@@ -135,12 +141,13 @@ pub struct WrappedSubmissionIndex {
 /// - `ActiveSubmission::temp_resources`: temporary resources used by a queue
 ///   submission, to be freed when it completes
 #[derive(Debug)]
-pub enum TempResource<A: HalApi> {
-    StagingBuffer(StagingBuffer<A>),
-    DestroyedBuffer(DestroyedBuffer<A>),
-    DestroyedTexture(DestroyedTexture<A>),
-    Blas(Arc<Blas<A>>),
-    Tlas(Arc<Tlas<A>>),
+pub enum TempResource {
+    StagingBuffer(FlushedStagingBuffer),
+    ScratchBuffer(ScratchBuffer),
+    DestroyedBuffer(DestroyedBuffer),
+    DestroyedTexture(DestroyedTexture),
+    Blas(Arc<Blas>),
+    Tlas(Arc<Tlas>),
 }
 
 /// A series of raw [`CommandBuffer`]s that have been submitted to a
@@ -148,24 +155,24 @@ pub enum TempResource<A: HalApi> {
 ///
 /// [`CommandBuffer`]: hal::Api::CommandBuffer
 /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder
-pub(crate) struct EncoderInFlight<A: HalApi> {
-    raw: A::CommandEncoder,
-    cmd_buffers: Vec<A::CommandBuffer>,
-    trackers: Tracker<A>,
+pub(crate) struct EncoderInFlight {
+    raw: Box<dyn hal::DynCommandEncoder>,
+    cmd_buffers: Vec<Box<dyn hal::DynCommandBuffer>>,
+    pub(crate) trackers: Tracker,
 
     /// These are the buffers that have been tracked by `PendingWrites`.
-    pending_buffers: Vec<Arc<Buffer<A>>>,
+    pub(crate) pending_buffers: FastHashMap<TrackerIndex, Arc<Buffer>>,
     /// These are the textures that have been tracked by `PendingWrites`.
-    pending_textures: Vec<Arc<Texture<A>>>,
+    pub(crate) pending_textures: FastHashMap<TrackerIndex, Arc<Texture>>,
 }
 
-impl<A: HalApi> EncoderInFlight<A> {
+impl EncoderInFlight {
     /// Free all of our command buffers.
     ///
     /// Return the command encoder, fully reset and ready to be
     /// reused.
-    pub(crate) unsafe fn land(mut self) -> A::CommandEncoder {
-        unsafe { self.raw.reset_all(self.cmd_buffers.into_iter()) };
+    pub(crate) unsafe fn land(mut self) -> Box<dyn hal::DynCommandEncoder> {
+        unsafe { self.raw.reset_all(self.cmd_buffers) };
         {
             // This involves actually decrementing the ref count of all command buffer
             // resources, so can be _very_ expensive.
@@ -199,8 +206,8 @@ impl<A: HalApi> EncoderInFlight<A> {
 ///
 /// All uses of [`StagingBuffer`]s end up here.
 #[derive(Debug)]
-pub(crate) struct PendingWrites<A: HalApi> {
-    pub command_encoder: A::CommandEncoder,
+pub(crate) struct PendingWrites {
+    pub command_encoder: Box<dyn hal::DynCommandEncoder>,
 
     /// True if `command_encoder` is in the "recording" state, as
     /// described in the docs for the [`wgpu_hal::CommandEncoder`]
@@ -209,13 +216,13 @@ pub(crate) struct PendingWrites<A: HalApi> {
     /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder
     pub is_recording: bool,
 
-    temp_resources: Vec<TempResource<A>>,
-    dst_buffers: FastHashMap<TrackerIndex, Arc<Buffer<A>>>,
-    dst_textures: FastHashMap<TrackerIndex, Arc<Texture<A>>>,
+    temp_resources: Vec<TempResource>,
+    dst_buffers: FastHashMap<TrackerIndex, Arc<Buffer>>,
+    dst_textures: FastHashMap<TrackerIndex, Arc<Texture>>,
 }
 
-impl<A: HalApi> PendingWrites<A> {
-    pub fn new(command_encoder: A::CommandEncoder) -> Self {
+impl PendingWrites {
+    pub fn new(command_encoder: Box<dyn hal::DynCommandEncoder>) -> Self {
         Self {
             command_encoder,
             is_recording: false,
@@ -225,7 +232,7 @@ impl<A: HalApi> PendingWrites<A> {
         }
     }
 
-    pub fn dispose(mut self, device: &A::Device) {
+    pub fn dispose(mut self, device: &dyn hal::DynDevice) {
         unsafe {
             if self.is_recording {
                 self.command_encoder.discard_encoding();
@@ -236,42 +243,42 @@ impl<A: HalApi> PendingWrites<A> {
         self.temp_resources.clear();
     }
 
-    pub fn insert_buffer(&mut self, buffer: &Arc<Buffer<A>>) {
+    pub fn insert_buffer(&mut self, buffer: &Arc<Buffer>) {
         self.dst_buffers
             .insert(buffer.tracker_index(), buffer.clone());
     }
 
-    pub fn insert_texture(&mut self, texture: &Arc<Texture<A>>) {
+    pub fn insert_texture(&mut self, texture: &Arc<Texture>) {
         self.dst_textures
             .insert(texture.tracker_index(), texture.clone());
     }
 
-    pub fn contains_buffer(&self, buffer: &Arc<Buffer<A>>) -> bool {
+    pub fn contains_buffer(&self, buffer: &Arc<Buffer>) -> bool {
         self.dst_buffers.contains_key(&buffer.tracker_index())
     }
 
-    pub fn contains_texture(&self, texture: &Arc<Texture<A>>) -> bool {
+    pub fn contains_texture(&self, texture: &Arc<Texture>) -> bool {
         self.dst_textures.contains_key(&texture.tracker_index())
     }
 
-    pub fn consume_temp(&mut self, resource: TempResource<A>) {
+    pub fn consume_temp(&mut self, resource: TempResource) {
         self.temp_resources.push(resource);
     }
 
-    fn consume(&mut self, buffer: StagingBuffer<A>) {
+    pub fn consume(&mut self, buffer: FlushedStagingBuffer) {
         self.temp_resources
             .push(TempResource::StagingBuffer(buffer));
     }
 
     fn pre_submit(
         &mut self,
-        command_allocator: &CommandAllocator<A>,
-        device: &A::Device,
-        queue: &A::Queue,
-    ) -> Result<Option<EncoderInFlight<A>>, DeviceError> {
+        command_allocator: &CommandAllocator,
+        device: &dyn hal::DynDevice,
+        queue: &dyn hal::DynQueue,
+    ) -> Result<Option<EncoderInFlight>, DeviceError> {
         if self.is_recording {
-            let pending_buffers = self.dst_buffers.drain().map(|(_, b)| b).collect();
-            let pending_textures = self.dst_textures.drain().map(|(_, t)| t).collect();
+            let pending_buffers = mem::take(&mut self.dst_buffers);
+            let pending_textures = mem::take(&mut self.dst_textures);
 
             let cmd_buf = unsafe { self.command_encoder.end_encoding()? };
             self.is_recording = false;
@@ -293,7 +300,7 @@ impl<A: HalApi> PendingWrites<A> {
         }
     }
 
-    pub fn activate(&mut self) -> &mut A::CommandEncoder {
+    pub fn activate(&mut self) -> &mut dyn hal::DynCommandEncoder {
         if !self.is_recording {
             unsafe {
                 self.command_encoder
@@ -302,7 +309,7 @@ impl<A: HalApi> PendingWrites<A> {
             }
             self.is_recording = true;
         }
-        &mut self.command_encoder
+        self.command_encoder.as_mut()
     }
 
     pub fn deactivate(&mut self) {
@@ -315,47 +322,6 @@ impl<A: HalApi> PendingWrites<A> {
     }
 }
 
-pub(crate) fn prepare_staging_buffer<A: HalApi>(
-    device: &Arc<Device<A>>,
-    size: wgt::BufferAddress,
-    instance_flags: wgt::InstanceFlags,
-) -> Result<(StagingBuffer<A>, NonNull<u8>), DeviceError> {
-    profiling::scope!("prepare_staging_buffer");
-    let stage_desc = hal::BufferDescriptor {
-        label: hal_label(Some("(wgpu internal) Staging"), instance_flags),
-        size,
-        usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::COPY_SRC,
-        memory_flags: hal::MemoryFlags::TRANSIENT,
-    };
-
-    let buffer = unsafe { device.raw().create_buffer(&stage_desc)? };
-    let mapping = unsafe { device.raw().map_buffer(&buffer, 0..size) }?;
-
-    let staging_buffer = StagingBuffer {
-        raw: Mutex::new(rank::STAGING_BUFFER_RAW, Some(buffer)),
-        device: device.clone(),
-        size,
-        is_coherent: mapping.is_coherent,
-    };
-
-    Ok((staging_buffer, mapping.ptr))
-}
-
-impl<A: HalApi> StagingBuffer<A> {
-    unsafe fn flush(&self, device: &A::Device) -> Result<(), DeviceError> {
-        if !self.is_coherent {
-            unsafe {
-                device.flush_mapped_ranges(
-                    self.raw.lock().as_ref().unwrap(),
-                    iter::once(0..self.size),
-                )
-            };
-        }
-        unsafe { device.unmap_buffer(self.raw.lock().as_ref().unwrap())? };
-        Ok(())
-    }
-}
-
 #[derive(Clone, Debug, Error)]
 #[error("Queue is invalid")]
 pub struct InvalidQueue;
@@ -403,7 +369,7 @@ pub enum QueueSubmitError {
 //TODO: move out common parts of write_xxx.
 
 impl Global {
-    pub fn queue_write_buffer<A: HalApi>(
+    pub fn queue_write_buffer(
         &self,
         queue_id: QueueId,
         buffer_id: id::BufferId,
@@ -413,7 +379,7 @@ impl Global {
         profiling::scope!("Queue::write_buffer");
         api_log!("Queue::write_buffer {buffer_id:?} {}bytes", data.len());
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let buffer = hub
             .buffers
@@ -442,32 +408,29 @@ impl Global {
 
         buffer.same_device_as(queue.as_ref())?;
 
-        if data_size == 0 {
+        let data_size = if let Some(data_size) = wgt::BufferSize::new(data_size) {
+            data_size
+        } else {
             log::trace!("Ignoring write_buffer of size 0");
             return Ok(());
-        }
+        };
 
         // Platform validation requires that the staging buffer always be
         // freed, even if an error occurs. All paths from here must call
         // `device.pending_writes.consume`.
-        let (staging_buffer, staging_buffer_ptr) =
-            prepare_staging_buffer(device, data_size, device.instance_flags)?;
+        let mut staging_buffer = StagingBuffer::new(device, data_size)?;
         let mut pending_writes = device.pending_writes.lock();
-        let pending_writes = pending_writes.as_mut().unwrap();
 
-        if let Err(flush_error) = unsafe {
+        let staging_buffer = {
             profiling::scope!("copy");
-            ptr::copy_nonoverlapping(data.as_ptr(), staging_buffer_ptr.as_ptr(), data.len());
-            staging_buffer.flush(device.raw())
-        } {
-            pending_writes.consume(staging_buffer);
-            return Err(flush_error.into());
-        }
+            staging_buffer.write(data);
+            staging_buffer.flush()
+        };
 
         let result = self.queue_write_staging_buffer_impl(
             &queue,
             device,
-            pending_writes,
+            &mut pending_writes,
             &staging_buffer,
             buffer_id,
             buffer_offset,
@@ -477,14 +440,14 @@ impl Global {
         result
     }
 
-    pub fn queue_create_staging_buffer<A: HalApi>(
+    pub fn queue_create_staging_buffer(
         &self,
         queue_id: QueueId,
         buffer_size: wgt::BufferSize,
         id_in: Option<id::StagingBufferId>,
     ) -> Result<(id::StagingBufferId, NonNull<u8>), QueueWriteError> {
         profiling::scope!("Queue::create_staging_buffer");
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let queue = hub
             .queues
@@ -493,17 +456,17 @@ impl Global {
 
         let device = &queue.device;
 
-        let (staging_buffer, staging_buffer_ptr) =
-            prepare_staging_buffer(device, buffer_size.get(), device.instance_flags)?;
+        let staging_buffer = StagingBuffer::new(device, buffer_size)?;
+        let ptr = unsafe { staging_buffer.ptr() };
 
-        let fid = hub.staging_buffers.prepare(id_in);
+        let fid = hub.staging_buffers.prepare(queue_id.backend(), id_in);
         let id = fid.assign(Arc::new(staging_buffer));
         resource_log!("Queue::create_staging_buffer {id:?}");
 
-        Ok((id, staging_buffer_ptr))
+        Ok((id, ptr))
     }
 
-    pub fn queue_write_staging_buffer<A: HalApi>(
+    pub fn queue_write_staging_buffer(
         &self,
         queue_id: QueueId,
         buffer_id: id::BufferId,
@@ -511,7 +474,7 @@ impl Global {
         staging_buffer_id: id::StagingBufferId,
     ) -> Result<(), QueueWriteError> {
         profiling::scope!("Queue::write_staging_buffer");
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let queue = hub
             .queues
@@ -527,21 +490,17 @@ impl Global {
             .ok_or_else(|| QueueWriteError::Transfer(TransferError::InvalidBufferId(buffer_id)))?;
 
         let mut pending_writes = device.pending_writes.lock();
-        let pending_writes = pending_writes.as_mut().unwrap();
 
         // At this point, we have taken ownership of the staging_buffer from the
         // user. Platform validation requires that the staging buffer always
         // be freed, even if an error occurs. All paths from here must call
         // `device.pending_writes.consume`.
-        if let Err(flush_error) = unsafe { staging_buffer.flush(device.raw()) } {
-            pending_writes.consume(staging_buffer);
-            return Err(flush_error.into());
-        }
+        let staging_buffer = staging_buffer.flush();
 
         let result = self.queue_write_staging_buffer_impl(
             &queue,
             device,
-            pending_writes,
+            &mut pending_writes,
             &staging_buffer,
             buffer_id,
             buffer_offset,
@@ -551,15 +510,15 @@ impl Global {
         result
     }
 
-    pub fn queue_validate_write_buffer<A: HalApi>(
+    pub fn queue_validate_write_buffer(
         &self,
         _queue_id: QueueId,
         buffer_id: id::BufferId,
         buffer_offset: u64,
-        buffer_size: u64,
+        buffer_size: wgt::BufferSize,
     ) -> Result<(), QueueWriteError> {
         profiling::scope!("Queue::validate_write_buffer");
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let buffer = hub
             .buffers
@@ -571,23 +530,23 @@ impl Global {
         Ok(())
     }
 
-    fn queue_validate_write_buffer_impl<A: HalApi>(
+    fn queue_validate_write_buffer_impl(
         &self,
-        buffer: &Buffer<A>,
+        buffer: &Buffer,
         buffer_offset: u64,
-        buffer_size: u64,
+        buffer_size: wgt::BufferSize,
     ) -> Result<(), TransferError> {
         buffer.check_usage(wgt::BufferUsages::COPY_DST)?;
-        if buffer_size % wgt::COPY_BUFFER_ALIGNMENT != 0 {
-            return Err(TransferError::UnalignedCopySize(buffer_size));
+        if buffer_size.get() % wgt::COPY_BUFFER_ALIGNMENT != 0 {
+            return Err(TransferError::UnalignedCopySize(buffer_size.get()));
         }
         if buffer_offset % wgt::COPY_BUFFER_ALIGNMENT != 0 {
             return Err(TransferError::UnalignedBufferOffset(buffer_offset));
         }
-        if buffer_offset + buffer_size > buffer.size {
+        if buffer_offset + buffer_size.get() > buffer.size {
             return Err(TransferError::BufferOverrun {
                 start_offset: buffer_offset,
-                end_offset: buffer_offset + buffer_size,
+                end_offset: buffer_offset + buffer_size.get(),
                 buffer_size: buffer.size,
                 side: CopySide::Destination,
             });
@@ -596,16 +555,16 @@ impl Global {
         Ok(())
     }
 
-    fn queue_write_staging_buffer_impl<A: HalApi>(
+    fn queue_write_staging_buffer_impl(
         &self,
-        queue: &Arc<Queue<A>>,
-        device: &Arc<Device<A>>,
-        pending_writes: &mut PendingWrites<A>,
-        staging_buffer: &StagingBuffer<A>,
+        queue: &Arc<Queue>,
+        device: &Arc<Device>,
+        pending_writes: &mut PendingWrites,
+        staging_buffer: &FlushedStagingBuffer,
         buffer_id: id::BufferId,
         buffer_offset: u64,
     ) -> Result<(), QueueWriteError> {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let dst = hub
             .buffers
@@ -622,30 +581,23 @@ impl Global {
 
         dst.same_device_as(queue.as_ref())?;
 
-        let src_buffer_size = staging_buffer.size;
-        self.queue_validate_write_buffer_impl(&dst, buffer_offset, src_buffer_size)?;
-
-        dst.use_at(device.active_submission_index.load(Ordering::Relaxed) + 1);
+        self.queue_validate_write_buffer_impl(&dst, buffer_offset, staging_buffer.size)?;
 
-        let region = wgt::BufferSize::new(src_buffer_size).map(|size| hal::BufferCopy {
+        let region = hal::BufferCopy {
             src_offset: 0,
             dst_offset: buffer_offset,
-            size,
-        });
-        let inner_buffer = staging_buffer.raw.lock();
+            size: staging_buffer.size,
+        };
         let barriers = iter::once(hal::BufferBarrier {
-            buffer: inner_buffer.as_ref().unwrap(),
+            buffer: staging_buffer.raw(),
             usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC,
         })
-        .chain(transition.map(|pending| pending.into_hal(&dst, &snatch_guard)));
+        .chain(transition.map(|pending| pending.into_hal(&dst, &snatch_guard)))
+        .collect::<Vec<_>>();
         let encoder = pending_writes.activate();
         unsafe {
-            encoder.transition_buffers(barriers);
-            encoder.copy_buffer_to_buffer(
-                inner_buffer.as_ref().unwrap(),
-                dst_raw,
-                region.into_iter(),
-            );
+            encoder.transition_buffers(&barriers);
+            encoder.copy_buffer_to_buffer(staging_buffer.raw(), dst_raw, &[region]);
         }
 
         pending_writes.insert_buffer(&dst);
@@ -655,13 +607,13 @@ impl Global {
         {
             dst.initialization_status
                 .write()
-                .drain(buffer_offset..(buffer_offset + src_buffer_size));
+                .drain(buffer_offset..(buffer_offset + staging_buffer.size.get()));
         }
 
         Ok(())
     }
 
-    pub fn queue_write_texture<A: HalApi>(
+    pub fn queue_write_texture(
         &self,
         queue_id: QueueId,
         destination: &ImageCopyTexture,
@@ -672,7 +624,7 @@ impl Global {
         profiling::scope!("Queue::write_texture");
         api_log!("Queue::write_texture {:?} {size:?}", destination.texture);
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let queue = hub
             .queues
@@ -728,7 +680,7 @@ impl Global {
 
         // Note: `_source_bytes_per_array_layer` is ignored since we
         // have a staging copy, and it can have a different value.
-        let (_, _source_bytes_per_array_layer) = validate_linear_texture_data(
+        let (required_bytes_in_copy, _source_bytes_per_array_layer) = validate_linear_texture_data(
             data_layout,
             dst.desc.format,
             destination.aspect,
@@ -744,33 +696,7 @@ impl Global {
                 .map_err(TransferError::from)?;
         }
 
-        let (block_width, block_height) = dst.desc.format.block_dimensions();
-        let width_blocks = size.width / block_width;
-        let height_blocks = size.height / block_height;
-
-        let block_rows_per_image = data_layout.rows_per_image.unwrap_or(
-            // doesn't really matter because we need this only if we copy
-            // more than one layer, and then we validate for this being not
-            // None
-            height_blocks,
-        );
-
-        let block_size = dst
-            .desc
-            .format
-            .block_copy_size(Some(destination.aspect))
-            .unwrap();
-        let bytes_per_row_alignment =
-            get_lowest_common_denom(device.alignments.buffer_copy_pitch.get() as u32, block_size);
-        let stage_bytes_per_row =
-            wgt::math::align_to(block_size * width_blocks, bytes_per_row_alignment);
-
-        let block_rows_in_copy =
-            (size.depth_or_array_layers - 1) * block_rows_per_image + height_blocks;
-        let stage_size = stage_bytes_per_row as u64 * block_rows_in_copy as u64;
-
         let mut pending_writes = device.pending_writes.lock();
-        let pending_writes = pending_writes.as_mut().unwrap();
         let encoder = pending_writes.activate();
 
         // If the copy does not fully cover the layers, we need to initialize to
@@ -804,7 +730,7 @@ impl Global {
                         encoder,
                         &mut trackers.textures,
                         &device.alignments,
-                        device.zero_buffer.as_ref().unwrap(),
+                        device.zero_buffer.as_ref(),
                         &device.snatchable_lock.read(),
                     )
                     .map_err(QueueWriteError::from)?;
@@ -822,78 +748,88 @@ impl Global {
         // call above. Since we've held `texture_guard` the whole time, we know
         // the texture hasn't gone away in the mean time, so we can unwrap.
         let dst = hub.textures.get(destination.texture).unwrap();
-        dst.use_at(device.active_submission_index.load(Ordering::Relaxed) + 1);
 
         let dst_raw = dst.try_raw(&snatch_guard)?;
 
-        let bytes_per_row = data_layout
-            .bytes_per_row
-            .unwrap_or(width_blocks * block_size);
+        let (block_width, block_height) = dst.desc.format.block_dimensions();
+        let width_in_blocks = size.width / block_width;
+        let height_in_blocks = size.height / block_height;
+
+        let block_size = dst
+            .desc
+            .format
+            .block_copy_size(Some(destination.aspect))
+            .unwrap();
+        let bytes_in_last_row = width_in_blocks * block_size;
+
+        let bytes_per_row = data_layout.bytes_per_row.unwrap_or(bytes_in_last_row);
+        let rows_per_image = data_layout.rows_per_image.unwrap_or(height_in_blocks);
+
+        let bytes_per_row_alignment =
+            get_lowest_common_denom(device.alignments.buffer_copy_pitch.get() as u32, block_size);
+        let stage_bytes_per_row = wgt::math::align_to(bytes_in_last_row, bytes_per_row_alignment);
 
         // Platform validation requires that the staging buffer always be
         // freed, even if an error occurs. All paths from here must call
         // `device.pending_writes.consume`.
-        let (staging_buffer, staging_buffer_ptr) =
-            prepare_staging_buffer(device, stage_size, device.instance_flags)?;
-
-        if stage_bytes_per_row == bytes_per_row {
+        let staging_buffer = if stage_bytes_per_row == bytes_per_row {
             profiling::scope!("copy aligned");
             // Fast path if the data is already being aligned optimally.
-            unsafe {
-                ptr::copy_nonoverlapping(
-                    data.as_ptr().offset(data_layout.offset as isize),
-                    staging_buffer_ptr.as_ptr(),
-                    stage_size as usize,
-                );
-            }
+            let stage_size = wgt::BufferSize::new(required_bytes_in_copy).unwrap();
+            let mut staging_buffer = StagingBuffer::new(device, stage_size)?;
+            staging_buffer.write(&data[data_layout.offset as usize..]);
+            staging_buffer
         } else {
             profiling::scope!("copy chunked");
             // Copy row by row into the optimal alignment.
+            let block_rows_in_copy =
+                (size.depth_or_array_layers - 1) * rows_per_image + height_in_blocks;
+            let stage_size =
+                wgt::BufferSize::new(stage_bytes_per_row as u64 * block_rows_in_copy as u64)
+                    .unwrap();
+            let mut staging_buffer = StagingBuffer::new(device, stage_size)?;
             let copy_bytes_per_row = stage_bytes_per_row.min(bytes_per_row) as usize;
             for layer in 0..size.depth_or_array_layers {
-                let rows_offset = layer * block_rows_per_image;
-                for row in 0..height_blocks {
+                let rows_offset = layer * rows_per_image;
+                for row in rows_offset..rows_offset + height_in_blocks {
+                    let src_offset = data_layout.offset as u32 + row * bytes_per_row;
+                    let dst_offset = row * stage_bytes_per_row;
                     unsafe {
-                        ptr::copy_nonoverlapping(
-                            data.as_ptr().offset(
-                                data_layout.offset as isize
-                                    + (rows_offset + row) as isize * bytes_per_row as isize,
-                            ),
-                            staging_buffer_ptr.as_ptr().offset(
-                                (rows_offset + row) as isize * stage_bytes_per_row as isize,
-                            ),
+                        staging_buffer.write_with_offset(
+                            data,
+                            src_offset as isize,
+                            dst_offset as isize,
                             copy_bytes_per_row,
-                        );
+                        )
                     }
                 }
             }
-        }
-
-        if let Err(e) = unsafe { staging_buffer.flush(device.raw()) } {
-            pending_writes.consume(staging_buffer);
-            return Err(e.into());
-        }
+            staging_buffer
+        };
 
-        let regions = (0..array_layer_count).map(|rel_array_layer| {
-            let mut texture_base = dst_base.clone();
-            texture_base.array_layer += rel_array_layer;
-            hal::BufferTextureCopy {
-                buffer_layout: wgt::ImageDataLayout {
-                    offset: rel_array_layer as u64
-                        * block_rows_per_image as u64
-                        * stage_bytes_per_row as u64,
-                    bytes_per_row: Some(stage_bytes_per_row),
-                    rows_per_image: Some(block_rows_per_image),
-                },
-                texture_base,
-                size: hal_copy_size,
-            }
-        });
+        let staging_buffer = staging_buffer.flush();
+
+        let regions = (0..array_layer_count)
+            .map(|array_layer_offset| {
+                let mut texture_base = dst_base.clone();
+                texture_base.array_layer += array_layer_offset;
+                hal::BufferTextureCopy {
+                    buffer_layout: wgt::ImageDataLayout {
+                        offset: array_layer_offset as u64
+                            * rows_per_image as u64
+                            * stage_bytes_per_row as u64,
+                        bytes_per_row: Some(stage_bytes_per_row),
+                        rows_per_image: Some(rows_per_image),
+                    },
+                    texture_base,
+                    size: hal_copy_size,
+                }
+            })
+            .collect::<Vec<_>>();
 
         {
-            let inner_buffer = staging_buffer.raw.lock();
-            let barrier = hal::BufferBarrier {
-                buffer: inner_buffer.as_ref().unwrap(),
+            let buffer_barrier = hal::BufferBarrier {
+                buffer: staging_buffer.raw(),
                 usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC,
             };
 
@@ -902,10 +838,14 @@ impl Global {
                 trackers
                     .textures
                     .set_single(&dst, selector, hal::TextureUses::COPY_DST);
+            let texture_barriers = transition
+                .map(|pending| pending.into_hal(dst_raw))
+                .collect::<Vec<_>>();
+
             unsafe {
-                encoder.transition_textures(transition.map(|pending| pending.into_hal(dst_raw)));
-                encoder.transition_buffers(iter::once(barrier));
-                encoder.copy_buffer_to_texture(inner_buffer.as_ref().unwrap(), dst_raw, regions);
+                encoder.transition_textures(&texture_barriers);
+                encoder.transition_buffers(&[buffer_barrier]);
+                encoder.copy_buffer_to_texture(staging_buffer.raw(), dst_raw, &regions);
             }
         }
 
@@ -916,7 +856,7 @@ impl Global {
     }
 
     #[cfg(webgl)]
-    pub fn queue_copy_external_image_to_texture<A: HalApi>(
+    pub fn queue_copy_external_image_to_texture(
         &self,
         queue_id: QueueId,
         source: &wgt::ImageCopyExternalImage,
@@ -925,7 +865,7 @@ impl Global {
     ) -> Result<(), QueueWriteError> {
         profiling::scope!("Queue::copy_external_image_to_texture");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let queue = hub
             .queues
@@ -1030,7 +970,7 @@ impl Global {
             extract_texture_selector(&destination.to_untagged(), &size, &dst)?;
 
         let mut pending_writes = device.pending_writes.lock();
-        let encoder = pending_writes.as_mut().unwrap().activate();
+        let encoder = pending_writes.activate();
 
         // If the copy does not fully cover the layers, we need to initialize to
         // zero *first* as we don't keep track of partial texture layer inits.
@@ -1063,7 +1003,7 @@ impl Global {
                         encoder,
                         &mut trackers.textures,
                         &device.alignments,
-                        device.zero_buffer.as_ref().unwrap(),
+                        device.zero_buffer.as_ref(),
                         &device.snatchable_lock.read(),
                     )
                     .map_err(QueueWriteError::from)?;
@@ -1073,7 +1013,6 @@ impl Global {
                     .drain(init_layer_range);
             }
         }
-        dst.use_at(device.active_submission_index.load(Ordering::Relaxed) + 1);
 
         let snatch_guard = device.snatchable_lock.read();
         let dst_raw = dst.try_raw(&snatch_guard)?;
@@ -1089,16 +1028,36 @@ impl Global {
             size: hal_copy_size,
         };
 
+        let mut trackers = device.trackers.lock();
+        let transitions = trackers
+            .textures
+            .set_single(&dst, selector, hal::TextureUses::COPY_DST);
+
+        // `copy_external_image_to_texture` is exclusive to the WebGL backend.
+        // Don't go through the `DynCommandEncoder` abstraction and directly to the WebGL backend.
+        let encoder_webgl = encoder
+            .as_any_mut()
+            .downcast_mut::<hal::gles::CommandEncoder>()
+            .unwrap();
+        let dst_raw_webgl = dst_raw
+            .as_any()
+            .downcast_ref::<hal::gles::Texture>()
+            .unwrap();
+        let transitions_webgl = transitions.map(|pending| {
+            let dyn_transition = pending.into_hal(dst_raw);
+            hal::TextureBarrier {
+                texture: dst_raw_webgl,
+                range: dyn_transition.range,
+                usage: dyn_transition.usage,
+            }
+        });
+
+        use hal::CommandEncoder as _;
         unsafe {
-            let mut trackers = device.trackers.lock();
-            let transitions =
-                trackers
-                    .textures
-                    .set_single(&dst, selector, hal::TextureUses::COPY_DST);
-            encoder.transition_textures(transitions.map(|pending| pending.into_hal(dst_raw)));
-            encoder.copy_external_image_to_texture(
+            encoder_webgl.transition_textures(transitions_webgl);
+            encoder_webgl.copy_external_image_to_texture(
                 source,
-                dst_raw,
+                dst_raw_webgl,
                 destination.premultiplied_alpha,
                 iter::once(regions),
             );
@@ -1107,16 +1066,16 @@ impl Global {
         Ok(())
     }
 
-    pub fn queue_submit<A: HalApi>(
+    pub fn queue_submit(
         &self,
         queue_id: QueueId,
         command_buffer_ids: &[id::CommandBufferId],
-    ) -> Result<WrappedSubmissionIndex, QueueSubmitError> {
+    ) -> Result<SubmissionIndex, QueueSubmitError> {
         profiling::scope!("Queue::submit");
         api_log!("Queue::submit {queue_id:?}");
 
         let (submit_index, callbacks) = {
-            let hub = A::hub(self);
+            let hub = &self.hub;
 
             let queue = hub
                 .queues
@@ -1128,11 +1087,10 @@ impl Global {
             let snatch_guard = device.snatchable_lock.read();
 
             // Fence lock must be acquired after the snatch lock everywhere to avoid deadlocks.
-            let mut fence_guard = device.fence.write();
-            let fence = fence_guard.as_mut().unwrap();
+            let mut fence = device.fence.write();
             let submit_index = device
                 .active_submission_index
-                .fetch_add(1, Ordering::Relaxed)
+                .fetch_add(1, Ordering::SeqCst)
                 + 1;
             let mut active_executions = Vec::new();
 
@@ -1192,7 +1150,7 @@ impl Global {
                         }
 
                         {
-                            profiling::scope!("update submission ids");
+                            profiling::scope!("check resource state");
 
                             let cmd_buf_data = cmdbuf.data.lock();
                             let cmd_buf_trackers = &cmd_buf_data.as_ref().unwrap().trackers;
@@ -1202,7 +1160,6 @@ impl Global {
                                 profiling::scope!("buffers");
                                 for buffer in cmd_buf_trackers.buffers.used_resources() {
                                     buffer.check_destroyed(&snatch_guard)?;
-                                    buffer.use_at(submit_index);
 
                                     match *buffer.map_state.lock() {
                                         BufferMapState::Idle => (),
@@ -1219,17 +1176,14 @@ impl Global {
                                 for texture in cmd_buf_trackers.textures.used_resources() {
                                     let should_extend = match texture.try_inner(&snatch_guard)? {
                                         TextureInner::Native { .. } => false,
-                                        TextureInner::Surface { ref raw, .. } => {
-                                            if raw.is_some() {
-                                                // Compare the Arcs by pointer as Textures don't implement Eq.
-                                                submit_surface_textures_owned
-                                                    .insert(Arc::as_ptr(&texture), texture.clone());
-                                            }
+                                        TextureInner::Surface { .. } => {
+                                            // Compare the Arcs by pointer as Textures don't implement Eq.
+                                            submit_surface_textures_owned
+                                                .insert(Arc::as_ptr(&texture), texture.clone());
 
                                             true
                                         }
                                     };
-                                    texture.use_at(submit_index);
                                     if should_extend {
                                         unsafe {
                                             used_surface_textures
@@ -1243,81 +1197,6 @@ impl Global {
                                     }
                                 }
                             }
-                            {
-                                profiling::scope!("views");
-                                for texture_view in cmd_buf_trackers.views.used_resources() {
-                                    texture_view.use_at(submit_index);
-                                }
-                            }
-                            {
-                                profiling::scope!("bind groups (+ referenced views/samplers)");
-                                for bg in cmd_buf_trackers.bind_groups.used_resources() {
-                                    bg.use_at(submit_index);
-                                    // We need to update the submission indices for the contained
-                                    // state-less (!) resources as well, so that they don't get
-                                    // deleted too early if the parent bind group goes out of scope.
-                                    for view in bg.used.views.used_resources() {
-                                        view.use_at(submit_index);
-                                    }
-                                    for sampler in bg.used.samplers.used_resources() {
-                                        sampler.use_at(submit_index);
-                                    }
-                                }
-                            }
-                            {
-                                profiling::scope!("compute pipelines");
-                                for compute_pipeline in
-                                    cmd_buf_trackers.compute_pipelines.used_resources()
-                                {
-                                    compute_pipeline.use_at(submit_index);
-                                }
-                            }
-                            {
-                                profiling::scope!("render pipelines");
-                                for render_pipeline in
-                                    cmd_buf_trackers.render_pipelines.used_resources()
-                                {
-                                    render_pipeline.use_at(submit_index);
-                                }
-                            }
-                            {
-                                profiling::scope!("query sets");
-                                for query_set in cmd_buf_trackers.query_sets.used_resources() {
-                                    query_set.use_at(submit_index);
-                                }
-                            }
-                            {
-                                profiling::scope!(
-                                    "render bundles (+ referenced pipelines/query sets)"
-                                );
-                                for bundle in cmd_buf_trackers.bundles.used_resources() {
-                                    bundle.use_at(submit_index);
-                                    // We need to update the submission indices for the contained
-                                    // state-less (!) resources as well, excluding the bind groups.
-                                    // They don't get deleted too early if the bundle goes out of scope.
-                                    for render_pipeline in
-                                        bundle.used.render_pipelines.read().used_resources()
-                                    {
-                                        render_pipeline.use_at(submit_index);
-                                    }
-                                    for query_set in bundle.used.query_sets.read().used_resources()
-                                    {
-                                        query_set.use_at(submit_index);
-                                    }
-                                }
-                            }
-                            {
-                                profiling::scope!("blas");
-                                for blas in cmd_buf_trackers.blas_s.used_resources() {
-                                    blas.use_at(submit_index);
-                                }
-                            }
-                            {
-                                profiling::scope!("tlas");
-                                for tlas in cmd_buf_trackers.tlas_s.used_resources() {
-                                    tlas.use_at(submit_index);
-                                }
-                            }
                         }
 
                         let mut baked = cmdbuf.from_arc_into_baked();
@@ -1332,19 +1211,18 @@ impl Global {
                                 ))
                                 .map_err(DeviceError::from)?
                         };
-                        log::trace!("Stitching command buffer {:?} before submission", cmb_id);
 
                         //Note: locking the trackers has to be done after the storages
                         let mut trackers = device.trackers.lock();
-                        baked.initialize_buffer_memory(&mut *trackers, &snatch_guard)?;
-                        baked.initialize_texture_memory(&mut *trackers, device, &snatch_guard)?;
+                        baked.initialize_buffer_memory(&mut trackers, &snatch_guard)?;
+                        baked.initialize_texture_memory(&mut trackers, device, &snatch_guard)?;
                         baked.validate_blas_actions()?;
                         baked.validate_tlas_actions()?;
                         //Note: stateless trackers are not merged:
                         // device already knows these resources exist.
                         CommandBuffer::insert_barriers_from_device_tracker(
-                            &mut baked.encoder,
-                            &mut *trackers,
+                            baked.encoder.as_mut(),
+                            &mut trackers,
                             &baked.trackers,
                             &snatch_guard,
                         );
@@ -1370,9 +1248,10 @@ impl Global {
                                 .set_from_usage_scope_and_drain_transitions(
                                     &used_surface_textures,
                                     &snatch_guard,
-                                );
+                                )
+                                .collect::<Vec<_>>();
                             let present = unsafe {
-                                baked.encoder.transition_textures(texture_barriers);
+                                baked.encoder.transition_textures(&texture_barriers);
                                 baked.encoder.end_encoding().unwrap()
                             };
                             baked.list.push(present);
@@ -1384,29 +1263,24 @@ impl Global {
                             raw: baked.encoder,
                             cmd_buffers: baked.list,
                             trackers: baked.trackers,
-                            pending_buffers: Vec::new(),
-                            pending_textures: Vec::new(),
+                            pending_buffers: FastHashMap::default(),
+                            pending_textures: FastHashMap::default(),
                         });
                     }
-
-                    log::trace!("Device after submission {}", submit_index);
                 }
             }
 
-            let mut pending_writes_guard = device.pending_writes.lock();
-            let pending_writes = pending_writes_guard.as_mut().unwrap();
+            let mut pending_writes = device.pending_writes.lock();
 
             {
                 used_surface_textures.set_size(hub.textures.read().len());
                 for texture in pending_writes.dst_textures.values() {
                     match texture.try_inner(&snatch_guard)? {
                         TextureInner::Native { .. } => {}
-                        TextureInner::Surface { ref raw, .. } => {
-                            if raw.is_some() {
-                                // Compare the Arcs by pointer as Textures don't implement Eq
-                                submit_surface_textures_owned
-                                    .insert(Arc::as_ptr(texture), texture.clone());
-                            }
+                        TextureInner::Surface { .. } => {
+                            // Compare the Arcs by pointer as Textures don't implement Eq
+                            submit_surface_textures_owned
+                                .insert(Arc::as_ptr(texture), texture.clone());
 
                             unsafe {
                                 used_surface_textures
@@ -1425,71 +1299,71 @@ impl Global {
                         .set_from_usage_scope_and_drain_transitions(
                             &used_surface_textures,
                             &snatch_guard,
-                        );
+                        )
+                        .collect::<Vec<_>>();
                     unsafe {
                         pending_writes
                             .command_encoder
-                            .transition_textures(texture_barriers);
+                            .transition_textures(&texture_barriers);
                     };
                 }
             }
 
-            if let Some(pending_execution) = pending_writes.pre_submit(
-                &device.command_allocator,
-                device.raw(),
-                queue.raw.as_ref().unwrap(),
-            )? {
+            if let Some(pending_execution) =
+                pending_writes.pre_submit(&device.command_allocator, device.raw(), queue.raw())?
+            {
                 active_executions.insert(0, pending_execution);
             }
 
             let hal_command_buffers = active_executions
                 .iter()
-                .flat_map(|e| e.cmd_buffers.iter())
+                .flat_map(|e| e.cmd_buffers.iter().map(|b| b.as_ref()))
                 .collect::<Vec<_>>();
 
             {
                 let mut submit_surface_textures =
-                    SmallVec::<[_; 2]>::with_capacity(submit_surface_textures_owned.len());
+                    SmallVec::<[&dyn hal::DynSurfaceTexture; 2]>::with_capacity(
+                        submit_surface_textures_owned.len(),
+                    );
 
                 for texture in submit_surface_textures_owned.values() {
-                    submit_surface_textures.extend(match texture.inner.get(&snatch_guard) {
+                    let raw = match texture.inner.get(&snatch_guard) {
                         Some(TextureInner::Surface { raw, .. }) => raw.as_ref(),
-                        _ => None,
-                    });
+                        _ => unreachable!(),
+                    };
+                    submit_surface_textures.push(raw);
                 }
 
                 unsafe {
                     queue
-                        .raw
-                        .as_ref()
-                        .unwrap()
+                        .raw()
                         .submit(
                             &hal_command_buffers,
                             &submit_surface_textures,
-                            (fence, submit_index),
+                            (fence.as_mut(), submit_index),
                         )
                         .map_err(DeviceError::from)?;
                 }
+
+                // Advance the successful submission index.
+                device
+                    .last_successful_submission_index
+                    .fetch_max(submit_index, Ordering::SeqCst);
             }
 
             profiling::scope!("cleanup");
 
             // this will register the new submission to the life time tracker
-            let mut pending_write_resources = mem::take(&mut pending_writes.temp_resources);
             device.lock_life().track_submission(
                 submit_index,
-                pending_write_resources.drain(..),
+                pending_writes.temp_resources.drain(..),
                 active_executions,
             );
-
-            // pending_write_resources has been drained, so it's empty, but we
-            // want to retain its heap allocation.
-            pending_writes.temp_resources = pending_write_resources;
-            drop(pending_writes_guard);
+            drop(pending_writes);
 
             // This will schedule destruction of all resources that are no longer needed
             // by the user but used in the command stream, among other things.
-            let fence_guard = RwLockWriteGuard::downgrade(fence_guard);
+            let fence_guard = RwLockWriteGuard::downgrade(fence);
             let (closures, _) =
                 match device.maintain(fence_guard, wgt::Maintain::Poll, snatch_guard) {
                     Ok(closures) => closures,
@@ -1506,24 +1380,18 @@ impl Global {
 
         api_log!("Queue::submit to {queue_id:?} returned submit index {submit_index}");
 
-        Ok(WrappedSubmissionIndex {
-            queue_id,
-            index: submit_index,
-        })
+        Ok(submit_index)
     }
 
-    pub fn queue_get_timestamp_period<A: HalApi>(
-        &self,
-        queue_id: QueueId,
-    ) -> Result<f32, InvalidQueue> {
-        let hub = A::hub(self);
+    pub fn queue_get_timestamp_period(&self, queue_id: QueueId) -> Result<f32, InvalidQueue> {
+        let hub = &self.hub;
         match hub.queues.get(queue_id) {
-            Ok(queue) => Ok(unsafe { queue.raw.as_ref().unwrap().get_timestamp_period() }),
+            Ok(queue) => Ok(unsafe { queue.raw().get_timestamp_period() }),
             Err(_) => Err(InvalidQueue),
         }
     }
 
-    pub fn queue_on_submitted_work_done<A: HalApi>(
+    pub fn queue_on_submitted_work_done(
         &self,
         queue_id: QueueId,
         closure: SubmittedWorkDoneClosure,
@@ -1531,7 +1399,7 @@ impl Global {
         api_log!("Queue::on_submitted_work_done {queue_id:?}");
 
         //TODO: flush pending writes
-        let hub = A::hub(self);
+        let hub = &self.hub;
         match hub.queues.get(queue_id) {
             Ok(queue) => queue.device.lock_life().add_work_done_closure(closure),
             Err(_) => return Err(InvalidQueue),
diff --git a/wgpu-core/src/device/ray_tracing.rs b/wgpu-core/src/device/ray_tracing.rs
index 5e80ce52f8..3d1e102b6d 100644
--- a/wgpu-core/src/device/ray_tracing.rs
+++ b/wgpu-core/src/device/ray_tracing.rs
@@ -1,46 +1,48 @@
+use std::mem::ManuallyDrop;
+use std::sync::Arc;
+
+use hal::AccelerationStructureTriangleIndices;
+
 #[cfg(feature = "trace")]
 use crate::device::trace;
+use crate::lock::rank;
+use crate::resource::TrackingData;
 use crate::{
     device::{queue::TempResource, Device, DeviceError},
     global::Global,
-    hal_api::HalApi,
     id::{self, BlasId, TlasId},
     lock::RwLock,
     ray_tracing::{get_raw_tlas_instance_size, CreateBlasError, CreateTlasError},
     resource, LabelHelpers,
 };
-use std::sync::Arc;
 
-use crate::lock::rank;
-use crate::resource::{Trackable, TrackingData};
-use hal::{AccelerationStructureTriangleIndices, Device as _};
-
-impl<A: HalApi> Device<A> {
+impl Device {
     fn create_blas(
         self: &Arc<Self>,
-        self_id: id::DeviceId,
         blas_desc: &resource::BlasDescriptor,
         sizes: wgt::BlasGeometrySizeDescriptors,
-    ) -> Result<Arc<resource::Blas<A>>, CreateBlasError> {
-        debug_assert_eq!(self_id.backend(), A::VARIANT);
-
+    ) -> Result<Arc<resource::Blas>, CreateBlasError> {
         let size_info = match &sizes {
             wgt::BlasGeometrySizeDescriptors::Triangles { desc } => {
                 let mut entries =
-                    Vec::<hal::AccelerationStructureTriangles<A>>::with_capacity(desc.len());
+                    Vec::<hal::AccelerationStructureTriangles<dyn hal::DynBuffer>>::with_capacity(
+                        desc.len(),
+                    );
                 for x in desc {
                     if x.index_count.is_some() != x.index_format.is_some() {
                         return Err(CreateBlasError::MissingIndexData);
                     }
                     let indices =
                         x.index_count
-                            .map(|count| AccelerationStructureTriangleIndices::<A> {
+                            .map(|count| AccelerationStructureTriangleIndices::<
+                                dyn hal::DynBuffer,
+                            > {
                                 format: x.index_format.unwrap(),
                                 buffer: None,
                                 offset: 0,
                                 count,
                             });
-                    entries.push(hal::AccelerationStructureTriangles::<A> {
+                    entries.push(hal::AccelerationStructureTriangles::<dyn hal::DynBuffer> {
                         vertex_buffer: None,
                         vertex_format: x.vertex_format,
                         first_vertex: 0,
@@ -72,10 +74,13 @@ impl<A: HalApi> Device<A> {
         }
         .map_err(DeviceError::from)?;
 
-        let handle = unsafe { self.raw().get_acceleration_structure_device_address(&raw) };
+        let handle = unsafe {
+            self.raw()
+                .get_acceleration_structure_device_address(raw.as_ref())
+        };
 
         Ok(Arc::new(resource::Blas {
-            raw: Some(raw),
+            raw: ManuallyDrop::new(raw),
             device: self.clone(),
             size_info,
             sizes,
@@ -90,11 +95,8 @@ impl<A: HalApi> Device<A> {
 
     fn create_tlas(
         self: &Arc<Self>,
-        self_id: id::DeviceId,
         desc: &resource::TlasDescriptor,
-    ) -> Result<Arc<resource::Tlas<A>>, CreateTlasError> {
-        debug_assert_eq!(self_id.backend(), A::VARIANT);
-
+    ) -> Result<Arc<resource::Tlas>, CreateTlasError> {
         let size_info = unsafe {
             self.raw().get_acceleration_structure_build_sizes(
                 &hal::GetAccelerationStructureBuildSizesDescriptor {
@@ -120,8 +122,8 @@ impl<A: HalApi> Device<A> {
         }
         .map_err(DeviceError::from)?;
 
-        let instance_buffer_size =
-            get_raw_tlas_instance_size::<A>() * std::cmp::max(desc.max_instances, 1) as usize;
+        let instance_buffer_size = get_raw_tlas_instance_size(self.backend())
+            * std::cmp::max(desc.max_instances, 1) as usize;
         let instance_buffer = unsafe {
             self.raw().create_buffer(&hal::BufferDescriptor {
                 label: Some("(wgpu-core) instances_buffer"),
@@ -134,14 +136,14 @@ impl<A: HalApi> Device<A> {
         .map_err(DeviceError::from)?;
 
         Ok(Arc::new(resource::Tlas {
-            raw: Some(raw),
+            raw: ManuallyDrop::new(raw),
             device: self.clone(),
             size_info,
             flags: desc.flags,
             update_mode: desc.update_mode,
             built_index: RwLock::new(rank::TLAS_BUILT_INDEX, None),
             dependencies: RwLock::new(rank::TLAS_DEPENDENCIES, Vec::new()),
-            instance_buffer: RwLock::new(rank::TLAS_INSTANCE_BUFFER, Some(instance_buffer)),
+            instance_buffer: ManuallyDrop::new(instance_buffer),
             label: desc.label.to_string(),
             max_instance_count: desc.max_instances,
             tracking_data: TrackingData::new(self.tracker_indices.tlas_s.clone()),
@@ -150,7 +152,7 @@ impl<A: HalApi> Device<A> {
 }
 
 impl Global {
-    pub fn device_create_blas<A: HalApi>(
+    pub fn device_create_blas(
         &self,
         device_id: id::DeviceId,
         desc: &resource::BlasDescriptor,
@@ -159,8 +161,8 @@ impl Global {
     ) -> (BlasId, Option<u64>, Option<CreateBlasError>) {
         profiling::scope!("Device::create_blas");
 
-        let hub = A::hub(self);
-        let fid = hub.blas_s.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.blas_s.prepare(device_id.backend(), id_in);
 
         let device_guard = hub.devices.read();
         let error = 'error: {
@@ -181,7 +183,7 @@ impl Global {
                 });
             }
 
-            let blas = match device.create_blas(device_id, desc, sizes) {
+            let blas = match device.create_blas(desc, sizes) {
                 Ok(blas) => blas,
                 Err(e) => break 'error e,
             };
@@ -197,7 +199,7 @@ impl Global {
         (id, None, Some(error))
     }
 
-    pub fn device_create_tlas<A: HalApi>(
+    pub fn device_create_tlas(
         &self,
         device_id: id::DeviceId,
         desc: &resource::TlasDescriptor,
@@ -205,8 +207,8 @@ impl Global {
     ) -> (TlasId, Option<CreateTlasError>) {
         profiling::scope!("Device::create_tlas");
 
-        let hub = A::hub(self);
-        let fid = hub.tlas_s.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.tlas_s.prepare(device_id.backend(), id_in);
 
         let device_guard = hub.devices.read();
         let error = 'error: {
@@ -222,7 +224,7 @@ impl Global {
                 });
             }
 
-            let tlas = match device.create_tlas(device_id, desc) {
+            let tlas = match device.create_tlas(desc) {
                 Ok(tlas) => tlas,
                 Err(e) => break 'error e,
             };
@@ -237,10 +239,10 @@ impl Global {
         (id, Some(error))
     }
 
-    pub fn blas_destroy<A: HalApi>(&self, blas_id: BlasId) -> Result<(), resource::DestroyError> {
+    pub fn blas_destroy(&self, blas_id: BlasId) -> Result<(), resource::DestroyError> {
         profiling::scope!("Blas::destroy");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         log::info!("Blas {:?} is destroyed", blas_id);
         let blas_guard = hub.blas_s.write();
@@ -248,7 +250,7 @@ impl Global {
             .get(blas_id)
             .map_err(|_| resource::DestroyError::Invalid)?
             .clone();
-
+        drop(blas_guard);
         let device = &blas.device;
 
         #[cfg(feature = "trace")]
@@ -258,49 +260,51 @@ impl Global {
 
         let temp = TempResource::Blas(blas.clone());
         {
-            let last_submit_index = blas.submission_index();
-            drop(blas_guard);
-            device
-                .lock_life()
-                .schedule_resource_destruction(temp, last_submit_index);
+            let mut device_lock = device.lock_life();
+            let last_submit_index = device_lock.get_blas_latest_submission_index(blas.as_ref());
+            if let Some(last_submit_index) = last_submit_index {
+                device_lock.schedule_resource_destruction(temp, last_submit_index);
+            }
         }
 
         Ok(())
     }
 
-    pub fn blas_drop<A: HalApi>(&self, blas_id: BlasId, wait: bool) {
+    pub fn blas_drop(&self, blas_id: BlasId) {
         profiling::scope!("Blas::drop");
         log::debug!("blas {:?} is dropped", blas_id);
 
-        let hub = A::hub(self);
-
-        if let Some(blas) = hub.blas_s.unregister(blas_id) {
-            let last_submit_index = blas.submission_index();
+        let hub = &self.hub;
 
-            #[cfg(feature = "trace")]
-            if let Some(t) = blas.device.trace.lock().as_mut() {
-                t.add(trace::Action::DestroyBlas(blas_id));
+        let _blas = match hub.blas_s.unregister(blas_id) {
+            Some(blas) => blas,
+            None => {
+                return;
             }
+        };
 
-            if wait {
-                match blas.device.wait_for_submit(last_submit_index) {
-                    Ok(()) => (),
-                    Err(e) => log::error!("Failed to wait for blas {:?}: {:?}", blas_id, e),
-                }
+        #[cfg(feature = "trace")]
+        {
+            let mut lock = _blas.device.trace.lock();
+
+            if let Some(t) = lock.as_mut() {
+                t.add(trace::Action::DestroyBlas(blas_id));
             }
         }
     }
 
-    pub fn tlas_destroy<A: HalApi>(&self, tlas_id: TlasId) -> Result<(), resource::DestroyError> {
+    pub fn tlas_destroy(&self, tlas_id: TlasId) -> Result<(), resource::DestroyError> {
         profiling::scope!("Tlas::destroy");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         log::info!("Tlas {:?} is destroyed", tlas_id);
         let tlas_guard = hub.tlas_s.write();
         let tlas = tlas_guard
             .get(tlas_id)
-            .map_err(|_| resource::DestroyError::Invalid)?;
+            .map_err(|_| resource::DestroyError::Invalid)?
+            .clone();
+        drop(tlas_guard);
 
         let device = &mut tlas.device.clone();
 
@@ -311,35 +315,35 @@ impl Global {
 
         let temp = TempResource::Tlas(tlas.clone());
         {
-            let last_submit_index = tlas.submission_index();
-            drop(tlas_guard);
-            let guard = &mut device.lock_life();
-
-            guard.schedule_resource_destruction(temp, last_submit_index);
+            let mut device_lock = device.lock_life();
+            let last_submit_index = device_lock.get_tlas_latest_submission_index(tlas.as_ref());
+            if let Some(last_submit_index) = last_submit_index {
+                device_lock.schedule_resource_destruction(temp, last_submit_index);
+            }
         }
 
         Ok(())
     }
 
-    pub fn tlas_drop<A: HalApi>(&self, tlas_id: TlasId, wait: bool) {
+    pub fn tlas_drop(&self, tlas_id: TlasId) {
         profiling::scope!("Tlas::drop");
         log::debug!("tlas {:?} is dropped", tlas_id);
 
-        let hub = A::hub(self);
-
-        if let Some(tlas) = hub.tlas_s.unregister(tlas_id) {
-            let last_submit_index = tlas.submission_index();
+        let hub = &self.hub;
 
-            #[cfg(feature = "trace")]
-            if let Some(t) = tlas.device.trace.lock().as_mut() {
-                t.add(trace::Action::DestroyTlas(tlas_id));
+        let _tlas = match hub.tlas_s.unregister(tlas_id) {
+            Some(tlas) => tlas,
+            None => {
+                return;
             }
+        };
 
-            if wait {
-                match tlas.device.wait_for_submit(last_submit_index) {
-                    Ok(()) => (),
-                    Err(e) => log::error!("Failed to wait for blas {:?}: {:?}", tlas_id, e),
-                }
+        #[cfg(feature = "trace")]
+        {
+            let mut lock = _tlas.device.trace.lock();
+
+            if let Some(t) = lock.as_mut() {
+                t.add(trace::Action::DestroyTlas(tlas_id));
             }
         }
     }
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index e0058ee3ea..49dc487b82 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -11,7 +11,6 @@ use crate::{
         AttachmentData, DeviceLostInvocation, HostMap, MissingDownlevelFlags, MissingFeatures,
         RenderPassContext, CLEANUP_WAIT_MS,
     },
-    hal_api::HalApi,
     hal_label,
     init_tracker::{
         BufferInitTracker, BufferInitTrackerAction, MemoryInitKind, TextureInitRange,
@@ -22,8 +21,8 @@ use crate::{
     pipeline,
     pool::ResourcePool,
     resource::{
-        self, Buffer, Labeled, ParentDevice, QuerySet, Sampler, Texture, TextureView,
-        TextureViewNotRenderableReason, TrackingData,
+        self, Buffer, Labeled, ParentDevice, QuerySet, Sampler, StagingBuffer, Texture,
+        TextureView, TextureViewNotRenderableReason, TrackingData,
     },
     resource_log,
     snatch::{SnatchGuard, SnatchLock, Snatchable},
@@ -32,25 +31,20 @@ use crate::{
         UsageScopePool,
     },
     validation::{self, validate_color_attachment_bytes_per_sample},
-    FastHashMap, LabelHelpers as _, SubmissionIndex,
+    FastHashMap, LabelHelpers as _, PreHashedKey, PreHashedMap,
 };
 
 use arrayvec::ArrayVec;
-use hal::{CommandEncoder as _, Device as _};
 use once_cell::sync::OnceCell;
 
 use smallvec::SmallVec;
 use thiserror::Error;
 use wgt::{DeviceLostReason, TextureFormat, TextureSampleType, TextureViewDimension};
 
-use super::{
-    queue::{self, Queue},
-    DeviceDescriptor, DeviceError, UserClosures, ENTRYPOINT_FAILURE_ERROR, ZERO_BUFFER_SIZE,
-};
-use crate::resource::Tlas;
+use crate::resource::{AccelerationStructure, Tlas};
 use std::{
     borrow::Cow,
-    iter,
+    mem::ManuallyDrop,
     num::NonZeroU32,
     sync::{
         atomic::{AtomicBool, AtomicU64, Ordering},
@@ -58,6 +52,11 @@ use std::{
     },
 };
 
+use super::{
+    queue::Queue, DeviceDescriptor, DeviceError, UserClosures, ENTRYPOINT_FAILURE_ERROR,
+    ZERO_BUFFER_SIZE,
+};
+
 /// Structure describing a logical device. Some members are internally mutable,
 /// stored behind mutexes.
 ///
@@ -78,21 +77,40 @@ use std::{
 /// Important:
 /// When locking pending_writes please check that trackers is not locked
 /// trackers should be locked only when needed for the shortest time possible
-pub struct Device<A: HalApi> {
-    raw: Option<A::Device>,
-    pub(crate) adapter: Arc<Adapter<A>>,
-    pub(crate) queue: OnceCell<Weak<Queue<A>>>,
-    queue_to_drop: OnceCell<A::Queue>,
-    pub(crate) zero_buffer: Option<A::Buffer>,
+pub struct Device {
+    raw: ManuallyDrop<Box<dyn hal::DynDevice>>,
+    pub(crate) adapter: Arc<Adapter>,
+    pub(crate) queue: OnceCell<Weak<Queue>>,
+    queue_to_drop: OnceCell<Box<dyn hal::DynQueue>>,
+    pub(crate) zero_buffer: ManuallyDrop<Box<dyn hal::DynBuffer>>,
     /// The `label` from the descriptor used to create the resource.
     label: String,
 
-    pub(crate) command_allocator: command::CommandAllocator<A>,
-    //Note: The submission index here corresponds to the last submission that is done.
-    pub(crate) active_submission_index: AtomicU64, //SubmissionIndex,
+    pub(crate) command_allocator: command::CommandAllocator,
+
+    /// The index of the last command submission that was attempted.
+    ///
+    /// Note that `fence` may never be signalled with this value, if the command
+    /// submission failed. If you need to wait for everything running on a
+    /// `Queue` to complete, wait for [`last_successful_submission_index`].
+    ///
+    /// [`last_successful_submission_index`]: Device::last_successful_submission_index
+    pub(crate) active_submission_index: hal::AtomicFenceValue,
+
+    /// The index of the last successful submission to this device's
+    /// [`hal::Queue`].
+    ///
+    /// Unlike [`active_submission_index`], which is incremented each time
+    /// submission is attempted, this is updated only when submission succeeds,
+    /// so waiting for this value won't hang waiting for work that was never
+    /// submitted.
+    ///
+    /// [`active_submission_index`]: Device::active_submission_index
+    pub(crate) last_successful_submission_index: hal::AtomicFenceValue,
+
     // NOTE: if both are needed, the `snatchable_lock` must be consistently acquired before the
     // `fence` lock to avoid deadlocks.
-    pub(crate) fence: RwLock<Option<A::Fence>>,
+    pub(crate) fence: RwLock<ManuallyDrop<Box<dyn hal::DynFence>>>,
     pub(crate) snatchable_lock: SnatchLock,
 
     /// Is this device valid? Valid is closely associated with "lose the device",
@@ -112,31 +130,31 @@ pub struct Device<A: HalApi> {
     ///
     /// Has to be locked temporarily only (locked last)
     /// and never before pending_writes
-    pub(crate) trackers: Mutex<DeviceTracker<A>>,
+    pub(crate) trackers: Mutex<DeviceTracker>,
     pub(crate) tracker_indices: TrackerIndexAllocators,
     // Life tracker should be locked right after the device and before anything else.
-    life_tracker: Mutex<LifetimeTracker<A>>,
+    life_tracker: Mutex<LifetimeTracker>,
     /// Pool of bind group layouts, allowing deduplication.
-    pub(crate) bgl_pool: ResourcePool<bgl::EntryMap, BindGroupLayout<A>>,
+    pub(crate) bgl_pool: ResourcePool<bgl::EntryMap, BindGroupLayout>,
     pub(crate) alignments: hal::Alignments,
     pub(crate) limits: wgt::Limits,
     pub(crate) features: wgt::Features,
     pub(crate) downlevel: wgt::DownlevelCapabilities,
     pub(crate) instance_flags: wgt::InstanceFlags,
-    pub(crate) pending_writes: Mutex<Option<PendingWrites<A>>>,
-    pub(crate) deferred_destroy: Mutex<Vec<DeferredDestroy<A>>>,
+    pub(crate) pending_writes: Mutex<ManuallyDrop<PendingWrites>>,
+    pub(crate) deferred_destroy: Mutex<Vec<DeferredDestroy>>,
     #[cfg(feature = "trace")]
     pub(crate) trace: Mutex<Option<trace::Trace>>,
-    pub(crate) usage_scopes: UsageScopePool<A>,
+    pub(crate) usage_scopes: UsageScopePool,
     pub(crate) last_acceleration_structure_build_command_index: AtomicU64,
 }
 
-pub(crate) enum DeferredDestroy<A: HalApi> {
-    TextureView(Weak<TextureView<A>>),
-    BindGroup(Weak<BindGroup<A>>),
+pub(crate) enum DeferredDestroy {
+    TextureView(Weak<TextureView>),
+    BindGroup(Weak<BindGroup>),
 }
 
-impl<A: HalApi> std::fmt::Debug for Device<A> {
+impl std::fmt::Debug for Device {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         f.debug_struct("Device")
             .field("label", &self.label())
@@ -147,16 +165,22 @@ impl<A: HalApi> std::fmt::Debug for Device<A> {
     }
 }
 
-impl<A: HalApi> Drop for Device<A> {
+impl Drop for Device {
     fn drop(&mut self) {
         resource_log!("Drop {}", self.error_ident());
-        let raw = self.raw.take().unwrap();
-        let pending_writes = self.pending_writes.lock().take().unwrap();
-        pending_writes.dispose(&raw);
-        self.command_allocator.dispose(&raw);
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        // SAFETY: We are in the Drop impl and we don't use self.zero_buffer anymore after this point.
+        let zero_buffer = unsafe { ManuallyDrop::take(&mut self.zero_buffer) };
+        // SAFETY: We are in the Drop impl and we don't use self.pending_writes anymore after this point.
+        let pending_writes = unsafe { ManuallyDrop::take(&mut self.pending_writes.lock()) };
+        // SAFETY: We are in the Drop impl and we don't use self.fence anymore after this point.
+        let fence = unsafe { ManuallyDrop::take(&mut self.fence.write()) };
+        pending_writes.dispose(raw.as_ref());
+        self.command_allocator.dispose(raw.as_ref());
         unsafe {
-            raw.destroy_buffer(self.zero_buffer.take().unwrap());
-            raw.destroy_fence(self.fence.write().take().unwrap());
+            raw.destroy_buffer(zero_buffer);
+            raw.destroy_fence(fence);
             let queue = self.queue_to_drop.take().unwrap();
             raw.exit(queue);
         }
@@ -171,9 +195,9 @@ pub enum CreateDeviceError {
     FailedToCreateZeroBuffer(#[from] DeviceError),
 }
 
-impl<A: HalApi> Device<A> {
-    pub(crate) fn raw(&self) -> &A::Device {
-        self.raw.as_ref().unwrap()
+impl Device {
+    pub(crate) fn raw(&self) -> &dyn hal::DynDevice {
+        self.raw.as_ref()
     }
     pub(crate) fn require_features(&self, feature: wgt::Features) -> Result<(), MissingFeatures> {
         if self.features.contains(feature) {
@@ -195,11 +219,11 @@ impl<A: HalApi> Device<A> {
     }
 }
 
-impl<A: HalApi> Device<A> {
+impl Device {
     pub(crate) fn new(
-        raw_device: A::Device,
-        raw_queue: &A::Queue,
-        adapter: &Arc<Adapter<A>>,
+        raw_device: Box<dyn hal::DynDevice>,
+        raw_queue: &dyn hal::DynQueue,
+        adapter: &Arc<Adapter>,
         desc: &DeviceDescriptor,
         trace_path: Option<&std::path::Path>,
         instance_flags: wgt::InstanceFlags,
@@ -213,9 +237,9 @@ impl<A: HalApi> Device<A> {
 
         let command_allocator = command::CommandAllocator::new();
         let pending_encoder = command_allocator
-            .acquire_encoder(&raw_device, raw_queue)
+            .acquire_encoder(raw_device.as_ref(), raw_queue)
             .map_err(|_| CreateDeviceError::OutOfMemory)?;
-        let mut pending_writes = PendingWrites::<A>::new(pending_encoder);
+        let mut pending_writes = PendingWrites::new(pending_encoder);
 
         // Create zeroed buffer used for texture clears.
         let zero_buffer = unsafe {
@@ -232,34 +256,35 @@ impl<A: HalApi> Device<A> {
         unsafe {
             pending_writes
                 .command_encoder
-                .transition_buffers(iter::once(hal::BufferBarrier {
-                    buffer: &zero_buffer,
+                .transition_buffers(&[hal::BufferBarrier {
+                    buffer: zero_buffer.as_ref(),
                     usage: hal::BufferUses::empty()..hal::BufferUses::COPY_DST,
-                }));
+                }]);
             pending_writes
                 .command_encoder
-                .clear_buffer(&zero_buffer, 0..ZERO_BUFFER_SIZE);
+                .clear_buffer(zero_buffer.as_ref(), 0..ZERO_BUFFER_SIZE);
             pending_writes
                 .command_encoder
-                .transition_buffers(iter::once(hal::BufferBarrier {
-                    buffer: &zero_buffer,
+                .transition_buffers(&[hal::BufferBarrier {
+                    buffer: zero_buffer.as_ref(),
                     usage: hal::BufferUses::COPY_DST..hal::BufferUses::COPY_SRC,
-                }));
+                }]);
         }
 
         let alignments = adapter.raw.capabilities.alignments.clone();
         let downlevel = adapter.raw.capabilities.downlevel.clone();
 
         Ok(Self {
-            raw: Some(raw_device),
+            raw: ManuallyDrop::new(raw_device),
             adapter: adapter.clone(),
             queue: OnceCell::new(),
             queue_to_drop: OnceCell::new(),
-            zero_buffer: Some(zero_buffer),
+            zero_buffer: ManuallyDrop::new(zero_buffer),
             label: desc.label.to_string(),
             command_allocator,
             active_submission_index: AtomicU64::new(0),
-            fence: RwLock::new(rank::DEVICE_FENCE, Some(fence)),
+            last_successful_submission_index: AtomicU64::new(0),
+            fence: RwLock::new(rank::DEVICE_FENCE, ManuallyDrop::new(fence)),
             snatchable_lock: unsafe { SnatchLock::new(rank::DEVICE_SNATCHABLE_LOCK) },
             valid: AtomicBool::new(true),
             trackers: Mutex::new(rank::DEVICE_TRACKERS, DeviceTracker::new()),
@@ -273,7 +298,7 @@ impl<A: HalApi> Device<A> {
                     Ok(mut trace) => {
                         trace.add(trace::Action::Init {
                             desc: desc.clone(),
-                            backend: A::VARIANT,
+                            backend: adapter.raw.backend(),
                         });
                         Some(trace)
                     }
@@ -288,13 +313,21 @@ impl<A: HalApi> Device<A> {
             features: desc.required_features,
             downlevel,
             instance_flags,
-            pending_writes: Mutex::new(rank::DEVICE_PENDING_WRITES, Some(pending_writes)),
+            pending_writes: Mutex::new(
+                rank::DEVICE_PENDING_WRITES,
+                ManuallyDrop::new(pending_writes),
+            ),
             deferred_destroy: Mutex::new(rank::DEVICE_DEFERRED_DESTROY, Vec::new()),
             usage_scopes: Mutex::new(rank::DEVICE_USAGE_SCOPES, Default::default()),
             last_acceleration_structure_build_command_index: AtomicU64::new(0),
         })
     }
 
+    /// Returns the backend this device is using.
+    pub fn backend(&self) -> wgt::Backend {
+        self.adapter.raw.backend()
+    }
+
     pub fn is_valid(&self) -> bool {
         self.valid.load(Ordering::Acquire)
     }
@@ -307,11 +340,11 @@ impl<A: HalApi> Device<A> {
         }
     }
 
-    pub(crate) fn release_queue(&self, queue: A::Queue) {
+    pub(crate) fn release_queue(&self, queue: Box<dyn hal::DynQueue>) {
         assert!(self.queue_to_drop.set(queue).is_ok());
     }
 
-    pub(crate) fn lock_life<'a>(&'a self) -> MutexGuard<'a, LifetimeTracker<A>> {
+    pub(crate) fn lock_life<'a>(&'a self) -> MutexGuard<'a, LifetimeTracker> {
         self.life_tracker.lock()
     }
 
@@ -336,7 +369,6 @@ impl<A: HalApi> Device<A> {
                     resource_log!("Destroy raw {}", view.error_ident());
 
                     unsafe {
-                        use hal::Device;
                         self.raw().destroy_texture_view(raw_view);
                     }
                 }
@@ -352,7 +384,6 @@ impl<A: HalApi> Device<A> {
                     resource_log!("Destroy raw {}", bind_group.error_ident());
 
                     unsafe {
-                        use hal::Device;
                         self.raw().destroy_bind_group(raw_bind_group);
                     }
                 }
@@ -360,11 +391,11 @@ impl<A: HalApi> Device<A> {
         }
     }
 
-    pub fn get_queue(&self) -> Option<Arc<Queue<A>>> {
+    pub fn get_queue(&self) -> Option<Arc<Queue>> {
         self.queue.get().as_ref()?.upgrade()
     }
 
-    pub fn set_queue(&self, queue: &Arc<Queue<A>>) {
+    pub fn set_queue(&self, queue: &Arc<Queue>) {
         assert!(self.queue.set(Arc::downgrade(queue)).is_ok());
     }
 
@@ -383,44 +414,53 @@ impl<A: HalApi> Device<A> {
     ///   return it to our callers.)
     pub(crate) fn maintain<'this>(
         &'this self,
-        fence_guard: crate::lock::RwLockReadGuard<Option<A::Fence>>,
-        maintain: wgt::Maintain<queue::WrappedSubmissionIndex>,
+        fence: crate::lock::RwLockReadGuard<ManuallyDrop<Box<dyn hal::DynFence>>>,
+        maintain: wgt::Maintain<crate::SubmissionIndex>,
         snatch_guard: SnatchGuard,
     ) -> Result<(UserClosures, bool), WaitIdleError> {
         profiling::scope!("Device::maintain");
 
-        let fence = fence_guard.as_ref().unwrap();
-        let last_done_index = if maintain.is_wait() {
-            let index_to_wait_for = match maintain {
-                wgt::Maintain::WaitForSubmissionIndex(submission_index) => {
-                    // We don't need to check to see if the queue id matches
-                    // as we already checked this from inside the poll call.
-                    submission_index.index
+        // Determine which submission index `maintain` represents.
+        let submission_index = match maintain {
+            wgt::Maintain::WaitForSubmissionIndex(submission_index) => {
+                let last_successful_submission_index = self
+                    .last_successful_submission_index
+                    .load(Ordering::Acquire);
+
+                if let wgt::Maintain::WaitForSubmissionIndex(submission_index) = maintain {
+                    if submission_index > last_successful_submission_index {
+                        return Err(WaitIdleError::WrongSubmissionIndex(
+                            submission_index,
+                            last_successful_submission_index,
+                        ));
+                    }
                 }
-                _ => self.active_submission_index.load(Ordering::Relaxed),
-            };
-            unsafe {
-                self.raw
-                    .as_ref()
-                    .unwrap()
-                    .wait(fence, index_to_wait_for, CLEANUP_WAIT_MS)
+
+                submission_index
+            }
+            wgt::Maintain::Wait => self
+                .last_successful_submission_index
+                .load(Ordering::Acquire),
+            wgt::Maintain::Poll => unsafe {
+                self.raw()
+                    .get_fence_value(fence.as_ref())
                     .map_err(DeviceError::from)?
-            };
-            index_to_wait_for
-        } else {
+            },
+        };
+
+        // If necessary, wait for that submission to complete.
+        if maintain.is_wait() {
             unsafe {
-                self.raw
-                    .as_ref()
-                    .unwrap()
-                    .get_fence_value(fence)
+                self.raw()
+                    .wait(fence.as_ref(), submission_index, CLEANUP_WAIT_MS)
                     .map_err(DeviceError::from)?
-            }
-        };
-        log::info!("Device::maintain: last done index {last_done_index}");
+            };
+        }
+        log::trace!("Device::maintain: waiting for submission index {submission_index}");
 
         let mut life_tracker = self.lock_life();
         let submission_closures =
-            life_tracker.triage_submissions(last_done_index, &self.command_allocator);
+            life_tracker.triage_submissions(submission_index, &self.command_allocator);
 
         life_tracker.triage_mapped();
 
@@ -453,7 +493,7 @@ impl<A: HalApi> Device<A> {
 
         // Don't hold the locks while calling release_gpu_resources.
         drop(life_tracker);
-        drop(fence_guard);
+        drop(fence);
         drop(snatch_guard);
 
         if should_release_gpu_resource {
@@ -471,7 +511,7 @@ impl<A: HalApi> Device<A> {
     pub(crate) fn create_buffer(
         self: &Arc<Self>,
         desc: &resource::BufferDescriptor,
-    ) -> Result<Arc<Buffer<A>>, resource::CreateBufferError> {
+    ) -> Result<Arc<Buffer>, resource::CreateBufferError> {
         self.check_is_valid()?;
 
         if desc.size > self.limits.max_buffer_size {
@@ -559,7 +599,6 @@ impl<A: HalApi> Device<A> {
                 rank::BUFFER_INITIALIZATION_STATUS,
                 BufferInitTracker::new(aligned_size),
             ),
-            sync_mapped_writes: Mutex::new(rank::BUFFER_SYNC_MAPPED_WRITES, None),
             map_state: Mutex::new(rank::BUFFER_MAP_STATE, resource::BufferMapState::Idle),
             label: desc.label.to_string(),
             tracking_data: TrackingData::new(self.tracker_indices.buffers.clone()),
@@ -573,8 +612,11 @@ impl<A: HalApi> Device<A> {
         } else if desc.usage.contains(wgt::BufferUsages::MAP_WRITE) {
             // buffer is mappable, so we are just doing that at start
             let map_size = buffer.size;
-            let ptr = if map_size == 0 {
-                std::ptr::NonNull::dangling()
+            let mapping = if map_size == 0 {
+                hal::BufferMapping {
+                    ptr: std::ptr::NonNull::dangling(),
+                    is_coherent: true,
+                }
             } else {
                 let snatch_guard: SnatchGuard = self.snatchable_lock.read();
                 map_buffer(
@@ -587,24 +629,21 @@ impl<A: HalApi> Device<A> {
                 )?
             };
             *buffer.map_state.lock() = resource::BufferMapState::Active {
-                ptr,
+                mapping,
                 range: 0..map_size,
                 host: HostMap::Write,
             };
             hal::BufferUses::MAP_WRITE
         } else {
-            let (staging_buffer, staging_buffer_ptr) =
-                queue::prepare_staging_buffer(self, desc.size, self.instance_flags)?;
+            let mut staging_buffer =
+                StagingBuffer::new(self, wgt::BufferSize::new(aligned_size).unwrap())?;
 
             // Zero initialize memory and then mark the buffer as initialized
             // (it's guaranteed that this is the case by the time the buffer is usable)
-            unsafe { std::ptr::write_bytes(staging_buffer_ptr.as_ptr(), 0, buffer.size as usize) };
-            buffer.initialization_status.write().drain(0..buffer.size);
+            staging_buffer.write_zeros();
+            buffer.initialization_status.write().drain(0..aligned_size);
 
-            *buffer.map_state.lock() = resource::BufferMapState::Init {
-                staging_buffer,
-                ptr: staging_buffer_ptr,
-            };
+            *buffer.map_state.lock() = resource::BufferMapState::Init { staging_buffer };
             hal::BufferUses::COPY_DST
         };
 
@@ -618,9 +657,9 @@ impl<A: HalApi> Device<A> {
 
     pub(crate) fn create_texture_from_hal(
         self: &Arc<Self>,
-        hal_texture: A::Texture,
+        hal_texture: Box<dyn hal::DynTexture>,
         desc: &resource::TextureDescriptor,
-    ) -> Result<Arc<Texture<A>>, resource::CreateTextureError> {
+    ) -> Result<Arc<Texture>, resource::CreateTextureError> {
         let format_features = self
             .describe_format_features(desc.format)
             .map_err(|error| resource::CreateTextureError::MissingFeatures(desc.format, error))?;
@@ -647,9 +686,9 @@ impl<A: HalApi> Device<A> {
 
     pub fn create_buffer_from_hal(
         self: &Arc<Self>,
-        hal_buffer: A::Buffer,
+        hal_buffer: Box<dyn hal::DynBuffer>,
         desc: &resource::BufferDescriptor,
-    ) -> Arc<Buffer<A>> {
+    ) -> Arc<Buffer> {
         let buffer = Buffer {
             raw: Snatchable::new(hal_buffer),
             device: self.clone(),
@@ -659,7 +698,6 @@ impl<A: HalApi> Device<A> {
                 rank::BUFFER_INITIALIZATION_STATUS,
                 BufferInitTracker::new(0),
             ),
-            sync_mapped_writes: Mutex::new(rank::BUFFER_SYNC_MAPPED_WRITES, None),
             map_state: Mutex::new(rank::BUFFER_MAP_STATE, resource::BufferMapState::Idle),
             label: desc.label.to_string(),
             tracking_data: TrackingData::new(self.tracker_indices.buffers.clone()),
@@ -679,7 +717,7 @@ impl<A: HalApi> Device<A> {
     pub(crate) fn create_texture(
         self: &Arc<Self>,
         desc: &resource::TextureDescriptor,
-    ) -> Result<Arc<Texture<A>>, resource::CreateTextureError> {
+    ) -> Result<Arc<Texture>, resource::CreateTextureError> {
         use resource::{CreateTextureError, TextureDimensionError};
 
         self.check_is_valid()?;
@@ -710,8 +748,12 @@ impl<A: HalApi> Device<A> {
                     desc.dimension,
                 ));
             }
+        }
 
-            // Compressed textures can only be 2D
+        if desc.dimension != wgt::TextureDimension::D2
+            && desc.dimension != wgt::TextureDimension::D3
+        {
+            // Compressed textures can only be 2D or 3D
             if desc.format.is_compressed() {
                 return Err(CreateTextureError::InvalidCompressedDimension(
                     desc.dimension,
@@ -742,6 +784,19 @@ impl<A: HalApi> Device<A> {
                     },
                 ));
             }
+
+            if desc.dimension == wgt::TextureDimension::D3 {
+                // Only BCn formats with Sliced 3D feature can be used for 3D textures
+                if desc.format.is_bcn() {
+                    self.require_features(wgt::Features::TEXTURE_COMPRESSION_BC_SLICED_3D)
+                        .map_err(|error| CreateTextureError::MissingFeatures(desc.format, error))?;
+                } else {
+                    return Err(CreateTextureError::InvalidCompressedDimension(
+                        desc.dimension,
+                        desc.format,
+                    ));
+                }
+            }
         }
 
         {
@@ -877,9 +932,7 @@ impl<A: HalApi> Device<A> {
         };
 
         let raw_texture = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
+            self.raw()
                 .create_texture(&hal_desc)
                 .map_err(DeviceError::from)?
         };
@@ -921,9 +974,11 @@ impl<A: HalApi> Device<A> {
                                     array_layer_count: Some(1),
                                 },
                             };
-                            clear_views.push(Some(
-                                unsafe { self.raw().create_texture_view(&raw_texture, &desc) }
-                                    .map_err(DeviceError::from)?,
+                            clear_views.push(ManuallyDrop::new(
+                                unsafe {
+                                    self.raw().create_texture_view(raw_texture.as_ref(), &desc)
+                                }
+                                .map_err(DeviceError::from)?,
                             ));
                         };
                     }
@@ -969,9 +1024,11 @@ impl<A: HalApi> Device<A> {
 
     pub(crate) fn create_texture_view(
         self: &Arc<Self>,
-        texture: &Arc<Texture<A>>,
+        texture: &Arc<Texture>,
         desc: &resource::TextureViewDescriptor,
-    ) -> Result<Arc<TextureView<A>>, resource::CreateTextureViewError> {
+    ) -> Result<Arc<TextureView>, resource::CreateTextureViewError> {
+        self.check_is_valid()?;
+
         let snatch_guard = texture.device.snatchable_lock.read();
 
         let texture_raw = texture.try_raw(&snatch_guard)?;
@@ -1204,12 +1261,6 @@ impl<A: HalApi> Device<A> {
             texture.hal_usage & mask_copy & mask_dimension & mask_mip_level
         };
 
-        log::debug!(
-            "Create view for {} filters usages to {:?}",
-            texture.error_ident(),
-            usage
-        );
-
         // use the combined depth-stencil format for the view
         let format = if resolved_format.is_depth_stencil_component(texture.desc.format) {
             texture.desc.format
@@ -1234,9 +1285,7 @@ impl<A: HalApi> Device<A> {
         };
 
         let raw = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
+            self.raw()
                 .create_texture_view(texture_raw, &hal_desc)
                 .map_err(|_| resource::CreateTextureViewError::OutOfMemory)?
         };
@@ -1281,7 +1330,7 @@ impl<A: HalApi> Device<A> {
     pub(crate) fn create_sampler(
         self: &Arc<Self>,
         desc: &resource::SamplerDescriptor,
-    ) -> Result<Arc<Sampler<A>>, resource::CreateSamplerError> {
+    ) -> Result<Arc<Sampler>, resource::CreateSamplerError> {
         self.check_is_valid()?;
 
         if desc
@@ -1371,21 +1420,20 @@ impl<A: HalApi> Device<A> {
         };
 
         let raw = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
+            self.raw()
                 .create_sampler(&hal_desc)
                 .map_err(DeviceError::from)?
         };
 
         let sampler = Sampler {
-            raw: Some(raw),
+            raw: ManuallyDrop::new(raw),
             device: self.clone(),
             label: desc.label.to_string(),
             tracking_data: TrackingData::new(self.tracker_indices.samplers.clone()),
             comparison: desc.compare.is_some(),
             filtering: desc.min_filter == wgt::FilterMode::Linear
-                || desc.mag_filter == wgt::FilterMode::Linear,
+                || desc.mag_filter == wgt::FilterMode::Linear
+                || desc.mipmap_filter == wgt::FilterMode::Linear,
         };
 
         let sampler = Arc::new(sampler);
@@ -1397,7 +1445,7 @@ impl<A: HalApi> Device<A> {
         self: &Arc<Self>,
         desc: &pipeline::ShaderModuleDescriptor<'a>,
         source: pipeline::ShaderModuleSource<'a>,
-    ) -> Result<pipeline::ShaderModule<A>, pipeline::CreateShaderModuleError> {
+    ) -> Result<Arc<pipeline::ShaderModule>, pipeline::CreateShaderModuleError> {
         self.check_is_valid()?;
 
         let (module, source) = match source {
@@ -1494,12 +1542,7 @@ impl<A: HalApi> Device<A> {
             label: desc.label.to_hal(self.instance_flags),
             runtime_checks: desc.shader_bound_checks.runtime_checks(),
         };
-        let raw = match unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
-                .create_shader_module(&hal_desc, hal_shader)
-        } {
+        let raw = match unsafe { self.raw().create_shader_module(&hal_desc, hal_shader) } {
             Ok(raw) => raw,
             Err(error) => {
                 return Err(match error {
@@ -1514,12 +1557,16 @@ impl<A: HalApi> Device<A> {
             }
         };
 
-        Ok(pipeline::ShaderModule {
-            raw: Some(raw),
+        let module = pipeline::ShaderModule {
+            raw: ManuallyDrop::new(raw),
             device: self.clone(),
             interface: Some(interface),
             label: desc.label.to_string(),
-        })
+        };
+
+        let module = Arc::new(module);
+
+        Ok(module)
     }
 
     #[allow(unused_unsafe)]
@@ -1527,7 +1574,7 @@ impl<A: HalApi> Device<A> {
         self: &Arc<Self>,
         desc: &pipeline::ShaderModuleDescriptor<'a>,
         source: &'a [u32],
-    ) -> Result<pipeline::ShaderModule<A>, pipeline::CreateShaderModuleError> {
+    ) -> Result<Arc<pipeline::ShaderModule>, pipeline::CreateShaderModuleError> {
         self.check_is_valid()?;
 
         self.require_features(wgt::Features::SPIRV_SHADER_PASSTHROUGH)?;
@@ -1536,12 +1583,7 @@ impl<A: HalApi> Device<A> {
             runtime_checks: desc.shader_bound_checks.runtime_checks(),
         };
         let hal_shader = hal::ShaderInput::SpirV(source);
-        let raw = match unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
-                .create_shader_module(&hal_desc, hal_shader)
-        } {
+        let raw = match unsafe { self.raw().create_shader_module(&hal_desc, hal_shader) } {
             Ok(raw) => raw,
             Err(error) => {
                 return Err(match error {
@@ -1556,40 +1598,42 @@ impl<A: HalApi> Device<A> {
             }
         };
 
-        Ok(pipeline::ShaderModule {
-            raw: Some(raw),
+        let module = pipeline::ShaderModule {
+            raw: ManuallyDrop::new(raw),
             device: self.clone(),
             interface: None,
             label: desc.label.to_string(),
-        })
+        };
+
+        let module = Arc::new(module);
+
+        Ok(module)
     }
 
     pub(crate) fn create_command_encoder(
         self: &Arc<Self>,
         label: &crate::Label,
-    ) -> Result<command::CommandBuffer<A>, DeviceError> {
+    ) -> Result<Arc<command::CommandBuffer>, DeviceError> {
         self.check_is_valid()?;
 
         let queue = self.get_queue().unwrap();
 
         let encoder = self
             .command_allocator
-            .acquire_encoder(self.raw(), queue.raw.as_ref().unwrap())?;
+            .acquire_encoder(self.raw(), queue.raw())?;
 
-        Ok(command::CommandBuffer::new(
-            encoder,
-            self,
-            #[cfg(feature = "trace")]
-            self.trace.lock().is_some(),
-            label,
-        ))
+        let command_buffer = command::CommandBuffer::new(encoder, self, label);
+
+        let command_buffer = Arc::new(command_buffer);
+
+        Ok(command_buffer)
     }
 
     /// Generate information about late-validated buffer bindings for pipelines.
     //TODO: should this be combined with `get_introspection_bind_group_layouts` in some way?
     pub(crate) fn make_late_sized_buffer_groups(
         shader_binding_sizes: &FastHashMap<naga::ResourceBinding, wgt::BufferSize>,
-        layout: &binding_model::PipelineLayout<A>,
+        layout: &binding_model::PipelineLayout,
     ) -> ArrayVec<pipeline::LateSizedBufferGroup, { hal::MAX_BIND_GROUPS }> {
         // Given the shader-required binding sizes and the pipeline layout,
         // return the filtered list of them in the layout order,
@@ -1627,7 +1671,7 @@ impl<A: HalApi> Device<A> {
         label: &crate::Label,
         entry_map: bgl::EntryMap,
         origin: bgl::Origin,
-    ) -> Result<BindGroupLayout<A>, binding_model::CreateBindGroupLayoutError> {
+    ) -> Result<Arc<BindGroupLayout>, binding_model::CreateBindGroupLayoutError> {
         #[derive(PartialEq)]
         enum WritableStorage {
             Yes,
@@ -1809,9 +1853,7 @@ impl<A: HalApi> Device<A> {
             entries: &hal_bindings,
         };
         let raw = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
+            self.raw()
                 .create_bind_group_layout(&hal_desc)
                 .map_err(DeviceError::from)?
         };
@@ -1826,30 +1868,34 @@ impl<A: HalApi> Device<A> {
             .validate(&self.limits)
             .map_err(binding_model::CreateBindGroupLayoutError::TooManyBindings)?;
 
-        Ok(BindGroupLayout {
-            raw: Some(raw),
+        let bgl = BindGroupLayout {
+            raw: ManuallyDrop::new(raw),
             device: self.clone(),
             entries: entry_map,
             origin,
             exclusive_pipeline: OnceCell::new(),
             binding_count_validator: count_validator,
             label: label.to_string(),
-            tracking_data: TrackingData::new(self.tracker_indices.bind_group_layouts.clone()),
-        })
+        };
+
+        let bgl = Arc::new(bgl);
+
+        Ok(bgl)
     }
 
     pub(crate) fn create_buffer_binding<'a>(
         self: &Arc<Self>,
-        bb: &'a binding_model::ResolvedBufferBinding<A>,
+        bb: &'a binding_model::ResolvedBufferBinding,
         binding: u32,
         decl: &wgt::BindGroupLayoutEntry,
-        used_buffer_ranges: &mut Vec<BufferInitTrackerAction<A>>,
+        used_buffer_ranges: &mut Vec<BufferInitTrackerAction>,
         dynamic_binding_info: &mut Vec<binding_model::BindGroupDynamicBindingData>,
         late_buffer_binding_sizes: &mut FastHashMap<u32, wgt::BufferSize>,
-        used: &mut BindGroupStates<A>,
+        used: &mut BindGroupStates,
         limits: &wgt::Limits,
         snatch_guard: &'a SnatchGuard<'a>,
-    ) -> Result<hal::BufferBinding<'a, A>, binding_model::CreateBindGroupError> {
+    ) -> Result<hal::BufferBinding<'a, dyn hal::DynBuffer>, binding_model::CreateBindGroupError>
+    {
         use crate::binding_model::CreateBindGroupError as Error;
 
         let (binding_ty, dynamic, min_size) = match decl.ty {
@@ -1896,7 +1942,7 @@ impl<A: HalApi> Device<A> {
 
         let buffer = &bb.buffer;
 
-        used.buffers.add_single(buffer, internal_use);
+        used.buffers.insert_single(buffer.clone(), internal_use);
 
         buffer.same_device(self)?;
 
@@ -1977,14 +2023,14 @@ impl<A: HalApi> Device<A> {
 
     fn create_sampler_binding<'a>(
         self: &Arc<Self>,
-        used: &BindGroupStates<A>,
+        used: &mut BindGroupStates,
         binding: u32,
         decl: &wgt::BindGroupLayoutEntry,
-        sampler: &'a Arc<Sampler<A>>,
-    ) -> Result<&'a A::Sampler, binding_model::CreateBindGroupError> {
+        sampler: &'a Arc<Sampler>,
+    ) -> Result<&'a dyn hal::DynSampler, binding_model::CreateBindGroupError> {
         use crate::binding_model::CreateBindGroupError as Error;
 
-        used.samplers.add_single(sampler);
+        used.samplers.insert_single(sampler.clone());
 
         sampler.same_device(self)?;
 
@@ -2028,13 +2074,12 @@ impl<A: HalApi> Device<A> {
         self: &Arc<Self>,
         binding: u32,
         decl: &wgt::BindGroupLayoutEntry,
-        view: &'a Arc<TextureView<A>>,
-        used: &mut BindGroupStates<A>,
-        used_texture_ranges: &mut Vec<TextureInitTrackerAction<A>>,
+        view: &'a Arc<TextureView>,
+        used: &mut BindGroupStates,
+        used_texture_ranges: &mut Vec<TextureInitTrackerAction>,
         snatch_guard: &'a SnatchGuard<'a>,
-    ) -> Result<hal::TextureBinding<'a, A>, binding_model::CreateBindGroupError> {
-        used.views.add_single(view);
-
+    ) -> Result<hal::TextureBinding<'a, dyn hal::DynTextureView>, binding_model::CreateBindGroupError>
+    {
         view.same_device(self)?;
 
         let (pub_usage, internal_use) = self.texture_use_parameters(
@@ -2043,14 +2088,10 @@ impl<A: HalApi> Device<A> {
             view,
             "SampledTexture, ReadonlyStorageTexture or WriteonlyStorageTexture",
         )?;
-        let texture = &view.parent;
-        // Careful here: the texture may no longer have its own ref count,
-        // if it was deleted by the user.
-        used.textures
-            .add_single(texture, Some(view.selector.clone()), internal_use);
 
-        texture.same_device_as(view.as_ref())?;
+        used.views.insert_single(view.clone(), internal_use);
 
+        let texture = &view.parent;
         texture.check_usage(pub_usage)?;
 
         used_texture_ranges.push(TextureInitTrackerAction {
@@ -2073,14 +2114,14 @@ impl<A: HalApi> Device<A> {
 
     fn create_tlas_binding<'a>(
         self: &Arc<Self>,
-        used: &BindGroupStates<A>,
+        used: &mut BindGroupStates,
         binding: u32,
         decl: &wgt::BindGroupLayoutEntry,
-        tlas: &'a Arc<Tlas<A>>,
-    ) -> Result<&'a A::AccelerationStructure, binding_model::CreateBindGroupError> {
+        tlas: &'a Arc<Tlas>,
+    ) -> Result<&'a dyn hal::DynAccelerationStructure, binding_model::CreateBindGroupError> {
         use crate::binding_model::CreateBindGroupError as Error;
 
-        used.acceleration_structures.add_single(tlas);
+        used.acceleration_structures.insert_single(tlas.clone());
 
         tlas.same_device(self)?;
 
@@ -2102,8 +2143,8 @@ impl<A: HalApi> Device<A> {
     // (not passing a duplicate) beforehand.
     pub(crate) fn create_bind_group(
         self: &Arc<Self>,
-        desc: binding_model::ResolvedBindGroupDescriptor<A>,
-    ) -> Result<Arc<BindGroup<A>>, binding_model::CreateBindGroupError> {
+        desc: binding_model::ResolvedBindGroupDescriptor,
+    ) -> Result<Arc<BindGroup>, binding_model::CreateBindGroupError> {
         use crate::binding_model::{CreateBindGroupError as Error, ResolvedBindingResource as Br};
 
         let layout = desc.layout;
@@ -2186,7 +2227,7 @@ impl<A: HalApi> Device<A> {
                     (res_index, num_bindings)
                 }
                 Br::Sampler(ref sampler) => {
-                    let sampler = self.create_sampler_binding(&used, binding, decl, sampler)?;
+                    let sampler = self.create_sampler_binding(&mut used, binding, decl, sampler)?;
 
                     let res_index = hal_samplers.len();
                     hal_samplers.push(sampler);
@@ -2198,7 +2239,8 @@ impl<A: HalApi> Device<A> {
 
                     let res_index = hal_samplers.len();
                     for sampler in samplers.iter() {
-                        let sampler = self.create_sampler_binding(&used, binding, decl, sampler)?;
+                        let sampler =
+                            self.create_sampler_binding(&mut used, binding, decl, sampler)?;
 
                         hal_samplers.push(sampler);
                     }
@@ -2239,7 +2281,7 @@ impl<A: HalApi> Device<A> {
                     (res_index, num_bindings)
                 }
                 Br::AccelerationStructure(ref tlas) => {
-                    let tlas = self.create_tlas_binding(&used, binding, decl, tlas)?;
+                    let tlas = self.create_tlas_binding(&mut used, binding, decl, tlas)?;
                     let res_index = hal_tlas_s.len();
                     hal_tlas_s.push(tlas);
                     (res_index, 1)
@@ -2271,9 +2313,7 @@ impl<A: HalApi> Device<A> {
             acceleration_structures: &hal_tlas_s,
         };
         let raw = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
+            self.raw()
                 .create_bind_group(&hal_desc)
                 .map_err(DeviceError::from)?
         };
@@ -2358,7 +2398,7 @@ impl<A: HalApi> Device<A> {
         self: &Arc<Self>,
         binding: u32,
         decl: &wgt::BindGroupLayoutEntry,
-        view: &TextureView<A>,
+        view: &TextureView,
         expected: &'static str,
     ) -> Result<(wgt::TextureUsages, hal::TextureUses), binding_model::CreateBindGroupError> {
         use crate::binding_model::CreateBindGroupError as Error;
@@ -2390,14 +2430,14 @@ impl<A: HalApi> Device<A> {
                     .unwrap();
                 match (sample_type, compat_sample_type) {
                     (Tst::Uint, Tst::Uint) |
-                    (Tst::Sint, Tst::Sint) |
-                    (Tst::Depth, Tst::Depth) |
-                    // if we expect non-filterable, accept anything float
-                    (Tst::Float { filterable: false }, Tst::Float { .. }) |
-                    // if we expect filterable, require it
-                    (Tst::Float { filterable: true }, Tst::Float { filterable: true }) |
-                    // if we expect non-filterable, also accept depth
-                    (Tst::Float { filterable: false }, Tst::Depth) => {}
+                        (Tst::Sint, Tst::Sint) |
+                        (Tst::Depth, Tst::Depth) |
+                        // if we expect non-filterable, accept anything float
+                        (Tst::Float { filterable: false }, Tst::Float { .. }) |
+                        // if we expect filterable, require it
+                        (Tst::Float { filterable: true }, Tst::Float { filterable: true }) |
+                        // if we expect non-filterable, also accept depth
+                        (Tst::Float { filterable: false }, Tst::Depth) => {}
                     // if we expect filterable, also accept Float that is defined as
                     // unfilterable if filterable feature is explicitly enabled (only hit
                     // if wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES is
@@ -2487,8 +2527,8 @@ impl<A: HalApi> Device<A> {
 
     pub(crate) fn create_pipeline_layout(
         self: &Arc<Self>,
-        desc: &binding_model::ResolvedPipelineLayoutDescriptor<A>,
-    ) -> Result<binding_model::PipelineLayout<A>, binding_model::CreatePipelineLayoutError> {
+        desc: &binding_model::ResolvedPipelineLayoutDescriptor,
+    ) -> Result<Arc<binding_model::PipelineLayout>, binding_model::CreatePipelineLayoutError> {
         use crate::binding_model::CreatePipelineLayoutError as Error;
 
         self.check_is_valid()?;
@@ -2571,29 +2611,30 @@ impl<A: HalApi> Device<A> {
         };
 
         let raw = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
+            self.raw()
                 .create_pipeline_layout(&hal_desc)
                 .map_err(DeviceError::from)?
         };
 
         drop(raw_bind_group_layouts);
 
-        Ok(binding_model::PipelineLayout {
-            raw: Some(raw),
+        let layout = binding_model::PipelineLayout {
+            raw: ManuallyDrop::new(raw),
             device: self.clone(),
             label: desc.label.to_string(),
-            tracking_data: TrackingData::new(self.tracker_indices.pipeline_layouts.clone()),
             bind_group_layouts,
             push_constant_ranges: desc.push_constant_ranges.iter().cloned().collect(),
-        })
+        };
+
+        let layout = Arc::new(layout);
+
+        Ok(layout)
     }
 
     pub(crate) fn derive_pipeline_layout(
         self: &Arc<Self>,
         mut derived_group_layouts: ArrayVec<bgl::EntryMap, { hal::MAX_BIND_GROUPS }>,
-    ) -> Result<Arc<binding_model::PipelineLayout<A>>, pipeline::ImplicitLayoutError> {
+    ) -> Result<Arc<binding_model::PipelineLayout>, pipeline::ImplicitLayoutError> {
         while derived_group_layouts
             .last()
             .map_or(false, |map| map.is_empty())
@@ -2601,11 +2642,28 @@ impl<A: HalApi> Device<A> {
             derived_group_layouts.pop();
         }
 
+        let mut unique_bind_group_layouts = PreHashedMap::default();
+
         let bind_group_layouts = derived_group_layouts
             .into_iter()
-            .map(|bgl_entry_map| {
-                self.create_bind_group_layout(&None, bgl_entry_map, bgl::Origin::Derived)
-                    .map(Arc::new)
+            .map(|mut bgl_entry_map| {
+                bgl_entry_map.sort();
+                match unique_bind_group_layouts.entry(PreHashedKey::from_key(&bgl_entry_map)) {
+                    std::collections::hash_map::Entry::Occupied(v) => Ok(Arc::clone(v.get())),
+                    std::collections::hash_map::Entry::Vacant(e) => {
+                        match self.create_bind_group_layout(
+                            &None,
+                            bgl_entry_map,
+                            bgl::Origin::Derived,
+                        ) {
+                            Ok(bgl) => {
+                                e.insert(bgl.clone());
+                                Ok(bgl)
+                            }
+                            Err(e) => Err(e),
+                        }
+                    }
+                }
             })
             .collect::<Result<Vec<_>, _>>()?;
 
@@ -2616,14 +2674,13 @@ impl<A: HalApi> Device<A> {
         };
 
         let layout = self.create_pipeline_layout(&layout_desc)?;
-        let layout = Arc::new(layout);
         Ok(layout)
     }
 
     pub(crate) fn create_compute_pipeline(
         self: &Arc<Self>,
-        desc: pipeline::ResolvedComputePipelineDescriptor<A>,
-    ) -> Result<Arc<pipeline::ComputePipeline<A>>, pipeline::CreateComputePipelineError> {
+        desc: pipeline::ResolvedComputePipelineDescriptor,
+    ) -> Result<Arc<pipeline::ComputePipeline>, pipeline::CreateComputePipelineError> {
         self.check_is_valid()?;
 
         self.require_downlevel_flags(wgt::DownlevelFlags::COMPUTE_SHADERS)?;
@@ -2703,31 +2760,32 @@ impl<A: HalApi> Device<A> {
                 entry_point: final_entry_point_name.as_ref(),
                 constants: desc.stage.constants.as_ref(),
                 zero_initialize_workgroup_memory: desc.stage.zero_initialize_workgroup_memory,
-                vertex_pulling_transform: false,
             },
-            cache: cache.as_ref().and_then(|it| it.raw.as_ref()),
+            cache: cache.as_ref().map(|it| it.raw()),
         };
 
-        let raw = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
-                .create_compute_pipeline(&pipeline_desc)
-        }
-        .map_err(|err| match err {
-            hal::PipelineError::Device(error) => {
-                pipeline::CreateComputePipelineError::Device(error.into())
-            }
-            hal::PipelineError::Linkage(_stages, msg) => {
-                pipeline::CreateComputePipelineError::Internal(msg)
-            }
-            hal::PipelineError::EntryPoint(_stage) => {
-                pipeline::CreateComputePipelineError::Internal(ENTRYPOINT_FAILURE_ERROR.to_string())
-            }
-        })?;
+        let raw =
+            unsafe { self.raw().create_compute_pipeline(&pipeline_desc) }.map_err(
+                |err| match err {
+                    hal::PipelineError::Device(error) => {
+                        pipeline::CreateComputePipelineError::Device(error.into())
+                    }
+                    hal::PipelineError::Linkage(_stages, msg) => {
+                        pipeline::CreateComputePipelineError::Internal(msg)
+                    }
+                    hal::PipelineError::EntryPoint(_stage) => {
+                        pipeline::CreateComputePipelineError::Internal(
+                            ENTRYPOINT_FAILURE_ERROR.to_string(),
+                        )
+                    }
+                    hal::PipelineError::PipelineConstants(_stages, msg) => {
+                        pipeline::CreateComputePipelineError::PipelineConstants(msg)
+                    }
+                },
+            )?;
 
         let pipeline = pipeline::ComputePipeline {
-            raw: Some(raw),
+            raw: ManuallyDrop::new(raw),
             layout: pipeline_layout,
             device: self.clone(),
             _shader_module: shader_module,
@@ -2740,11 +2798,12 @@ impl<A: HalApi> Device<A> {
 
         if is_auto_layout {
             for bgl in pipeline.layout.bind_group_layouts.iter() {
-                bgl.exclusive_pipeline
+                // `bind_group_layouts` might contain duplicate entries, so we need to ignore the result.
+                let _ = bgl
+                    .exclusive_pipeline
                     .set(binding_model::ExclusivePipeline::Compute(Arc::downgrade(
                         &pipeline,
-                    )))
-                    .unwrap();
+                    )));
             }
         }
 
@@ -2753,8 +2812,8 @@ impl<A: HalApi> Device<A> {
 
     pub(crate) fn create_render_pipeline(
         self: &Arc<Self>,
-        desc: pipeline::ResolvedRenderPipelineDescriptor<A>,
-    ) -> Result<Arc<pipeline::RenderPipeline<A>>, pipeline::CreateRenderPipelineError> {
+        desc: pipeline::ResolvedRenderPipelineDescriptor,
+    ) -> Result<Arc<pipeline::RenderPipeline>, pipeline::CreateRenderPipelineError> {
         use wgt::TextureFormatFeatureFlags as Tfff;
 
         self.check_is_valid()?;
@@ -2787,7 +2846,6 @@ impl<A: HalApi> Device<A> {
                     .iter()
                     .any(|ct| ct.write_mask != first.write_mask || ct.blend != first.blend)
             } {
-                log::debug!("Color targets: {:?}", color_targets);
                 self.require_downlevel_flags(wgt::DownlevelFlags::INDEPENDENT_BLEND)?;
             }
         }
@@ -2981,7 +3039,7 @@ impl<A: HalApi> Device<A> {
                                     break;
                                 } else {
                                     return Err(pipeline::CreateRenderPipelineError
-                            ::BlendFactorOnUnsupportedTarget { factor, target: i as u32 });
+                                        ::BlendFactorOnUnsupportedTarget { factor, target: i as u32 });
                                 }
                             }
                         }
@@ -3123,7 +3181,6 @@ impl<A: HalApi> Device<A> {
                 entry_point: &vertex_entry_point_name,
                 constants: stage_desc.constants.as_ref(),
                 zero_initialize_workgroup_memory: stage_desc.zero_initialize_workgroup_memory,
-                vertex_pulling_transform: stage_desc.vertex_pulling_transform,
             }
         };
 
@@ -3133,6 +3190,7 @@ impl<A: HalApi> Device<A> {
                 let stage = wgt::ShaderStages::FRAGMENT;
 
                 let shader_module = &fragment_state.stage.module;
+                shader_module.same_device(self)?;
 
                 let stage_err = |error| pipeline::CreateRenderPipelineError::Stage { stage, error };
 
@@ -3179,7 +3237,6 @@ impl<A: HalApi> Device<A> {
                     zero_initialize_workgroup_memory: fragment_state
                         .stage
                         .zero_initialize_workgroup_memory,
-                    vertex_pulling_transform: false,
                 })
             }
             None => None,
@@ -3285,28 +3342,28 @@ impl<A: HalApi> Device<A> {
             fragment_stage,
             color_targets,
             multiview: desc.multiview,
-            cache: cache.as_ref().and_then(|it| it.raw.as_ref()),
+            cache: cache.as_ref().map(|it| it.raw()),
         };
-        let raw = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
-                .create_render_pipeline(&pipeline_desc)
-        }
-        .map_err(|err| match err {
-            hal::PipelineError::Device(error) => {
-                pipeline::CreateRenderPipelineError::Device(error.into())
-            }
-            hal::PipelineError::Linkage(stage, msg) => {
-                pipeline::CreateRenderPipelineError::Internal { stage, error: msg }
-            }
-            hal::PipelineError::EntryPoint(stage) => {
-                pipeline::CreateRenderPipelineError::Internal {
-                    stage: hal::auxil::map_naga_stage(stage),
-                    error: ENTRYPOINT_FAILURE_ERROR.to_string(),
-                }
-            }
-        })?;
+        let raw =
+            unsafe { self.raw().create_render_pipeline(&pipeline_desc) }.map_err(
+                |err| match err {
+                    hal::PipelineError::Device(error) => {
+                        pipeline::CreateRenderPipelineError::Device(error.into())
+                    }
+                    hal::PipelineError::Linkage(stage, msg) => {
+                        pipeline::CreateRenderPipelineError::Internal { stage, error: msg }
+                    }
+                    hal::PipelineError::EntryPoint(stage) => {
+                        pipeline::CreateRenderPipelineError::Internal {
+                            stage: hal::auxil::map_naga_stage(stage),
+                            error: ENTRYPOINT_FAILURE_ERROR.to_string(),
+                        }
+                    }
+                    hal::PipelineError::PipelineConstants(stage, error) => {
+                        pipeline::CreateRenderPipelineError::PipelineConstants { stage, error }
+                    }
+                },
+            )?;
 
         let pass_context = RenderPassContext {
             attachments: AttachmentData {
@@ -3349,7 +3406,7 @@ impl<A: HalApi> Device<A> {
         };
 
         let pipeline = pipeline::RenderPipeline {
-            raw: Some(raw),
+            raw: ManuallyDrop::new(raw),
             layout: pipeline_layout,
             device: self.clone(),
             pass_context,
@@ -3366,11 +3423,12 @@ impl<A: HalApi> Device<A> {
 
         if is_auto_layout {
             for bgl in pipeline.layout.bind_group_layouts.iter() {
-                bgl.exclusive_pipeline
+                // `bind_group_layouts` might contain duplicate entries, so we need to ignore the result.
+                let _ = bgl
+                    .exclusive_pipeline
                     .set(binding_model::ExclusivePipeline::Render(Arc::downgrade(
                         &pipeline,
-                    )))
-                    .unwrap();
+                    )));
             }
         }
 
@@ -3382,7 +3440,7 @@ impl<A: HalApi> Device<A> {
     pub unsafe fn create_pipeline_cache(
         self: &Arc<Self>,
         desc: &pipeline::PipelineCacheDescriptor,
-    ) -> Result<pipeline::PipelineCache<A>, pipeline::CreatePipelineCacheError> {
+    ) -> Result<Arc<pipeline::PipelineCache>, pipeline::CreatePipelineCacheError> {
         use crate::pipeline_cache;
 
         self.check_is_valid()?;
@@ -3418,10 +3476,12 @@ impl<A: HalApi> Device<A> {
         let cache = pipeline::PipelineCache {
             device: self.clone(),
             label: desc.label.to_string(),
-            tracking_data: TrackingData::new(self.tracker_indices.pipeline_caches.clone()),
             // This would be none in the error condition, which we don't implement yet
-            raw: Some(raw),
+            raw: ManuallyDrop::new(raw),
         };
+
+        let cache = Arc::new(cache);
+
         Ok(cache)
     }
 
@@ -3465,29 +3525,16 @@ impl<A: HalApi> Device<A> {
         }
     }
 
+    #[cfg(feature = "replay")]
     pub(crate) fn wait_for_submit(
         &self,
-        submission_index: SubmissionIndex,
-    ) -> Result<(), WaitIdleError> {
-        let guard = self.fence.read();
-        let fence = guard.as_ref().unwrap();
-        let last_done_index = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
-                .get_fence_value(fence)
-                .map_err(DeviceError::from)?
-        };
+        submission_index: crate::SubmissionIndex,
+    ) -> Result<(), DeviceError> {
+        let fence = self.fence.read();
+        let last_done_index = unsafe { self.raw().get_fence_value(fence.as_ref())? };
         if last_done_index < submission_index {
-            log::info!("Waiting for submission {:?}", submission_index);
-            unsafe {
-                self.raw
-                    .as_ref()
-                    .unwrap()
-                    .wait(fence, submission_index, !0)
-                    .map_err(DeviceError::from)?
-            };
-            drop(guard);
+            unsafe { self.raw().wait(fence.as_ref(), submission_index, !0)? };
+            drop(fence);
             let closures = self
                 .lock_life()
                 .triage_submissions(submission_index, &self.command_allocator);
@@ -3502,7 +3549,7 @@ impl<A: HalApi> Device<A> {
     pub(crate) fn create_query_set(
         self: &Arc<Self>,
         desc: &resource::QuerySetDescriptor,
-    ) -> Result<Arc<QuerySet<A>>, resource::CreateQuerySetError> {
+    ) -> Result<Arc<QuerySet>, resource::CreateQuerySetError> {
         use resource::CreateQuerySetError as Error;
 
         self.check_is_valid()?;
@@ -3530,8 +3577,10 @@ impl<A: HalApi> Device<A> {
 
         let hal_desc = desc.map_label(|label| label.to_hal(self.instance_flags));
 
+        let raw = unsafe { self.raw().create_query_set(&hal_desc).unwrap() };
+
         let query_set = QuerySet {
-            raw: Some(unsafe { self.raw().create_query_set(&hal_desc).unwrap() }),
+            raw: ManuallyDrop::new(raw),
             device: self.clone(),
             label: desc.label.to_string(),
             tracking_data: TrackingData::new(self.tracker_indices.query_sets.clone()),
@@ -3596,43 +3645,40 @@ impl<A: HalApi> Device<A> {
         }
     }
 
-    pub(crate) fn new_usage_scope(&self) -> UsageScope<'_, A> {
+    pub(crate) fn new_usage_scope(&self) -> UsageScope<'_> {
         UsageScope::new_pooled(&self.usage_scopes, &self.tracker_indices)
     }
 
     pub fn get_hal_counters(&self) -> wgt::HalCounters {
-        self.raw
-            .as_ref()
-            .map(|raw| raw.get_internal_counters())
-            .unwrap_or_default()
+        self.raw().get_internal_counters()
+    }
+
+    pub fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
+        self.raw().generate_allocator_report()
     }
 }
 
-impl<A: HalApi> Device<A> {
-    pub(crate) fn destroy_command_buffer(&self, mut cmd_buf: command::CommandBuffer<A>) {
+impl Device {
+    pub(crate) fn destroy_command_buffer(&self, mut cmd_buf: command::CommandBuffer) {
         let mut baked = cmd_buf.extract_baked_commands();
         unsafe {
-            baked.encoder.reset_all(baked.list.into_iter());
+            baked.encoder.reset_all(baked.list);
         }
         unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
-                .destroy_command_encoder(baked.encoder);
+            self.raw().destroy_command_encoder(baked.encoder);
         }
     }
 
     /// Wait for idle and remove resources that we can, before we die.
     pub(crate) fn prepare_to_die(&self) {
-        self.pending_writes.lock().as_mut().unwrap().deactivate();
-        let current_index = self.active_submission_index.load(Ordering::Relaxed);
+        self.pending_writes.lock().deactivate();
+        let current_index = self
+            .last_successful_submission_index
+            .load(Ordering::Acquire);
         if let Err(error) = unsafe {
             let fence = self.fence.read();
-            let fence = fence.as_ref().unwrap();
-            self.raw
-                .as_ref()
-                .unwrap()
-                .wait(fence, current_index, CLEANUP_WAIT_MS)
+            self.raw()
+                .wait(fence.as_ref(), current_index, CLEANUP_WAIT_MS)
         } {
             log::error!("failed to wait for the device: {error}");
         }
diff --git a/wgpu-core/src/global.rs b/wgpu-core/src/global.rs
index 6f6756a88c..4d79a81e3b 100644
--- a/wgpu-core/src/global.rs
+++ b/wgpu-core/src/global.rs
@@ -1,52 +1,30 @@
-use std::sync::Arc;
-
-use wgt::Backend;
-
 use crate::{
     hal_api::HalApi,
-    hub::{HubReport, Hubs},
+    hub::{Hub, HubReport},
     instance::{Instance, Surface},
     registry::{Registry, RegistryReport},
     resource_log,
-    storage::Element,
 };
 
 #[derive(Debug, PartialEq, Eq)]
 pub struct GlobalReport {
     pub surfaces: RegistryReport,
-    #[cfg(vulkan)]
-    pub vulkan: Option<HubReport>,
-    #[cfg(metal)]
-    pub metal: Option<HubReport>,
-    #[cfg(dx12)]
-    pub dx12: Option<HubReport>,
-    #[cfg(gles)]
-    pub gl: Option<HubReport>,
+    pub hub: HubReport,
 }
 
 impl GlobalReport {
     pub fn surfaces(&self) -> &RegistryReport {
         &self.surfaces
     }
-    pub fn hub_report(&self, backend: Backend) -> &HubReport {
-        match backend {
-            #[cfg(vulkan)]
-            Backend::Vulkan => self.vulkan.as_ref().unwrap(),
-            #[cfg(metal)]
-            Backend::Metal => self.metal.as_ref().unwrap(),
-            #[cfg(dx12)]
-            Backend::Dx12 => self.dx12.as_ref().unwrap(),
-            #[cfg(gles)]
-            Backend::Gl => self.gl.as_ref().unwrap(),
-            _ => panic!("HubReport is not supported on this backend"),
-        }
+    pub fn hub_report(&self) -> &HubReport {
+        &self.hub
     }
 }
 
 pub struct Global {
     pub instance: Instance,
     pub(crate) surfaces: Registry<Surface>,
-    pub(crate) hubs: Hubs,
+    pub(crate) hub: Hub,
 }
 
 impl Global {
@@ -54,8 +32,8 @@ impl Global {
         profiling::scope!("Global::new");
         Self {
             instance: Instance::new(name, instance_desc),
-            surfaces: Registry::without_backend(),
-            hubs: Hubs::new(),
+            surfaces: Registry::new(),
+            hub: Hub::new(),
         }
     }
 
@@ -64,10 +42,16 @@ impl Global {
     /// Refer to the creation of wgpu-hal Instance for every backend.
     pub unsafe fn from_hal_instance<A: HalApi>(name: &str, hal_instance: A::Instance) -> Self {
         profiling::scope!("Global::new");
+
+        let dyn_instance: Box<dyn hal::DynInstance> = Box::new(hal_instance);
         Self {
-            instance: A::create_instance_from_hal(name, hal_instance),
-            surfaces: Registry::without_backend(),
-            hubs: Hubs::new(),
+            instance: Instance {
+                name: name.to_owned(),
+                instance_per_backend: std::iter::once((A::VARIANT, dyn_instance)).collect(),
+                ..Default::default()
+            },
+            surfaces: Registry::new(),
+            hub: Hub::new(),
         }
     }
 
@@ -75,7 +59,13 @@ impl Global {
     ///
     /// - The raw instance handle returned must not be manually destroyed.
     pub unsafe fn instance_as_hal<A: HalApi>(&self) -> Option<&A::Instance> {
-        A::instance_as_hal(&self.instance)
+        self.instance.raw(A::VARIANT).map(|instance| {
+            instance
+                .as_any()
+                .downcast_ref()
+                // This should be impossible. It would mean that backend instance and enum type are mismatching.
+                .expect("Stored instance is not of the correct type")
+        })
     }
 
     /// # Safety
@@ -85,45 +75,15 @@ impl Global {
         profiling::scope!("Global::new");
         Self {
             instance,
-            surfaces: Registry::without_backend(),
-            hubs: Hubs::new(),
+            surfaces: Registry::new(),
+            hub: Hub::new(),
         }
     }
 
-    pub fn clear_backend<A: HalApi>(&self, _dummy: ()) {
-        let hub = A::hub(self);
-        let surfaces_locked = self.surfaces.read();
-        // this is used for tests, which keep the adapter
-        hub.clear(&surfaces_locked, false);
-    }
-
     pub fn generate_report(&self) -> GlobalReport {
         GlobalReport {
             surfaces: self.surfaces.generate_report(),
-            #[cfg(vulkan)]
-            vulkan: if self.instance.vulkan.is_some() {
-                Some(self.hubs.vulkan.generate_report())
-            } else {
-                None
-            },
-            #[cfg(metal)]
-            metal: if self.instance.metal.is_some() {
-                Some(self.hubs.metal.generate_report())
-            } else {
-                None
-            },
-            #[cfg(dx12)]
-            dx12: if self.instance.dx12.is_some() {
-                Some(self.hubs.dx12.generate_report())
-            } else {
-                None
-            },
-            #[cfg(gles)]
-            gl: if self.instance.gl.is_some() {
-                Some(self.hubs.gl.generate_report())
-            } else {
-                None
-            },
+            hub: self.hub.generate_report(),
         }
     }
 }
@@ -134,32 +94,10 @@ impl Drop for Global {
         resource_log!("Global::drop");
         let mut surfaces_locked = self.surfaces.write();
 
-        // destroy hubs before the instance gets dropped
-        #[cfg(vulkan)]
-        {
-            self.hubs.vulkan.clear(&surfaces_locked, true);
-        }
-        #[cfg(metal)]
-        {
-            self.hubs.metal.clear(&surfaces_locked, true);
-        }
-        #[cfg(dx12)]
-        {
-            self.hubs.dx12.clear(&surfaces_locked, true);
-        }
-        #[cfg(gles)]
-        {
-            self.hubs.gl.clear(&surfaces_locked, true);
-        }
+        // destroy hub before the instance gets dropped
+        self.hub.clear(&surfaces_locked);
 
-        // destroy surfaces
-        for element in surfaces_locked.map.drain(..) {
-            if let Element::Occupied(arc_surface, _) = element {
-                let surface = Arc::into_inner(arc_surface)
-                    .expect("Surface cannot be destroyed because is still in use");
-                self.instance.destroy_surface(surface);
-            }
-        }
+        surfaces_locked.map.clear();
     }
 }
 
diff --git a/wgpu-core/src/hal_api.rs b/wgpu-core/src/hal_api.rs
index f1a40b1cff..b41847b8d5 100644
--- a/wgpu-core/src/hal_api.rs
+++ b/wgpu-core/src/hal_api.rs
@@ -1,116 +1,29 @@
 use wgt::{Backend, WasmNotSendSync};
 
-use crate::{
-    global::Global,
-    hub::Hub,
-    instance::{Instance, Surface},
-};
-
 pub trait HalApi: hal::Api + 'static + WasmNotSendSync {
     const VARIANT: Backend;
-    fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance;
-    fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance>;
-    fn hub(global: &Global) -> &Hub<Self>;
-    fn surface_as_hal(surface: &Surface) -> Option<&Self::Surface>;
 }
 
 impl HalApi for hal::api::Empty {
     const VARIANT: Backend = Backend::Empty;
-    fn create_instance_from_hal(_: &str, _: Self::Instance) -> Instance {
-        unimplemented!("called empty api")
-    }
-    fn instance_as_hal(_: &Instance) -> Option<&Self::Instance> {
-        unimplemented!("called empty api")
-    }
-    fn hub(_: &Global) -> &Hub<Self> {
-        unimplemented!("called empty api")
-    }
-    fn surface_as_hal(_: &Surface) -> Option<&Self::Surface> {
-        unimplemented!("called empty api")
-    }
 }
 
 #[cfg(vulkan)]
 impl HalApi for hal::api::Vulkan {
     const VARIANT: Backend = Backend::Vulkan;
-    fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance {
-        Instance {
-            name: name.to_owned(),
-            vulkan: Some(hal_instance),
-            ..Default::default()
-        }
-    }
-    fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance> {
-        instance.vulkan.as_ref()
-    }
-    fn hub(global: &Global) -> &Hub<Self> {
-        &global.hubs.vulkan
-    }
-    fn surface_as_hal(surface: &Surface) -> Option<&Self::Surface> {
-        surface.vulkan.as_ref()
-    }
 }
 
 #[cfg(metal)]
 impl HalApi for hal::api::Metal {
     const VARIANT: Backend = Backend::Metal;
-    fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance {
-        Instance {
-            name: name.to_owned(),
-            metal: Some(hal_instance),
-            ..Default::default()
-        }
-    }
-    fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance> {
-        instance.metal.as_ref()
-    }
-    fn hub(global: &Global) -> &Hub<Self> {
-        &global.hubs.metal
-    }
-    fn surface_as_hal(surface: &Surface) -> Option<&Self::Surface> {
-        surface.metal.as_ref()
-    }
 }
 
 #[cfg(dx12)]
 impl HalApi for hal::api::Dx12 {
     const VARIANT: Backend = Backend::Dx12;
-    fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance {
-        Instance {
-            name: name.to_owned(),
-            dx12: Some(hal_instance),
-            ..Default::default()
-        }
-    }
-    fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance> {
-        instance.dx12.as_ref()
-    }
-    fn hub(global: &Global) -> &Hub<Self> {
-        &global.hubs.dx12
-    }
-    fn surface_as_hal(surface: &Surface) -> Option<&Self::Surface> {
-        surface.dx12.as_ref()
-    }
 }
 
 #[cfg(gles)]
 impl HalApi for hal::api::Gles {
     const VARIANT: Backend = Backend::Gl;
-    fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance {
-        #[allow(clippy::needless_update)]
-        Instance {
-            name: name.to_owned(),
-            gl: Some(hal_instance),
-            ..Default::default()
-        }
-    }
-    fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance> {
-        instance.gl.as_ref()
-    }
-    fn hub(global: &Global) -> &Hub<Self> {
-        &global.hubs.gl
-    }
-    fn surface_as_hal(surface: &Surface) -> Option<&Self::Surface> {
-        surface.gl.as_ref()
-    }
 }
diff --git a/wgpu-core/src/hub.rs b/wgpu-core/src/hub.rs
index 6e77618fad..82d45c964b 100644
--- a/wgpu-core/src/hub.rs
+++ b/wgpu-core/src/hub.rs
@@ -10,10 +10,7 @@ of course `Debug`.
 [`id::BufferId`]: crate::id::BufferId
 
 Each `Id` contains not only an index for the resource it denotes but
-also a Backend indicating which `wgpu` backend it belongs to. You
-can use the [`gfx_select`] macro to dynamically dispatch on an id's
-backend to a function specialized at compile time for a specific
-backend. See that macro's documentation for details.
+also a Backend indicating which `wgpu` backend it belongs to.
 
 `Id`s also incorporate a generation number, for additional validation.
 
@@ -96,7 +93,6 @@ creation fails, the id supplied for that resource is marked to indicate
 as much, allowing subsequent operations using that id to be properly
 flagged as errors as well.
 
-[`gfx_select`]: crate::gfx_select
 [`process`]: crate::identity::IdentityManager::process
 [`Id<R>`]: crate::id::Id
 [wrapped in a mutex]: trait.IdentityHandler.html#impl-IdentityHandler%3CI%3E-for-Mutex%3CIdentityManager%3E
@@ -108,7 +104,6 @@ use crate::{
     binding_model::{BindGroup, BindGroupLayout, PipelineLayout},
     command::{CommandBuffer, RenderBundle},
     device::{queue::Queue, Device},
-    hal_api::HalApi,
     instance::{Adapter, Surface},
     pipeline::{ComputePipeline, PipelineCache, RenderPipeline, ShaderModule},
     registry::{Registry, RegistryReport},
@@ -145,10 +140,7 @@ impl HubReport {
 }
 
 #[allow(rustdoc::private_intra_doc_links)]
-/// All the resources for a particular backend in a [`crate::global::Global`].
-///
-/// To obtain `global`'s `Hub` for some [`HalApi`] backend type `A`,
-/// call [`A::hub(global)`].
+/// All the resources tracked by a [`crate::global::Global`].
 ///
 /// ## Locking
 ///
@@ -169,61 +161,56 @@ impl HubReport {
 ///
 ///
 /// [`A::hub(global)`]: HalApi::hub
-pub struct Hub<A: HalApi> {
-    pub(crate) adapters: Registry<Adapter<A>>,
-    pub(crate) devices: Registry<Device<A>>,
-    pub(crate) queues: Registry<Queue<A>>,
-    pub(crate) pipeline_layouts: Registry<PipelineLayout<A>>,
-    pub(crate) shader_modules: Registry<ShaderModule<A>>,
-    pub(crate) bind_group_layouts: Registry<BindGroupLayout<A>>,
-    pub(crate) bind_groups: Registry<BindGroup<A>>,
-    pub(crate) command_buffers: Registry<CommandBuffer<A>>,
-    pub(crate) render_bundles: Registry<RenderBundle<A>>,
-    pub(crate) render_pipelines: Registry<RenderPipeline<A>>,
-    pub(crate) compute_pipelines: Registry<ComputePipeline<A>>,
-    pub(crate) pipeline_caches: Registry<PipelineCache<A>>,
-    pub(crate) query_sets: Registry<QuerySet<A>>,
-    pub(crate) buffers: Registry<Buffer<A>>,
-    pub(crate) staging_buffers: Registry<StagingBuffer<A>>,
-    pub(crate) textures: Registry<Texture<A>>,
-    pub(crate) texture_views: Registry<TextureView<A>>,
-    pub(crate) samplers: Registry<Sampler<A>>,
-    pub(crate) blas_s: Registry<Blas<A>>,
-    pub(crate) tlas_s: Registry<Tlas<A>>,
+pub struct Hub {
+    pub(crate) adapters: Registry<Adapter>,
+    pub(crate) devices: Registry<Device>,
+    pub(crate) queues: Registry<Queue>,
+    pub(crate) pipeline_layouts: Registry<PipelineLayout>,
+    pub(crate) shader_modules: Registry<ShaderModule>,
+    pub(crate) bind_group_layouts: Registry<BindGroupLayout>,
+    pub(crate) bind_groups: Registry<BindGroup>,
+    pub(crate) command_buffers: Registry<CommandBuffer>,
+    pub(crate) render_bundles: Registry<RenderBundle>,
+    pub(crate) render_pipelines: Registry<RenderPipeline>,
+    pub(crate) compute_pipelines: Registry<ComputePipeline>,
+    pub(crate) pipeline_caches: Registry<PipelineCache>,
+    pub(crate) query_sets: Registry<QuerySet>,
+    pub(crate) buffers: Registry<Buffer>,
+    pub(crate) staging_buffers: Registry<StagingBuffer>,
+    pub(crate) textures: Registry<Texture>,
+    pub(crate) texture_views: Registry<TextureView>,
+    pub(crate) samplers: Registry<Sampler>,
+    pub(crate) blas_s: Registry<Blas>,
+    pub(crate) tlas_s: Registry<Tlas>,
 }
 
-impl<A: HalApi> Hub<A> {
-    fn new() -> Self {
+impl Hub {
+    pub(crate) fn new() -> Self {
         Self {
-            adapters: Registry::new(A::VARIANT),
-            devices: Registry::new(A::VARIANT),
-            queues: Registry::new(A::VARIANT),
-            pipeline_layouts: Registry::new(A::VARIANT),
-            shader_modules: Registry::new(A::VARIANT),
-            bind_group_layouts: Registry::new(A::VARIANT),
-            bind_groups: Registry::new(A::VARIANT),
-            command_buffers: Registry::new(A::VARIANT),
-            render_bundles: Registry::new(A::VARIANT),
-            render_pipelines: Registry::new(A::VARIANT),
-            compute_pipelines: Registry::new(A::VARIANT),
-            pipeline_caches: Registry::new(A::VARIANT),
-            query_sets: Registry::new(A::VARIANT),
-            buffers: Registry::new(A::VARIANT),
-            staging_buffers: Registry::new(A::VARIANT),
-            textures: Registry::new(A::VARIANT),
-            texture_views: Registry::new(A::VARIANT),
-            samplers: Registry::new(A::VARIANT),
-            blas_s: Registry::new(A::VARIANT),
-            tlas_s: Registry::new(A::VARIANT),
+            adapters: Registry::new(),
+            devices: Registry::new(),
+            queues: Registry::new(),
+            pipeline_layouts: Registry::new(),
+            shader_modules: Registry::new(),
+            bind_group_layouts: Registry::new(),
+            bind_groups: Registry::new(),
+            command_buffers: Registry::new(),
+            render_bundles: Registry::new(),
+            render_pipelines: Registry::new(),
+            compute_pipelines: Registry::new(),
+            pipeline_caches: Registry::new(),
+            query_sets: Registry::new(),
+            buffers: Registry::new(),
+            staging_buffers: Registry::new(),
+            textures: Registry::new(),
+            texture_views: Registry::new(),
+            samplers: Registry::new(),
+            blas_s: Registry::new(),
+            tlas_s: Registry::new(),
         }
     }
 
-    //TODO: instead of having a hacky `with_adapters` parameter,
-    // we should have `clear_device(device_id)` that specifically destroys
-    // everything related to a logical device.
-    pub(crate) fn clear(&self, surface_guard: &Storage<Surface>, with_adapters: bool) {
-        use hal::Surface;
-
+    pub(crate) fn clear(&self, surface_guard: &Storage<Surface>) {
         let mut devices = self.devices.write();
         for element in devices.map.iter() {
             if let Element::Occupied(ref device, _) = *element {
@@ -248,12 +235,9 @@ impl<A: HalApi> Hub<A> {
         for element in surface_guard.map.iter() {
             if let Element::Occupied(ref surface, _epoch) = *element {
                 if let Some(ref mut present) = surface.presentation.lock().take() {
-                    if let Some(device) = present.device.downcast_ref::<A>() {
-                        let suf = A::surface_as_hal(surface);
-                        unsafe {
-                            suf.unwrap().unconfigure(device.raw());
-                            //TODO: we could destroy the surface here
-                        }
+                    let suf = surface.raw(present.device.backend());
+                    unsafe {
+                        suf.unwrap().unconfigure(present.device.raw());
                     }
                 }
             }
@@ -262,17 +246,8 @@ impl<A: HalApi> Hub<A> {
         self.queues.write().map.clear();
         devices.map.clear();
 
-        if with_adapters {
-            drop(devices);
-            self.adapters.write().map.clear();
-        }
-    }
-
-    pub(crate) fn surface_unconfigure(&self, device: &Device<A>, surface: &A::Surface) {
-        unsafe {
-            use hal::Surface;
-            surface.unconfigure(device.raw());
-        }
+        drop(devices);
+        self.adapters.write().map.clear();
     }
 
     pub fn generate_report(&self) -> HubReport {
@@ -297,33 +272,3 @@ impl<A: HalApi> Hub<A> {
         }
     }
 }
-
-pub struct Hubs {
-    #[cfg(vulkan)]
-    pub(crate) vulkan: Hub<hal::api::Vulkan>,
-    #[cfg(metal)]
-    pub(crate) metal: Hub<hal::api::Metal>,
-    #[cfg(dx12)]
-    pub(crate) dx12: Hub<hal::api::Dx12>,
-    #[cfg(gles)]
-    pub(crate) gl: Hub<hal::api::Gles>,
-    #[cfg(all(not(vulkan), not(metal), not(dx12), not(gles)))]
-    pub(crate) empty: Hub<hal::api::Empty>,
-}
-
-impl Hubs {
-    pub(crate) fn new() -> Self {
-        Self {
-            #[cfg(vulkan)]
-            vulkan: Hub::new(),
-            #[cfg(metal)]
-            metal: Hub::new(),
-            #[cfg(dx12)]
-            dx12: Hub::new(),
-            #[cfg(gles)]
-            gl: Hub::new(),
-            #[cfg(all(not(vulkan), not(metal), not(dx12), not(gles)))]
-            empty: Hub::new(),
-        }
-    }
-}
diff --git a/wgpu-core/src/id.rs b/wgpu-core/src/id.rs
index fc8268c202..3c00f755df 100644
--- a/wgpu-core/src/id.rs
+++ b/wgpu-core/src/id.rs
@@ -11,7 +11,7 @@ type IdType = u64;
 type ZippedIndex = Index;
 type NonZeroId = std::num::NonZeroU64;
 
-const INDEX_BITS: usize = std::mem::size_of::<ZippedIndex>() * 8;
+const INDEX_BITS: usize = ZippedIndex::BITS as usize;
 const EPOCH_BITS: usize = INDEX_BITS - BACKEND_BITS;
 const BACKEND_BITS: usize = 3;
 const BACKEND_SHIFT: usize = INDEX_BITS * 2 - BACKEND_BITS;
@@ -77,18 +77,6 @@ impl RawId {
     }
 }
 
-/// Coerce a slice of identifiers into a slice of optional raw identifiers.
-///
-/// There's two reasons why we know this is correct:
-/// * `Option<T>` is guaranteed to be niche-filled to 0's.
-/// * The `T` in `Option<T>` can inhabit any representation except 0's, since
-///   its underlying representation is `NonZero*`.
-pub fn as_option_slice<T: Marker>(ids: &[Id<T>]) -> &[Option<Id<T>>] {
-    // SAFETY: Any Id<T> is repr(transparent) over `Option<RawId>`, since both
-    // are backed by non-zero types.
-    unsafe { std::slice::from_raw_parts(ids.as_ptr().cast(), ids.len()) }
-}
-
 /// An identifier for a wgpu object.
 ///
 /// An `Id<T>` value identifies a value stored in a [`Global`]'s [`Hub`].
@@ -340,12 +328,6 @@ impl CommandBufferId {
     }
 }
 
-impl DeviceId {
-    pub fn into_queue_id(self) -> QueueId {
-        Id(self.0, PhantomData)
-    }
-}
-
 #[test]
 fn test_id_backend() {
     for &b in &[
diff --git a/wgpu-core/src/init_tracker/buffer.rs b/wgpu-core/src/init_tracker/buffer.rs
index 2c0fa8d372..ee8e99aa22 100644
--- a/wgpu-core/src/init_tracker/buffer.rs
+++ b/wgpu-core/src/init_tracker/buffer.rs
@@ -1,10 +1,10 @@
 use super::{InitTracker, MemoryInitKind};
-use crate::{hal_api::HalApi, resource::Buffer};
+use crate::resource::Buffer;
 use std::{ops::Range, sync::Arc};
 
 #[derive(Debug, Clone)]
-pub(crate) struct BufferInitTrackerAction<A: HalApi> {
-    pub buffer: Arc<Buffer<A>>,
+pub(crate) struct BufferInitTrackerAction {
+    pub buffer: Arc<Buffer>,
     pub range: Range<wgt::BufferAddress>,
     pub kind: MemoryInitKind,
 }
@@ -14,21 +14,21 @@ pub(crate) type BufferInitTracker = InitTracker<wgt::BufferAddress>;
 impl BufferInitTracker {
     /// Checks if an action has/requires any effect on the initialization status
     /// and shrinks its range if possible.
-    pub(crate) fn check_action<A: HalApi>(
+    pub(crate) fn check_action(
         &self,
-        action: &BufferInitTrackerAction<A>,
-    ) -> Option<BufferInitTrackerAction<A>> {
+        action: &BufferInitTrackerAction,
+    ) -> Option<BufferInitTrackerAction> {
         self.create_action(&action.buffer, action.range.clone(), action.kind)
     }
 
     /// Creates an action if it would have any effect on the initialization
     /// status and shrinks the range if possible.
-    pub(crate) fn create_action<A: HalApi>(
+    pub(crate) fn create_action(
         &self,
-        buffer: &Arc<Buffer<A>>,
+        buffer: &Arc<Buffer>,
         query_range: Range<wgt::BufferAddress>,
         kind: MemoryInitKind,
-    ) -> Option<BufferInitTrackerAction<A>> {
+    ) -> Option<BufferInitTrackerAction> {
         self.check(query_range)
             .map(|range| BufferInitTrackerAction {
                 buffer: buffer.clone(),
diff --git a/wgpu-core/src/init_tracker/texture.rs b/wgpu-core/src/init_tracker/texture.rs
index 4785b52229..4bf7278f21 100644
--- a/wgpu-core/src/init_tracker/texture.rs
+++ b/wgpu-core/src/init_tracker/texture.rs
@@ -1,5 +1,5 @@
 use super::{InitTracker, MemoryInitKind};
-use crate::{hal_api::HalApi, resource::Texture, track::TextureSelector};
+use crate::{resource::Texture, track::TextureSelector};
 use arrayvec::ArrayVec;
 use std::{ops::Range, sync::Arc};
 
@@ -35,8 +35,8 @@ impl From<TextureSelector> for TextureInitRange {
 }
 
 #[derive(Debug, Clone)]
-pub(crate) struct TextureInitTrackerAction<A: HalApi> {
-    pub(crate) texture: Arc<Texture<A>>,
+pub(crate) struct TextureInitTrackerAction {
+    pub(crate) texture: Arc<Texture>,
     pub(crate) range: TextureInitRange,
     pub(crate) kind: MemoryInitKind,
 }
@@ -57,10 +57,10 @@ impl TextureInitTracker {
         }
     }
 
-    pub(crate) fn check_action<A: HalApi>(
+    pub(crate) fn check_action(
         &self,
-        action: &TextureInitTrackerAction<A>,
-    ) -> Option<TextureInitTrackerAction<A>> {
+        action: &TextureInitTrackerAction,
+    ) -> Option<TextureInitTrackerAction> {
         let mut mip_range_start = usize::MAX;
         let mut mip_range_end = usize::MIN;
         let mut layer_range_start = u32::MAX;
diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs
index 8c580588ff..a71117cfe1 100644
--- a/wgpu-core/src/instance.rs
+++ b/wgpu-core/src/instance.rs
@@ -1,6 +1,7 @@
-use std::collections::HashMap;
 use std::sync::Arc;
+use std::{borrow::Cow, collections::HashMap};
 
+use crate::hub::Hub;
 use crate::{
     api_log,
     device::{queue::Queue, resource::Device, DeviceDescriptor},
@@ -15,17 +16,15 @@ use crate::{
 
 use wgt::{Backend, Backends, PowerPreference};
 
-use hal::{Adapter as _, Instance as _, OpenDevice};
 use thiserror::Error;
 
 pub type RequestAdapterOptions = wgt::RequestAdapterOptions<SurfaceId>;
-type HalInstance<A> = <A as hal::Api>::Instance;
-type HalSurface<A> = <A as hal::Api>::Surface;
 
 #[derive(Clone, Debug, Error)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[error("Limit '{name}' value {requested} is better than allowed {allowed}")]
 pub struct FailedLimit {
-    name: &'static str,
+    name: Cow<'static, str>,
     requested: u64,
     allowed: u64,
 }
@@ -35,7 +34,7 @@ fn check_limits(requested: &wgt::Limits, allowed: &wgt::Limits) -> Vec<FailedLim
 
     requested.check_limits_with_fail_fn(allowed, false, |name, requested, allowed| {
         failed.push(FailedLimit {
-            name,
+            name: Cow::Borrowed(name),
             requested,
             allowed,
         })
@@ -57,20 +56,20 @@ fn downlevel_default_limits_less_than_default_limits() {
 pub struct Instance {
     #[allow(dead_code)]
     pub name: String,
-    #[cfg(vulkan)]
-    pub vulkan: Option<HalInstance<hal::api::Vulkan>>,
-    #[cfg(metal)]
-    pub metal: Option<HalInstance<hal::api::Metal>>,
-    #[cfg(dx12)]
-    pub dx12: Option<HalInstance<hal::api::Dx12>>,
-    #[cfg(gles)]
-    pub gl: Option<HalInstance<hal::api::Gles>>,
+    /// List of instances per backend.
+    ///
+    /// The ordering in this list implies prioritization and needs to be preserved.
+    pub instance_per_backend: Vec<(Backend, Box<dyn hal::DynInstance>)>,
     pub flags: wgt::InstanceFlags,
 }
 
 impl Instance {
     pub fn new(name: &str, instance_desc: wgt::InstanceDescriptor) -> Self {
-        fn init<A: HalApi>(_: A, instance_desc: &wgt::InstanceDescriptor) -> Option<A::Instance> {
+        fn init<A: HalApi>(
+            _: A,
+            instance_desc: &wgt::InstanceDescriptor,
+            instance_per_backend: &mut Vec<(Backend, Box<dyn hal::DynInstance>)>,
+        ) {
             if instance_desc.backends.contains(A::VARIANT.into()) {
                 let hal_desc = hal::InstanceDescriptor {
                     name: "wgpu",
@@ -78,10 +77,12 @@ impl Instance {
                     dx12_shader_compiler: instance_desc.dx12_shader_compiler.clone(),
                     gles_minor_version: instance_desc.gles_minor_version,
                 };
-                match unsafe { hal::Instance::init(&hal_desc) } {
+
+                use hal::Instance as _;
+                match unsafe { A::Instance::init(&hal_desc) } {
                     Ok(instance) => {
                         log::debug!("Instance::new: created {:?} backend", A::VARIANT);
-                        Some(instance)
+                        instance_per_backend.push((A::VARIANT, Box::new(instance)));
                     }
                     Err(err) => {
                         log::debug!(
@@ -89,59 +90,43 @@ impl Instance {
                             A::VARIANT,
                             err
                         );
-                        None
                     }
                 }
             } else {
                 log::trace!("Instance::new: backend {:?} not requested", A::VARIANT);
-                None
             }
         }
 
+        let mut instance_per_backend = Vec::new();
+
+        #[cfg(vulkan)]
+        init(hal::api::Vulkan, &instance_desc, &mut instance_per_backend);
+        #[cfg(metal)]
+        init(hal::api::Metal, &instance_desc, &mut instance_per_backend);
+        #[cfg(dx12)]
+        init(hal::api::Dx12, &instance_desc, &mut instance_per_backend);
+        #[cfg(gles)]
+        init(hal::api::Gles, &instance_desc, &mut instance_per_backend);
+
         Self {
             name: name.to_string(),
-            #[cfg(vulkan)]
-            vulkan: init(hal::api::Vulkan, &instance_desc),
-            #[cfg(metal)]
-            metal: init(hal::api::Metal, &instance_desc),
-            #[cfg(dx12)]
-            dx12: init(hal::api::Dx12, &instance_desc),
-            #[cfg(gles)]
-            gl: init(hal::api::Gles, &instance_desc),
+            instance_per_backend,
             flags: instance_desc.flags,
         }
     }
 
-    pub(crate) fn destroy_surface(&self, surface: Surface) {
-        fn destroy<A: HalApi>(instance: &Option<A::Instance>, mut surface: Option<HalSurface<A>>) {
-            if let Some(surface) = surface.take() {
-                unsafe {
-                    instance.as_ref().unwrap().destroy_surface(surface);
-                }
-            }
-        }
-        #[cfg(vulkan)]
-        destroy::<hal::api::Vulkan>(&self.vulkan, surface.vulkan);
-        #[cfg(metal)]
-        destroy::<hal::api::Metal>(&self.metal, surface.metal);
-        #[cfg(dx12)]
-        destroy::<hal::api::Dx12>(&self.dx12, surface.dx12);
-        #[cfg(gles)]
-        destroy::<hal::api::Gles>(&self.gl, surface.gl);
+    pub fn raw(&self, backend: Backend) -> Option<&dyn hal::DynInstance> {
+        self.instance_per_backend
+            .iter()
+            .find_map(|(instance_backend, instance)| {
+                (*instance_backend == backend).then(|| instance.as_ref())
+            })
     }
 }
 
 pub struct Surface {
     pub(crate) presentation: Mutex<Option<Presentation>>,
-
-    #[cfg(vulkan)]
-    pub vulkan: Option<HalSurface<hal::api::Vulkan>>,
-    #[cfg(metal)]
-    pub metal: Option<HalSurface<hal::api::Metal>>,
-    #[cfg(dx12)]
-    pub dx12: Option<HalSurface<hal::api::Dx12>>,
-    #[cfg(gles)]
-    pub gl: Option<HalSurface<hal::api::Gles>>,
+    pub surface_per_backend: HashMap<Backend, Box<dyn hal::DynSurface>>,
 }
 
 impl ResourceType for Surface {
@@ -152,36 +137,40 @@ impl crate::storage::StorageItem for Surface {
 }
 
 impl Surface {
-    pub fn get_capabilities<A: HalApi>(
+    pub fn get_capabilities(
         &self,
-        adapter: &Adapter<A>,
+        adapter: &Adapter,
     ) -> Result<hal::SurfaceCapabilities, GetSurfaceSupportError> {
         self.get_capabilities_with_raw(&adapter.raw)
     }
 
-    pub fn get_capabilities_with_raw<A: HalApi>(
+    pub fn get_capabilities_with_raw(
         &self,
-        adapter: &hal::ExposedAdapter<A>,
+        adapter: &hal::DynExposedAdapter,
     ) -> Result<hal::SurfaceCapabilities, GetSurfaceSupportError> {
-        let suf = A::surface_as_hal(self).ok_or(GetSurfaceSupportError::Unsupported)?;
+        let suf = self
+            .raw(adapter.backend())
+            .ok_or(GetSurfaceSupportError::Unsupported)?;
         profiling::scope!("surface_capabilities");
-        let caps = unsafe {
-            adapter
-                .adapter
-                .surface_capabilities(suf)
-                .ok_or(GetSurfaceSupportError::Unsupported)?
-        };
+        let caps = unsafe { adapter.adapter.surface_capabilities(suf) }
+            .ok_or(GetSurfaceSupportError::Unsupported)?;
 
         Ok(caps)
     }
+
+    pub fn raw(&self, backend: Backend) -> Option<&dyn hal::DynSurface> {
+        self.surface_per_backend
+            .get(&backend)
+            .map(|surface| surface.as_ref())
+    }
 }
 
-pub struct Adapter<A: HalApi> {
-    pub(crate) raw: hal::ExposedAdapter<A>,
+pub struct Adapter {
+    pub(crate) raw: hal::DynExposedAdapter,
 }
 
-impl<A: HalApi> Adapter<A> {
-    fn new(mut raw: hal::ExposedAdapter<A>) -> Self {
+impl Adapter {
+    fn new(mut raw: hal::DynExposedAdapter) -> Self {
         // WebGPU requires this offset alignment as lower bound on all adapters.
         const MIN_BUFFER_OFFSET_ALIGNMENT_LOWER_BOUND: u32 = 32;
 
@@ -276,27 +265,23 @@ impl<A: HalApi> Adapter<A> {
     #[allow(clippy::type_complexity)]
     fn create_device_and_queue_from_hal(
         self: &Arc<Self>,
-        hal_device: OpenDevice<A>,
+        hal_device: hal::DynOpenDevice,
         desc: &DeviceDescriptor,
         instance_flags: wgt::InstanceFlags,
         trace_path: Option<&std::path::Path>,
-    ) -> Result<(Arc<Device<A>>, Arc<Queue<A>>), RequestDeviceError> {
+    ) -> Result<(Arc<Device>, Arc<Queue>), RequestDeviceError> {
         api_log!("Adapter::create_device");
 
         if let Ok(device) = Device::new(
             hal_device.device,
-            &hal_device.queue,
+            hal_device.queue.as_ref(),
             self,
             desc,
             trace_path,
             instance_flags,
         ) {
             let device = Arc::new(device);
-            let queue = Queue {
-                device: device.clone(),
-                raw: Some(hal_device.queue),
-            };
-            let queue = Arc::new(queue);
+            let queue = Arc::new(Queue::new(device.clone(), hal_device.queue));
             device.set_queue(&queue);
             return Ok((device, queue));
         }
@@ -309,7 +294,7 @@ impl<A: HalApi> Adapter<A> {
         desc: &DeviceDescriptor,
         instance_flags: wgt::InstanceFlags,
         trace_path: Option<&std::path::Path>,
-    ) -> Result<(Arc<Device<A>>, Arc<Queue<A>>), RequestDeviceError> {
+    ) -> Result<(Arc<Device>, Arc<Queue>), RequestDeviceError> {
         // Verify all features were exposed by the adapter
         if !self.raw.features.contains(desc.required_features) {
             return Err(RequestDeviceError::UnsupportedFeature(
@@ -318,7 +303,7 @@ impl<A: HalApi> Adapter<A> {
         }
 
         let caps = &self.raw.capabilities;
-        if Backends::PRIMARY.contains(Backends::from(A::VARIANT))
+        if Backends::PRIMARY.contains(Backends::from(self.raw.backend()))
             && !caps.downlevel.is_webgpu_compliant()
         {
             let missing_flags = wgt::DownlevelFlags::compliant() - caps.downlevel.flags;
@@ -342,10 +327,6 @@ impl<A: HalApi> Adapter<A> {
             );
         }
 
-        if let Some(_) = desc.label {
-            //TODO
-        }
-
         if let Some(failed) = check_limits(&desc.required_limits, &caps.limits).pop() {
             return Err(RequestDeviceError::LimitsExceeded(failed));
         }
@@ -391,6 +372,7 @@ pub enum GetSurfaceSupportError {
 }
 
 #[derive(Clone, Debug, Error)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 /// Error when requesting a device from the adaptor
 #[non_exhaustive]
 pub enum RequestDeviceError {
@@ -435,6 +417,7 @@ impl<M: Marker> AdapterInputs<'_, M> {
 pub struct InvalidAdapter;
 
 #[derive(Clone, Debug, Error)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[non_exhaustive]
 pub enum RequestAdapterError {
     #[error("No suitable adapter found")]
@@ -479,85 +462,44 @@ impl Global {
     ) -> Result<SurfaceId, CreateSurfaceError> {
         profiling::scope!("Instance::create_surface");
 
-        fn init<A: HalApi>(
-            errors: &mut HashMap<Backend, hal::InstanceError>,
-            any_created: &mut bool,
-            backend: Backend,
-            inst: &Option<A::Instance>,
-            display_handle: raw_window_handle::RawDisplayHandle,
-            window_handle: raw_window_handle::RawWindowHandle,
-        ) -> Option<HalSurface<A>> {
-            inst.as_ref().and_then(|inst| {
-                match unsafe { inst.create_surface(display_handle, window_handle) } {
-                    Ok(raw) => {
-                        *any_created = true;
-                        Some(raw)
-                    }
-                    Err(err) => {
-                        log::debug!(
-                            "Instance::create_surface: failed to create surface for {:?}: {:?}",
-                            backend,
-                            err
-                        );
-                        errors.insert(backend, err);
-                        None
-                    }
-                }
-            })
-        }
-
         let mut errors = HashMap::default();
-        let mut any_created = false;
+        let mut surface_per_backend = HashMap::default();
 
-        let surface = Surface {
-            presentation: Mutex::new(rank::SURFACE_PRESENTATION, None),
-
-            #[cfg(vulkan)]
-            vulkan: init::<hal::api::Vulkan>(
-                &mut errors,
-                &mut any_created,
-                Backend::Vulkan,
-                &self.instance.vulkan,
-                display_handle,
-                window_handle,
-            ),
-            #[cfg(metal)]
-            metal: init::<hal::api::Metal>(
-                &mut errors,
-                &mut any_created,
-                Backend::Metal,
-                &self.instance.metal,
-                display_handle,
-                window_handle,
-            ),
-            #[cfg(dx12)]
-            dx12: init::<hal::api::Dx12>(
-                &mut errors,
-                &mut any_created,
-                Backend::Dx12,
-                &self.instance.dx12,
-                display_handle,
-                window_handle,
-            ),
-            #[cfg(gles)]
-            gl: init::<hal::api::Gles>(
-                &mut errors,
-                &mut any_created,
-                Backend::Gl,
-                &self.instance.gl,
-                display_handle,
-                window_handle,
-            ),
-        };
+        for (backend, instance) in &self.instance.instance_per_backend {
+            match unsafe {
+                instance
+                    .as_ref()
+                    .create_surface(display_handle, window_handle)
+            } {
+                Ok(raw) => {
+                    surface_per_backend.insert(*backend, raw);
+                }
+                Err(err) => {
+                    log::debug!(
+                        "Instance::create_surface: failed to create surface for {:?}: {:?}",
+                        backend,
+                        err
+                    );
+                    errors.insert(*backend, err);
+                }
+            }
+        }
 
-        if any_created {
-            #[allow(clippy::arc_with_non_send_sync)]
-            let id = self.surfaces.prepare(id_in).assign(Arc::new(surface));
-            Ok(id)
-        } else {
+        if surface_per_backend.is_empty() {
             Err(CreateSurfaceError::FailedToCreateSurfaceForAnyBackend(
                 errors,
             ))
+        } else {
+            let surface = Surface {
+                presentation: Mutex::new(rank::SURFACE_PRESENTATION, None),
+                surface_per_backend,
+            };
+
+            let id = self
+                .surfaces
+                .prepare(wgt::Backend::Empty, id_in) // No specific backend for Surface, since it's not specific.
+                .assign(Arc::new(surface));
+            Ok(id)
         }
     }
 
@@ -572,25 +514,37 @@ impl Global {
     ) -> Result<SurfaceId, CreateSurfaceError> {
         profiling::scope!("Instance::create_surface_metal");
 
+        let instance = self
+            .instance
+            .raw(Backend::Metal)
+            .ok_or(CreateSurfaceError::BackendNotEnabled(Backend::Metal))?;
+        let instance_metal: &hal::metal::Instance = instance.as_any().downcast_ref().unwrap();
+
+        let layer = layer.cast();
+        // SAFETY: We do this cast and deref. (rather than using `metal` to get the
+        // object we want) to avoid direct coupling on the `metal` crate.
+        //
+        // To wit, this pointer…
+        //
+        // - …is properly aligned.
+        // - …is dereferenceable to a `MetalLayerRef` as an invariant of the `metal`
+        //   field.
+        // - …points to an _initialized_ `MetalLayerRef`.
+        // - …is only ever aliased via an immutable reference that lives within this
+        //   lexical scope.
+        let layer = unsafe { &*layer };
+        let raw_surface: Box<dyn hal::DynSurface> =
+            Box::new(instance_metal.create_surface_from_layer(layer));
+
         let surface = Surface {
             presentation: Mutex::new(rank::SURFACE_PRESENTATION, None),
-            metal: Some(self.instance.metal.as_ref().map_or(
-                Err(CreateSurfaceError::BackendNotEnabled(Backend::Metal)),
-                |inst| {
-                    // we don't want to link to metal-rs for this
-                    #[allow(clippy::transmute_ptr_to_ref)]
-                    Ok(inst.create_surface_from_layer(unsafe { std::mem::transmute(layer) }))
-                },
-            )?),
-            #[cfg(dx12)]
-            dx12: None,
-            #[cfg(vulkan)]
-            vulkan: None,
-            #[cfg(gles)]
-            gl: None,
+            surface_per_backend: std::iter::once((Backend::Metal, raw_surface)).collect(),
         };
 
-        let id = self.surfaces.prepare(id_in).assign(Arc::new(surface));
+        let id = self
+            .surfaces
+            .prepare(Backend::Metal, id_in)
+            .assign(Arc::new(surface));
         Ok(id)
     }
 
@@ -598,25 +552,24 @@ impl Global {
     fn instance_create_surface_dx12(
         &self,
         id_in: Option<SurfaceId>,
-        create_surface_func: impl FnOnce(&HalInstance<hal::api::Dx12>) -> HalSurface<hal::api::Dx12>,
+        create_surface_func: impl FnOnce(&hal::dx12::Instance) -> hal::dx12::Surface,
     ) -> Result<SurfaceId, CreateSurfaceError> {
+        let instance = self
+            .instance
+            .raw(Backend::Dx12)
+            .ok_or(CreateSurfaceError::BackendNotEnabled(Backend::Dx12))?;
+        let instance_dx12 = instance.as_any().downcast_ref().unwrap();
+        let surface: Box<dyn hal::DynSurface> = Box::new(create_surface_func(instance_dx12));
+
         let surface = Surface {
             presentation: Mutex::new(rank::SURFACE_PRESENTATION, None),
-            dx12: Some(create_surface_func(
-                self.instance
-                    .dx12
-                    .as_ref()
-                    .ok_or(CreateSurfaceError::BackendNotEnabled(Backend::Dx12))?,
-            )),
-            #[cfg(metal)]
-            metal: None,
-            #[cfg(vulkan)]
-            vulkan: None,
-            #[cfg(gles)]
-            gl: None,
+            surface_per_backend: std::iter::once((Backend::Dx12, surface)).collect(),
         };
 
-        let id = self.surfaces.prepare(id_in).assign(Arc::new(surface));
+        let id = self
+            .surfaces
+            .prepare(Backend::Dx12, id_in)
+            .assign(Arc::new(surface));
         Ok(id)
     }
 
@@ -631,7 +584,7 @@ impl Global {
     ) -> Result<SurfaceId, CreateSurfaceError> {
         profiling::scope!("Instance::instance_create_surface_from_visual");
         self.instance_create_surface_dx12(id_in, |inst| unsafe {
-            inst.create_surface_from_visual(visual as _)
+            inst.create_surface_from_visual(visual.cast())
         })
     }
 
@@ -661,7 +614,7 @@ impl Global {
     ) -> Result<SurfaceId, CreateSurfaceError> {
         profiling::scope!("Instance::instance_create_surface_from_swap_chain_panel");
         self.instance_create_surface_dx12(id_in, |inst| unsafe {
-            inst.create_surface_from_swap_chain_panel(swap_chain_panel as _)
+            inst.create_surface_from_swap_chain_panel(swap_chain_panel.cast())
         })
     }
 
@@ -670,97 +623,68 @@ impl Global {
 
         api_log!("Surface::drop {id:?}");
 
-        fn unconfigure<A: HalApi>(
-            global: &Global,
-            surface: &Option<HalSurface<A>>,
-            present: &Presentation,
-        ) {
-            if let Some(surface) = surface {
-                let hub = HalApi::hub(global);
-                if let Some(device) = present.device.downcast_ref::<A>() {
-                    hub.surface_unconfigure(device, surface);
-                }
-            }
-        }
-
         let surface = self.surfaces.unregister(id);
         let surface = Arc::into_inner(surface.unwrap())
             .expect("Surface cannot be destroyed because is still in use");
 
         if let Some(present) = surface.presentation.lock().take() {
-            #[cfg(vulkan)]
-            unconfigure::<hal::api::Vulkan>(self, &surface.vulkan, &present);
-            #[cfg(metal)]
-            unconfigure::<hal::api::Metal>(self, &surface.metal, &present);
-            #[cfg(dx12)]
-            unconfigure::<hal::api::Dx12>(self, &surface.dx12, &present);
-            #[cfg(gles)]
-            unconfigure::<hal::api::Gles>(self, &surface.gl, &present);
-        }
-        self.instance.destroy_surface(surface);
-    }
-
-    fn enumerate<A: HalApi>(
-        &self,
-        _: A,
-        instance: &Option<A::Instance>,
-        inputs: &AdapterInputs<markers::Adapter>,
-        list: &mut Vec<AdapterId>,
-    ) {
-        let inst = match *instance {
-            Some(ref inst) => inst,
-            None => return,
-        };
-        let id_backend = match inputs.find(A::VARIANT) {
-            Some(id) => id,
-            None => return,
-        };
-
-        profiling::scope!("enumerating", &*format!("{:?}", A::VARIANT));
-        let hub = HalApi::hub(self);
-
-        let hal_adapters = unsafe { inst.enumerate_adapters(None) };
-        for raw in hal_adapters {
-            let adapter = Adapter::new(raw);
-            log::info!("Adapter {:?} {:?}", A::VARIANT, adapter.raw.info);
-            let id = hub.adapters.prepare(id_backend).assign(Arc::new(adapter));
-            list.push(id);
+            for (&backend, surface) in &surface.surface_per_backend {
+                if backend == present.device.backend() {
+                    unsafe { surface.unconfigure(present.device.raw()) };
+                }
+            }
         }
+        drop(surface)
     }
 
     pub fn enumerate_adapters(&self, inputs: AdapterInputs<markers::Adapter>) -> Vec<AdapterId> {
         profiling::scope!("Instance::enumerate_adapters");
         api_log!("Instance::enumerate_adapters");
 
-        let mut adapters = Vec::new();
+        fn enumerate(
+            hub: &Hub,
+            backend: Backend,
+            instance: &dyn hal::DynInstance,
+            inputs: &AdapterInputs<markers::Adapter>,
+            list: &mut Vec<AdapterId>,
+        ) {
+            let Some(id_backend) = inputs.find(backend) else {
+                return;
+            };
 
-        #[cfg(vulkan)]
-        self.enumerate(
-            hal::api::Vulkan,
-            &self.instance.vulkan,
-            &inputs,
-            &mut adapters,
-        );
-        #[cfg(metal)]
-        self.enumerate(
-            hal::api::Metal,
-            &self.instance.metal,
-            &inputs,
-            &mut adapters,
-        );
-        #[cfg(dx12)]
-        self.enumerate(hal::api::Dx12, &self.instance.dx12, &inputs, &mut adapters);
-        #[cfg(gles)]
-        self.enumerate(hal::api::Gles, &self.instance.gl, &inputs, &mut adapters);
+            profiling::scope!("enumerating", &*format!("{:?}", backend));
 
+            let hal_adapters = unsafe { instance.enumerate_adapters(None) };
+            for raw in hal_adapters {
+                let adapter = Adapter::new(raw);
+                log::info!("Adapter {:?}", adapter.raw.info);
+                let id = hub
+                    .adapters
+                    .prepare(backend, id_backend)
+                    .assign(Arc::new(adapter));
+                list.push(id);
+            }
+        }
+
+        let mut adapters = Vec::new();
+        for (backend, instance) in &self.instance.instance_per_backend {
+            enumerate(
+                &self.hub,
+                *backend,
+                instance.as_ref(),
+                &inputs,
+                &mut adapters,
+            );
+        }
         adapters
     }
 
-    fn select<A: HalApi>(
+    fn select(
         &self,
+        backend: Backend,
         selected: &mut usize,
         new_id: Option<AdapterId>,
-        mut list: Vec<hal::ExposedAdapter<A>>,
+        mut list: Vec<hal::DynExposedAdapter>,
     ) -> Option<AdapterId> {
         match selected.checked_sub(list.len()) {
             Some(left) => {
@@ -769,10 +693,11 @@ impl Global {
             }
             None => {
                 let adapter = Adapter::new(list.swap_remove(*selected));
-                log::info!("Adapter {:?} {:?}", A::VARIANT, adapter.raw.info);
-                let id = HalApi::hub(self)
+                log::info!("Adapter {:?}", adapter.raw.info);
+                let id = self
+                    .hub
                     .adapters
-                    .prepare(new_id)
+                    .prepare(backend, new_id)
                     .assign(Arc::new(adapter));
                 Some(id)
             }
@@ -787,19 +712,19 @@ impl Global {
         profiling::scope!("Instance::request_adapter");
         api_log!("Instance::request_adapter");
 
-        fn gather<A: HalApi>(
-            _: A,
-            instance: Option<&A::Instance>,
+        fn gather(
+            backend: Backend,
+            instance: &Instance,
             inputs: &AdapterInputs<markers::Adapter>,
             compatible_surface: Option<&Surface>,
             force_software: bool,
             device_types: &mut Vec<wgt::DeviceType>,
-        ) -> (Option<Id<markers::Adapter>>, Vec<hal::ExposedAdapter<A>>) {
-            let id = inputs.find(A::VARIANT);
-            match (id, instance) {
+        ) -> (Option<Id<markers::Adapter>>, Vec<hal::DynExposedAdapter>) {
+            let id = inputs.find(backend);
+            match (id, instance.raw(backend)) {
                 (Some(id), Some(inst)) => {
                     let compatible_hal_surface =
-                        compatible_surface.and_then(|surface| A::surface_as_hal(surface));
+                        compatible_surface.and_then(|surface| surface.raw(backend));
                     let mut adapters = unsafe { inst.enumerate_adapters(compatible_hal_surface) };
                     if force_software {
                         adapters.retain(|exposed| exposed.info.device_type == wgt::DeviceType::Cpu);
@@ -828,8 +753,8 @@ impl Global {
 
         #[cfg(vulkan)]
         let (id_vulkan, adapters_vk) = gather(
-            hal::api::Vulkan,
-            self.instance.vulkan.as_ref(),
+            Backend::Vulkan,
+            &self.instance,
             &inputs,
             compatible_surface,
             desc.force_fallback_adapter,
@@ -837,8 +762,8 @@ impl Global {
         );
         #[cfg(metal)]
         let (id_metal, adapters_metal) = gather(
-            hal::api::Metal,
-            self.instance.metal.as_ref(),
+            Backend::Metal,
+            &self.instance,
             &inputs,
             compatible_surface,
             desc.force_fallback_adapter,
@@ -846,8 +771,8 @@ impl Global {
         );
         #[cfg(dx12)]
         let (id_dx12, adapters_dx12) = gather(
-            hal::api::Dx12,
-            self.instance.dx12.as_ref(),
+            Backend::Dx12,
+            &self.instance,
             &inputs,
             compatible_surface,
             desc.force_fallback_adapter,
@@ -855,8 +780,8 @@ impl Global {
         );
         #[cfg(gles)]
         let (id_gl, adapters_gl) = gather(
-            hal::api::Gles,
-            self.instance.gl.as_ref(),
+            Backend::Gl,
+            &self.instance,
             &inputs,
             compatible_surface,
             desc.force_fallback_adapter,
@@ -915,19 +840,19 @@ impl Global {
 
         let mut selected = preferred_gpu.unwrap_or(0);
         #[cfg(vulkan)]
-        if let Some(id) = self.select(&mut selected, id_vulkan, adapters_vk) {
+        if let Some(id) = self.select(Backend::Vulkan, &mut selected, id_vulkan, adapters_vk) {
             return Ok(id);
         }
         #[cfg(metal)]
-        if let Some(id) = self.select(&mut selected, id_metal, adapters_metal) {
+        if let Some(id) = self.select(Backend::Metal, &mut selected, id_metal, adapters_metal) {
             return Ok(id);
         }
         #[cfg(dx12)]
-        if let Some(id) = self.select(&mut selected, id_dx12, adapters_dx12) {
+        if let Some(id) = self.select(Backend::Dx12, &mut selected, id_dx12, adapters_dx12) {
             return Ok(id);
         }
         #[cfg(gles)]
-        if let Some(id) = self.select(&mut selected, id_gl, adapters_gl) {
+        if let Some(id) = self.select(Backend::Gl, &mut selected, id_gl, adapters_gl) {
             return Ok(id);
         }
         let _ = selected;
@@ -939,113 +864,92 @@ impl Global {
     /// # Safety
     ///
     /// `hal_adapter` must be created from this global internal instance handle.
-    pub unsafe fn create_adapter_from_hal<A: HalApi>(
+    pub unsafe fn create_adapter_from_hal(
         &self,
-        hal_adapter: hal::ExposedAdapter<A>,
+        hal_adapter: hal::DynExposedAdapter,
         input: Option<AdapterId>,
     ) -> AdapterId {
         profiling::scope!("Instance::create_adapter_from_hal");
 
-        let fid = A::hub(self).adapters.prepare(input);
-
-        let id = match A::VARIANT {
-            #[cfg(vulkan)]
-            Backend::Vulkan => fid.assign(Arc::new(Adapter::new(hal_adapter))),
-            #[cfg(metal)]
-            Backend::Metal => fid.assign(Arc::new(Adapter::new(hal_adapter))),
-            #[cfg(dx12)]
-            Backend::Dx12 => fid.assign(Arc::new(Adapter::new(hal_adapter))),
-            #[cfg(gles)]
-            Backend::Gl => fid.assign(Arc::new(Adapter::new(hal_adapter))),
-            _ => unreachable!(),
-        };
+        let fid = self.hub.adapters.prepare(hal_adapter.backend(), input);
+        let id = fid.assign(Arc::new(Adapter::new(hal_adapter)));
+
         resource_log!("Created Adapter {:?}", id);
         id
     }
 
-    pub fn adapter_get_info<A: HalApi>(
+    pub fn adapter_get_info(
         &self,
         adapter_id: AdapterId,
     ) -> Result<wgt::AdapterInfo, InvalidAdapter> {
-        let hub = A::hub(self);
-
-        hub.adapters
+        self.hub
+            .adapters
             .get(adapter_id)
             .map(|adapter| adapter.raw.info.clone())
             .map_err(|_| InvalidAdapter)
     }
 
-    pub fn adapter_get_texture_format_features<A: HalApi>(
+    pub fn adapter_get_texture_format_features(
         &self,
         adapter_id: AdapterId,
         format: wgt::TextureFormat,
     ) -> Result<wgt::TextureFormatFeatures, InvalidAdapter> {
-        let hub = A::hub(self);
-
-        hub.adapters
+        self.hub
+            .adapters
             .get(adapter_id)
             .map(|adapter| adapter.get_texture_format_features(format))
             .map_err(|_| InvalidAdapter)
     }
 
-    pub fn adapter_features<A: HalApi>(
-        &self,
-        adapter_id: AdapterId,
-    ) -> Result<wgt::Features, InvalidAdapter> {
-        let hub = A::hub(self);
-
-        hub.adapters
+    pub fn adapter_features(&self, adapter_id: AdapterId) -> Result<wgt::Features, InvalidAdapter> {
+        self.hub
+            .adapters
             .get(adapter_id)
             .map(|adapter| adapter.raw.features)
             .map_err(|_| InvalidAdapter)
     }
 
-    pub fn adapter_limits<A: HalApi>(
-        &self,
-        adapter_id: AdapterId,
-    ) -> Result<wgt::Limits, InvalidAdapter> {
-        let hub = A::hub(self);
-
-        hub.adapters
+    pub fn adapter_limits(&self, adapter_id: AdapterId) -> Result<wgt::Limits, InvalidAdapter> {
+        self.hub
+            .adapters
             .get(adapter_id)
             .map(|adapter| adapter.raw.capabilities.limits.clone())
             .map_err(|_| InvalidAdapter)
     }
 
-    pub fn adapter_downlevel_capabilities<A: HalApi>(
+    pub fn adapter_downlevel_capabilities(
         &self,
         adapter_id: AdapterId,
     ) -> Result<wgt::DownlevelCapabilities, InvalidAdapter> {
-        let hub = A::hub(self);
-
-        hub.adapters
+        self.hub
+            .adapters
             .get(adapter_id)
             .map(|adapter| adapter.raw.capabilities.downlevel.clone())
             .map_err(|_| InvalidAdapter)
     }
 
-    pub fn adapter_get_presentation_timestamp<A: HalApi>(
+    pub fn adapter_get_presentation_timestamp(
         &self,
         adapter_id: AdapterId,
     ) -> Result<wgt::PresentationTimestamp, InvalidAdapter> {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let adapter = hub.adapters.get(adapter_id).map_err(|_| InvalidAdapter)?;
 
         Ok(unsafe { adapter.raw.adapter.get_presentation_timestamp() })
     }
 
-    pub fn adapter_drop<A: HalApi>(&self, adapter_id: AdapterId) {
+    pub fn adapter_drop(&self, adapter_id: AdapterId) {
         profiling::scope!("Adapter::drop");
         api_log!("Adapter::drop {adapter_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         hub.adapters.unregister(adapter_id);
     }
 }
 
 impl Global {
-    pub fn adapter_request_device<A: HalApi>(
+    pub fn adapter_request_device(
         &self,
         adapter_id: AdapterId,
         desc: &DeviceDescriptor,
@@ -1056,12 +960,12 @@ impl Global {
         profiling::scope!("Adapter::request_device");
         api_log!("Adapter::request_device");
 
-        let hub = A::hub(self);
-        let device_fid = hub.devices.prepare(device_id_in);
-        let queue_fid = hub.queues.prepare(queue_id_in);
+        let backend = adapter_id.backend();
+        let device_fid = self.hub.devices.prepare(backend, device_id_in);
+        let queue_fid = self.hub.queues.prepare(backend, queue_id_in);
 
         let error = 'error: {
-            let adapter = match hub.adapters.get(adapter_id) {
+            let adapter = match self.hub.adapters.get(adapter_id) {
                 Ok(adapter) => adapter,
                 Err(_) => break 'error RequestDeviceError::InvalidAdapter,
             };
@@ -1089,10 +993,10 @@ impl Global {
     ///
     /// - `hal_device` must be created from `adapter_id` or its internal handle.
     /// - `desc` must be a subset of `hal_device` features and limits.
-    pub unsafe fn create_device_from_hal<A: HalApi>(
+    pub unsafe fn create_device_from_hal(
         &self,
         adapter_id: AdapterId,
-        hal_device: OpenDevice<A>,
+        hal_device: hal::DynOpenDevice,
         desc: &DeviceDescriptor,
         trace_path: Option<&std::path::Path>,
         device_id_in: Option<DeviceId>,
@@ -1100,12 +1004,12 @@ impl Global {
     ) -> (DeviceId, QueueId, Option<RequestDeviceError>) {
         profiling::scope!("Global::create_device_from_hal");
 
-        let hub = A::hub(self);
-        let devices_fid = hub.devices.prepare(device_id_in);
-        let queues_fid = hub.queues.prepare(queue_id_in);
+        let backend = adapter_id.backend();
+        let devices_fid = self.hub.devices.prepare(backend, device_id_in);
+        let queues_fid = self.hub.queues.prepare(backend, queue_id_in);
 
         let error = 'error: {
-            let adapter = match hub.adapters.get(adapter_id) {
+            let adapter = match self.hub.adapters.get(adapter_id) {
                 Ok(adapter) => adapter,
                 Err(_) => break 'error RequestDeviceError::InvalidAdapter,
             };
diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs
index 8f6d944f4e..1a83b68590 100644
--- a/wgpu-core/src/lib.rs
+++ b/wgpu-core/src/lib.rs
@@ -41,12 +41,20 @@
     rustdoc::private_intra_doc_links
 )]
 #![warn(
+    clippy::ptr_as_ptr,
     trivial_casts,
     trivial_numeric_casts,
     unsafe_op_in_unsafe_fn,
     unused_extern_crates,
     unused_qualifications
 )]
+// We use `Arc` in wgpu-core, but on wasm (unless opted out via `fragile-send-sync-non-atomic-wasm`)
+// wgpu-hal resources are not Send/Sync, causing a clippy warning for unnecessary `Arc`s.
+// We could use `Rc`s in this case as recommended, but unless atomics are enabled
+// this doesn't make a difference.
+// Therefore, this is only really a concern for users targeting WebGL
+// (the only reason to use wgpu-core on the web in the first place) that have atomics enabled.
+#![cfg_attr(not(send_sync), allow(clippy::arc_with_non_send_sync))]
 
 pub mod binding_model;
 pub mod command;
@@ -88,7 +96,7 @@ pub(crate) use hash_utils::*;
 /// The index of a queue submission.
 ///
 /// These are the values stored in `Device::fence`.
-type SubmissionIndex = hal::FenceValue;
+pub type SubmissionIndex = hal::FenceValue;
 
 type Index = u32;
 type Epoch = u32;
@@ -132,174 +140,6 @@ If you are running this program on native and not in a browser and wish to work
 Adapter::downlevel_properties or Device::downlevel_properties to get a listing of the features the current \
 platform supports.";
 
-// #[cfg] attributes in exported macros are interesting!
-//
-// The #[cfg] conditions in a macro's expansion are evaluated using the
-// configuration options (features, target architecture and os, etc.) in force
-// where the macro is *used*, not where it is *defined*. That is, if crate A
-// defines a macro like this:
-//
-//     #[macro_export]
-//     macro_rules! if_bleep {
-//         { } => {
-//             #[cfg(feature = "bleep")]
-//             bleep();
-//         }
-//     }
-//
-// and then crate B uses it like this:
-//
-//     fn f() {
-//         if_bleep! { }
-//     }
-//
-// then it is crate B's `"bleep"` feature, not crate A's, that determines
-// whether the macro expands to a function call or an empty statement. The
-// entire configuration predicate is evaluated in the use's context, not the
-// definition's.
-//
-// Since `wgpu-core` selects back ends using features, we need to make sure the
-// arms of the `gfx_select!` macro are pruned according to `wgpu-core`'s
-// features, not those of whatever crate happens to be using `gfx_select!`. This
-// means we can't use `#[cfg]` attributes in `gfx_select!`s definition itself.
-// Instead, for each backend, `gfx_select!` must use a macro whose definition is
-// selected by `#[cfg]` in `wgpu-core`. The configuration predicate is still
-// evaluated when the macro is used; we've just moved the `#[cfg]` into a macro
-// used by `wgpu-core` itself.
-
-/// Define an exported macro named `$public` that expands to an expression if
-/// the feature `$feature` is enabled, or to a panic otherwise.
-///
-/// This is used in the definition of `gfx_select!`, to dispatch the
-/// call to the appropriate backend, but panic if that backend was not
-/// compiled in.
-///
-/// For a call like this:
-///
-/// ```ignore
-/// define_backend_caller! { name, private, "feature" if cfg_condition }
-/// ```
-///
-/// define a macro `name`, used like this:
-///
-/// ```ignore
-/// name!(expr)
-/// ```
-///
-/// that expands to `expr` if `#[cfg(cfg_condition)]` is enabled, or a
-/// panic otherwise. The panic message complains that `"feature"` is
-/// not enabled.
-///
-/// Because of odd technical limitations on exporting macros expanded
-/// by other macros, you must supply both a public-facing name for the
-/// macro and a private name, `$private`, which is never used
-/// outside this macro. For details:
-/// <https://github.com/rust-lang/rust/pull/52234#issuecomment-976702997>
-macro_rules! define_backend_caller {
-    { $public:ident, $private:ident, $feature:literal if $cfg:meta } => {
-        #[cfg($cfg)]
-        #[macro_export]
-        macro_rules! $private {
-            ( $call:expr ) => ( $call )
-        }
-
-        #[cfg(not($cfg))]
-        #[macro_export]
-        macro_rules! $private {
-            ( $call:expr ) => (
-                panic!("Identifier refers to disabled backend feature {:?}", $feature)
-            )
-        }
-
-        // See note about rust-lang#52234 above.
-        #[doc(hidden)] pub use $private as $public;
-    }
-}
-
-// Define a macro for each `gfx_select!` match arm. For example,
-//
-//     gfx_if_vulkan!(expr)
-//
-// expands to `expr` if the `"vulkan"` feature is enabled, or to a panic
-// otherwise.
-define_backend_caller! { gfx_if_vulkan, gfx_if_vulkan_hidden, "vulkan" if all(feature = "vulkan", not(target_arch = "wasm32")) }
-define_backend_caller! { gfx_if_metal, gfx_if_metal_hidden, "metal" if all(feature = "metal", any(target_os = "macos", target_os = "ios")) }
-define_backend_caller! { gfx_if_dx12, gfx_if_dx12_hidden, "dx12" if all(feature = "dx12", windows) }
-define_backend_caller! { gfx_if_gles, gfx_if_gles_hidden, "gles" if feature = "gles" }
-define_backend_caller! { gfx_if_empty, gfx_if_empty_hidden, "empty" if all(
-    not(any(feature = "metal", feature = "vulkan", feature = "gles")),
-    any(target_os = "macos", target_os = "ios"),
-) }
-
-/// Dispatch on an [`Id`]'s backend to a backend-generic method.
-///
-/// Uses of this macro have the form:
-///
-/// ```ignore
-///
-///     gfx_select!(id => value.method(args...))
-///
-/// ```
-///
-/// This expands to an expression that calls `value.method::<A>(args...)` for
-/// the backend `A` selected by `id`. The expansion matches on `id.backend()`,
-/// with an arm for each backend type in [`wgpu_types::Backend`] which calls the
-/// specialization of `method` for the given backend. This allows resource
-/// identifiers to select backends dynamically, even though many `wgpu_core`
-/// methods are compiled and optimized for a specific back end.
-///
-/// This macro is typically used to call methods on [`wgpu_core::global::Global`],
-/// many of which take a single `hal::Api` type parameter. For example, to
-/// create a new buffer on the device indicated by `device_id`, one would say:
-///
-/// ```ignore
-/// gfx_select!(device_id => global.device_create_buffer(device_id, ...))
-/// ```
-///
-/// where the `device_create_buffer` method is defined like this:
-///
-/// ```ignore
-/// impl Global {
-///    pub fn device_create_buffer<A: HalApi>(&self, ...) -> ...
-///    { ... }
-/// }
-/// ```
-///
-/// That `gfx_select!` call uses `device_id`'s backend to select the right
-/// backend type `A` for a call to `Global::device_create_buffer<A>`.
-///
-/// However, there's nothing about this macro that is specific to `hub::Global`.
-/// For example, Firefox's embedding of `wgpu_core` defines its own types with
-/// methods that take `hal::Api` type parameters. Firefox uses `gfx_select!` to
-/// dynamically dispatch to the right specialization based on the resource's id.
-///
-/// [`wgpu_types::Backend`]: wgt::Backend
-/// [`wgpu_core::global::Global`]: crate::global::Global
-/// [`Id`]: id::Id
-#[macro_export]
-macro_rules! gfx_select {
-    // Simple two-component expression, like `self.0.method(..)`.
-    ($id:expr => $c0:ident.$c1:tt.$method:ident $params:tt) => {
-        $crate::gfx_select!($id => {$c0.$c1}, $method $params)
-    };
-
-    // Simple identifier-only expression, like `global.method(..)`.
-    ($id:expr => $c0:ident.$method:ident $params:tt) => {
-        $crate::gfx_select!($id => {$c0}, $method $params)
-    };
-
-    ($id:expr => {$($c:tt)*}, $method:ident $params:tt) => {
-        match $id.backend() {
-            wgt::Backend::Vulkan => $crate::gfx_if_vulkan!($($c)*.$method::<$crate::api::Vulkan> $params),
-            wgt::Backend::Metal => $crate::gfx_if_metal!($($c)*.$method::<$crate::api::Metal> $params),
-            wgt::Backend::Dx12 => $crate::gfx_if_dx12!($($c)*.$method::<$crate::api::Dx12> $params),
-            wgt::Backend::Gl => $crate::gfx_if_gles!($($c)*.$method::<$crate::api::Gles> $params),
-            wgt::Backend::Empty => $crate::gfx_if_empty!($($c)*.$method::<$crate::api::Empty> $params),
-            other => panic!("Unexpected backend {:?}", other),
-        }
-    };
-}
-
 #[cfg(feature = "api_log_info")]
 macro_rules! api_log {
     ($($arg:tt)+) => (log::info!($($arg)+))
diff --git a/wgpu-core/src/lock/rank.rs b/wgpu-core/src/lock/rank.rs
index b62c1a521f..8b55b27d8f 100644
--- a/wgpu-core/src/lock/rank.rs
+++ b/wgpu-core/src/lock/rank.rs
@@ -91,18 +91,12 @@ define_lock_ranks! {
         DEVICE_SNATCHABLE_LOCK,
         DEVICE_USAGE_SCOPES,
         SHARED_TRACKER_INDEX_ALLOCATOR_INNER,
-        BUFFER_BIND_GROUP_STATE_BUFFERS,
-        TEXTURE_BIND_GROUP_STATE_TEXTURES,
         BUFFER_MAP_STATE,
-        STATELESS_BIND_GROUP_STATE_RESOURCES,
     }
     rank DEVICE_SNATCHABLE_LOCK "Device::snatchable_lock" followed by {
         SHARED_TRACKER_INDEX_ALLOCATOR_INNER,
         DEVICE_TRACE,
         BUFFER_MAP_STATE,
-        BUFFER_BIND_GROUP_STATE_BUFFERS,
-        TEXTURE_BIND_GROUP_STATE_TEXTURES,
-        STATELESS_BIND_GROUP_STATE_RESOURCES,
         // Uncomment this to see an interesting cycle.
         // COMMAND_BUFFER_DATA,
     }
@@ -125,9 +119,7 @@ define_lock_ranks! {
     }
 
     rank BUFFER_BIND_GROUPS "Buffer::bind_groups" followed by { }
-    rank BUFFER_BIND_GROUP_STATE_BUFFERS "BufferBindGroupState::buffers" followed by { }
     rank BUFFER_INITIALIZATION_STATUS "Buffer::initialization_status" followed by { }
-    rank BUFFER_SYNC_MAPPED_WRITES "Buffer::sync_mapped_writes" followed by { }
     rank DEVICE_DEFERRED_DESTROY "Device::deferred_destroy" followed by { }
     rank DEVICE_FENCE "Device::fence" followed by { }
     #[allow(dead_code)]
@@ -136,26 +128,15 @@ define_lock_ranks! {
     rank DEVICE_USAGE_SCOPES "Device::usage_scopes" followed by { }
     rank IDENTITY_MANAGER_VALUES "IdentityManager::values" followed by { }
     rank REGISTRY_STORAGE "Registry::storage" followed by { }
-    rank RENDER_BUNDLE_SCOPE_BUFFERS "RenderBundleScope::buffers" followed by { }
-    rank RENDER_BUNDLE_SCOPE_TEXTURES "RenderBundleScope::textures" followed by { }
-    rank RENDER_BUNDLE_SCOPE_BIND_GROUPS "RenderBundleScope::bind_groups" followed by { }
-    rank RENDER_BUNDLE_SCOPE_RENDER_PIPELINES "RenderBundleScope::render_pipelines" followed by { }
-    rank RENDER_BUNDLE_SCOPE_QUERY_SETS "RenderBundleScope::query_sets" followed by { }
     rank RESOURCE_POOL_INNER "ResourcePool::inner" followed by { }
     rank SHARED_TRACKER_INDEX_ALLOCATOR_INNER "SharedTrackerIndexAllocator::inner" followed by { }
-    rank STAGING_BUFFER_RAW "StagingBuffer::raw" followed by { }
-    rank STATELESS_BIND_GROUP_STATE_RESOURCES "StatelessBindGroupState::resources" followed by { }
     rank SURFACE_PRESENTATION "Surface::presentation" followed by { }
     rank TEXTURE_BIND_GROUPS "Texture::bind_groups" followed by { }
-    rank TEXTURE_BIND_GROUP_STATE_TEXTURES "TextureBindGroupState::textures" followed by { }
     rank TEXTURE_INITIALIZATION_STATUS "Texture::initialization_status" followed by { }
-    rank TEXTURE_CLEAR_MODE "Texture::clear_mode" followed by { }
     rank TEXTURE_VIEWS "Texture::views" followed by { }
-    rank BLAS "Blas::raw" followed by { }
     rank BLAS_BUILT_INDEX "Blas::built_index" followed by { }
     rank TLAS_BUILT_INDEX "Tlas::built_index" followed by { }
     rank TLAS_DEPENDENCIES "Tlas::dependencies" followed by { }
-    rank TLAS_INSTANCE_BUFFER "Tlas::instance_buffer" followed by { }
 
     #[cfg(test)]
     rank PAWN "pawn" followed by { ROOK, BISHOP }
diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs
index b422ced5eb..db1c1ba76a 100644
--- a/wgpu-core/src/pipeline.rs
+++ b/wgpu-core/src/pipeline.rs
@@ -3,14 +3,13 @@ use crate::{
     binding_model::{CreateBindGroupLayoutError, CreatePipelineLayoutError, PipelineLayout},
     command::ColorAttachmentError,
     device::{Device, DeviceError, MissingDownlevelFlags, MissingFeatures, RenderPassContext},
-    hal_api::HalApi,
     id::{PipelineCacheId, PipelineLayoutId, ShaderModuleId},
     resource::{Labeled, TrackingData},
     resource_log, validation, Label,
 };
 use arrayvec::ArrayVec;
 use naga::error::ShaderError;
-use std::{borrow::Cow, marker::PhantomData, num::NonZeroU32, sync::Arc};
+use std::{borrow::Cow, marker::PhantomData, mem::ManuallyDrop, num::NonZeroU32, sync::Arc};
 use thiserror::Error;
 
 /// Information about buffer bindings, which
@@ -46,22 +45,21 @@ pub struct ShaderModuleDescriptor<'a> {
 }
 
 #[derive(Debug)]
-pub struct ShaderModule<A: HalApi> {
-    pub(crate) raw: Option<A::ShaderModule>,
-    pub(crate) device: Arc<Device<A>>,
+pub struct ShaderModule {
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynShaderModule>>,
+    pub(crate) device: Arc<Device>,
     pub(crate) interface: Option<validation::Interface>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
 }
 
-impl<A: HalApi> Drop for ShaderModule<A> {
+impl Drop for ShaderModule {
     fn drop(&mut self) {
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw {}", self.error_ident());
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_shader_module(raw);
-            }
+        resource_log!("Destroy raw {}", self.error_ident());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            self.device.raw().destroy_shader_module(raw);
         }
     }
 }
@@ -71,9 +69,9 @@ crate::impl_labeled!(ShaderModule);
 crate::impl_parent_device!(ShaderModule);
 crate::impl_storage_item!(ShaderModule);
 
-impl<A: HalApi> ShaderModule<A> {
-    pub(crate) fn raw(&self) -> &A::ShaderModule {
-        self.raw.as_ref().unwrap()
+impl ShaderModule {
+    pub(crate) fn raw(&self) -> &dyn hal::DynShaderModule {
+        self.raw.as_ref()
     }
 
     pub(crate) fn finalize_entry_point_name(
@@ -147,15 +145,13 @@ pub struct ProgrammableStageDescriptor<'a> {
     /// This is required by the WebGPU spec, but may have overhead which can be avoided
     /// for cross-platform applications
     pub zero_initialize_workgroup_memory: bool,
-    /// Should the pipeline attempt to transform vertex shaders to use vertex pulling.
-    pub vertex_pulling_transform: bool,
 }
 
 /// Describes a programmable pipeline stage.
 #[derive(Clone, Debug)]
-pub struct ResolvedProgrammableStageDescriptor<'a, A: HalApi> {
+pub struct ResolvedProgrammableStageDescriptor<'a> {
     /// The compiled shader module for this stage.
-    pub module: Arc<ShaderModule<A>>,
+    pub module: Arc<ShaderModule>,
     /// The name of the entry point in the compiled shader. The name is selected using the
     /// following logic:
     ///
@@ -176,8 +172,6 @@ pub struct ResolvedProgrammableStageDescriptor<'a, A: HalApi> {
     /// This is required by the WebGPU spec, but may have overhead which can be avoided
     /// for cross-platform applications
     pub zero_initialize_workgroup_memory: bool,
-    /// Should the pipeline attempt to transform vertex shaders to use vertex pulling.
-    pub vertex_pulling_transform: bool,
 }
 
 /// Number of implicit bind groups derived at pipeline creation.
@@ -186,6 +180,8 @@ pub type ImplicitBindGroupCount = u8;
 #[derive(Clone, Debug, Error)]
 #[non_exhaustive]
 pub enum ImplicitLayoutError {
+    #[error("The implicit_pipeline_ids arg is required")]
+    MissingImplicitPipelineIds,
     #[error("Missing IDs for deriving {0} bind groups")]
     MissingIds(ImplicitBindGroupCount),
     #[error("Unable to reflect the shader {0:?} interface")]
@@ -211,14 +207,14 @@ pub struct ComputePipelineDescriptor<'a> {
 
 /// Describes a compute pipeline.
 #[derive(Clone, Debug)]
-pub struct ResolvedComputePipelineDescriptor<'a, A: HalApi> {
+pub struct ResolvedComputePipelineDescriptor<'a> {
     pub label: Label<'a>,
     /// The layout of bind groups for this pipeline.
-    pub layout: Option<Arc<PipelineLayout<A>>>,
+    pub layout: Option<Arc<PipelineLayout>>,
     /// The compiled compute stage and its entry point.
-    pub stage: ResolvedProgrammableStageDescriptor<'a, A>,
+    pub stage: ResolvedProgrammableStageDescriptor<'a>,
     /// The pipeline cache to use when creating this pipeline.
-    pub cache: Option<Arc<PipelineCache<A>>>,
+    pub cache: Option<Arc<PipelineCache>>,
 }
 
 #[derive(Clone, Debug, Error)]
@@ -236,30 +232,31 @@ pub enum CreateComputePipelineError {
     Stage(#[from] validation::StageError),
     #[error("Internal error: {0}")]
     Internal(String),
+    #[error("Pipeline constant error: {0}")]
+    PipelineConstants(String),
     #[error(transparent)]
     MissingDownlevelFlags(#[from] MissingDownlevelFlags),
 }
 
 #[derive(Debug)]
-pub struct ComputePipeline<A: HalApi> {
-    pub(crate) raw: Option<A::ComputePipeline>,
-    pub(crate) layout: Arc<PipelineLayout<A>>,
-    pub(crate) device: Arc<Device<A>>,
-    pub(crate) _shader_module: Arc<ShaderModule<A>>,
+pub struct ComputePipeline {
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynComputePipeline>>,
+    pub(crate) layout: Arc<PipelineLayout>,
+    pub(crate) device: Arc<Device>,
+    pub(crate) _shader_module: Arc<ShaderModule>,
     pub(crate) late_sized_buffer_groups: ArrayVec<LateSizedBufferGroup, { hal::MAX_BIND_GROUPS }>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
     pub(crate) tracking_data: TrackingData,
 }
 
-impl<A: HalApi> Drop for ComputePipeline<A> {
+impl Drop for ComputePipeline {
     fn drop(&mut self) {
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw {}", self.error_ident());
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_compute_pipeline(raw);
-            }
+        resource_log!("Destroy raw {}", self.error_ident());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            self.device.raw().destroy_compute_pipeline(raw);
         }
     }
 }
@@ -270,9 +267,9 @@ crate::impl_parent_device!(ComputePipeline);
 crate::impl_storage_item!(ComputePipeline);
 crate::impl_trackable!(ComputePipeline);
 
-impl<A: HalApi> ComputePipeline<A> {
-    pub(crate) fn raw(&self) -> &A::ComputePipeline {
-        self.raw.as_ref().unwrap()
+impl ComputePipeline {
+    pub(crate) fn raw(&self) -> &dyn hal::DynComputePipeline {
+        self.raw.as_ref()
     }
 }
 
@@ -300,22 +297,20 @@ impl From<hal::PipelineCacheError> for CreatePipelineCacheError {
 }
 
 #[derive(Debug)]
-pub struct PipelineCache<A: HalApi> {
-    pub(crate) raw: Option<A::PipelineCache>,
-    pub(crate) device: Arc<Device<A>>,
+pub struct PipelineCache {
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynPipelineCache>>,
+    pub(crate) device: Arc<Device>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
-    pub(crate) tracking_data: TrackingData,
 }
 
-impl<A: HalApi> Drop for PipelineCache<A> {
+impl Drop for PipelineCache {
     fn drop(&mut self) {
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw {}", self.error_ident());
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_pipeline_cache(raw);
-            }
+        resource_log!("Destroy raw {}", self.error_ident());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            self.device.raw().destroy_pipeline_cache(raw);
         }
     }
 }
@@ -324,7 +319,12 @@ crate::impl_resource_type!(PipelineCache);
 crate::impl_labeled!(PipelineCache);
 crate::impl_parent_device!(PipelineCache);
 crate::impl_storage_item!(PipelineCache);
-crate::impl_trackable!(PipelineCache);
+
+impl PipelineCache {
+    pub(crate) fn raw(&self) -> &dyn hal::DynPipelineCache {
+        self.raw.as_ref()
+    }
+}
 
 /// Describes how the vertex buffer is interpreted.
 #[derive(Clone, Debug)]
@@ -351,9 +351,9 @@ pub struct VertexState<'a> {
 
 /// Describes the vertex process in a render pipeline.
 #[derive(Clone, Debug)]
-pub struct ResolvedVertexState<'a, A: HalApi> {
+pub struct ResolvedVertexState<'a> {
     /// The compiled vertex stage and its entry point.
-    pub stage: ResolvedProgrammableStageDescriptor<'a, A>,
+    pub stage: ResolvedProgrammableStageDescriptor<'a>,
     /// The format of any vertex buffers used with this pipeline.
     pub buffers: Cow<'a, [VertexBufferLayout<'a>]>,
 }
@@ -370,9 +370,9 @@ pub struct FragmentState<'a> {
 
 /// Describes fragment processing in a render pipeline.
 #[derive(Clone, Debug)]
-pub struct ResolvedFragmentState<'a, A: HalApi> {
+pub struct ResolvedFragmentState<'a> {
     /// The compiled fragment stage and its entry point.
-    pub stage: ResolvedProgrammableStageDescriptor<'a, A>,
+    pub stage: ResolvedProgrammableStageDescriptor<'a>,
     /// The effect of draw calls on the color aspect of the output target.
     pub targets: Cow<'a, [Option<wgt::ColorTargetState>]>,
 }
@@ -406,12 +406,12 @@ pub struct RenderPipelineDescriptor<'a> {
 
 /// Describes a render (graphics) pipeline.
 #[derive(Clone, Debug)]
-pub struct ResolvedRenderPipelineDescriptor<'a, A: HalApi> {
+pub struct ResolvedRenderPipelineDescriptor<'a> {
     pub label: Label<'a>,
     /// The layout of bind groups for this pipeline.
-    pub layout: Option<Arc<PipelineLayout<A>>>,
+    pub layout: Option<Arc<PipelineLayout>>,
     /// The vertex processing state for this pipeline.
-    pub vertex: ResolvedVertexState<'a, A>,
+    pub vertex: ResolvedVertexState<'a>,
     /// The properties of the pipeline at the primitive assembly and rasterization level.
     pub primitive: wgt::PrimitiveState,
     /// The effect of draw calls on the depth and stencil aspects of the output target, if any.
@@ -419,12 +419,12 @@ pub struct ResolvedRenderPipelineDescriptor<'a, A: HalApi> {
     /// The multi-sampling properties of the pipeline.
     pub multisample: wgt::MultisampleState,
     /// The fragment processing state for this pipeline.
-    pub fragment: Option<ResolvedFragmentState<'a, A>>,
+    pub fragment: Option<ResolvedFragmentState<'a>>,
     /// If the pipeline will be used with a multiview render pass, this indicates how many array
     /// layers the attachments will have.
     pub multiview: Option<NonZeroU32>,
     /// The pipeline cache to use when creating this pipeline.
-    pub cache: Option<Arc<PipelineCache<A>>>,
+    pub cache: Option<Arc<PipelineCache>>,
 }
 
 #[derive(Clone, Debug)]
@@ -529,6 +529,11 @@ pub enum CreateRenderPipelineError {
         stage: wgt::ShaderStages,
         error: String,
     },
+    #[error("Pipeline constant error in {stage:?} shader: {error}")]
+    PipelineConstants {
+        stage: wgt::ShaderStages,
+        error: String,
+    },
     #[error("In the provided shader, the type given for group {group} binding {binding} has a size of {size}. As the device does not support `DownlevelFlags::BUFFER_BINDINGS_NOT_16_BYTE_ALIGNED`, the type must have a size that is a multiple of 16 bytes.")]
     UnalignedShader { group: u32, binding: u32, size: u64 },
     #[error("Using the blend factor {factor:?} for render target {target} is not possible. Only the first render target may be used when dual-source blending.")]
@@ -582,12 +587,11 @@ impl Default for VertexStep {
 }
 
 #[derive(Debug)]
-pub struct RenderPipeline<A: HalApi> {
-    pub(crate) raw: Option<A::RenderPipeline>,
-    pub(crate) device: Arc<Device<A>>,
-    pub(crate) layout: Arc<PipelineLayout<A>>,
-    pub(crate) _shader_modules:
-        ArrayVec<Arc<ShaderModule<A>>, { hal::MAX_CONCURRENT_SHADER_STAGES }>,
+pub struct RenderPipeline {
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynRenderPipeline>>,
+    pub(crate) device: Arc<Device>,
+    pub(crate) layout: Arc<PipelineLayout>,
+    pub(crate) _shader_modules: ArrayVec<Arc<ShaderModule>, { hal::MAX_CONCURRENT_SHADER_STAGES }>,
     pub(crate) pass_context: RenderPassContext,
     pub(crate) flags: PipelineFlags,
     pub(crate) strip_index_format: Option<wgt::IndexFormat>,
@@ -598,14 +602,13 @@ pub struct RenderPipeline<A: HalApi> {
     pub(crate) tracking_data: TrackingData,
 }
 
-impl<A: HalApi> Drop for RenderPipeline<A> {
+impl Drop for RenderPipeline {
     fn drop(&mut self) {
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw {}", self.error_ident());
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_render_pipeline(raw);
-            }
+        resource_log!("Destroy raw {}", self.error_ident());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            self.device.raw().destroy_render_pipeline(raw);
         }
     }
 }
@@ -616,8 +619,8 @@ crate::impl_parent_device!(RenderPipeline);
 crate::impl_storage_item!(RenderPipeline);
 crate::impl_trackable!(RenderPipeline);
 
-impl<A: HalApi> RenderPipeline<A> {
-    pub(crate) fn raw(&self) -> &A::RenderPipeline {
-        self.raw.as_ref().unwrap()
+impl RenderPipeline {
+    pub(crate) fn raw(&self) -> &dyn hal::DynRenderPipeline {
+        self.raw.as_ref()
     }
 }
diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs
index fa03387cb7..697156b35f 100644
--- a/wgpu-core/src/present.rs
+++ b/wgpu-core/src/present.rs
@@ -9,21 +9,18 @@ When this texture is presented, we remove it from the device tracker as well as
 extract it from the hub.
 !*/
 
-use std::{borrow::Borrow, sync::Arc};
+use std::{mem::ManuallyDrop, sync::Arc};
 
 #[cfg(feature = "trace")]
 use crate::device::trace::Action;
 use crate::{
     conv,
-    device::any_device::AnyDevice,
-    device::{DeviceError, MissingDownlevelFlags, WaitIdleError},
+    device::{Device, DeviceError, MissingDownlevelFlags, WaitIdleError},
     global::Global,
-    hal_api::HalApi,
     hal_label, id,
     resource::{self, Trackable},
 };
 
-use hal::{Queue as _, Surface as _};
 use thiserror::Error;
 use wgt::SurfaceStatus as Status;
 
@@ -31,7 +28,7 @@ const FRAME_TIMEOUT_MS: u32 = 1000;
 
 #[derive(Debug)]
 pub(crate) struct Presentation {
-    pub(crate) device: AnyDevice,
+    pub(crate) device: Arc<Device>,
     pub(crate) config: wgt::SurfaceConfiguration<Vec<wgt::TextureFormat>>,
     pub(crate) acquired_texture: Option<id::TextureId>,
 }
@@ -89,8 +86,11 @@ pub enum ConfigureSurfaceError {
         requested: wgt::CompositeAlphaMode,
         available: Vec<wgt::CompositeAlphaMode>,
     },
-    #[error("Requested usage is not supported")]
-    UnsupportedUsage,
+    #[error("Requested usage {requested:?} is not in the list of supported usages: {available:?}")]
+    UnsupportedUsage {
+        requested: hal::TextureUses,
+        available: hal::TextureUses,
+    },
     #[error("Gpu got stuck :(")]
     StuckGpu,
 }
@@ -113,16 +113,14 @@ pub struct SurfaceOutput {
 }
 
 impl Global {
-    pub fn surface_get_current_texture<A: HalApi>(
+    pub fn surface_get_current_texture(
         &self,
         surface_id: id::SurfaceId,
         texture_id_in: Option<id::TextureId>,
     ) -> Result<SurfaceOutput, SurfaceError> {
         profiling::scope!("SwapChain::get_next_texture");
 
-        let hub = A::hub(self);
-
-        let fid = hub.textures.prepare(texture_id_in);
+        let hub = &self.hub;
 
         let surface = self
             .surfaces
@@ -130,17 +128,14 @@ impl Global {
             .map_err(|_| SurfaceError::Invalid)?;
 
         let (device, config) = if let Some(ref present) = *surface.presentation.lock() {
-            match present.device.downcast_clone::<A>() {
-                Some(device) => {
-                    device.check_is_valid()?;
-                    (device, present.config.clone())
-                }
-                None => return Err(SurfaceError::NotConfigured),
-            }
+            present.device.check_is_valid()?;
+            (present.device.clone(), present.config.clone())
         } else {
             return Err(SurfaceError::NotConfigured);
         };
 
+        let fid = hub.textures.prepare(device.backend(), texture_id_in);
+
         #[cfg(feature = "trace")]
         if let Some(ref mut trace) = *device.trace.lock() {
             trace.add(Action::GetSurfaceTexture {
@@ -149,18 +144,17 @@ impl Global {
             });
         }
 
-        let fence_guard = device.fence.read();
-        let fence = fence_guard.as_ref().unwrap();
+        let fence = device.fence.read();
 
-        let suf = A::surface_as_hal(surface.as_ref());
+        let suf = surface.raw(device.backend()).unwrap();
         let (texture_id, status) = match unsafe {
-            suf.unwrap().acquire_texture(
+            suf.acquire_texture(
                 Some(std::time::Duration::from_millis(FRAME_TIMEOUT_MS as u64)),
-                fence,
+                fence.as_ref(),
             )
         } {
             Ok(Some(ast)) => {
-                drop(fence_guard);
+                drop(fence);
 
                 let texture_desc = wgt::TextureDescriptor {
                     label: Some(std::borrow::Cow::Borrowed("<Surface Texture>")),
@@ -193,11 +187,9 @@ impl Global {
                     range: wgt::ImageSubresourceRange::default(),
                 };
                 let clear_view = unsafe {
-                    hal::Device::create_texture_view(
-                        device.raw(),
-                        ast.texture.borrow(),
-                        &clear_view_desc,
-                    )
+                    device
+                        .raw()
+                        .create_texture_view(ast.texture.as_ref().borrow(), &clear_view_desc)
                 }
                 .map_err(DeviceError::from)?;
 
@@ -206,14 +198,14 @@ impl Global {
                 let texture = resource::Texture::new(
                     &device,
                     resource::TextureInner::Surface {
-                        raw: Some(ast.texture),
+                        raw: ast.texture,
                         parent_id: surface_id,
                     },
                     hal_usage,
                     &texture_desc,
                     format_features,
                     resource::TextureClearMode::Surface {
-                        clear_view: Some(clear_view),
+                        clear_view: ManuallyDrop::new(clear_view),
                     },
                     true,
                 );
@@ -227,7 +219,6 @@ impl Global {
                     .insert_single(&texture, hal::TextureUses::UNINITIALIZED);
 
                 let id = fid.assign(texture);
-                log::debug!("Created CURRENT Surface Texture {:?}", id);
 
                 if present.acquired_texture.is_some() {
                     return Err(SurfaceError::AlreadyAcquired);
@@ -261,13 +252,10 @@ impl Global {
         Ok(SurfaceOutput { status, texture_id })
     }
 
-    pub fn surface_present<A: HalApi>(
-        &self,
-        surface_id: id::SurfaceId,
-    ) -> Result<Status, SurfaceError> {
+    pub fn surface_present(&self, surface_id: id::SurfaceId) -> Result<Status, SurfaceError> {
         profiling::scope!("SwapChain::present");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let surface = self
             .surfaces
@@ -280,7 +268,7 @@ impl Global {
             None => return Err(SurfaceError::NotConfigured),
         };
 
-        let device = present.device.downcast_ref::<A>().unwrap();
+        let device = &present.device;
 
         #[cfg(feature = "trace")]
         if let Some(ref mut trace) = *device.trace.lock() {
@@ -298,10 +286,6 @@ impl Global {
 
             // The texture ID got added to the device tracker by `submit()`,
             // and now we are moving it away.
-            log::debug!(
-                "Removing swapchain texture {:?} from the device tracker",
-                texture_id
-            );
             let texture = hub.textures.unregister(texture_id);
             if let Some(texture) = texture {
                 device
@@ -309,27 +293,15 @@ impl Global {
                     .lock()
                     .textures
                     .remove(texture.tracker_index());
-                let mut exclusive_snatch_guard = device.snatchable_lock.write();
-                let suf = A::surface_as_hal(&surface);
-                let mut inner = texture.inner_mut(&mut exclusive_snatch_guard);
-                let inner = inner.as_mut().unwrap();
-
-                match *inner {
-                    resource::TextureInner::Surface {
-                        ref mut raw,
-                        ref parent_id,
-                    } => {
-                        if surface_id != *parent_id {
+                let suf = surface.raw(device.backend()).unwrap();
+                let exclusive_snatch_guard = device.snatchable_lock.write();
+                match texture.inner.snatch(exclusive_snatch_guard).unwrap() {
+                    resource::TextureInner::Surface { raw, parent_id } => {
+                        if surface_id != parent_id {
                             log::error!("Presented frame is from a different surface");
                             Err(hal::SurfaceError::Lost)
                         } else {
-                            unsafe {
-                                queue
-                                    .raw
-                                    .as_ref()
-                                    .unwrap()
-                                    .present(suf.unwrap(), raw.take().unwrap())
-                            }
+                            unsafe { queue.raw().present(suf, raw) }
                         }
                     }
                     _ => unreachable!(),
@@ -339,8 +311,6 @@ impl Global {
             }
         };
 
-        log::debug!("Presented. End of Frame");
-
         match result {
             Ok(()) => Ok(Status::Good),
             Err(err) => match err {
@@ -355,13 +325,10 @@ impl Global {
         }
     }
 
-    pub fn surface_texture_discard<A: HalApi>(
-        &self,
-        surface_id: id::SurfaceId,
-    ) -> Result<(), SurfaceError> {
+    pub fn surface_texture_discard(&self, surface_id: id::SurfaceId) -> Result<(), SurfaceError> {
         profiling::scope!("SwapChain::discard");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let surface = self
             .surfaces
@@ -373,7 +340,7 @@ impl Global {
             None => return Err(SurfaceError::NotConfigured),
         };
 
-        let device = present.device.downcast_ref::<A>().unwrap();
+        let device = &present.device;
 
         #[cfg(feature = "trace")]
         if let Some(ref mut trace) = *device.trace.lock() {
@@ -390,11 +357,6 @@ impl Global {
 
             // The texture ID got added to the device tracker by `submit()`,
             // and now we are moving it away.
-            log::debug!(
-                "Removing swapchain texture {:?} from the device tracker",
-                texture_id
-            );
-
             let texture = hub.textures.unregister(texture_id);
 
             if let Some(texture) = texture {
@@ -403,12 +365,12 @@ impl Global {
                     .lock()
                     .textures
                     .remove(texture.tracker_index());
-                let suf = A::surface_as_hal(&surface);
+                let suf = surface.raw(device.backend());
                 let exclusive_snatch_guard = device.snatchable_lock.write();
                 match texture.inner.snatch(exclusive_snatch_guard).unwrap() {
-                    resource::TextureInner::Surface { mut raw, parent_id } => {
+                    resource::TextureInner::Surface { raw, parent_id } => {
                         if surface_id == parent_id {
-                            unsafe { suf.unwrap().discard_texture(raw.take().unwrap()) };
+                            unsafe { suf.unwrap().discard_texture(raw) };
                         } else {
                             log::warn!("Surface texture is outdated");
                         }
diff --git a/wgpu-core/src/ray_tracing.rs b/wgpu-core/src/ray_tracing.rs
index ae8b24df74..678d66e0cd 100644
--- a/wgpu-core/src/ray_tracing.rs
+++ b/wgpu-core/src/ray_tracing.rs
@@ -1,7 +1,6 @@
 use crate::{
     command::CommandEncoderError,
     device::DeviceError,
-    hal_api::HalApi,
     id::{BlasId, BufferId, TlasId},
     resource::CreateBufferError,
 };
@@ -194,24 +193,24 @@ pub(crate) enum BlasActionKind {
 }
 
 #[derive(Debug, Clone)]
-pub(crate) enum TlasActionKind<A: HalApi> {
+pub(crate) enum TlasActionKind {
     Build {
         build_index: NonZeroU64,
-        dependencies: Vec<Arc<Blas<A>>>,
+        dependencies: Vec<Arc<Blas>>,
     },
     Use,
 }
 
 #[derive(Debug, Clone)]
-pub(crate) struct BlasAction<A: HalApi> {
-    pub blas: Arc<Blas<A>>,
+pub(crate) struct BlasAction {
+    pub blas: Arc<Blas>,
     pub kind: BlasActionKind,
 }
 
 #[derive(Debug, Clone)]
-pub(crate) struct TlasAction<A: HalApi> {
-    pub tlas: Arc<Tlas<A>>,
-    pub kind: TlasActionKind<A>,
+pub(crate) struct TlasAction {
+    pub tlas: Arc<Tlas>,
+    pub kind: TlasActionKind,
 }
 
 #[derive(Debug, Clone)]
@@ -257,8 +256,9 @@ pub struct TraceTlasPackage {
     pub lowest_unmodified: u32,
 }
 
-pub(crate) fn get_raw_tlas_instance_size<A: HalApi>() -> usize {
-    match A::VARIANT {
+pub(crate) fn get_raw_tlas_instance_size(backend: wgt::Backend) -> usize {
+    // TODO: this should be provided by the backend
+    match backend {
         wgt::Backend::Empty => 0,
         wgt::Backend::Vulkan => 64,
         _ => unimplemented!(),
@@ -274,11 +274,13 @@ struct RawTlasInstance {
     acceleration_structure_reference: u64,
 }
 
-pub(crate) fn tlas_instance_into_bytes<A: HalApi>(
+pub(crate) fn tlas_instance_into_bytes(
     instance: &TlasInstance,
     blas_address: u64,
+    backend: wgt::Backend,
 ) -> Vec<u8> {
-    match A::VARIANT {
+    // TODO: get the device to do this
+    match backend {
         wgt::Backend::Empty => vec![],
         wgt::Backend::Vulkan => {
             const MAX_U24: u32 = (1u32 << 24u32) - 1u32;
@@ -292,7 +294,7 @@ pub(crate) fn tlas_instance_into_bytes<A: HalApi>(
             let temp: *const _ = &temp;
             unsafe {
                 slice::from_raw_parts::<u8>(
-                    temp as *const u8,
+                    temp.cast::<u8>(),
                     std::mem::size_of::<RawTlasInstance>(),
                 )
                 .to_vec()
diff --git a/wgpu-core/src/registry.rs b/wgpu-core/src/registry.rs
index 9183cc83bb..fa7e0def6c 100644
--- a/wgpu-core/src/registry.rs
+++ b/wgpu-core/src/registry.rs
@@ -1,7 +1,5 @@
 use std::sync::Arc;
 
-use wgt::Backend;
-
 use crate::{
     id::Id,
     identity::IdentityManager,
@@ -40,21 +38,15 @@ pub(crate) struct Registry<T: StorageItem> {
     // Must only contain an id which has either never been used or has been released from `storage`
     identity: Arc<IdentityManager<T::Marker>>,
     storage: RwLock<Storage<T>>,
-    backend: Backend,
 }
 
 impl<T: StorageItem> Registry<T> {
-    pub(crate) fn new(backend: Backend) -> Self {
+    pub(crate) fn new() -> Self {
         Self {
             identity: Arc::new(IdentityManager::new()),
             storage: RwLock::new(rank::REGISTRY_STORAGE, Storage::new()),
-            backend,
         }
     }
-
-    pub(crate) fn without_backend() -> Self {
-        Self::new(Backend::Empty)
-    }
 }
 
 #[must_use]
@@ -89,14 +81,18 @@ impl<T: StorageItem> FutureId<'_, T> {
 }
 
 impl<T: StorageItem> Registry<T> {
-    pub(crate) fn prepare(&self, id_in: Option<Id<T::Marker>>) -> FutureId<T> {
+    pub(crate) fn prepare(
+        &self,
+        backend: wgt::Backend,
+        id_in: Option<Id<T::Marker>>,
+    ) -> FutureId<T> {
         FutureId {
             id: match id_in {
                 Some(id_in) => {
                     self.identity.mark_as_used(id_in);
                     id_in
                 }
-                None => self.identity.process(self.backend),
+                None => self.identity.process(backend),
             },
             data: &self.storage,
         }
@@ -164,13 +160,13 @@ mod tests {
 
     #[test]
     fn simultaneous_registration() {
-        let registry = Registry::without_backend();
+        let registry = Registry::new();
         std::thread::scope(|s| {
             for _ in 0..5 {
                 s.spawn(|| {
                     for _ in 0..1000 {
                         let value = Arc::new(TestData);
-                        let new_id = registry.prepare(None);
+                        let new_id = registry.prepare(wgt::Backend::Empty, None);
                         let id = new_id.assign(value);
                         registry.unregister(id);
                     }
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 896d1342c8..83e45c7f2c 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -12,29 +12,25 @@ use crate::{
     init_tracker::{BufferInitTracker, TextureInitTracker},
     lock::{rank, Mutex, RwLock},
     resource_log,
-    snatch::{ExclusiveSnatchGuard, SnatchGuard, Snatchable},
+    snatch::{SnatchGuard, Snatchable},
     track::{SharedTrackerIndexAllocator, TextureSelector, TrackerIndex},
-    Label, LabelHelpers, SubmissionIndex,
+    Label, LabelHelpers,
 };
 
-use hal::CommandEncoder;
 use smallvec::SmallVec;
 use thiserror::Error;
 
+use hal::BufferUses;
+use std::num::NonZeroU64;
 use std::{
-    borrow::Borrow,
+    borrow::{Borrow, Cow},
     fmt::Debug,
-    iter, mem,
+    mem::{self, ManuallyDrop},
     ops::Range,
     ptr::NonNull,
-    sync::{
-        atomic::{AtomicUsize, Ordering},
-        Arc, Weak,
-    },
+    sync::{Arc, Weak},
 };
 
-use std::num::NonZeroU64;
-
 /// Information about the wgpu-core resource.
 ///
 /// Each type representing a `wgpu-core` resource, like [`Device`],
@@ -58,14 +54,6 @@ use std::num::NonZeroU64;
 pub(crate) struct TrackingData {
     tracker_index: TrackerIndex,
     tracker_indices: Arc<SharedTrackerIndexAllocator>,
-    /// The index of the last queue submission in which the resource
-    /// was used.
-    ///
-    /// Each queue submission is fenced and assigned an index number
-    /// sequentially. Thus, when a queue submission completes, we know any
-    /// resources used in that submission and any lower-numbered submissions are
-    /// no longer in use by the GPU.
-    submission_index: AtomicUsize,
 }
 
 impl Drop for TrackingData {
@@ -79,29 +67,18 @@ impl TrackingData {
         Self {
             tracker_index: tracker_indices.alloc(),
             tracker_indices,
-            submission_index: AtomicUsize::new(0),
         }
     }
 
     pub(crate) fn tracker_index(&self) -> TrackerIndex {
         self.tracker_index
     }
-
-    /// Record that this resource will be used by the queue submission with the
-    /// given index.
-    pub(crate) fn use_at(&self, submit_index: SubmissionIndex) {
-        self.submission_index
-            .store(submit_index as _, Ordering::Release);
-    }
-
-    pub(crate) fn submission_index(&self) -> SubmissionIndex {
-        self.submission_index.load(Ordering::Acquire) as _
-    }
 }
 
 #[derive(Clone, Debug)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct ResourceErrorIdent {
-    r#type: &'static str,
+    r#type: Cow<'static, str>,
     label: String,
 }
 
@@ -111,14 +88,14 @@ impl std::fmt::Display for ResourceErrorIdent {
     }
 }
 
-pub(crate) trait ParentDevice<A: HalApi>: Labeled {
-    fn device(&self) -> &Arc<Device<A>>;
+pub(crate) trait ParentDevice: Labeled {
+    fn device(&self) -> &Arc<Device>;
 
     fn is_equal(self: &Arc<Self>, other: &Arc<Self>) -> bool {
         Arc::ptr_eq(self, other)
     }
 
-    fn same_device_as<O: ParentDevice<A>>(&self, other: &O) -> Result<(), DeviceError> {
+    fn same_device_as<O: ParentDevice>(&self, other: &O) -> Result<(), DeviceError> {
         if Arc::ptr_eq(self.device(), other.device()) {
             Ok(())
         } else {
@@ -131,7 +108,7 @@ pub(crate) trait ParentDevice<A: HalApi>: Labeled {
         }
     }
 
-    fn same_device(&self, device: &Arc<Device<A>>) -> Result<(), DeviceError> {
+    fn same_device(&self, device: &Arc<Device>) -> Result<(), DeviceError> {
         if Arc::ptr_eq(self.device(), device) {
             Ok(())
         } else {
@@ -148,8 +125,8 @@ pub(crate) trait ParentDevice<A: HalApi>: Labeled {
 #[macro_export]
 macro_rules! impl_parent_device {
     ($ty:ident) => {
-        impl<A: HalApi> $crate::resource::ParentDevice<A> for $ty<A> {
-            fn device(&self) -> &Arc<Device<A>> {
+        impl $crate::resource::ParentDevice for $ty {
+            fn device(&self) -> &Arc<Device> {
                 &self.device
             }
         }
@@ -163,7 +140,7 @@ pub(crate) trait ResourceType {
 #[macro_export]
 macro_rules! impl_resource_type {
     ($ty:ident) => {
-        impl<A: HalApi> $crate::resource::ResourceType for $ty<A> {
+        impl $crate::resource::ResourceType for $ty {
             const TYPE: &'static str = stringify!($ty);
         }
     };
@@ -179,7 +156,7 @@ pub(crate) trait Labeled: ResourceType {
 
     fn error_ident(&self) -> ResourceErrorIdent {
         ResourceErrorIdent {
-            r#type: Self::TYPE,
+            r#type: Cow::Borrowed(Self::TYPE),
             label: self.label().to_owned(),
         }
     }
@@ -188,7 +165,7 @@ pub(crate) trait Labeled: ResourceType {
 #[macro_export]
 macro_rules! impl_labeled {
     ($ty:ident) => {
-        impl<A: HalApi> $crate::resource::Labeled for $ty<A> {
+        impl $crate::resource::Labeled for $ty {
             fn label(&self) -> &str {
                 &self.label
             }
@@ -196,27 +173,17 @@ macro_rules! impl_labeled {
     };
 }
 
-pub(crate) trait Trackable: Labeled {
+pub(crate) trait Trackable {
     fn tracker_index(&self) -> TrackerIndex;
-    /// Record that this resource will be used by the queue submission with the
-    /// given index.
-    fn use_at(&self, submit_index: SubmissionIndex);
-    fn submission_index(&self) -> SubmissionIndex;
 }
 
 #[macro_export]
 macro_rules! impl_trackable {
     ($ty:ident) => {
-        impl<A: HalApi> $crate::resource::Trackable for $ty<A> {
+        impl $crate::resource::Trackable for $ty {
             fn tracker_index(&self) -> $crate::track::TrackerIndex {
                 self.tracking_data.tracker_index()
             }
-            fn use_at(&self, submit_index: $crate::SubmissionIndex) {
-                self.tracking_data.use_at(submit_index)
-            }
-            fn submission_index(&self) -> $crate::SubmissionIndex {
-                self.tracking_data.submission_index()
-            }
         }
     };
 }
@@ -255,17 +222,14 @@ pub enum BufferMapAsyncStatus {
 }
 
 #[derive(Debug)]
-pub(crate) enum BufferMapState<A: HalApi> {
+pub(crate) enum BufferMapState {
     /// Mapped at creation.
-    Init {
-        staging_buffer: StagingBuffer<A>,
-        ptr: NonNull<u8>,
-    },
+    Init { staging_buffer: StagingBuffer },
     /// Waiting for GPU to be done before mapping
-    Waiting(BufferPendingMapping<A>),
+    Waiting(BufferPendingMapping),
     /// Mapped
     Active {
-        ptr: NonNull<u8>,
+        mapping: hal::BufferMapping,
         range: hal::MemoryRange,
         host: HostMap,
     },
@@ -274,9 +238,9 @@ pub(crate) enum BufferMapState<A: HalApi> {
 }
 
 #[cfg(send_sync)]
-unsafe impl<A: HalApi> Send for BufferMapState<A> {}
+unsafe impl Send for BufferMapState {}
 #[cfg(send_sync)]
-unsafe impl<A: HalApi> Sync for BufferMapState<A> {}
+unsafe impl Sync for BufferMapState {}
 
 #[repr(C)]
 pub struct BufferMapCallbackC {
@@ -378,6 +342,7 @@ pub struct BufferMapOperation {
 }
 
 #[derive(Clone, Debug, Error)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[non_exhaustive]
 pub enum BufferAccessError {
     #[error(transparent)]
@@ -426,6 +391,7 @@ pub enum BufferAccessError {
 }
 
 #[derive(Clone, Debug, Error)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[error("Usage flags {actual:?} of {res} do not contain required usage flags {expected:?}")]
 pub struct MissingBufferUsageError {
     pub(crate) res: ResourceErrorIdent,
@@ -442,59 +408,59 @@ pub struct MissingTextureUsageError {
 }
 
 #[derive(Clone, Debug, Error)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[error("{0} has been destroyed")]
 pub struct DestroyedResourceError(pub ResourceErrorIdent);
 
 pub type BufferAccessResult = Result<(), BufferAccessError>;
 
 #[derive(Debug)]
-pub(crate) struct BufferPendingMapping<A: HalApi> {
+pub(crate) struct BufferPendingMapping {
     pub(crate) range: Range<wgt::BufferAddress>,
     pub(crate) op: BufferMapOperation,
     // hold the parent alive while the mapping is active
-    pub(crate) _parent_buffer: Arc<Buffer<A>>,
+    pub(crate) _parent_buffer: Arc<Buffer>,
 }
 
 pub type BufferDescriptor<'a> = wgt::BufferDescriptor<Label<'a>>;
 
 #[derive(Debug)]
-pub struct Buffer<A: HalApi> {
-    pub(crate) raw: Snatchable<A::Buffer>,
-    pub(crate) device: Arc<Device<A>>,
+pub struct Buffer {
+    pub(crate) raw: Snatchable<Box<dyn hal::DynBuffer>>,
+    pub(crate) device: Arc<Device>,
     pub(crate) usage: wgt::BufferUsages,
     pub(crate) size: wgt::BufferAddress,
     pub(crate) initialization_status: RwLock<BufferInitTracker>,
-    pub(crate) sync_mapped_writes: Mutex<Option<hal::MemoryRange>>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
     pub(crate) tracking_data: TrackingData,
-    pub(crate) map_state: Mutex<BufferMapState<A>>,
-    pub(crate) bind_groups: Mutex<Vec<Weak<BindGroup<A>>>>,
+    pub(crate) map_state: Mutex<BufferMapState>,
+    pub(crate) bind_groups: Mutex<Vec<Weak<BindGroup>>>,
 }
 
-impl<A: HalApi> Drop for Buffer<A> {
+impl Drop for Buffer {
     fn drop(&mut self) {
         if let Some(raw) = self.raw.take() {
             resource_log!("Destroy raw {}", self.error_ident());
             unsafe {
-                use hal::Device;
                 self.device.raw().destroy_buffer(raw);
             }
         }
     }
 }
 
-impl<A: HalApi> Buffer<A> {
-    pub(crate) fn raw<'a>(&'a self, guard: &'a SnatchGuard) -> Option<&'a A::Buffer> {
-        self.raw.get(guard)
+impl Buffer {
+    pub(crate) fn raw<'a>(&'a self, guard: &'a SnatchGuard) -> Option<&'a dyn hal::DynBuffer> {
+        self.raw.get(guard).map(|b| b.as_ref())
     }
 
     pub(crate) fn try_raw<'a>(
         &'a self,
         guard: &'a SnatchGuard,
-    ) -> Result<&A::Buffer, DestroyedResourceError> {
+    ) -> Result<&dyn hal::DynBuffer, DestroyedResourceError> {
         self.raw
             .get(guard)
+            .map(|raw| raw.as_ref())
             .ok_or_else(|| DestroyedResourceError(self.error_ident()))
     }
 
@@ -645,22 +611,14 @@ impl<A: HalApi> Buffer<A> {
         self: &Arc<Self>,
         #[cfg(feature = "trace")] buffer_id: BufferId,
     ) -> Result<Option<BufferMapPendingClosure>, BufferAccessError> {
-        use hal::Device;
-
         let device = &self.device;
         let snatch_guard = device.snatchable_lock.read();
         let raw_buf = self.try_raw(&snatch_guard)?;
-        log::debug!("{} map state -> Idle", self.error_ident());
         match mem::replace(&mut *self.map_state.lock(), BufferMapState::Idle) {
-            BufferMapState::Init {
-                staging_buffer,
-                ptr,
-            } => {
+            BufferMapState::Init { staging_buffer } => {
                 #[cfg(feature = "trace")]
                 if let Some(ref mut trace) = *device.trace.lock() {
-                    let data = trace.make_binary("bin", unsafe {
-                        std::slice::from_raw_parts(ptr.as_ptr(), self.size as usize)
-                    });
+                    let data = trace.make_binary("bin", staging_buffer.get_data());
                     trace.add(trace::Action::WriteBuffer {
                         id: buffer_id,
                         data,
@@ -668,49 +626,36 @@ impl<A: HalApi> Buffer<A> {
                         queued: true,
                     });
                 }
-                let _ = ptr;
 
-                let raw_staging_buffer_guard = staging_buffer.raw.lock();
-                let raw_staging_buffer = raw_staging_buffer_guard.as_ref().unwrap();
-                if !staging_buffer.is_coherent {
-                    unsafe {
-                        device
-                            .raw()
-                            .flush_mapped_ranges(raw_staging_buffer, iter::once(0..self.size));
-                    }
-                }
+                let mut pending_writes = device.pending_writes.lock();
+
+                let staging_buffer = staging_buffer.flush();
 
-                self.use_at(device.active_submission_index.load(Ordering::Relaxed) + 1);
                 let region = wgt::BufferSize::new(self.size).map(|size| hal::BufferCopy {
                     src_offset: 0,
                     dst_offset: 0,
                     size,
                 });
                 let transition_src = hal::BufferBarrier {
-                    buffer: raw_staging_buffer,
+                    buffer: staging_buffer.raw(),
                     usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC,
                 };
-                let transition_dst = hal::BufferBarrier {
+                let transition_dst = hal::BufferBarrier::<dyn hal::DynBuffer> {
                     buffer: raw_buf,
                     usage: hal::BufferUses::empty()..hal::BufferUses::COPY_DST,
                 };
-                let mut pending_writes = device.pending_writes.lock();
-                let pending_writes = pending_writes.as_mut().unwrap();
                 let encoder = pending_writes.activate();
                 unsafe {
-                    encoder.transition_buffers(
-                        iter::once(transition_src).chain(iter::once(transition_dst)),
-                    );
+                    encoder.transition_buffers(&[transition_src, transition_dst]);
                     if self.size > 0 {
                         encoder.copy_buffer_to_buffer(
-                            raw_staging_buffer,
+                            staging_buffer.raw(),
                             raw_buf,
-                            region.into_iter(),
+                            region.as_slice(),
                         );
                     }
                 }
-                drop(raw_staging_buffer_guard);
-                pending_writes.consume_temp(queue::TempResource::StagingBuffer(staging_buffer));
+                pending_writes.consume(staging_buffer);
                 pending_writes.insert_buffer(self);
             }
             BufferMapState::Idle => {
@@ -719,13 +664,18 @@ impl<A: HalApi> Buffer<A> {
             BufferMapState::Waiting(pending) => {
                 return Ok(Some((pending.op, Err(BufferAccessError::MapAborted))));
             }
-            BufferMapState::Active { ptr, range, host } => {
+            BufferMapState::Active {
+                mapping,
+                range,
+                host,
+            } => {
+                #[allow(clippy::collapsible_if)]
                 if host == HostMap::Write {
                     #[cfg(feature = "trace")]
                     if let Some(ref mut trace) = *device.trace.lock() {
                         let size = range.end - range.start;
                         let data = trace.make_binary("bin", unsafe {
-                            std::slice::from_raw_parts(ptr.as_ptr(), size as usize)
+                            std::slice::from_raw_parts(mapping.ptr.as_ptr(), size as usize)
                         });
                         trace.add(trace::Action::WriteBuffer {
                             id: buffer_id,
@@ -734,14 +684,11 @@ impl<A: HalApi> Buffer<A> {
                             queued: false,
                         });
                     }
-                    let _ = (ptr, range);
+                    if !mapping.is_coherent {
+                        unsafe { device.raw().flush_mapped_ranges(raw_buf, &[range]) };
+                    }
                 }
-                unsafe {
-                    device
-                        .raw()
-                        .unmap_buffer(raw_buf)
-                        .map_err(DeviceError::from)?
-                };
+                unsafe { device.raw().unmap_buffer(raw_buf) };
             }
         }
         Ok(None)
@@ -765,7 +712,7 @@ impl<A: HalApi> Buffer<A> {
             };
 
             queue::TempResource::DestroyedBuffer(DestroyedBuffer {
-                raw: Some(raw),
+                raw: ManuallyDrop::new(raw),
                 device: Arc::clone(&self.device),
                 label: self.label().to_owned(),
                 bind_groups,
@@ -773,14 +720,14 @@ impl<A: HalApi> Buffer<A> {
         };
 
         let mut pending_writes = device.pending_writes.lock();
-        let pending_writes = pending_writes.as_mut().unwrap();
         if pending_writes.contains_buffer(self) {
             pending_writes.consume_temp(temp);
         } else {
-            let last_submit_index = self.submission_index();
-            device
-                .lock_life()
-                .schedule_resource_destruction(temp, last_submit_index);
+            let mut life_lock = device.lock_life();
+            let last_submit_index = life_lock.get_buffer_latest_submission_index(self);
+            if let Some(last_submit_index) = last_submit_index {
+                life_lock.schedule_resource_destruction(temp, last_submit_index);
+            }
         }
 
         Ok(())
@@ -814,20 +761,20 @@ crate::impl_trackable!(Buffer);
 
 /// A buffer that has been marked as destroyed and is staged for actual deletion soon.
 #[derive(Debug)]
-pub struct DestroyedBuffer<A: HalApi> {
-    raw: Option<A::Buffer>,
-    device: Arc<Device<A>>,
+pub struct DestroyedBuffer {
+    raw: ManuallyDrop<Box<dyn hal::DynBuffer>>,
+    device: Arc<Device>,
     label: String,
-    bind_groups: Vec<Weak<BindGroup<A>>>,
+    bind_groups: Vec<Weak<BindGroup>>,
 }
 
-impl<A: HalApi> DestroyedBuffer<A> {
+impl DestroyedBuffer {
     pub fn label(&self) -> &dyn Debug {
         &self.label
     }
 }
 
-impl<A: HalApi> Drop for DestroyedBuffer<A> {
+impl Drop for DestroyedBuffer {
     fn drop(&mut self) {
         let mut deferred = self.device.deferred_destroy.lock();
         for bind_group in self.bind_groups.drain(..) {
@@ -835,17 +782,20 @@ impl<A: HalApi> Drop for DestroyedBuffer<A> {
         }
         drop(deferred);
 
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw Buffer (destroyed) {:?}", self.label());
-
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_buffer(raw);
-            }
+        resource_log!("Destroy raw Buffer (destroyed) {:?}", self.label());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            hal::DynDevice::destroy_buffer(self.device.raw(), raw);
         }
     }
 }
 
+#[cfg(send_sync)]
+unsafe impl Send for StagingBuffer {}
+#[cfg(send_sync)]
+unsafe impl Sync for StagingBuffer {}
+
 /// A temporary buffer, consumed by the command that uses it.
 ///
 /// A [`StagingBuffer`] is designed for one-shot uploads of data to the GPU. It
@@ -866,68 +816,200 @@ impl<A: HalApi> Drop for DestroyedBuffer<A> {
 /// [`queue_write_texture`]: Global::queue_write_texture
 /// [`Device::pending_writes`]: crate::device::Device
 #[derive(Debug)]
-pub struct StagingBuffer<A: HalApi> {
-    pub(crate) raw: Mutex<Option<A::Buffer>>,
-    pub(crate) device: Arc<Device<A>>,
-    pub(crate) size: wgt::BufferAddress,
-    pub(crate) is_coherent: bool,
-}
+pub struct StagingBuffer {
+    raw: Box<dyn hal::DynBuffer>,
+    device: Arc<Device>,
+    pub(crate) size: wgt::BufferSize,
+    is_coherent: bool,
+    ptr: NonNull<u8>,
+}
+
+impl StagingBuffer {
+    pub(crate) fn new(device: &Arc<Device>, size: wgt::BufferSize) -> Result<Self, DeviceError> {
+        profiling::scope!("StagingBuffer::new");
+        let stage_desc = hal::BufferDescriptor {
+            label: crate::hal_label(Some("(wgpu internal) Staging"), device.instance_flags),
+            size: size.get(),
+            usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::COPY_SRC,
+            memory_flags: hal::MemoryFlags::TRANSIENT,
+        };
 
-impl<A: HalApi> Drop for StagingBuffer<A> {
-    fn drop(&mut self) {
-        if let Some(raw) = self.raw.lock().take() {
-            resource_log!("Destroy raw {}", self.error_ident());
+        let raw = unsafe { device.raw().create_buffer(&stage_desc)? };
+        let mapping = unsafe { device.raw().map_buffer(raw.as_ref(), 0..size.get()) }?;
+
+        let staging_buffer = StagingBuffer {
+            raw,
+            device: device.clone(),
+            size,
+            is_coherent: mapping.is_coherent,
+            ptr: mapping.ptr,
+        };
+
+        Ok(staging_buffer)
+    }
+
+    /// SAFETY: You must not call any functions of `self`
+    /// until you stopped using the returned pointer.
+    pub(crate) unsafe fn ptr(&self) -> NonNull<u8> {
+        self.ptr
+    }
+
+    #[cfg(feature = "trace")]
+    pub(crate) fn get_data(&self) -> &[u8] {
+        unsafe { std::slice::from_raw_parts(self.ptr.as_ptr(), self.size.get() as usize) }
+    }
+
+    pub(crate) fn write_zeros(&mut self) {
+        unsafe { core::ptr::write_bytes(self.ptr.as_ptr(), 0, self.size.get() as usize) };
+    }
+
+    pub(crate) fn write(&mut self, data: &[u8]) {
+        assert!(data.len() >= self.size.get() as usize);
+        // SAFETY: With the assert above, all of `copy_nonoverlapping`'s
+        // requirements are satisfied.
+        unsafe {
+            core::ptr::copy_nonoverlapping(
+                data.as_ptr(),
+                self.ptr.as_ptr(),
+                self.size.get() as usize,
+            );
+        }
+    }
+
+    /// SAFETY: The offsets and size must be in-bounds.
+    pub(crate) unsafe fn write_with_offset(
+        &mut self,
+        data: &[u8],
+        src_offset: isize,
+        dst_offset: isize,
+        size: usize,
+    ) {
+        unsafe {
+            core::ptr::copy_nonoverlapping(
+                data.as_ptr().offset(src_offset),
+                self.ptr.as_ptr().offset(dst_offset),
+                size,
+            );
+        }
+    }
+
+    pub(crate) fn flush(self) -> FlushedStagingBuffer {
+        let device = self.device.raw();
+        if !self.is_coherent {
+            #[allow(clippy::single_range_in_vec_init)]
             unsafe {
-                use hal::Device;
-                self.device.raw().destroy_buffer(raw);
-            }
+                device.flush_mapped_ranges(self.raw.as_ref(), &[0..self.size.get()])
+            };
+        }
+        unsafe { device.unmap_buffer(self.raw.as_ref()) };
+
+        let StagingBuffer {
+            raw, device, size, ..
+        } = self;
+
+        FlushedStagingBuffer {
+            raw: ManuallyDrop::new(raw),
+            device,
+            size,
         }
     }
 }
 
 crate::impl_resource_type!(StagingBuffer);
-// TODO: add label
-impl<A: HalApi> Labeled for StagingBuffer<A> {
-    fn label(&self) -> &str {
-        ""
+crate::impl_storage_item!(StagingBuffer);
+
+#[derive(Debug)]
+pub struct FlushedStagingBuffer {
+    raw: ManuallyDrop<Box<dyn hal::DynBuffer>>,
+    device: Arc<Device>,
+    pub(crate) size: wgt::BufferSize,
+}
+
+impl FlushedStagingBuffer {
+    pub(crate) fn raw(&self) -> &dyn hal::DynBuffer {
+        self.raw.as_ref()
+    }
+}
+
+impl Drop for FlushedStagingBuffer {
+    fn drop(&mut self) {
+        resource_log!("Destroy raw StagingBuffer");
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe { self.device.raw().destroy_buffer(raw) };
+    }
+}
+
+#[derive(Debug)]
+pub struct ScratchBuffer {
+    raw: ManuallyDrop<Box<dyn hal::DynBuffer>>,
+    device: Arc<Device>,
+}
+
+impl ScratchBuffer {
+    pub(crate) fn new(device: &Arc<Device>, size: wgt::BufferSize) -> Result<Self, DeviceError> {
+        let raw = unsafe {
+            device
+                .raw()
+                .create_buffer(&hal::BufferDescriptor {
+                    label: Some("(wgpu) scratch buffer"),
+                    size: size.get(),
+                    usage: BufferUses::ACCELERATION_STRUCTURE_SCRATCH | BufferUses::MAP_WRITE,
+                    memory_flags: hal::MemoryFlags::empty(),
+                })
+                .map_err(crate::device::DeviceError::from)?
+        };
+        Ok(Self {
+            raw: ManuallyDrop::new(raw),
+            device: device.clone(),
+        })
+    }
+    pub(crate) fn raw(&self) -> &dyn hal::DynBuffer {
+        self.raw.as_ref()
+    }
+}
+
+impl Drop for ScratchBuffer {
+    fn drop(&mut self) {
+        resource_log!("Destroy raw StagingBuffer");
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe { self.device.raw().destroy_buffer(raw) };
     }
 }
-crate::impl_parent_device!(StagingBuffer);
-crate::impl_storage_item!(StagingBuffer);
 
 pub type TextureDescriptor<'a> = wgt::TextureDescriptor<Label<'a>, Vec<wgt::TextureFormat>>;
 
 #[derive(Debug)]
-pub(crate) enum TextureInner<A: HalApi> {
+pub(crate) enum TextureInner {
     Native {
-        raw: A::Texture,
+        raw: Box<dyn hal::DynTexture>,
     },
     Surface {
-        raw: Option<A::SurfaceTexture>,
+        raw: Box<dyn hal::DynSurfaceTexture>,
         parent_id: SurfaceId,
     },
 }
 
-impl<A: HalApi> TextureInner<A> {
-    pub(crate) fn raw(&self) -> Option<&A::Texture> {
+impl TextureInner {
+    pub(crate) fn raw(&self) -> &dyn hal::DynTexture {
         match self {
-            Self::Native { raw } => Some(raw),
-            Self::Surface { raw: Some(tex), .. } => Some(tex.borrow()),
-            _ => None,
+            Self::Native { raw } => raw.as_ref(),
+            Self::Surface { raw, .. } => raw.as_ref().borrow(),
         }
     }
 }
 
 #[derive(Debug)]
-pub enum TextureClearMode<A: HalApi> {
+pub enum TextureClearMode {
     BufferCopy,
     // View for clear via RenderPass for every subsurface (mip/layer/slice)
     RenderPass {
-        clear_views: SmallVec<[Option<A::TextureView>; 1]>,
+        clear_views: SmallVec<[ManuallyDrop<Box<dyn hal::DynTextureView>>; 1]>,
         is_color: bool,
     },
     Surface {
-        clear_view: Option<A::TextureView>,
+        clear_view: ManuallyDrop<Box<dyn hal::DynTextureView>>,
     },
     // Texture can't be cleared, attempting to do so will cause panic.
     // (either because it is impossible for the type of texture or it is being destroyed)
@@ -935,9 +1017,9 @@ pub enum TextureClearMode<A: HalApi> {
 }
 
 #[derive(Debug)]
-pub struct Texture<A: HalApi> {
-    pub(crate) inner: Snatchable<TextureInner<A>>,
-    pub(crate) device: Arc<Device<A>>,
+pub struct Texture {
+    pub(crate) inner: Snatchable<TextureInner>,
+    pub(crate) device: Arc<Device>,
     pub(crate) desc: wgt::TextureDescriptor<(), Vec<wgt::TextureFormat>>,
     pub(crate) hal_usage: hal::TextureUses,
     pub(crate) format_features: wgt::TextureFormatFeatures,
@@ -946,19 +1028,19 @@ pub struct Texture<A: HalApi> {
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
     pub(crate) tracking_data: TrackingData,
-    pub(crate) clear_mode: RwLock<TextureClearMode<A>>,
-    pub(crate) views: Mutex<Vec<Weak<TextureView<A>>>>,
-    pub(crate) bind_groups: Mutex<Vec<Weak<BindGroup<A>>>>,
+    pub(crate) clear_mode: TextureClearMode,
+    pub(crate) views: Mutex<Vec<Weak<TextureView>>>,
+    pub(crate) bind_groups: Mutex<Vec<Weak<BindGroup>>>,
 }
 
-impl<A: HalApi> Texture<A> {
+impl Texture {
     pub(crate) fn new(
-        device: &Arc<Device<A>>,
-        inner: TextureInner<A>,
+        device: &Arc<Device>,
+        inner: TextureInner,
         hal_usage: hal::TextureUses,
         desc: &TextureDescriptor,
         format_features: wgt::TextureFormatFeatures,
-        clear_mode: TextureClearMode<A>,
+        clear_mode: TextureClearMode,
         init: bool,
     ) -> Self {
         Texture {
@@ -981,7 +1063,7 @@ impl<A: HalApi> Texture<A> {
             },
             label: desc.label.to_string(),
             tracking_data: TrackingData::new(device.tracker_indices.textures.clone()),
-            clear_mode: RwLock::new(rank::TEXTURE_CLEAR_MODE, clear_mode),
+            clear_mode,
             views: Mutex::new(rank::TEXTURE_VIEWS, Vec::new()),
             bind_groups: Mutex::new(rank::TEXTURE_BIND_GROUPS, Vec::new()),
         }
@@ -1004,19 +1086,16 @@ impl<A: HalApi> Texture<A> {
     }
 }
 
-impl<A: HalApi> Drop for Texture<A> {
+impl Drop for Texture {
     fn drop(&mut self) {
-        use hal::Device;
-        let mut clear_mode = self.clear_mode.write();
-        let clear_mode = &mut *clear_mode;
-        match *clear_mode {
+        match self.clear_mode {
             TextureClearMode::Surface {
                 ref mut clear_view, ..
             } => {
-                if let Some(view) = clear_view.take() {
-                    unsafe {
-                        self.device.raw().destroy_texture_view(view);
-                    }
+                // SAFETY: We are in the Drop impl and we don't use clear_view anymore after this point.
+                let raw = unsafe { ManuallyDrop::take(clear_view) };
+                unsafe {
+                    self.device.raw().destroy_texture_view(raw);
                 }
             }
             TextureClearMode::RenderPass {
@@ -1024,10 +1103,10 @@ impl<A: HalApi> Drop for Texture<A> {
                 ..
             } => {
                 clear_views.iter_mut().for_each(|clear_view| {
-                    if let Some(view) = clear_view.take() {
-                        unsafe {
-                            self.device.raw().destroy_texture_view(view);
-                        }
+                    // SAFETY: We are in the Drop impl and we don't use clear_view anymore after this point.
+                    let raw = unsafe { ManuallyDrop::take(clear_view) };
+                    unsafe {
+                        self.device.raw().destroy_texture_view(raw);
                     }
                 });
             }
@@ -1043,42 +1122,39 @@ impl<A: HalApi> Drop for Texture<A> {
     }
 }
 
-impl<A: HalApi> Texture<A> {
+impl Texture {
     pub(crate) fn try_inner<'a>(
         &'a self,
         guard: &'a SnatchGuard,
-    ) -> Result<&'a TextureInner<A>, DestroyedResourceError> {
+    ) -> Result<&'a TextureInner, DestroyedResourceError> {
         self.inner
             .get(guard)
             .ok_or_else(|| DestroyedResourceError(self.error_ident()))
     }
 
-    pub(crate) fn raw<'a>(&'a self, snatch_guard: &'a SnatchGuard) -> Option<&'a A::Texture> {
-        self.inner.get(snatch_guard)?.raw()
+    pub(crate) fn raw<'a>(
+        &'a self,
+        snatch_guard: &'a SnatchGuard,
+    ) -> Option<&'a dyn hal::DynTexture> {
+        Some(self.inner.get(snatch_guard)?.raw())
     }
 
     pub(crate) fn try_raw<'a>(
         &'a self,
         guard: &'a SnatchGuard,
-    ) -> Result<&'a A::Texture, DestroyedResourceError> {
+    ) -> Result<&'a dyn hal::DynTexture, DestroyedResourceError> {
         self.inner
             .get(guard)
-            .and_then(|t| t.raw())
+            .map(|t| t.raw())
             .ok_or_else(|| DestroyedResourceError(self.error_ident()))
     }
 
-    pub(crate) fn inner_mut<'a>(
-        &'a self,
-        guard: &'a mut ExclusiveSnatchGuard,
-    ) -> Option<&'a mut TextureInner<A>> {
-        self.inner.get_mut(guard)
-    }
     pub(crate) fn get_clear_view<'a>(
-        clear_mode: &'a TextureClearMode<A>,
+        clear_mode: &'a TextureClearMode,
         desc: &'a wgt::TextureDescriptor<(), Vec<wgt::TextureFormat>>,
         mip_level: u32,
         depth_or_layer: u32,
-    ) -> &'a A::TextureView {
+    ) -> &'a dyn hal::DynTextureView {
         match *clear_mode {
             TextureClearMode::BufferCopy => {
                 panic!("Given texture is cleared with buffer copies, not render passes")
@@ -1086,7 +1162,7 @@ impl<A: HalApi> Texture<A> {
             TextureClearMode::None => {
                 panic!("Given texture can't be cleared")
             }
-            TextureClearMode::Surface { ref clear_view, .. } => clear_view.as_ref().unwrap(),
+            TextureClearMode::Surface { ref clear_view, .. } => clear_view.as_ref(),
             TextureClearMode::RenderPass {
                 ref clear_views, ..
             } => {
@@ -1097,7 +1173,7 @@ impl<A: HalApi> Texture<A> {
                 } else {
                     mip_level * desc.size.depth_or_array_layers
                 } + depth_or_layer;
-                clear_views[index as usize].as_ref().unwrap()
+                clear_views[index as usize].as_ref()
             }
         }
     }
@@ -1128,7 +1204,7 @@ impl<A: HalApi> Texture<A> {
             };
 
             queue::TempResource::DestroyedTexture(DestroyedTexture {
-                raw: Some(raw),
+                raw: ManuallyDrop::new(raw),
                 views,
                 bind_groups,
                 device: Arc::clone(&self.device),
@@ -1137,14 +1213,14 @@ impl<A: HalApi> Texture<A> {
         };
 
         let mut pending_writes = device.pending_writes.lock();
-        let pending_writes = pending_writes.as_mut().unwrap();
         if pending_writes.contains_texture(self) {
             pending_writes.consume_temp(temp);
         } else {
-            let last_submit_index = self.submission_index();
-            device
-                .lock_life()
-                .schedule_resource_destruction(temp, last_submit_index);
+            let mut life_lock = device.lock_life();
+            let last_submit_index = life_lock.get_texture_latest_submission_index(self);
+            if let Some(last_submit_index) = last_submit_index {
+                life_lock.schedule_resource_destruction(temp, last_submit_index);
+            }
         }
 
         Ok(())
@@ -1162,11 +1238,13 @@ impl Global {
     ) -> R {
         profiling::scope!("Buffer::as_hal");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(buffer) = hub.buffers.get(id) {
             let snatch_guard = buffer.device.snatchable_lock.read();
-            let hal_buffer = buffer.raw(&snatch_guard);
+            let hal_buffer = buffer
+                .raw(&snatch_guard)
+                .and_then(|b| b.as_any().downcast_ref());
             hal_buffer_callback(hal_buffer)
         } else {
             hal_buffer_callback(None)
@@ -1183,11 +1261,14 @@ impl Global {
     ) -> R {
         profiling::scope!("Texture::as_hal");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(texture) = hub.textures.get(id) {
             let snatch_guard = texture.device.snatchable_lock.read();
             let hal_texture = texture.raw(&snatch_guard);
+            let hal_texture = hal_texture
+                .as_ref()
+                .and_then(|it| it.as_any().downcast_ref());
             hal_texture_callback(hal_texture)
         } else {
             hal_texture_callback(None)
@@ -1204,11 +1285,14 @@ impl Global {
     ) -> R {
         profiling::scope!("TextureView::as_hal");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(texture_view) = hub.texture_views.get(id) {
             let snatch_guard = texture_view.device.snatchable_lock.read();
             let hal_texture_view = texture_view.raw(&snatch_guard);
+            let hal_texture_view = hal_texture_view
+                .as_ref()
+                .and_then(|it| it.as_any().downcast_ref());
             hal_texture_view_callback(hal_texture_view)
         } else {
             hal_texture_view_callback(None)
@@ -1225,9 +1309,12 @@ impl Global {
     ) -> R {
         profiling::scope!("Adapter::as_hal");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let adapter = hub.adapters.get(id).ok();
-        let hal_adapter = adapter.as_ref().map(|adapter| &adapter.raw.adapter);
+        let hal_adapter = adapter
+            .as_ref()
+            .map(|adapter| &adapter.raw.adapter)
+            .and_then(|adapter| adapter.as_any().downcast_ref());
 
         hal_adapter_callback(hal_adapter)
     }
@@ -1242,9 +1329,12 @@ impl Global {
     ) -> R {
         profiling::scope!("Device::as_hal");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let device = hub.devices.get(id).ok();
-        let hal_device = device.as_ref().map(|device| device.raw());
+        let hal_device = device
+            .as_ref()
+            .map(|device| device.raw())
+            .and_then(|device| device.as_any().downcast_ref());
 
         hal_device_callback(hal_device)
     }
@@ -1259,12 +1349,11 @@ impl Global {
     ) -> R {
         profiling::scope!("Device::fence_as_hal");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(device) = hub.devices.get(id) {
-            let hal_fence = device.fence.read();
-            let hal_fence = hal_fence.as_ref();
-            hal_fence_callback(hal_fence)
+            let fence = device.fence.read();
+            hal_fence_callback(fence.as_any().downcast_ref())
         } else {
             hal_fence_callback(None)
         }
@@ -1282,7 +1371,8 @@ impl Global {
         let surface = self.surfaces.get(id).ok();
         let hal_surface = surface
             .as_ref()
-            .and_then(|surface| A::surface_as_hal(surface));
+            .and_then(|surface| surface.raw(A::VARIANT))
+            .and_then(|surface| surface.as_any().downcast_ref());
 
         hal_surface_callback(hal_surface)
     }
@@ -1301,12 +1391,16 @@ impl Global {
     ) -> R {
         profiling::scope!("CommandEncoder::as_hal");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(cmd_buf) = hub.command_buffers.get(id.into_command_buffer_id()) {
             let mut cmd_buf_data = cmd_buf.data.lock();
             let cmd_buf_data = cmd_buf_data.as_mut().unwrap();
-            let cmd_buf_raw = cmd_buf_data.encoder.open().ok();
+            let cmd_buf_raw = cmd_buf_data
+                .encoder
+                .open()
+                .ok()
+                .and_then(|encoder| encoder.as_any_mut().downcast_mut());
             hal_command_encoder_callback(cmd_buf_raw)
         } else {
             hal_command_encoder_callback(None)
@@ -1316,21 +1410,21 @@ impl Global {
 
 /// A texture that has been marked as destroyed and is staged for actual deletion soon.
 #[derive(Debug)]
-pub struct DestroyedTexture<A: HalApi> {
-    raw: Option<A::Texture>,
-    views: Vec<Weak<TextureView<A>>>,
-    bind_groups: Vec<Weak<BindGroup<A>>>,
-    device: Arc<Device<A>>,
+pub struct DestroyedTexture {
+    raw: ManuallyDrop<Box<dyn hal::DynTexture>>,
+    views: Vec<Weak<TextureView>>,
+    bind_groups: Vec<Weak<BindGroup>>,
+    device: Arc<Device>,
     label: String,
 }
 
-impl<A: HalApi> DestroyedTexture<A> {
+impl DestroyedTexture {
     pub fn label(&self) -> &dyn Debug {
         &self.label
     }
 }
 
-impl<A: HalApi> Drop for DestroyedTexture<A> {
+impl Drop for DestroyedTexture {
     fn drop(&mut self) {
         let device = &self.device;
 
@@ -1343,13 +1437,11 @@ impl<A: HalApi> Drop for DestroyedTexture<A> {
         }
         drop(deferred);
 
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw Texture (destroyed) {:?}", self.label());
-
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_texture(raw);
-            }
+        resource_log!("Destroy raw Texture (destroyed) {:?}", self.label());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            self.device.raw().destroy_texture(raw);
         }
     }
 }
@@ -1452,7 +1544,7 @@ crate::impl_parent_device!(Texture);
 crate::impl_storage_item!(Texture);
 crate::impl_trackable!(Texture);
 
-impl<A: HalApi> Borrow<TextureSelector> for Texture<A> {
+impl Borrow<TextureSelector> for Texture {
     fn borrow(&self) -> &TextureSelector {
         &self.full_range
     }
@@ -1513,11 +1605,11 @@ pub enum TextureViewNotRenderableReason {
 }
 
 #[derive(Debug)]
-pub struct TextureView<A: HalApi> {
-    pub(crate) raw: Snatchable<A::TextureView>,
+pub struct TextureView {
+    pub(crate) raw: Snatchable<Box<dyn hal::DynTextureView>>,
     // if it's a surface texture - it's none
-    pub(crate) parent: Arc<Texture<A>>,
-    pub(crate) device: Arc<Device<A>>,
+    pub(crate) parent: Arc<Texture>,
+    pub(crate) device: Arc<Device>,
     pub(crate) desc: HalTextureViewDescriptor,
     pub(crate) format_features: wgt::TextureFormatFeatures,
     /// This is `Err` only if the texture view is not renderable
@@ -1529,29 +1621,32 @@ pub struct TextureView<A: HalApi> {
     pub(crate) tracking_data: TrackingData,
 }
 
-impl<A: HalApi> Drop for TextureView<A> {
+impl Drop for TextureView {
     fn drop(&mut self) {
         if let Some(raw) = self.raw.take() {
             resource_log!("Destroy raw {}", self.error_ident());
             unsafe {
-                use hal::Device;
                 self.device.raw().destroy_texture_view(raw);
             }
         }
     }
 }
 
-impl<A: HalApi> TextureView<A> {
-    pub(crate) fn raw<'a>(&'a self, snatch_guard: &'a SnatchGuard) -> Option<&'a A::TextureView> {
-        self.raw.get(snatch_guard)
+impl TextureView {
+    pub(crate) fn raw<'a>(
+        &'a self,
+        snatch_guard: &'a SnatchGuard,
+    ) -> Option<&'a dyn hal::DynTextureView> {
+        self.raw.get(snatch_guard).map(|it| it.as_ref())
     }
 
     pub(crate) fn try_raw<'a>(
         &'a self,
         guard: &'a SnatchGuard,
-    ) -> Result<&A::TextureView, DestroyedResourceError> {
+    ) -> Result<&'a dyn hal::DynTextureView, DestroyedResourceError> {
         self.raw
             .get(guard)
+            .map(|it| it.as_ref())
             .ok_or_else(|| DestroyedResourceError(self.error_ident()))
     }
 }
@@ -1559,6 +1654,8 @@ impl<A: HalApi> TextureView<A> {
 #[derive(Clone, Debug, Error)]
 #[non_exhaustive]
 pub enum CreateTextureViewError {
+    #[error(transparent)]
+    Device(#[from] DeviceError),
     #[error("TextureId {0:?} is invalid")]
     InvalidTextureId(TextureId),
     #[error(transparent)]
@@ -1645,9 +1742,9 @@ pub struct SamplerDescriptor<'a> {
 }
 
 #[derive(Debug)]
-pub struct Sampler<A: HalApi> {
-    pub(crate) raw: Option<A::Sampler>,
-    pub(crate) device: Arc<Device<A>>,
+pub struct Sampler {
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynSampler>>,
+    pub(crate) device: Arc<Device>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
     pub(crate) tracking_data: TrackingData,
@@ -1657,21 +1754,20 @@ pub struct Sampler<A: HalApi> {
     pub(crate) filtering: bool,
 }
 
-impl<A: HalApi> Drop for Sampler<A> {
+impl Drop for Sampler {
     fn drop(&mut self) {
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw {}", self.error_ident());
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_sampler(raw);
-            }
+        resource_log!("Destroy raw {}", self.error_ident());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            self.device.raw().destroy_sampler(raw);
         }
     }
 }
 
-impl<A: HalApi> Sampler<A> {
-    pub(crate) fn raw(&self) -> &A::Sampler {
-        self.raw.as_ref().unwrap()
+impl Sampler {
+    pub(crate) fn raw(&self) -> &dyn hal::DynSampler {
+        self.raw.as_ref()
     }
 }
 
@@ -1741,23 +1837,22 @@ pub enum CreateQuerySetError {
 pub type QuerySetDescriptor<'a> = wgt::QuerySetDescriptor<Label<'a>>;
 
 #[derive(Debug)]
-pub struct QuerySet<A: HalApi> {
-    pub(crate) raw: Option<A::QuerySet>,
-    pub(crate) device: Arc<Device<A>>,
+pub struct QuerySet {
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynQuerySet>>,
+    pub(crate) device: Arc<Device>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
     pub(crate) tracking_data: TrackingData,
     pub(crate) desc: wgt::QuerySetDescriptor<()>,
 }
 
-impl<A: HalApi> Drop for QuerySet<A> {
+impl Drop for QuerySet {
     fn drop(&mut self) {
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw {}", self.error_ident());
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_query_set(raw);
-            }
+        resource_log!("Destroy raw {}", self.error_ident());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            self.device.raw().destroy_query_set(raw);
         }
     }
 }
@@ -1768,9 +1863,9 @@ crate::impl_parent_device!(QuerySet);
 crate::impl_storage_item!(QuerySet);
 crate::impl_trackable!(QuerySet);
 
-impl<A: HalApi> QuerySet<A> {
-    pub(crate) fn raw(&self) -> &A::QuerySet {
-        self.raw.as_ref().unwrap()
+impl QuerySet {
+    pub(crate) fn raw(&self) -> &dyn hal::DynQuerySet {
+        self.raw.as_ref()
     }
 }
 
@@ -1786,10 +1881,14 @@ pub enum DestroyError {
 pub type BlasDescriptor<'a> = wgt::CreateBlasDescriptor<Label<'a>>;
 pub type TlasDescriptor<'a> = wgt::CreateTlasDescriptor<Label<'a>>;
 
+pub(crate) trait AccelerationStructure: Trackable {
+    fn raw(&self) -> &dyn hal::DynAccelerationStructure;
+}
+
 #[derive(Debug)]
-pub struct Blas<A: HalApi> {
-    pub(crate) raw: Option<A::AccelerationStructure>,
-    pub(crate) device: Arc<Device<A>>,
+pub struct Blas {
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynAccelerationStructure>>,
+    pub(crate) device: Arc<Device>,
     pub(crate) size_info: hal::AccelerationStructureBuildSizes,
     pub(crate) sizes: wgt::BlasGeometrySizeDescriptors,
     pub(crate) flags: wgt::AccelerationStructureFlags,
@@ -1801,18 +1900,23 @@ pub struct Blas<A: HalApi> {
     pub(crate) tracking_data: TrackingData,
 }
 
-impl<A: HalApi> Drop for Blas<A> {
+impl Drop for Blas {
     fn drop(&mut self) {
+        resource_log!("Destroy raw {}", self.error_ident());
+        // SAFETY: We are in the Drop impl, and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
         unsafe {
-            if let Some(structure) = self.raw.take() {
-                resource_log!("Destroy raw {}", self.error_ident());
-                use hal::Device;
-                self.device.raw().destroy_acceleration_structure(structure);
-            }
+            self.device.raw().destroy_acceleration_structure(raw);
         }
     }
 }
 
+impl AccelerationStructure for Blas {
+    fn raw(&self) -> &dyn hal::DynAccelerationStructure {
+        self.raw.as_ref()
+    }
+}
+
 crate::impl_resource_type!(Blas);
 crate::impl_labeled!(Blas);
 crate::impl_parent_device!(Blas);
@@ -1820,39 +1924,36 @@ crate::impl_storage_item!(Blas);
 crate::impl_trackable!(Blas);
 
 #[derive(Debug)]
-pub struct Tlas<A: HalApi> {
-    pub(crate) raw: Option<A::AccelerationStructure>,
-    pub(crate) device: Arc<Device<A>>,
+pub struct Tlas {
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynAccelerationStructure>>,
+    pub(crate) device: Arc<Device>,
     pub(crate) size_info: hal::AccelerationStructureBuildSizes,
     pub(crate) max_instance_count: u32,
     pub(crate) flags: wgt::AccelerationStructureFlags,
     pub(crate) update_mode: wgt::AccelerationStructureUpdateMode,
     pub(crate) built_index: RwLock<Option<NonZeroU64>>,
-    pub(crate) dependencies: RwLock<Vec<Arc<Blas<A>>>>,
-    pub(crate) instance_buffer: RwLock<Option<A::Buffer>>,
+    pub(crate) dependencies: RwLock<Vec<Arc<Blas>>>,
+    pub(crate) instance_buffer: ManuallyDrop<Box<dyn hal::DynBuffer>>,
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
     pub(crate) tracking_data: TrackingData,
 }
 
-impl<A: HalApi> Drop for Tlas<A> {
+impl Drop for Tlas {
     fn drop(&mut self) {
         unsafe {
-            use hal::Device;
-            if let Some(structure) = self.raw.take() {
-                resource_log!("Destroy raw {}", self.error_ident());
-                self.device.raw().destroy_acceleration_structure(structure);
-            }
-            if let Some(buffer) = self.instance_buffer.write().take() {
-                self.device.raw().destroy_buffer(buffer)
-            }
+            let structure = ManuallyDrop::take(&mut self.raw);
+            let buffer = ManuallyDrop::take(&mut self.instance_buffer);
+            resource_log!("Destroy raw {}", self.error_ident());
+            self.device.raw().destroy_acceleration_structure(structure);
+            self.device.raw().destroy_buffer(buffer);
         }
     }
 }
 
-impl<A: HalApi> Tlas<A> {
-    pub(crate) fn raw(&self) -> &A::AccelerationStructure {
-        self.raw.as_ref().unwrap()
+impl AccelerationStructure for Tlas {
+    fn raw(&self) -> &dyn hal::DynAccelerationStructure {
+        self.raw.as_ref()
     }
 }
 
diff --git a/wgpu-core/src/snatch.rs b/wgpu-core/src/snatch.rs
index 6f60f45d85..9866b77723 100644
--- a/wgpu-core/src/snatch.rs
+++ b/wgpu-core/src/snatch.rs
@@ -37,11 +37,6 @@ impl<T> Snatchable<T> {
         unsafe { (*self.value.get()).as_ref() }
     }
 
-    /// Get write access to the value. Requires a the snatchable lock's write guard.
-    pub fn get_mut<'a>(&'a self, _guard: &'a mut ExclusiveSnatchGuard) -> Option<&'a mut T> {
-        unsafe { (*self.value.get()).as_mut() }
-    }
-
     /// Take the value. Requires a the snatchable lock's write guard.
     pub fn snatch(&self, _guard: ExclusiveSnatchGuard) -> Option<T> {
         unsafe { (*self.value.get()).take() }
diff --git a/wgpu-core/src/storage.rs b/wgpu-core/src/storage.rs
index f2875b3542..c5e91eedd4 100644
--- a/wgpu-core/src/storage.rs
+++ b/wgpu-core/src/storage.rs
@@ -31,7 +31,7 @@ pub(crate) trait StorageItem: ResourceType {
 #[macro_export]
 macro_rules! impl_storage_item {
     ($ty:ident) => {
-        impl<A: HalApi> $crate::storage::StorageItem for $ty<A> {
+        impl $crate::storage::StorageItem for $ty {
             type Marker = $crate::id::markers::$ty;
         }
     };
@@ -119,13 +119,11 @@ where
     }
 
     pub(crate) fn insert(&mut self, id: Id<T::Marker>, value: Arc<T>) {
-        log::trace!("User is inserting {}{:?}", T::TYPE, id);
         let (index, epoch, _backend) = id.unzip();
         self.insert_impl(index as usize, epoch, Element::Occupied(value, epoch))
     }
 
     pub(crate) fn insert_error(&mut self, id: Id<T::Marker>) {
-        log::trace!("User is inserting as error {}{:?}", T::TYPE, id);
         let (index, epoch, _) = id.unzip();
         self.insert_impl(index as usize, epoch, Element::Error(epoch))
     }
@@ -143,7 +141,6 @@ where
     }
 
     pub(crate) fn remove(&mut self, id: Id<T::Marker>) -> Option<Arc<T>> {
-        log::trace!("User is removing {}{:?}", T::TYPE, id);
         let (index, epoch, _) = id.unzip();
         match std::mem::replace(&mut self.map[index as usize], Element::Vacant) {
             Element::Occupied(value, storage_epoch) => {
diff --git a/wgpu-core/src/track/buffer.rs b/wgpu-core/src/track/buffer.rs
index dbc761687e..13629dfbc9 100644
--- a/wgpu-core/src/track/buffer.rs
+++ b/wgpu-core/src/track/buffer.rs
@@ -1,16 +1,13 @@
-/*! Buffer Trackers
- *
- * Buffers are represented by a single state for the whole resource,
- * a 16 bit bitflag of buffer usages. Because there is only ever
- * one subresource, they have no selector.
-!*/
+//! Buffer Trackers
+//!
+//! Buffers are represented by a single state for the whole resource,
+//! a 16 bit bitflag of buffer usages. Because there is only ever
+//! one subresource, they have no selector.
 
 use std::sync::{Arc, Weak};
 
 use super::{PendingTransition, TrackerIndex};
 use crate::{
-    hal_api::HalApi,
-    lock::{rank, Mutex},
     resource::{Buffer, Trackable},
     snatch::SnatchGuard,
     track::{
@@ -39,15 +36,15 @@ impl ResourceUses for BufferUses {
     }
 }
 
-/// Stores all the buffers that a bind group stores.
+/// Stores a bind group's buffers + their usages (within the bind group).
 #[derive(Debug)]
-pub(crate) struct BufferBindGroupState<A: HalApi> {
-    buffers: Mutex<Vec<(Arc<Buffer<A>>, BufferUses)>>,
+pub(crate) struct BufferBindGroupState {
+    buffers: Vec<(Arc<Buffer>, BufferUses)>,
 }
-impl<A: HalApi> BufferBindGroupState<A> {
+impl BufferBindGroupState {
     pub fn new() -> Self {
         Self {
-            buffers: Mutex::new(rank::BUFFER_BIND_GROUP_STATE_BUFFERS, Vec::new()),
+            buffers: Vec::new(),
         }
     }
 
@@ -55,38 +52,34 @@ impl<A: HalApi> BufferBindGroupState<A> {
     ///
     /// When this list of states is merged into a tracker, the memory
     /// accesses will be in a constant ascending order.
-    #[allow(clippy::pattern_type_mismatch)]
-    pub(crate) fn optimize(&self) {
-        let mut buffers = self.buffers.lock();
-        buffers.sort_unstable_by_key(|(b, _)| b.tracker_index());
+    pub(crate) fn optimize(&mut self) {
+        self.buffers
+            .sort_unstable_by_key(|(b, _)| b.tracker_index());
     }
 
     /// Returns a list of all buffers tracked. May contain duplicates.
-    #[allow(clippy::pattern_type_mismatch)]
     pub fn used_tracker_indices(&self) -> impl Iterator<Item = TrackerIndex> + '_ {
-        let buffers = self.buffers.lock();
-        buffers
+        self.buffers
             .iter()
-            .map(|(ref b, _)| b.tracker_index())
+            .map(|(b, _)| b.tracker_index())
             .collect::<Vec<_>>()
             .into_iter()
     }
 
     /// Adds the given resource with the given state.
-    pub fn add_single(&self, buffer: &Arc<Buffer<A>>, state: BufferUses) {
-        let mut buffers = self.buffers.lock();
-        buffers.push((buffer.clone(), state));
+    pub fn insert_single(&mut self, buffer: Arc<Buffer>, state: BufferUses) {
+        self.buffers.push((buffer, state));
     }
 }
 
 /// Stores all buffer state within a single usage scope.
 #[derive(Debug)]
-pub(crate) struct BufferUsageScope<A: HalApi> {
+pub(crate) struct BufferUsageScope {
     state: Vec<BufferUses>,
-    metadata: ResourceMetadata<Arc<Buffer<A>>>,
+    metadata: ResourceMetadata<Arc<Buffer>>,
 }
 
-impl<A: HalApi> Default for BufferUsageScope<A> {
+impl Default for BufferUsageScope {
     fn default() -> Self {
         Self {
             state: Vec::new(),
@@ -95,7 +88,7 @@ impl<A: HalApi> Default for BufferUsageScope<A> {
     }
 }
 
-impl<A: HalApi> BufferUsageScope<A> {
+impl BufferUsageScope {
     fn tracker_assert_in_bounds(&self, index: usize) {
         strict_assert!(index < self.state.len());
         self.metadata.tracker_assert_in_bounds(index);
@@ -135,10 +128,9 @@ impl<A: HalApi> BufferUsageScope<A> {
     /// method is called.
     pub unsafe fn merge_bind_group(
         &mut self,
-        bind_group: &BufferBindGroupState<A>,
+        bind_group: &BufferBindGroupState,
     ) -> Result<(), ResourceUsageCompatibilityError> {
-        let buffers = bind_group.buffers.lock();
-        for &(ref resource, state) in &*buffers {
+        for &(ref resource, state) in bind_group.buffers.iter() {
             let index = resource.tracker_index().as_usize();
 
             unsafe {
@@ -206,7 +198,7 @@ impl<A: HalApi> BufferUsageScope<A> {
     /// the vectors will be extended. A call to set_size is not needed.
     pub fn merge_single(
         &mut self,
-        buffer: &Arc<Buffer<A>>,
+        buffer: &Arc<Buffer>,
         new_state: BufferUses,
     ) -> Result<(), ResourceUsageCompatibilityError> {
         let index = buffer.tracker_index().as_usize();
@@ -232,16 +224,16 @@ impl<A: HalApi> BufferUsageScope<A> {
 }
 
 /// Stores all buffer state within a command buffer.
-pub(crate) struct BufferTracker<A: HalApi> {
+pub(crate) struct BufferTracker {
     start: Vec<BufferUses>,
     end: Vec<BufferUses>,
 
-    metadata: ResourceMetadata<Arc<Buffer<A>>>,
+    metadata: ResourceMetadata<Arc<Buffer>>,
 
     temp: Vec<PendingTransition<BufferUses>>,
 }
 
-impl<A: HalApi> BufferTracker<A> {
+impl BufferTracker {
     pub fn new() -> Self {
         Self {
             start: Vec::new(),
@@ -277,8 +269,13 @@ impl<A: HalApi> BufferTracker<A> {
         }
     }
 
+    /// Returns true if the given buffer is tracked.
+    pub fn contains(&self, buffer: &Buffer) -> bool {
+        self.metadata.contains(buffer.tracker_index().as_usize())
+    }
+
     /// Returns a list of all buffers tracked.
-    pub fn used_resources(&self) -> impl Iterator<Item = Arc<Buffer<A>>> + '_ {
+    pub fn used_resources(&self) -> impl Iterator<Item = Arc<Buffer>> + '_ {
         self.metadata.owned_resources()
     }
 
@@ -286,7 +283,7 @@ impl<A: HalApi> BufferTracker<A> {
     pub fn drain_transitions<'a, 'b: 'a>(
         &'b mut self,
         snatch_guard: &'a SnatchGuard<'a>,
-    ) -> impl Iterator<Item = BufferBarrier<'a, A>> {
+    ) -> impl Iterator<Item = BufferBarrier<'a, dyn hal::DynBuffer>> {
         let buffer_barriers = self.temp.drain(..).map(|pending| {
             let buf = unsafe { self.metadata.get_resource_unchecked(pending.id as _) };
             pending.into_hal(buf, snatch_guard)
@@ -303,7 +300,7 @@ impl<A: HalApi> BufferTracker<A> {
     /// the vectors will be extended. A call to set_size is not needed.
     pub fn set_single(
         &mut self,
-        buffer: &Arc<Buffer<A>>,
+        buffer: &Arc<Buffer>,
         state: BufferUses,
     ) -> Option<PendingTransition<BufferUses>> {
         let index: usize = buffer.tracker_index().as_usize();
@@ -376,7 +373,7 @@ impl<A: HalApi> BufferTracker<A> {
     ///
     /// If the ID is higher than the length of internal vectors,
     /// the vectors will be extended. A call to set_size is not needed.
-    pub fn set_from_usage_scope(&mut self, scope: &BufferUsageScope<A>) {
+    pub fn set_from_usage_scope(&mut self, scope: &BufferUsageScope) {
         let incoming_size = scope.state.len();
         if incoming_size > self.start.len() {
             self.set_size(incoming_size);
@@ -424,7 +421,7 @@ impl<A: HalApi> BufferTracker<A> {
     /// method is called.
     pub unsafe fn set_and_remove_from_usage_scope_sparse(
         &mut self,
-        scope: &mut BufferUsageScope<A>,
+        scope: &mut BufferUsageScope,
         index_source: impl IntoIterator<Item = TrackerIndex>,
     ) {
         let incoming_size = scope.state.len();
@@ -463,13 +460,13 @@ impl<A: HalApi> BufferTracker<A> {
 }
 
 /// Stores all buffer state within a device.
-pub(crate) struct DeviceBufferTracker<A: HalApi> {
+pub(crate) struct DeviceBufferTracker {
     current_states: Vec<BufferUses>,
-    metadata: ResourceMetadata<Weak<Buffer<A>>>,
+    metadata: ResourceMetadata<Weak<Buffer>>,
     temp: Vec<PendingTransition<BufferUses>>,
 }
 
-impl<A: HalApi> DeviceBufferTracker<A> {
+impl DeviceBufferTracker {
     pub fn new() -> Self {
         Self {
             current_states: Vec::new(),
@@ -492,14 +489,14 @@ impl<A: HalApi> DeviceBufferTracker<A> {
     }
 
     /// Returns a list of all buffers tracked.
-    pub fn used_resources(&self) -> impl Iterator<Item = Weak<Buffer<A>>> + '_ {
+    pub fn used_resources(&self) -> impl Iterator<Item = Weak<Buffer>> + '_ {
         self.metadata.owned_resources()
     }
 
     /// Inserts a single buffer and its state into the resource tracker.
     ///
     /// If the resource already exists in the tracker, it will be overwritten.
-    pub fn insert_single(&mut self, buffer: &Arc<Buffer<A>>, state: BufferUses) {
+    pub fn insert_single(&mut self, buffer: &Arc<Buffer>, state: BufferUses) {
         let index = buffer.tracker_index().as_usize();
 
         self.allow_index(index);
@@ -527,7 +524,7 @@ impl<A: HalApi> DeviceBufferTracker<A> {
     /// is returned. No more than one transition is needed.
     pub fn set_single(
         &mut self,
-        buffer: &Arc<Buffer<A>>,
+        buffer: &Arc<Buffer>,
         state: BufferUses,
     ) -> Option<PendingTransition<BufferUses>> {
         let index: usize = buffer.tracker_index().as_usize();
@@ -557,9 +554,9 @@ impl<A: HalApi> DeviceBufferTracker<A> {
     /// those transitions are returned.
     pub fn set_from_tracker_and_drain_transitions<'a, 'b: 'a>(
         &'a mut self,
-        tracker: &'a BufferTracker<A>,
+        tracker: &'a BufferTracker,
         snatch_guard: &'b SnatchGuard<'b>,
-    ) -> impl Iterator<Item = BufferBarrier<'a, A>> {
+    ) -> impl Iterator<Item = BufferBarrier<'a, dyn hal::DynBuffer>> {
         for index in tracker.metadata.owned_indices() {
             self.tracker_assert_in_bounds(index);
 
@@ -623,14 +620,14 @@ impl BufferStateProvider<'_> {
 /// Indexes must be valid indexes into all arrays passed in
 /// to this function, either directly or via metadata or provider structs.
 #[inline(always)]
-unsafe fn insert_or_merge<A: HalApi>(
+unsafe fn insert_or_merge(
     start_states: Option<&mut [BufferUses]>,
     current_states: &mut [BufferUses],
-    resource_metadata: &mut ResourceMetadata<Arc<Buffer<A>>>,
+    resource_metadata: &mut ResourceMetadata<Arc<Buffer>>,
     index32: u32,
     index: usize,
     state_provider: BufferStateProvider<'_>,
-    metadata_provider: ResourceMetadataProvider<'_, Arc<Buffer<A>>>,
+    metadata_provider: ResourceMetadataProvider<'_, Arc<Buffer>>,
 ) -> Result<(), ResourceUsageCompatibilityError> {
     let currently_owned = unsafe { resource_metadata.contains_unchecked(index) };
 
@@ -665,6 +662,7 @@ unsafe fn insert_or_merge<A: HalApi>(
 /// - Uses the `start_state_provider` to populate `start_states`
 /// - Uses either `end_state_provider` or `start_state_provider`
 ///   to populate `current_states`.
+///
 /// If the resource is tracked
 /// - Inserts barriers from the state in `current_states`
 ///   to the state provided by `start_state_provider`.
@@ -678,14 +676,14 @@ unsafe fn insert_or_merge<A: HalApi>(
 /// Indexes must be valid indexes into all arrays passed in
 /// to this function, either directly or via metadata or provider structs.
 #[inline(always)]
-unsafe fn insert_or_barrier_update<A: HalApi>(
+unsafe fn insert_or_barrier_update(
     start_states: Option<&mut [BufferUses]>,
     current_states: &mut [BufferUses],
-    resource_metadata: &mut ResourceMetadata<Arc<Buffer<A>>>,
+    resource_metadata: &mut ResourceMetadata<Arc<Buffer>>,
     index: usize,
     start_state_provider: BufferStateProvider<'_>,
     end_state_provider: Option<BufferStateProvider<'_>>,
-    metadata_provider: ResourceMetadataProvider<'_, Arc<Buffer<A>>>,
+    metadata_provider: ResourceMetadataProvider<'_, Arc<Buffer>>,
     barriers: &mut Vec<PendingTransition<BufferUses>>,
 ) {
     let currently_owned = unsafe { resource_metadata.contains_unchecked(index) };
@@ -730,8 +728,6 @@ unsafe fn insert<T: Clone>(
     strict_assert_eq!(invalid_resource_state(new_start_state), false);
     strict_assert_eq!(invalid_resource_state(new_end_state), false);
 
-    log::trace!("\tbuf {index}: insert {new_start_state:?}..{new_end_state:?}");
-
     unsafe {
         if let Some(&mut ref mut start_state) = start_states {
             *start_state.get_unchecked_mut(index) = new_start_state;
@@ -744,12 +740,12 @@ unsafe fn insert<T: Clone>(
 }
 
 #[inline(always)]
-unsafe fn merge<A: HalApi>(
+unsafe fn merge(
     current_states: &mut [BufferUses],
-    index32: u32,
+    _index32: u32,
     index: usize,
     state_provider: BufferStateProvider<'_>,
-    metadata_provider: ResourceMetadataProvider<'_, Arc<Buffer<A>>>,
+    metadata_provider: ResourceMetadataProvider<'_, Arc<Buffer>>,
 ) -> Result<(), ResourceUsageCompatibilityError> {
     let current_state = unsafe { current_states.get_unchecked_mut(index) };
     let new_state = unsafe { state_provider.get_state(index) };
@@ -764,8 +760,6 @@ unsafe fn merge<A: HalApi>(
         ));
     }
 
-    log::trace!("\tbuf {index32}: merge {current_state:?} + {new_state:?}");
-
     *current_state = merged_state;
 
     Ok(())
@@ -790,8 +784,6 @@ unsafe fn barrier(
         selector: (),
         usage: current_state..new_state,
     });
-
-    log::trace!("\tbuf {index}: transition {current_state:?} -> {new_state:?}");
 }
 
 #[inline(always)]
diff --git a/wgpu-core/src/track/metadata.rs b/wgpu-core/src/track/metadata.rs
index d7d63f04fa..22576207ae 100644
--- a/wgpu-core/src/track/metadata.rs
+++ b/wgpu-core/src/track/metadata.rs
@@ -1,7 +1,6 @@
 //! The `ResourceMetadata` type.
 
 use bit_vec::BitVec;
-use std::mem;
 use wgt::strict_assert;
 
 /// A set of resources, holding a `Arc<T>` and epoch for each member.
@@ -67,7 +66,7 @@ impl<T: Clone> ResourceMetadata<T> {
 
     /// Returns true if the set contains the resource with the given index.
     pub(super) fn contains(&self, index: usize) -> bool {
-        self.owned[index]
+        self.owned.get(index).unwrap_or(false)
     }
 
     /// Returns true if the set contains the resource with the given index.
@@ -191,7 +190,7 @@ fn resize_bitvec<B: bit_vec::BitBlock>(vec: &mut BitVec<B>, size: usize) {
 ///
 /// Will skip entire usize's worth of bits if they are all false.
 fn iterate_bitvec_indices(ownership: &BitVec<usize>) -> impl Iterator<Item = usize> + '_ {
-    const BITS_PER_BLOCK: usize = mem::size_of::<usize>() * 8;
+    const BITS_PER_BLOCK: usize = usize::BITS as usize;
 
     let size = ownership.len();
 
diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs
index 497436a813..9a66b5f903 100644
--- a/wgpu-core/src/track/mod.rs
+++ b/wgpu-core/src/track/mod.rs
@@ -1,7 +1,7 @@
 /*! Resource State and Lifetime Trackers
 
 These structures are responsible for keeping track of resource state,
-generating barriers where needed, and making sure resources are kept
+generating barriers where needednd making sure resources are kept
 alive until the trackers die.
 
 ## General Architecture
@@ -35,7 +35,7 @@ Stateless trackers only store metadata and own the given resource.
 ## Use Case
 
 Within each type of tracker, the trackers are further split into 3 different
-use cases, Bind Group, Usage Scope, and a full Tracker.
+use cases, Bind Group, Usage Scopend a full Tracker.
 
 Bind Group trackers are just a list of different resources, their refcount,
 and how they are used. Textures are used via a selector and a usage type.
@@ -60,7 +60,7 @@ not always contain every resource. Some resources (or even most resources) go
 unused in any given command buffer. So to help speed up the process of iterating
 through possibly thousands of resources, we use a bit vector to represent if
 a resource is in the buffer or not. This allows us extremely efficient memory
-utilization, as well as being able to bail out of whole blocks of 32-64 resources
+utilizations well as being able to bail out of whole blocks of 32-64 resources
 with a single usize comparison with zero. In practice this means that merging
 partially resident buffers is extremely quick.
 
@@ -98,13 +98,13 @@ Device <- CommandBuffer = insert(device.start, device.end, buffer.start, buffer.
 mod buffer;
 mod metadata;
 mod range;
+mod ray_tracing;
 mod stateless;
 mod texture;
 
 use crate::{
     binding_model, command,
-    hal_api::HalApi,
-    lock::{rank, Mutex, RwLock},
+    lock::{rank, Mutex},
     pipeline,
     resource::{self, Labeled, ResourceErrorIdent},
     snatch::SnatchGuard,
@@ -113,14 +113,15 @@ use crate::{
 use std::{fmt, ops, sync::Arc};
 use thiserror::Error;
 
+use crate::track::ray_tracing::AccelerationStructureTracker;
 pub(crate) use buffer::{
     BufferBindGroupState, BufferTracker, BufferUsageScope, DeviceBufferTracker,
 };
 use metadata::{ResourceMetadata, ResourceMetadataProvider};
-pub(crate) use stateless::{StatelessBindGroupState, StatelessTracker};
+pub(crate) use stateless::StatelessTracker;
 pub(crate) use texture::{
-    DeviceTextureTracker, TextureBindGroupState, TextureSelector, TextureTracker,
-    TextureTrackerSetSingle, TextureUsageScope,
+    DeviceTextureTracker, TextureSelector, TextureTracker, TextureTrackerSetSingle,
+    TextureUsageScope, TextureViewBindGroupState,
 };
 use wgt::strict_assert_ne;
 
@@ -141,6 +142,7 @@ impl TrackerIndex {
 /// - IDs of dead handles can be recycled while resources are internally held alive (and tracked).
 /// - The plan is to remove IDs in the long run
 ///   ([#5121](https://github.com/gfx-rs/wgpu/issues/5121)).
+///
 /// In order to produce these tracker indices, there is a shared TrackerIndexAllocator
 /// per resource type. Indices have the same lifetime as the internal resource they
 /// are associated to (alloc happens when creating the resource and free is called when
@@ -220,15 +222,12 @@ pub(crate) struct TrackerIndexAllocators {
     pub texture_views: Arc<SharedTrackerIndexAllocator>,
     pub samplers: Arc<SharedTrackerIndexAllocator>,
     pub bind_groups: Arc<SharedTrackerIndexAllocator>,
-    pub bind_group_layouts: Arc<SharedTrackerIndexAllocator>,
     pub compute_pipelines: Arc<SharedTrackerIndexAllocator>,
     pub render_pipelines: Arc<SharedTrackerIndexAllocator>,
-    pub pipeline_layouts: Arc<SharedTrackerIndexAllocator>,
     pub bundles: Arc<SharedTrackerIndexAllocator>,
     pub query_sets: Arc<SharedTrackerIndexAllocator>,
     pub blas_s: Arc<SharedTrackerIndexAllocator>,
     pub tlas_s: Arc<SharedTrackerIndexAllocator>,
-    pub pipeline_caches: Arc<SharedTrackerIndexAllocator>,
 }
 
 impl TrackerIndexAllocators {
@@ -239,13 +238,10 @@ impl TrackerIndexAllocators {
             texture_views: Arc::new(SharedTrackerIndexAllocator::new()),
             samplers: Arc::new(SharedTrackerIndexAllocator::new()),
             bind_groups: Arc::new(SharedTrackerIndexAllocator::new()),
-            bind_group_layouts: Arc::new(SharedTrackerIndexAllocator::new()),
             compute_pipelines: Arc::new(SharedTrackerIndexAllocator::new()),
             render_pipelines: Arc::new(SharedTrackerIndexAllocator::new()),
-            pipeline_layouts: Arc::new(SharedTrackerIndexAllocator::new()),
             bundles: Arc::new(SharedTrackerIndexAllocator::new()),
             query_sets: Arc::new(SharedTrackerIndexAllocator::new()),
-            pipeline_caches: Arc::new(SharedTrackerIndexAllocator::new()),
             blas_s: Arc::new(SharedTrackerIndexAllocator::new()),
             tlas_s: Arc::new(SharedTrackerIndexAllocator::new()),
         }
@@ -266,11 +262,11 @@ pub(crate) type PendingTransitionList = Vec<PendingTransition<hal::TextureUses>>
 
 impl PendingTransition<hal::BufferUses> {
     /// Produce the hal barrier corresponding to the transition.
-    pub fn into_hal<'a, A: HalApi>(
+    pub fn into_hal<'a>(
         self,
-        buf: &'a resource::Buffer<A>,
+        buf: &'a resource::Buffer,
         snatch_guard: &'a SnatchGuard<'a>,
-    ) -> hal::BufferBarrier<'a, A> {
+    ) -> hal::BufferBarrier<'a, dyn hal::DynBuffer> {
         let buffer = buf.raw(snatch_guard).expect("Buffer is destroyed");
         hal::BufferBarrier {
             buffer,
@@ -281,7 +277,10 @@ impl PendingTransition<hal::BufferUses> {
 
 impl PendingTransition<hal::TextureUses> {
     /// Produce the hal barrier corresponding to the transition.
-    pub fn into_hal<'a, A: HalApi>(self, texture: &'a A::Texture) -> hal::TextureBarrier<'a, A> {
+    pub fn into_hal(
+        self,
+        texture: &dyn hal::DynTexture,
+    ) -> hal::TextureBarrier<'_, dyn hal::DynTexture> {
         // These showing up in a barrier is always a bug
         strict_assert_ne!(self.usage.start, hal::TextureUses::UNKNOWN);
         strict_assert_ne!(self.usage.end, hal::TextureUses::UNKNOWN);
@@ -358,8 +357,8 @@ pub enum ResourceUsageCompatibilityError {
 }
 
 impl ResourceUsageCompatibilityError {
-    fn from_buffer<A: HalApi>(
-        buffer: &resource::Buffer<A>,
+    fn from_buffer(
+        buffer: &resource::Buffer,
         current_state: hal::BufferUses,
         new_state: hal::BufferUses,
     ) -> Self {
@@ -372,8 +371,8 @@ impl ResourceUsageCompatibilityError {
         }
     }
 
-    fn from_texture<A: HalApi>(
-        texture: &resource::Texture<A>,
+    fn from_texture(
+        texture: &resource::Texture,
         selector: TextureSelector,
         current_state: hal::TextureUses,
         new_state: hal::TextureUses,
@@ -423,22 +422,20 @@ impl<T: ResourceUses> fmt::Display for InvalidUse<T> {
 /// All bind group states are sorted by their ID so that when adding to a tracker,
 /// they are added in the most efficient order possible (ascending order).
 #[derive(Debug)]
-pub(crate) struct BindGroupStates<A: HalApi> {
-    pub buffers: BufferBindGroupState<A>,
-    pub textures: TextureBindGroupState<A>,
-    pub views: StatelessBindGroupState<resource::TextureView<A>>,
-    pub samplers: StatelessBindGroupState<resource::Sampler<A>>,
-    pub acceleration_structures: StatelessBindGroupState<resource::Tlas<A>>,
+pub(crate) struct BindGroupStates {
+    pub buffers: BufferBindGroupState,
+    pub views: TextureViewBindGroupState,
+    pub samplers: StatelessTracker<resource::Sampler>,
+    pub acceleration_structures: StatelessTracker<resource::Tlas>,
 }
 
-impl<A: HalApi> BindGroupStates<A> {
+impl BindGroupStates {
     pub fn new() -> Self {
         Self {
             buffers: BufferBindGroupState::new(),
-            textures: TextureBindGroupState::new(),
-            views: StatelessBindGroupState::new(),
-            samplers: StatelessBindGroupState::new(),
-            acceleration_structures: StatelessBindGroupState::new(),
+            views: TextureViewBindGroupState::new(),
+            samplers: StatelessTracker::new(),
+            acceleration_structures: StatelessTracker::new(),
         }
     }
 
@@ -448,10 +445,11 @@ impl<A: HalApi> BindGroupStates<A> {
     /// accesses will be in a constant ascending order.
     pub fn optimize(&mut self) {
         self.buffers.optimize();
-        self.textures.optimize();
+        // Views are stateless, however, `TextureViewBindGroupState`
+        // is special as it will be merged with other texture trackers.
         self.views.optimize();
-        self.samplers.optimize();
-        self.acceleration_structures.optimize();
+        // Samplers and Tlas's are stateless and don't need to be optimized
+        // since the tracker is never merged with any other tracker.
     }
 }
 
@@ -459,45 +457,28 @@ impl<A: HalApi> BindGroupStates<A> {
 /// that are not normally included in a usage scope, but are used by render bundles
 /// and need to be owned by the render bundles.
 #[derive(Debug)]
-pub(crate) struct RenderBundleScope<A: HalApi> {
-    pub buffers: RwLock<BufferUsageScope<A>>,
-    pub textures: RwLock<TextureUsageScope<A>>,
+pub(crate) struct RenderBundleScope {
+    pub buffers: BufferUsageScope,
+    pub textures: TextureUsageScope,
     // Don't need to track views and samplers, they are never used directly, only by bind groups.
-    pub bind_groups: RwLock<StatelessTracker<binding_model::BindGroup<A>>>,
-    pub render_pipelines: RwLock<StatelessTracker<pipeline::RenderPipeline<A>>>,
-    pub query_sets: RwLock<StatelessTracker<resource::QuerySet<A>>>,
+    pub bind_groups: StatelessTracker<binding_model::BindGroup>,
+    pub render_pipelines: StatelessTracker<pipeline::RenderPipeline>,
 }
 
-impl<A: HalApi> RenderBundleScope<A> {
+impl RenderBundleScope {
     /// Create the render bundle scope and pull the maximum IDs from the hubs.
     pub fn new() -> Self {
         Self {
-            buffers: RwLock::new(
-                rank::RENDER_BUNDLE_SCOPE_BUFFERS,
-                BufferUsageScope::default(),
-            ),
-            textures: RwLock::new(
-                rank::RENDER_BUNDLE_SCOPE_TEXTURES,
-                TextureUsageScope::default(),
-            ),
-            bind_groups: RwLock::new(
-                rank::RENDER_BUNDLE_SCOPE_BIND_GROUPS,
-                StatelessTracker::new(),
-            ),
-            render_pipelines: RwLock::new(
-                rank::RENDER_BUNDLE_SCOPE_RENDER_PIPELINES,
-                StatelessTracker::new(),
-            ),
-            query_sets: RwLock::new(
-                rank::RENDER_BUNDLE_SCOPE_QUERY_SETS,
-                StatelessTracker::new(),
-            ),
+            buffers: BufferUsageScope::default(),
+            textures: TextureUsageScope::default(),
+            bind_groups: StatelessTracker::new(),
+            render_pipelines: StatelessTracker::new(),
         }
     }
 
     /// Merge the inner contents of a bind group into the render bundle tracker.
     ///
-    /// Only stateful things are merged in here, all other resources are owned
+    /// Only stateful things are merged in herell other resources are owned
     /// indirectly by the bind group.
     ///
     /// # Safety
@@ -506,14 +487,10 @@ impl<A: HalApi> RenderBundleScope<A> {
     /// length of the storage given at the call to `new`.
     pub unsafe fn merge_bind_group(
         &mut self,
-        bind_group: &BindGroupStates<A>,
+        bind_group: &BindGroupStates,
     ) -> Result<(), ResourceUsageCompatibilityError> {
-        unsafe { self.buffers.write().merge_bind_group(&bind_group.buffers)? };
-        unsafe {
-            self.textures
-                .write()
-                .merge_bind_group(&bind_group.textures)?
-        };
+        unsafe { self.buffers.merge_bind_group(&bind_group.buffers)? };
+        unsafe { self.textures.merge_bind_group(&bind_group.views)? };
 
         Ok(())
     }
@@ -522,18 +499,18 @@ impl<A: HalApi> RenderBundleScope<A> {
 /// A pool for storing the memory used by [`UsageScope`]s. We take and store this memory when the
 /// scope is dropped to avoid reallocating. The memory required only grows and allocation cost is
 /// significant when a large number of resources have been used.
-pub(crate) type UsageScopePool<A> = Mutex<Vec<(BufferUsageScope<A>, TextureUsageScope<A>)>>;
+pub(crate) type UsageScopePool = Mutex<Vec<(BufferUsageScope, TextureUsageScope)>>;
 
 /// A usage scope tracker. Only needs to store stateful resources as stateless
 /// resources cannot possibly have a usage conflict.
 #[derive(Debug)]
-pub(crate) struct UsageScope<'a, A: HalApi> {
-    pub pool: &'a UsageScopePool<A>,
-    pub buffers: BufferUsageScope<A>,
-    pub textures: TextureUsageScope<A>,
+pub(crate) struct UsageScope<'a> {
+    pub pool: &'a UsageScopePool,
+    pub buffers: BufferUsageScope,
+    pub textures: TextureUsageScope,
 }
 
-impl<'a, A: HalApi> Drop for UsageScope<'a, A> {
+impl<'a> Drop for UsageScope<'a> {
     fn drop(&mut self) {
         // clear vecs and push into pool
         self.buffers.clear();
@@ -545,14 +522,14 @@ impl<'a, A: HalApi> Drop for UsageScope<'a, A> {
     }
 }
 
-impl<A: HalApi> UsageScope<'static, A> {
+impl UsageScope<'static> {
     pub fn new_pooled<'d>(
-        pool: &'d UsageScopePool<A>,
+        pool: &'d UsageScopePool,
         tracker_indices: &TrackerIndexAllocators,
-    ) -> UsageScope<'d, A> {
+    ) -> UsageScope<'d> {
         let pooled = pool.lock().pop().unwrap_or_default();
 
-        let mut scope = UsageScope::<'d, A> {
+        let mut scope = UsageScope::<'d> {
             pool,
             buffers: pooled.0,
             textures: pooled.1,
@@ -564,10 +541,10 @@ impl<A: HalApi> UsageScope<'static, A> {
     }
 }
 
-impl<'a, A: HalApi> UsageScope<'a, A> {
+impl<'a> UsageScope<'a> {
     /// Merge the inner contents of a bind group into the usage scope.
     ///
-    /// Only stateful things are merged in here, all other resources are owned
+    /// Only stateful things are merged in herell other resources are owned
     /// indirectly by the bind group.
     ///
     /// # Safety
@@ -576,11 +553,11 @@ impl<'a, A: HalApi> UsageScope<'a, A> {
     /// length of the storage given at the call to `new`.
     pub unsafe fn merge_bind_group(
         &mut self,
-        bind_group: &BindGroupStates<A>,
+        bind_group: &BindGroupStates,
     ) -> Result<(), ResourceUsageCompatibilityError> {
         unsafe {
             self.buffers.merge_bind_group(&bind_group.buffers)?;
-            self.textures.merge_bind_group(&bind_group.textures)?;
+            self.textures.merge_bind_group(&bind_group.views)?;
         }
 
         Ok(())
@@ -588,7 +565,7 @@ impl<'a, A: HalApi> UsageScope<'a, A> {
 
     /// Merge the inner contents of a bind group into the usage scope.
     ///
-    /// Only stateful things are merged in here, all other resources are owned
+    /// Only stateful things are merged in herell other resources are owned
     /// indirectly by a bind group or are merged directly into the command buffer tracker.
     ///
     /// # Safety
@@ -597,24 +574,22 @@ impl<'a, A: HalApi> UsageScope<'a, A> {
     /// length of the storage given at the call to `new`.
     pub unsafe fn merge_render_bundle(
         &mut self,
-        render_bundle: &RenderBundleScope<A>,
+        render_bundle: &RenderBundleScope,
     ) -> Result<(), ResourceUsageCompatibilityError> {
-        self.buffers
-            .merge_usage_scope(&*render_bundle.buffers.read())?;
-        self.textures
-            .merge_usage_scope(&*render_bundle.textures.read())?;
+        self.buffers.merge_usage_scope(&render_bundle.buffers)?;
+        self.textures.merge_usage_scope(&render_bundle.textures)?;
 
         Ok(())
     }
 }
 
 /// A tracker used by Device.
-pub(crate) struct DeviceTracker<A: HalApi> {
-    pub buffers: DeviceBufferTracker<A>,
-    pub textures: DeviceTextureTracker<A>,
+pub(crate) struct DeviceTracker {
+    pub buffers: DeviceBufferTracker,
+    pub textures: DeviceTextureTracker,
 }
 
-impl<A: HalApi> DeviceTracker<A> {
+impl DeviceTracker {
     pub fn new() -> Self {
         Self {
             buffers: DeviceBufferTracker::new(),
@@ -624,32 +599,32 @@ impl<A: HalApi> DeviceTracker<A> {
 }
 
 /// A full double sided tracker used by CommandBuffers.
-pub(crate) struct Tracker<A: HalApi> {
-    pub buffers: BufferTracker<A>,
-    pub textures: TextureTracker<A>,
-    pub views: StatelessTracker<resource::TextureView<A>>,
-    pub bind_groups: StatelessTracker<binding_model::BindGroup<A>>,
-    pub compute_pipelines: StatelessTracker<pipeline::ComputePipeline<A>>,
-    pub render_pipelines: StatelessTracker<pipeline::RenderPipeline<A>>,
-    pub bundles: StatelessTracker<command::RenderBundle<A>>,
-    pub query_sets: StatelessTracker<resource::QuerySet<A>>,
-    pub blas_s: StatelessTracker<resource::Blas<A>>,
-    pub tlas_s: StatelessTracker<resource::Tlas<A>>,
+pub(crate) struct Tracker {
+    pub buffers: BufferTracker,
+    pub textures: TextureTracker,
+    pub blas_s: AccelerationStructureTracker<resource::Blas>,
+    pub tlas_s: AccelerationStructureTracker<resource::Tlas>,
+    pub views: StatelessTracker<resource::TextureView>,
+    pub bind_groups: StatelessTracker<binding_model::BindGroup>,
+    pub compute_pipelines: StatelessTracker<pipeline::ComputePipeline>,
+    pub render_pipelines: StatelessTracker<pipeline::RenderPipeline>,
+    pub bundles: StatelessTracker<command::RenderBundle>,
+    pub query_sets: StatelessTracker<resource::QuerySet>,
 }
 
-impl<A: HalApi> Tracker<A> {
+impl Tracker {
     pub fn new() -> Self {
         Self {
             buffers: BufferTracker::new(),
             textures: TextureTracker::new(),
+            blas_s: AccelerationStructureTracker::new(),
+            tlas_s: AccelerationStructureTracker::new(),
             views: StatelessTracker::new(),
             bind_groups: StatelessTracker::new(),
             compute_pipelines: StatelessTracker::new(),
             render_pipelines: StatelessTracker::new(),
             bundles: StatelessTracker::new(),
             query_sets: StatelessTracker::new(),
-            blas_s: StatelessTracker::new(),
-            tlas_s: StatelessTracker::new(),
         }
     }
 
@@ -668,7 +643,7 @@ impl<A: HalApi> Tracker<A> {
     /// bind group as a source of which IDs to look at. The bind groups
     /// must have first been added to the usage scope.
     ///
-    /// Only stateful things are merged in here, all other resources are owned
+    /// Only stateful things are merged in herell other resources are owned
     /// indirectly by the bind group.
     ///
     /// # Safety
@@ -677,8 +652,8 @@ impl<A: HalApi> Tracker<A> {
     /// value given to `set_size`
     pub unsafe fn set_and_remove_from_usage_scope_sparse(
         &mut self,
-        scope: &mut UsageScope<A>,
-        bind_group: &BindGroupStates<A>,
+        scope: &mut UsageScope,
+        bind_group: &BindGroupStates,
     ) {
         unsafe {
             self.buffers.set_and_remove_from_usage_scope_sparse(
@@ -688,28 +663,7 @@ impl<A: HalApi> Tracker<A> {
         };
         unsafe {
             self.textures
-                .set_and_remove_from_usage_scope_sparse(&mut scope.textures, &bind_group.textures)
+                .set_and_remove_from_usage_scope_sparse(&mut scope.textures, &bind_group.views)
         };
     }
-
-    /// Tracks the stateless resources from the given renderbundle. It is expected
-    /// that the stateful resources will get merged into a usage scope first.
-    ///
-    /// # Safety
-    ///
-    /// The maximum ID given by each bind group resource must be less than the
-    /// value given to `set_size`
-    pub unsafe fn add_from_render_bundle(
-        &mut self,
-        render_bundle: &RenderBundleScope<A>,
-    ) -> Result<(), ResourceUsageCompatibilityError> {
-        self.bind_groups
-            .add_from_tracker(&*render_bundle.bind_groups.read());
-        self.render_pipelines
-            .add_from_tracker(&*render_bundle.render_pipelines.read());
-        self.query_sets
-            .add_from_tracker(&*render_bundle.query_sets.read());
-
-        Ok(())
-    }
 }
diff --git a/wgpu-core/src/track/ray_tracing.rs b/wgpu-core/src/track/ray_tracing.rs
new file mode 100644
index 0000000000..c344526dfb
--- /dev/null
+++ b/wgpu-core/src/track/ray_tracing.rs
@@ -0,0 +1,81 @@
+use crate::resource::AccelerationStructure;
+use crate::track::metadata::ResourceMetadata;
+use crate::track::ResourceUses;
+use hal::AccelerationStructureUses;
+use std::sync::Arc;
+use wgt::strict_assert;
+
+pub(crate) struct AccelerationStructureTracker<T: AccelerationStructure> {
+    start: Vec<AccelerationStructureUses>,
+    end: Vec<AccelerationStructureUses>,
+
+    metadata: ResourceMetadata<Arc<T>>,
+}
+
+impl<T: AccelerationStructure> AccelerationStructureTracker<T> {
+    pub fn new() -> Self {
+        Self {
+            start: Vec::new(),
+            end: Vec::new(),
+
+            metadata: ResourceMetadata::new(),
+        }
+    }
+
+    fn tracker_assert_in_bounds(&self, index: usize) {
+        strict_assert!(index < self.start.len());
+        strict_assert!(index < self.end.len());
+        self.metadata.tracker_assert_in_bounds(index);
+    }
+
+    /// Sets the size of all the vectors inside the tracker.
+    ///
+    /// Must be called with the highest possible Buffer ID before
+    /// all unsafe functions are called.
+    pub fn set_size(&mut self, size: usize) {
+        self.start.resize(size, AccelerationStructureUses::empty());
+        self.end.resize(size, AccelerationStructureUses::empty());
+
+        self.metadata.set_size(size);
+    }
+
+    /// Extend the vectors to let the given index be valid.
+    fn allow_index(&mut self, index: usize) {
+        if index >= self.start.len() {
+            self.set_size(index + 1);
+        }
+    }
+
+    /// Returns true if the given buffer is tracked.
+    pub fn contains(&self, acceleration_structure: &T) -> bool {
+        self.metadata
+            .contains(acceleration_structure.tracker_index().as_usize())
+    }
+
+    /// Inserts a single resource into the resource tracker.
+    pub fn set_single(&mut self, resource: Arc<T>) {
+        let index: usize = resource.tracker_index().as_usize();
+
+        self.allow_index(index);
+
+        self.tracker_assert_in_bounds(index);
+    }
+}
+
+impl ResourceUses for AccelerationStructureUses {
+    const EXCLUSIVE: Self = Self::empty();
+
+    type Selector = ();
+
+    fn bits(self) -> u16 {
+        Self::bits(&self) as u16
+    }
+
+    fn all_ordered(self) -> bool {
+        true
+    }
+
+    fn any_exclusive(self) -> bool {
+        self.intersects(Self::EXCLUSIVE)
+    }
+}
diff --git a/wgpu-core/src/track/stateless.rs b/wgpu-core/src/track/stateless.rs
index 06779540d7..975a850f36 100644
--- a/wgpu-core/src/track/stateless.rs
+++ b/wgpu-core/src/track/stateless.rs
@@ -1,129 +1,36 @@
-/*! Stateless Trackers
- *
- * Stateless trackers don't have any state, so make no
- * distinction between a usage scope and a full tracker.
-!*/
-
+use std::slice::Iter;
 use std::sync::Arc;
 
-use crate::{
-    lock::{rank, Mutex},
-    resource::Trackable,
-    track::ResourceMetadata,
-};
-
-/// Stores all the resources that a bind group stores.
+/// A tracker that holds strong references to resources.
+///
+/// This is only used to keep resources alive.
 #[derive(Debug)]
-pub(crate) struct StatelessBindGroupState<T: Trackable> {
-    resources: Mutex<Vec<Arc<T>>>,
+pub(crate) struct StatelessTracker<T> {
+    resources: Vec<Arc<T>>,
 }
 
-impl<T: Trackable> StatelessBindGroupState<T> {
+impl<T> StatelessTracker<T> {
     pub fn new() -> Self {
         Self {
-            resources: Mutex::new(rank::STATELESS_BIND_GROUP_STATE_RESOURCES, Vec::new()),
-        }
-    }
-
-    /// Optimize the buffer bind group state by sorting it by ID.
-    ///
-    /// When this list of states is merged into a tracker, the memory
-    /// accesses will be in a constant ascending order.
-    pub(crate) fn optimize(&self) {
-        let mut resources = self.resources.lock();
-        resources.sort_unstable_by_key(|resource| resource.tracker_index());
-    }
-
-    /// Returns a list of all resources tracked. May contain duplicates.
-    pub fn used_resources(&self) -> impl Iterator<Item = Arc<T>> + '_ {
-        let resources = self.resources.lock();
-        resources.iter().cloned().collect::<Vec<_>>().into_iter()
-    }
-
-    /// Adds the given resource.
-    pub fn add_single(&self, resource: &Arc<T>) {
-        let mut resources = self.resources.lock();
-        resources.push(resource.clone());
-    }
-}
-
-/// Stores all resource state within a command buffer or device.
-#[derive(Debug)]
-pub(crate) struct StatelessTracker<T: Trackable> {
-    metadata: ResourceMetadata<Arc<T>>,
-}
-
-impl<T: Trackable> StatelessTracker<T> {
-    pub fn new() -> Self {
-        Self {
-            metadata: ResourceMetadata::new(),
-        }
-    }
-
-    fn tracker_assert_in_bounds(&self, index: usize) {
-        self.metadata.tracker_assert_in_bounds(index);
-    }
-
-    /// Sets the size of all the vectors inside the tracker.
-    ///
-    /// Must be called with the highest possible Resource ID of this type
-    /// before all unsafe functions are called.
-    pub fn set_size(&mut self, size: usize) {
-        self.metadata.set_size(size);
-    }
-
-    /// Extend the vectors to let the given index be valid.
-    fn allow_index(&mut self, index: usize) {
-        if index >= self.metadata.size() {
-            self.set_size(index + 1);
+            resources: Vec::new(),
         }
     }
 
-    /// Returns a list of all resources tracked.
-    pub fn used_resources(&self) -> impl Iterator<Item = Arc<T>> + '_ {
-        self.metadata.owned_resources()
-    }
-
     /// Inserts a single resource into the resource tracker.
     ///
-    /// If the resource already exists in the tracker, it will be overwritten.
-    ///
-    /// If the ID is higher than the length of internal vectors,
-    /// the vectors will be extended. A call to set_size is not needed.
-    ///
     /// Returns a reference to the newly inserted resource.
     /// (This allows avoiding a clone/reference count increase in many cases.)
     pub fn insert_single(&mut self, resource: Arc<T>) -> &Arc<T> {
-        let index = resource.tracker_index().as_usize();
-
-        self.allow_index(index);
-
-        self.tracker_assert_in_bounds(index);
-
-        unsafe { self.metadata.insert(index, resource) }
+        self.resources.push(resource);
+        unsafe { self.resources.last().unwrap_unchecked() }
     }
+}
 
-    /// Adds the given resources from the given tracker.
-    ///
-    /// If the ID is higher than the length of internal vectors,
-    /// the vectors will be extended. A call to set_size is not needed.
-    pub fn add_from_tracker(&mut self, other: &Self) {
-        let incoming_size = other.metadata.size();
-        if incoming_size > self.metadata.size() {
-            self.set_size(incoming_size);
-        }
-
-        for index in other.metadata.owned_indices() {
-            self.tracker_assert_in_bounds(index);
-            other.tracker_assert_in_bounds(index);
-            unsafe {
-                let previously_owned = self.metadata.contains_unchecked(index);
+impl<'a, T> IntoIterator for &'a StatelessTracker<T> {
+    type Item = &'a Arc<T>;
+    type IntoIter = Iter<'a, Arc<T>>;
 
-                if !previously_owned {
-                    let other_resource = other.metadata.get_resource_unchecked(index);
-                    self.metadata.insert(index, other_resource.clone());
-                }
-            }
-        }
+    fn into_iter(self) -> Self::IntoIter {
+        self.resources.as_slice().iter()
     }
 }
diff --git a/wgpu-core/src/track/texture.rs b/wgpu-core/src/track/texture.rs
index c3a2468633..1c74bffd97 100644
--- a/wgpu-core/src/track/texture.rs
+++ b/wgpu-core/src/track/texture.rs
@@ -1,29 +1,26 @@
-/*! Texture Trackers
- *
- * Texture trackers are significantly more complicated than
- * the buffer trackers because textures can be in a "complex"
- * state where each individual subresource can potentially be
- * in a different state from every other subtresource. These
- * complex states are stored separately from the simple states
- * because they are signifignatly more difficult to track and
- * most resources spend the vast majority of their lives in
- * simple states.
- *
- * There are two special texture usages: `UNKNOWN` and `UNINITIALIZED`.
- * - `UNKNOWN` is only used in complex states and is used to signify
- *   that the complex state does not know anything about those subresources.
- *   It cannot leak into transitions, it is invalid to transition into UNKNOWN
- *   state.
- * - `UNINITIALIZED` is used in both simple and complex states to mean the texture
- *   is known to be in some undefined state. Any transition away from UNINITIALIZED
- *   will treat the contents as junk.
-!*/
+//! Texture Trackers
+//!
+//! Texture trackers are significantly more complicated than
+//! the buffer trackers because textures can be in a "complex"
+//! state where each individual subresource can potentially be
+//! in a different state from every other subtresource. These
+//! complex states are stored separately from the simple states
+//! because they are signifignatly more difficult to track and
+//! most resources spend the vast majority of their lives in
+//! simple states.
+//!
+//! There are two special texture usages: `UNKNOWN` and `UNINITIALIZED`.
+//! - `UNKNOWN` is only used in complex states and is used to signify
+//!   that the complex state does not know anything about those subresources.
+//!   It cannot leak into transitions, it is invalid to transition into UNKNOWN
+//!   state.
+//! - `UNINITIALIZED` is used in both simple and complex states to mean the texture
+//!   is known to be in some undefined state. Any transition away from UNINITIALIZED
+//!   will treat the contents as junk.
 
 use super::{range::RangedStates, PendingTransition, PendingTransitionList, TrackerIndex};
 use crate::{
-    hal_api::HalApi,
-    lock::{rank, Mutex},
-    resource::{Texture, TextureInner, Trackable},
+    resource::{Texture, TextureInner, TextureView, Trackable},
     snatch::SnatchGuard,
     track::{
         invalid_resource_state, skip_barrier, ResourceMetadata, ResourceMetadataProvider,
@@ -152,47 +149,28 @@ impl ComplexTextureState {
     }
 }
 
+/// Stores a bind group's texture views + their usages (within the bind group).
 #[derive(Debug)]
-struct TextureBindGroupStateData<A: HalApi> {
-    selector: Option<TextureSelector>,
-    texture: Arc<Texture<A>>,
-    usage: TextureUses,
+pub(crate) struct TextureViewBindGroupState {
+    views: Vec<(Arc<TextureView>, TextureUses)>,
 }
-
-/// Stores all the textures that a bind group stores.
-#[derive(Debug)]
-pub(crate) struct TextureBindGroupState<A: HalApi> {
-    textures: Mutex<Vec<TextureBindGroupStateData<A>>>,
-}
-impl<A: HalApi> TextureBindGroupState<A> {
+impl TextureViewBindGroupState {
     pub fn new() -> Self {
-        Self {
-            textures: Mutex::new(rank::TEXTURE_BIND_GROUP_STATE_TEXTURES, Vec::new()),
-        }
+        Self { views: Vec::new() }
     }
 
     /// Optimize the texture bind group state by sorting it by ID.
     ///
     /// When this list of states is merged into a tracker, the memory
     /// accesses will be in a constant ascending order.
-    pub(crate) fn optimize(&self) {
-        let mut textures = self.textures.lock();
-        textures.sort_unstable_by_key(|v| v.texture.tracker_index());
+    pub(crate) fn optimize(&mut self) {
+        self.views
+            .sort_unstable_by_key(|(view, _)| view.parent.tracker_index());
     }
 
     /// Adds the given resource with the given state.
-    pub fn add_single(
-        &self,
-        texture: &Arc<Texture<A>>,
-        selector: Option<TextureSelector>,
-        state: TextureUses,
-    ) {
-        let mut textures = self.textures.lock();
-        textures.push(TextureBindGroupStateData {
-            selector,
-            texture: texture.clone(),
-            usage: state,
-        });
+    pub fn insert_single(&mut self, view: Arc<TextureView>, usage: TextureUses) {
+        self.views.push((view, usage));
     }
 }
 
@@ -223,12 +201,12 @@ impl TextureStateSet {
 
 /// Stores all texture state within a single usage scope.
 #[derive(Debug)]
-pub(crate) struct TextureUsageScope<A: HalApi> {
+pub(crate) struct TextureUsageScope {
     set: TextureStateSet,
-    metadata: ResourceMetadata<Arc<Texture<A>>>,
+    metadata: ResourceMetadata<Arc<Texture>>,
 }
 
-impl<A: HalApi> Default for TextureUsageScope<A> {
+impl Default for TextureUsageScope {
     fn default() -> Self {
         Self {
             set: TextureStateSet::new(),
@@ -237,7 +215,7 @@ impl<A: HalApi> Default for TextureUsageScope<A> {
     }
 }
 
-impl<A: HalApi> TextureUsageScope<A> {
+impl TextureUsageScope {
     fn tracker_assert_in_bounds(&self, index: usize) {
         self.metadata.tracker_assert_in_bounds(index);
 
@@ -326,11 +304,10 @@ impl<A: HalApi> TextureUsageScope<A> {
     /// method is called.
     pub unsafe fn merge_bind_group(
         &mut self,
-        bind_group: &TextureBindGroupState<A>,
+        bind_group: &TextureViewBindGroupState,
     ) -> Result<(), ResourceUsageCompatibilityError> {
-        let textures = bind_group.textures.lock();
-        for t in &*textures {
-            unsafe { self.merge_single(&t.texture, t.selector.clone(), t.usage)? };
+        for (view, usage) in bind_group.views.iter() {
+            unsafe { self.merge_single(&view.parent, Some(view.selector.clone()), *usage)? };
         }
 
         Ok(())
@@ -351,7 +328,7 @@ impl<A: HalApi> TextureUsageScope<A> {
     /// method is called.
     pub unsafe fn merge_single(
         &mut self,
-        texture: &Arc<Texture<A>>,
+        texture: &Arc<Texture>,
         selector: Option<TextureSelector>,
         new_state: TextureUses,
     ) -> Result<(), ResourceUsageCompatibilityError> {
@@ -375,26 +352,26 @@ impl<A: HalApi> TextureUsageScope<A> {
     }
 }
 
-pub(crate) trait TextureTrackerSetSingle<A: HalApi> {
+pub(crate) trait TextureTrackerSetSingle {
     fn set_single(
         &mut self,
-        texture: &Arc<Texture<A>>,
+        texture: &Arc<Texture>,
         selector: TextureSelector,
         new_state: TextureUses,
     ) -> Drain<'_, PendingTransition<TextureUses>>;
 }
 
 /// Stores all texture state within a command buffer.
-pub(crate) struct TextureTracker<A: HalApi> {
+pub(crate) struct TextureTracker {
     start_set: TextureStateSet,
     end_set: TextureStateSet,
 
-    metadata: ResourceMetadata<Arc<Texture<A>>>,
+    metadata: ResourceMetadata<Arc<Texture>>,
 
     temp: Vec<PendingTransition<TextureUses>>,
 }
 
-impl<A: HalApi> TextureTracker<A> {
+impl TextureTracker {
     pub fn new() -> Self {
         Self {
             start_set: TextureStateSet::new(),
@@ -446,8 +423,13 @@ impl<A: HalApi> TextureTracker<A> {
         }
     }
 
+    /// Returns true if the tracker owns the given texture.
+    pub fn contains(&self, texture: &Texture) -> bool {
+        self.metadata.contains(texture.tracker_index().as_usize())
+    }
+
     /// Returns a list of all textures tracked.
-    pub fn used_resources(&self) -> impl Iterator<Item = Arc<Texture<A>>> + '_ {
+    pub fn used_resources(&self) -> impl Iterator<Item = Arc<Texture>> + '_ {
         self.metadata.owned_resources()
     }
 
@@ -455,7 +437,7 @@ impl<A: HalApi> TextureTracker<A> {
     pub fn drain_transitions<'a>(
         &'a mut self,
         snatch_guard: &'a SnatchGuard<'a>,
-    ) -> (PendingTransitionList, Vec<Option<&'a TextureInner<A>>>) {
+    ) -> (PendingTransitionList, Vec<Option<&'a TextureInner>>) {
         let mut textures = Vec::new();
         let transitions = self
             .temp
@@ -478,7 +460,7 @@ impl<A: HalApi> TextureTracker<A> {
     /// the vectors will be extended. A call to set_size is not needed.
     pub fn set_single(
         &mut self,
-        texture: &Arc<Texture<A>>,
+        texture: &Arc<Texture>,
         selector: TextureSelector,
         new_state: TextureUses,
     ) -> Drain<'_, PendingTransition<TextureUses>> {
@@ -556,7 +538,7 @@ impl<A: HalApi> TextureTracker<A> {
     ///
     /// If the ID is higher than the length of internal vectors,
     /// the vectors will be extended. A call to set_size is not needed.
-    pub fn set_from_usage_scope(&mut self, scope: &TextureUsageScope<A>) {
+    pub fn set_from_usage_scope(&mut self, scope: &TextureUsageScope) {
         let incoming_size = scope.set.simple.len();
         if incoming_size > self.start_set.simple.len() {
             self.set_size(incoming_size);
@@ -604,23 +586,22 @@ impl<A: HalApi> TextureTracker<A> {
     /// method is called.
     pub unsafe fn set_and_remove_from_usage_scope_sparse(
         &mut self,
-        scope: &mut TextureUsageScope<A>,
-        bind_group_state: &TextureBindGroupState<A>,
+        scope: &mut TextureUsageScope,
+        bind_group_state: &TextureViewBindGroupState,
     ) {
         let incoming_size = scope.set.simple.len();
         if incoming_size > self.start_set.simple.len() {
             self.set_size(incoming_size);
         }
 
-        let textures = bind_group_state.textures.lock();
-        for t in textures.iter() {
-            let index = t.texture.tracker_index().as_usize();
+        for (view, _) in bind_group_state.views.iter() {
+            let index = view.parent.tracker_index().as_usize();
             scope.tracker_assert_in_bounds(index);
 
             if unsafe { !scope.metadata.contains_unchecked(index) } {
                 continue;
             }
-            let texture_selector = &t.texture.full_range;
+            let texture_selector = &view.parent.full_range;
             unsafe {
                 insert_or_barrier_update(
                     texture_selector,
@@ -642,10 +623,10 @@ impl<A: HalApi> TextureTracker<A> {
     }
 }
 
-impl<A: HalApi> TextureTrackerSetSingle<A> for TextureTracker<A> {
+impl TextureTrackerSetSingle for TextureTracker {
     fn set_single(
         &mut self,
-        texture: &Arc<Texture<A>>,
+        texture: &Arc<Texture>,
         selector: TextureSelector,
         new_state: TextureUses,
     ) -> Drain<'_, PendingTransition<TextureUses>> {
@@ -654,13 +635,13 @@ impl<A: HalApi> TextureTrackerSetSingle<A> for TextureTracker<A> {
 }
 
 /// Stores all texture state within a device.
-pub(crate) struct DeviceTextureTracker<A: HalApi> {
+pub(crate) struct DeviceTextureTracker {
     current_state_set: TextureStateSet,
-    metadata: ResourceMetadata<Weak<Texture<A>>>,
+    metadata: ResourceMetadata<Weak<Texture>>,
     temp: Vec<PendingTransition<TextureUses>>,
 }
 
-impl<A: HalApi> DeviceTextureTracker<A> {
+impl DeviceTextureTracker {
     pub fn new() -> Self {
         Self {
             current_state_set: TextureStateSet::new(),
@@ -692,14 +673,14 @@ impl<A: HalApi> DeviceTextureTracker<A> {
     }
 
     /// Returns a list of all textures tracked.
-    pub fn used_resources(&self) -> impl Iterator<Item = Weak<Texture<A>>> + '_ {
+    pub fn used_resources(&self) -> impl Iterator<Item = Weak<Texture>> + '_ {
         self.metadata.owned_resources()
     }
 
     /// Inserts a single texture and a state into the resource tracker.
     ///
     /// If the resource already exists in the tracker, it will be overwritten.
-    pub fn insert_single(&mut self, texture: &Arc<Texture<A>>, usage: TextureUses) {
+    pub fn insert_single(&mut self, texture: &Arc<Texture>, usage: TextureUses) {
         let index = texture.tracker_index().as_usize();
 
         self.allow_index(index);
@@ -728,7 +709,7 @@ impl<A: HalApi> DeviceTextureTracker<A> {
     /// is returned.
     pub fn set_single(
         &mut self,
-        texture: &Arc<Texture<A>>,
+        texture: &Arc<Texture>,
         selector: TextureSelector,
         new_state: TextureUses,
     ) -> Drain<'_, PendingTransition<TextureUses>> {
@@ -770,9 +751,9 @@ impl<A: HalApi> DeviceTextureTracker<A> {
     /// those transitions are returned.
     pub fn set_from_tracker_and_drain_transitions<'a, 'b: 'a>(
         &'a mut self,
-        tracker: &'a TextureTracker<A>,
+        tracker: &'a TextureTracker,
         snatch_guard: &'b SnatchGuard<'b>,
-    ) -> impl Iterator<Item = TextureBarrier<'a, A>> {
+    ) -> impl Iterator<Item = TextureBarrier<'a, dyn hal::DynTexture>> {
         for index in tracker.metadata.owned_indices() {
             self.tracker_assert_in_bounds(index);
 
@@ -814,9 +795,9 @@ impl<A: HalApi> DeviceTextureTracker<A> {
     /// those transitions are returned.
     pub fn set_from_usage_scope_and_drain_transitions<'a, 'b: 'a>(
         &'a mut self,
-        scope: &'a TextureUsageScope<A>,
+        scope: &'a TextureUsageScope,
         snatch_guard: &'b SnatchGuard<'b>,
-    ) -> impl Iterator<Item = TextureBarrier<'a, A>> {
+    ) -> impl Iterator<Item = TextureBarrier<'a, dyn hal::DynTexture>> {
         for index in scope.metadata.owned_indices() {
             self.tracker_assert_in_bounds(index);
 
@@ -874,10 +855,10 @@ impl<A: HalApi> DeviceTextureTracker<A> {
     }
 }
 
-impl<A: HalApi> TextureTrackerSetSingle<A> for DeviceTextureTracker<A> {
+impl TextureTrackerSetSingle for DeviceTextureTracker {
     fn set_single(
         &mut self,
-        texture: &Arc<Texture<A>>,
+        texture: &Arc<Texture>,
         selector: TextureSelector,
         new_state: TextureUses,
     ) -> Drain<'_, PendingTransition<TextureUses>> {
@@ -996,13 +977,13 @@ impl<'a> TextureStateProvider<'a> {
 /// Indexes must be valid indexes into all arrays passed in
 /// to this function, either directly or via metadata or provider structs.
 #[inline(always)]
-unsafe fn insert_or_merge<A: HalApi>(
+unsafe fn insert_or_merge(
     texture_selector: &TextureSelector,
     current_state_set: &mut TextureStateSet,
-    resource_metadata: &mut ResourceMetadata<Arc<Texture<A>>>,
+    resource_metadata: &mut ResourceMetadata<Arc<Texture>>,
     index: usize,
     state_provider: TextureStateProvider<'_>,
-    metadata_provider: ResourceMetadataProvider<'_, Arc<Texture<A>>>,
+    metadata_provider: ResourceMetadataProvider<'_, Arc<Texture>>,
 ) -> Result<(), ResourceUsageCompatibilityError> {
     let currently_owned = unsafe { resource_metadata.contains_unchecked(index) };
 
@@ -1038,6 +1019,7 @@ unsafe fn insert_or_merge<A: HalApi>(
 /// - Uses the `start_state_provider` to populate `start_states`
 /// - Uses either `end_state_provider` or `start_state_provider`
 ///   to populate `current_states`.
+///
 /// If the resource is tracked
 /// - Inserts barriers from the state in `current_states`
 ///   to the state provided by `start_state_provider`.
@@ -1051,15 +1033,15 @@ unsafe fn insert_or_merge<A: HalApi>(
 /// Indexes must be valid indexes into all arrays passed in
 /// to this function, either directly or via metadata or provider structs.
 #[inline(always)]
-unsafe fn insert_or_barrier_update<A: HalApi>(
+unsafe fn insert_or_barrier_update(
     texture_selector: &TextureSelector,
     start_state: Option<&mut TextureStateSet>,
     current_state_set: &mut TextureStateSet,
-    resource_metadata: &mut ResourceMetadata<Arc<Texture<A>>>,
+    resource_metadata: &mut ResourceMetadata<Arc<Texture>>,
     index: usize,
     start_state_provider: TextureStateProvider<'_>,
     end_state_provider: Option<TextureStateProvider<'_>>,
-    metadata_provider: ResourceMetadataProvider<'_, Arc<Texture<A>>>,
+    metadata_provider: ResourceMetadataProvider<'_, Arc<Texture>>,
     barriers: &mut Vec<PendingTransition<TextureUses>>,
 ) {
     let currently_owned = unsafe { resource_metadata.contains_unchecked(index) };
@@ -1119,8 +1101,6 @@ unsafe fn insert<T: Clone>(
             // check that resource states don't have any conflicts.
             strict_assert_eq!(invalid_resource_state(state), false);
 
-            log::trace!("\ttex {index}: insert start {state:?}");
-
             if let Some(start_state) = start_state {
                 unsafe { *start_state.simple.get_unchecked_mut(index) = state };
             }
@@ -1136,8 +1116,6 @@ unsafe fn insert<T: Clone>(
             let complex =
                 unsafe { ComplexTextureState::from_selector_state_iter(full_range, state_iter) };
 
-            log::trace!("\ttex {index}: insert start {complex:?}");
-
             if let Some(start_state) = start_state {
                 unsafe { *start_state.simple.get_unchecked_mut(index) = TextureUses::COMPLEX };
                 start_state.complex.insert(index, complex.clone());
@@ -1158,8 +1136,6 @@ unsafe fn insert<T: Clone>(
                 // check that resource states don't have any conflicts.
                 strict_assert_eq!(invalid_resource_state(state), false);
 
-                log::trace!("\ttex {index}: insert end {state:?}");
-
                 // We only need to insert into the end, as there is guaranteed to be
                 // a start state provider.
                 unsafe { *end_state.simple.get_unchecked_mut(index) = state };
@@ -1171,8 +1147,6 @@ unsafe fn insert<T: Clone>(
                     ComplexTextureState::from_selector_state_iter(full_range, state_iter)
                 };
 
-                log::trace!("\ttex {index}: insert end {complex:?}");
-
                 // We only need to insert into the end, as there is guaranteed to be
                 // a start state provider.
                 unsafe { *end_state.simple.get_unchecked_mut(index) = TextureUses::COMPLEX };
@@ -1188,12 +1162,12 @@ unsafe fn insert<T: Clone>(
 }
 
 #[inline(always)]
-unsafe fn merge<A: HalApi>(
+unsafe fn merge(
     texture_selector: &TextureSelector,
     current_state_set: &mut TextureStateSet,
     index: usize,
     state_provider: TextureStateProvider<'_>,
-    metadata_provider: ResourceMetadataProvider<'_, Arc<Texture<A>>>,
+    metadata_provider: ResourceMetadataProvider<'_, Arc<Texture>>,
 ) -> Result<(), ResourceUsageCompatibilityError> {
     let current_simple = unsafe { current_state_set.simple.get_unchecked_mut(index) };
     let current_state = if *current_simple == TextureUses::COMPLEX {
@@ -1210,8 +1184,6 @@ unsafe fn merge<A: HalApi>(
         (SingleOrManyStates::Single(current_simple), SingleOrManyStates::Single(new_simple)) => {
             let merged_state = *current_simple | new_simple;
 
-            log::trace!("\ttex {index}: merge simple {current_simple:?} + {new_simple:?}");
-
             if invalid_resource_state(merged_state) {
                 return Err(ResourceUsageCompatibilityError::from_texture(
                     unsafe { metadata_provider.get(index) },
@@ -1237,8 +1209,6 @@ unsafe fn merge<A: HalApi>(
             for (selector, new_state) in new_many {
                 let merged_state = *current_simple | new_state;
 
-                log::trace!("\ttex {index}: merge {selector:?} {current_simple:?} + {new_state:?}");
-
                 if invalid_resource_state(merged_state) {
                     return Err(ResourceUsageCompatibilityError::from_texture(
                         unsafe { metadata_provider.get(index) },
@@ -1275,11 +1245,6 @@ unsafe fn merge<A: HalApi>(
                     // simple states are never unknown.
                     let merged_state = merged_state - TextureUses::UNKNOWN;
 
-                    log::trace!(
-                        "\ttex {index}: merge mip {mip_id} layers {layers:?} \
-                         {current_layer_state:?} + {new_simple:?}"
-                    );
-
                     if invalid_resource_state(merged_state) {
                         return Err(ResourceUsageCompatibilityError::from_texture(
                             unsafe { metadata_provider.get(index) },
@@ -1316,11 +1281,6 @@ unsafe fn merge<A: HalApi>(
                             continue;
                         }
 
-                        log::trace!(
-                            "\ttex {index}: merge mip {mip_id} layers {layers:?} \
-                             {current_layer_state:?} + {new_state:?}"
-                        );
-
                         if invalid_resource_state(merged_state) {
                             return Err(ResourceUsageCompatibilityError::from_texture(
                                 unsafe { metadata_provider.get(index) },
@@ -1368,8 +1328,6 @@ unsafe fn barrier(
                 return;
             }
 
-            log::trace!("\ttex {index}: transition simple {current_simple:?} -> {new_simple:?}");
-
             barriers.push(PendingTransition {
                 id: index as _,
                 selector: texture_selector.clone(),
@@ -1386,10 +1344,6 @@ unsafe fn barrier(
                     continue;
                 }
 
-                log::trace!(
-                    "\ttex {index}: transition {selector:?} {current_simple:?} -> {new_state:?}"
-                );
-
                 barriers.push(PendingTransition {
                     id: index as _,
                     selector,
@@ -1410,11 +1364,6 @@ unsafe fn barrier(
                         continue;
                     }
 
-                    log::trace!(
-                        "\ttex {index}: transition mip {mip_id} layers {layers:?} \
-                         {current_layer_state:?} -> {new_simple:?}"
-                    );
-
                     barriers.push(PendingTransition {
                         id: index as _,
                         selector: TextureSelector {
@@ -1444,11 +1393,6 @@ unsafe fn barrier(
                             continue;
                         }
 
-                        log::trace!(
-                            "\ttex {index}: transition mip {mip_id} layers {layers:?} \
-                            {current_layer_state:?} -> {new_state:?}"
-                        );
-
                         barriers.push(PendingTransition {
                             id: index as _,
                             selector: TextureSelector {
diff --git a/wgpu-core/src/validation.rs b/wgpu-core/src/validation.rs
index d12d98246e..3488e981b5 100644
--- a/wgpu-core/src/validation.rs
+++ b/wgpu-core/src/validation.rs
@@ -276,7 +276,7 @@ fn map_storage_format_to_naga(format: wgt::TextureFormat) -> Option<naga::Storag
 
         Tf::Rgb10a2Uint => Sf::Rgb10a2Uint,
         Tf::Rgb10a2Unorm => Sf::Rgb10a2Unorm,
-        Tf::Rg11b10Float => Sf::Rg11b10Float,
+        Tf::Rg11b10UFloat => Sf::Rg11b10UFloat,
 
         Tf::Rg32Uint => Sf::Rg32Uint,
         Tf::Rg32Sint => Sf::Rg32Sint,
@@ -332,7 +332,7 @@ fn map_storage_format_from_naga(format: naga::StorageFormat) -> wgt::TextureForm
 
         Sf::Rgb10a2Uint => Tf::Rgb10a2Uint,
         Sf::Rgb10a2Unorm => Tf::Rgb10a2Unorm,
-        Sf::Rg11b10Float => Tf::Rg11b10Float,
+        Sf::Rg11b10UFloat => Tf::Rg11b10UFloat,
 
         Sf::Rg32Uint => Tf::Rg32Uint,
         Sf::Rg32Sint => Tf::Rg32Sint,
@@ -661,7 +661,7 @@ impl NumericType {
             Tf::Rgba8Sint | Tf::Rgba16Sint | Tf::Rgba32Sint => {
                 (NumericDimension::Vector(Vs::Quad), Scalar::I32)
             }
-            Tf::Rg11b10Float => (NumericDimension::Vector(Vs::Tri), Scalar::F32),
+            Tf::Rg11b10UFloat => (NumericDimension::Vector(Vs::Tri), Scalar::F32),
             Tf::Stencil8
             | Tf::Depth16Unorm
             | Tf::Depth32Float
diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml
index 5b1fcb7261..eedd027bfe 100644
--- a/wgpu-hal/Cargo.toml
+++ b/wgpu-hal/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "wgpu-hal"
-version = "0.20.0"
+version = "22.0.0"
 authors = ["gfx-rs developers"]
 edition = "2021"
 description = "WebGPU hardware abstraction layer"
@@ -13,7 +13,7 @@ license = "MIT OR Apache-2.0"
 # copy the crates it actually uses out of the workspace, so it's meaningful for
 # them to have less restrictive MSRVs individually than the workspace as a
 # whole, if their code permits. See `../README.md` for details.
-rust-version = "1.74"
+rust-version = "1.76"
 
 [package.metadata.docs.rs]
 # Ideally we would enable all the features.
@@ -61,7 +61,10 @@ gles = [
     "dep:khronos-egl",
     "dep:libloading",
     "dep:ndk-sys",
-    "winapi/libloaderapi",
+    "windows/Win32_Graphics_OpenGL",
+    "windows/Win32_Graphics_Gdi",
+    "windows/Win32_System_LibraryLoader",
+    "windows/Win32_UI_WindowsAndMessaging",
 ]
 ## Enables the DX12 backend when targeting Windows.
 ##
@@ -107,53 +110,53 @@ name = "raw-gles"
 required-features = ["gles"]
 
 [dependencies]
-bitflags = "2"
-parking_lot = ">=0.11, <0.13"
-profiling = { version = "1", default-features = false }
-raw-window-handle = "0.6"
-thiserror = "1"
-once_cell = "1.19.0"
+bitflags.workspace = true
+parking_lot.workspace = true
+profiling = { workspace = true, default-features = false }
+raw-window-handle.workspace = true
+thiserror.workspace = true
+once_cell.workspace = true
 
 # backends common
-arrayvec = "0.7"
-rustc-hash = "1.1"
-log = "0.4"
+arrayvec.workspace = true
+rustc-hash.workspace = true
+log.workspace = true
 
 # backend: Gles
-glow = { version = "0.13.1", optional = true }
+glow = { workspace = true, optional = true }
 
 [dependencies.wgt]
 package = "wgpu-types"
 path = "../wgpu-types"
-version = "0.20.0"
+version = "22.0.0"
 
 [target.'cfg(not(target_arch = "wasm32"))'.dependencies]
 # backend: Vulkan
-ash = { version = "0.38.0", optional = true }
-gpu-alloc = { version = "0.6", optional = true }
-gpu-descriptor = { version = "0.3", optional = true }
-smallvec = { version = "1", optional = true, features = ["union"] }
+ash = { workspace = true, optional = true }
+gpu-alloc = { workspace = true, optional = true }
+gpu-descriptor = { workspace = true, optional = true }
+smallvec = { workspace = true, optional = true, features = ["union"] }
 
-khronos-egl = { version = "6", features = ["dynamic"], optional = true }
-libloading = { version = ">=0.7, <0.9", optional = true }
-renderdoc-sys = { version = "1.1.0", optional = true }
+khronos-egl = { workspace = true, features = ["dynamic"], optional = true }
+libloading = { workspace = true, optional = true }
+renderdoc-sys = { workspace = true, optional = true }
 
 [target.'cfg(target_os = "emscripten")'.dependencies]
-khronos-egl = { version = "6", features = ["static", "no-pkg-config"] }
+khronos-egl = { workspace = true, features = ["static", "no-pkg-config"] }
 #Note: it's unused by emscripten, but we keep it to have single code base in egl.rs
-libloading = { version = ">=0.7, <0.9", optional = true }
+libloading = { workspace = true, optional = true }
 
 [target.'cfg(windows)'.dependencies]
+# backend: Dx12 and Gles
+windows = { workspace = true, optional = true }
 # backend: Dx12
-bit-set = { version = "0.6", optional = true }
-range-alloc = { version = "0.1", optional = true }
-gpu-allocator = { version = "0.26", default-features = false, features = [
-    "d3d12",
-    "public-winapi",
-], optional = true }
-hassle-rs = { version = "0.11", optional = true }
+bit-set = { workspace = true, optional = true }
+range-alloc = { workspace = true, optional = true }
+gpu-allocator = { workspace = true, optional = true }
+hassle-rs = { workspace = true, optional = true }
+
 # backend: Gles
-glutin_wgl_sys = { version = "0.6", optional = true }
+glutin_wgl_sys = { workspace = true, optional = true }
 
 winapi = { version = "0.3", features = [
     "profileapi",
@@ -161,38 +164,38 @@ winapi = { version = "0.3", features = [
     "winuser",
     "dcomp",
 ] }
-d3d12 = { path = "../d3d12/", version = "0.20.0", optional = true, features = [
+d3d12 = { path = "../d3d12/", version = "22.0.0", optional = true, features = [
     "libloading",
 ] }
 
 [target.'cfg(any(target_os="macos", target_os="ios"))'.dependencies]
 # backend: Metal
-block = { version = "0.1", optional = true }
+block = { workspace = true, optional = true }
 
-metal = { version = "0.28.0" }
-objc = "0.2.5"
-core-graphics-types = "0.1"
+metal.workspace = true
+objc.workspace = true
+core-graphics-types.workspace = true
 
 [target.'cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))'.dependencies]
-wasm-bindgen = "0.2.87"
-web-sys = { version = "0.3.69", features = [
+wasm-bindgen.workspace = true
+web-sys = { workspace = true, features = [
     "Window",
     "HtmlCanvasElement",
     "WebGl2RenderingContext",
     "OffscreenCanvas",
 ] }
-js-sys = "0.3.69"
+js-sys.workspace = true
 
 [target.'cfg(unix)'.dependencies]
-libc = "0.2"
+libc.workspace = true
 
 [target.'cfg(target_os = "android")'.dependencies]
-android_system_properties = { version = "0.1.1", optional = true }
-ndk-sys = { version = "0.5.0", optional = true }
+android_system_properties = { workspace = true, optional = true }
+ndk-sys = { workspace = true, optional = true }
 
 [dependencies.naga]
 path = "../naga"
-version = "0.20.0"
+version = "22.0.0"
 
 [build-dependencies]
 cfg_aliases.workspace = true
@@ -200,16 +203,14 @@ cfg_aliases.workspace = true
 # DEV dependencies
 [dev-dependencies.naga]
 path = "../naga"
-version = "0.20.0"
+version = "22.0.0"
 features = ["wgsl-in"]
 
 [dev-dependencies]
-cfg-if = "1"
-env_logger = "0.11"
-glam = "0.27.0" # for ray-traced-triangle example
-winit = { version = "0.29", features = [
-    "android-native-activity",
-] } # for "halmark" example
+cfg-if.workspace = true
+env_logger.workspace = true
+glam.workspace = true # for ray-traced-triangle example
+winit.workspace = true # for "halmark" example
 
 [target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies]
-glutin = "0.29.1" # for "gles" example
+glutin.workspace = true # for "gles" example
diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs
index d61cec7380..dabcea418a 100644
--- a/wgpu-hal/examples/halmark/main.rs
+++ b/wgpu-hal/examples/halmark/main.rs
@@ -257,7 +257,6 @@ impl<A: hal::Api> Example<A> {
                 entry_point: "vs_main",
                 constants: &constants,
                 zero_initialize_workgroup_memory: true,
-                vertex_pulling_transform: false,
             },
             vertex_buffers: &[],
             fragment_stage: Some(hal::ProgrammableStage {
@@ -265,7 +264,6 @@ impl<A: hal::Api> Example<A> {
                 entry_point: "fs_main",
                 constants: &constants,
                 zero_initialize_workgroup_memory: true,
-                vertex_pulling_transform: false,
             }),
             primitive: wgt::PrimitiveState {
                 topology: wgt::PrimitiveTopology::TriangleStrip,
@@ -301,7 +299,7 @@ impl<A: hal::Api> Example<A> {
                 mapping.ptr.as_ptr(),
                 texture_data.len(),
             );
-            device.unmap_buffer(&staging_buffer).unwrap();
+            device.unmap_buffer(&staging_buffer);
             assert!(mapping.is_coherent);
         }
 
@@ -410,7 +408,7 @@ impl<A: hal::Api> Example<A> {
                 mapping.ptr.as_ptr(),
                 mem::size_of::<Globals>(),
             );
-            device.unmap_buffer(&buffer).unwrap();
+            device.unmap_buffer(&buffer);
             assert!(mapping.is_coherent);
             buffer
         };
@@ -580,7 +578,7 @@ impl<A: hal::Api> Example<A> {
 
             self.surface.unconfigure(&self.device);
             self.device.exit(self.queue);
-            self.instance.destroy_surface(self.surface);
+            drop(self.surface);
             drop(self.adapter);
         }
     }
@@ -647,7 +645,7 @@ impl<A: hal::Api> Example<A> {
                     size,
                 );
                 assert!(mapping.is_coherent);
-                self.device.unmap_buffer(&self.local_buffer).unwrap();
+                self.device.unmap_buffer(&self.local_buffer);
             }
         }
 
@@ -814,6 +812,8 @@ fn main() {
     let example_result = Example::<Api>::init(&window);
     let mut example = Some(example_result.expect("Selected backend is not supported"));
 
+    println!("Press space to spawn bunnies.");
+
     let mut last_frame_inst = Instant::now();
     let (mut frame_count, mut accum_time) = (0, 0.0);
 
diff --git a/wgpu-hal/examples/raw-gles.rs b/wgpu-hal/examples/raw-gles.rs
index ceab5b065b..06df610658 100644
--- a/wgpu-hal/examples/raw-gles.rs
+++ b/wgpu-hal/examples/raw-gles.rs
@@ -49,18 +49,19 @@ fn main() {
 
         match event {
             Event::LoopDestroyed => (),
-            Event::WindowEvent { event, .. } => match event {
-                WindowEvent::CloseRequested
-                | WindowEvent::KeyboardInput {
-                    input:
-                        KeyboardInput {
-                            virtual_keycode: Some(VirtualKeyCode::Escape),
-                            ..
-                        },
-                    ..
-                } => *control_flow = ControlFlow::Exit,
-                _ => (),
-            },
+            Event::WindowEvent {
+                event:
+                    WindowEvent::CloseRequested
+                    | WindowEvent::KeyboardInput {
+                        input:
+                            KeyboardInput {
+                                virtual_keycode: Some(VirtualKeyCode::Escape),
+                                ..
+                            },
+                        ..
+                    },
+                ..
+            } => *control_flow = ControlFlow::Exit,
             _ => (),
         }
     });
diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs
index e6481aae64..b1aceeb101 100644
--- a/wgpu-hal/examples/ray-traced-triangle/main.rs
+++ b/wgpu-hal/examples/ray-traced-triangle/main.rs
@@ -379,7 +379,6 @@ impl<A: hal::Api> Example<A> {
                     entry_point: "main",
                     constants: &Default::default(),
                     zero_initialize_workgroup_memory: true,
-                    vertex_pulling_transform: false,
                 },
                 cache: None,
             })
@@ -413,7 +412,7 @@ impl<A: hal::Api> Example<A> {
                 mapping.ptr.as_ptr(),
                 vertices_size_in_bytes,
             );
-            device.unmap_buffer(&vertices_buffer).unwrap();
+            device.unmap_buffer(&vertices_buffer);
             assert!(mapping.is_coherent);
 
             vertices_buffer
@@ -438,7 +437,7 @@ impl<A: hal::Api> Example<A> {
                 mapping.ptr.as_ptr(),
                 indices_size_in_bytes,
             );
-            device.unmap_buffer(&indices_buffer).unwrap();
+            device.unmap_buffer(&indices_buffer);
             assert!(mapping.is_coherent);
 
             indices_buffer
@@ -537,7 +536,7 @@ impl<A: hal::Api> Example<A> {
                 mapping.ptr.as_ptr(),
                 uniforms_size,
             );
-            device.unmap_buffer(&uniform_buffer).unwrap();
+            device.unmap_buffer(&uniform_buffer);
             assert!(mapping.is_coherent);
             uniform_buffer
         };
@@ -680,7 +679,7 @@ impl<A: hal::Api> Example<A> {
                 mapping.ptr.as_ptr(),
                 instances_buffer_size,
             );
-            device.unmap_buffer(&instances_buffer).unwrap();
+            device.unmap_buffer(&instances_buffer);
             assert!(mapping.is_coherent);
 
             instances_buffer
@@ -848,7 +847,7 @@ impl<A: hal::Api> Example<A> {
                 mapping.ptr.as_ptr(),
                 instances_buffer_size,
             );
-            self.device.unmap_buffer(&self.instances_buffer).unwrap();
+            self.device.unmap_buffer(&self.instances_buffer);
             assert!(mapping.is_coherent);
         }
 
@@ -1040,7 +1039,7 @@ impl<A: hal::Api> Example<A> {
 
             self.surface.unconfigure(&self.device);
             self.device.exit(self.queue);
-            self.instance.destroy_surface(self.surface);
+            drop(self.surface);
             drop(self.adapter);
         }
     }
diff --git a/wgpu-hal/src/auxil/dxgi/conv.rs b/wgpu-hal/src/auxil/dxgi/conv.rs
index e5162362f7..d84e082df1 100644
--- a/wgpu-hal/src/auxil/dxgi/conv.rs
+++ b/wgpu-hal/src/auxil/dxgi/conv.rs
@@ -44,7 +44,7 @@ pub fn map_texture_format_failable(format: wgt::TextureFormat) -> Option<dxgifor
         Tf::Rgb9e5Ufloat => DXGI_FORMAT_R9G9B9E5_SHAREDEXP,
         Tf::Rgb10a2Uint => DXGI_FORMAT_R10G10B10A2_UINT,
         Tf::Rgb10a2Unorm => DXGI_FORMAT_R10G10B10A2_UNORM,
-        Tf::Rg11b10Float => DXGI_FORMAT_R11G11B10_FLOAT,
+        Tf::Rg11b10UFloat => DXGI_FORMAT_R11G11B10_FLOAT,
         Tf::Rg32Uint => DXGI_FORMAT_R32G32_UINT,
         Tf::Rg32Sint => DXGI_FORMAT_R32G32_SINT,
         Tf::Rg32Float => DXGI_FORMAT_R32G32_FLOAT,
diff --git a/wgpu-hal/src/auxil/renderdoc.rs b/wgpu-hal/src/auxil/renderdoc.rs
index 15b2c1039a..240d9dda29 100644
--- a/wgpu-hal/src/auxil/renderdoc.rs
+++ b/wgpu-hal/src/auxil/renderdoc.rs
@@ -83,7 +83,7 @@ impl RenderDoc {
         match unsafe { get_api(10401, &mut obj) } {
             1 => RenderDoc::Available {
                 api: RenderDocApi {
-                    api: unsafe { *(obj as *mut renderdoc_sys::RENDERDOC_API_1_4_1) },
+                    api: unsafe { *obj.cast::<renderdoc_sys::RENDERDOC_API_1_4_1>() },
                     lib: renderdoc_lib,
                 },
             },
diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs
index 6c8ed1ccad..72b9d04b71 100644
--- a/wgpu-hal/src/dx12/adapter.rs
+++ b/wgpu-hal/src/dx12/adapter.rs
@@ -8,7 +8,8 @@ use winapi::{
     shared::{
         dxgi, dxgi1_2, dxgiformat::DXGI_FORMAT_B8G8R8A8_UNORM, minwindef::DWORD, windef, winerror,
     },
-    um::{d3d12 as d3d12_ty, d3d12sdklayers, winuser},
+    um::{d3d12 as d3d12_ty, d3d12sdklayers, winnt, winuser},
+    Interface,
 };
 
 impl Drop for super::Adapter {
@@ -87,7 +88,7 @@ impl super::Adapter {
         unsafe {
             device.CheckFeatureSupport(
                 d3d12_ty::D3D12_FEATURE_FEATURE_LEVELS,
-                &mut device_levels as *mut _ as *mut _,
+                ptr::from_mut(&mut device_levels).cast(),
                 mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_FEATURE_LEVELS>() as _,
             )
         };
@@ -110,7 +111,7 @@ impl super::Adapter {
         assert_eq!(0, unsafe {
             device.CheckFeatureSupport(
                 d3d12_ty::D3D12_FEATURE_ARCHITECTURE,
-                &mut features_architecture as *mut _ as *mut _,
+                ptr::from_mut(&mut features_architecture).cast(),
                 mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_ARCHITECTURE>() as _,
             )
         });
@@ -130,7 +131,24 @@ impl super::Adapter {
             } else {
                 wgt::DeviceType::DiscreteGpu
             },
-            driver: String::new(),
+            driver: {
+                let mut i: winnt::LARGE_INTEGER = unsafe { mem::zeroed() };
+                if 0 == unsafe {
+                    adapter.CheckInterfaceSupport(&dxgi::IDXGIDevice::uuidof(), &mut i)
+                } {
+                    let quad_part = unsafe { *i.QuadPart() };
+                    const MASK: i64 = 0xFFFF;
+                    format!(
+                        "{}.{}.{}.{}",
+                        quad_part >> 48,
+                        (quad_part >> 32) & MASK,
+                        (quad_part >> 16) & MASK,
+                        quad_part & MASK
+                    )
+                } else {
+                    String::new()
+                }
+            },
             driver_info: String::new(),
         };
 
@@ -138,7 +156,7 @@ impl super::Adapter {
         assert_eq!(0, unsafe {
             device.CheckFeatureSupport(
                 d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS,
-                &mut options as *mut _ as *mut _,
+                ptr::from_mut(&mut options).cast(),
                 mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS>() as _,
             )
         });
@@ -149,7 +167,7 @@ impl super::Adapter {
             let hr = unsafe {
                 device.CheckFeatureSupport(
                     d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS2,
-                    &mut features2 as *mut _ as *mut _,
+                    ptr::from_mut(&mut features2).cast(),
                     mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS2>() as _,
                 )
             };
@@ -162,7 +180,7 @@ impl super::Adapter {
             let hr = unsafe {
                 device.CheckFeatureSupport(
                     21, // D3D12_FEATURE_D3D12_OPTIONS3
-                    &mut features3 as *mut _ as *mut _,
+                    ptr::from_mut(&mut features3).cast(),
                     mem::size_of::<crate::dx12::types::D3D12_FEATURE_DATA_D3D12_OPTIONS3>() as _,
                 )
             };
@@ -192,7 +210,7 @@ impl super::Adapter {
                     if 0 == unsafe {
                         device.CheckFeatureSupport(
                             7, // D3D12_FEATURE_SHADER_MODEL
-                            &mut sm as *mut _ as *mut _,
+                            ptr::from_mut(&mut sm).cast(),
                             mem::size_of::<crate::dx12::types::D3D12_FEATURE_DATA_SHADER_MODEL>()
                                 as _,
                         )
@@ -281,6 +299,7 @@ impl super::Adapter {
             | wgt::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS
             | wgt::Features::TIMESTAMP_QUERY_INSIDE_PASSES
             | wgt::Features::TEXTURE_COMPRESSION_BC
+            | wgt::Features::TEXTURE_COMPRESSION_BC_SLICED_3D
             | wgt::Features::CLEAR_TEXTURE
             | wgt::Features::TEXTURE_FORMAT_16BIT_NORM
             | wgt::Features::PUSH_CONSTANTS
@@ -319,7 +338,7 @@ impl super::Adapter {
             let hr = unsafe {
                 device.CheckFeatureSupport(
                     d3d12_ty::D3D12_FEATURE_FORMAT_SUPPORT,
-                    &mut bgra8unorm_info as *mut _ as *mut _,
+                    ptr::from_mut(&mut bgra8unorm_info).cast(),
                     mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT>() as _,
                 )
             };
@@ -335,7 +354,7 @@ impl super::Adapter {
         let hr = unsafe {
             device.CheckFeatureSupport(
                 d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS1,
-                &mut features1 as *mut _ as *mut _,
+                ptr::from_mut(&mut features1).cast(),
                 mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS1>() as _,
             )
         };
@@ -360,7 +379,7 @@ impl super::Adapter {
             let hr = unsafe {
                 device.CheckFeatureSupport(
                     37, // D3D12_FEATURE_D3D12_OPTIONS9
-                    &mut features9 as *mut _ as *mut _,
+                    ptr::from_mut(&mut features9).cast(),
                     mem::size_of::<crate::dx12::types::D3D12_FEATURE_DATA_D3D12_OPTIONS9>() as _,
                 )
             };
@@ -568,7 +587,7 @@ impl crate::Adapter for super::Adapter {
         assert_eq!(winerror::S_OK, unsafe {
             self.device.CheckFeatureSupport(
                 d3d12_ty::D3D12_FEATURE_FORMAT_SUPPORT,
-                &mut data as *mut _ as *mut _,
+                ptr::from_mut(&mut data).cast(),
                 mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT>() as _,
             )
         });
diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs
index 3c535b2234..0356b91978 100644
--- a/wgpu-hal/src/dx12/command.rs
+++ b/wgpu-hal/src/dx12/command.rs
@@ -69,7 +69,7 @@ impl super::CommandEncoder {
         self.pass.kind = kind;
         if let Some(label) = label {
             let (wide_label, size) = self.temp.prepare_marker(label);
-            unsafe { list.BeginEvent(0, wide_label.as_ptr() as *const _, size) };
+            unsafe { list.BeginEvent(0, wide_label.as_ptr().cast(), size) };
             self.pass.has_label = true;
         }
         self.pass.dirty_root_elements = 0;
@@ -220,7 +220,6 @@ impl super::CommandEncoder {
     }
 
     fn reset_signature(&mut self, layout: &super::PipelineLayoutShared) {
-        log::trace!("Reset signature {:?}", layout.signature);
         if let Some(root_index) = layout.special_constants_root_index {
             self.pass.root_elements[root_index as usize] =
                 super::RootElement::SpecialConstantBuffer {
@@ -311,21 +310,11 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn transition_buffers<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>,
+        T: Iterator<Item = crate::BufferBarrier<'a, super::Buffer>>,
     {
         self.temp.barriers.clear();
 
-        log::trace!(
-            "List {:p} buffer transitions",
-            self.list.as_ref().unwrap().as_ptr()
-        );
         for barrier in barriers {
-            log::trace!(
-                "\t{:p}: usage {:?}..{:?}",
-                barrier.buffer.resource.as_ptr(),
-                barrier.usage.start,
-                barrier.usage.end
-            );
             let s0 = conv::map_buffer_usage_to_state(barrier.usage.start);
             let s1 = conv::map_buffer_usage_to_state(barrier.usage.end);
             if s0 != s1 {
@@ -370,22 +359,11 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn transition_textures<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>,
+        T: Iterator<Item = crate::TextureBarrier<'a, super::Texture>>,
     {
         self.temp.barriers.clear();
 
-        log::trace!(
-            "List {:p} texture transitions",
-            self.list.as_ref().unwrap().as_ptr()
-        );
         for barrier in barriers {
-            log::trace!(
-                "\t{:p}: usage {:?}..{:?}, range {:?}",
-                barrier.texture.resource.as_ptr(),
-                barrier.usage.start,
-                barrier.usage.end,
-                barrier.range
-            );
             let s0 = conv::map_texture_usage_to_state(barrier.usage.start);
             let s1 = conv::map_texture_usage_to_state(barrier.usage.end);
             if s0 != s1 {
@@ -683,7 +661,10 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     // render
 
-    unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) {
+    unsafe fn begin_render_pass(
+        &mut self,
+        desc: &crate::RenderPassDescriptor<super::QuerySet, super::TextureView>,
+    ) {
         unsafe { self.begin_pass(super::PassKind::Render, desc.label) };
 
         // Start timestamp if any (before all other commands but after debug marker)
@@ -879,13 +860,11 @@ impl crate::CommandEncoder for super::CommandEncoder {
         group: &super::BindGroup,
         dynamic_offsets: &[wgt::DynamicOffset],
     ) {
-        log::trace!("Set group[{}]", index);
         let info = &layout.bind_group_infos[index as usize];
         let mut root_index = info.base_root_index as usize;
 
         // Bind CBV/SRC/UAV descriptor tables
         if info.tables.contains(super::TableTypes::SRV_CBV_UAV) {
-            log::trace!("\tBind element[{}] = view", root_index);
             self.pass.root_elements[root_index] =
                 super::RootElement::Table(group.handle_views.unwrap().gpu);
             root_index += 1;
@@ -893,7 +872,6 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
         // Bind Sampler descriptor tables.
         if info.tables.contains(super::TableTypes::SAMPLERS) {
-            log::trace!("\tBind element[{}] = sampler", root_index);
             self.pass.root_elements[root_index] =
                 super::RootElement::Table(group.handle_samplers.unwrap().gpu);
             root_index += 1;
@@ -906,7 +884,6 @@ impl crate::CommandEncoder for super::CommandEncoder {
             .zip(group.dynamic_buffers.iter())
             .zip(dynamic_offsets)
         {
-            log::trace!("\tBind element[{}] = dynamic", root_index);
             self.pass.root_elements[root_index] = super::RootElement::DynamicOffsetBuffer {
                 kind,
                 address: gpu_base + offset as d3d12::GpuAddress,
@@ -950,7 +927,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
             self.list
                 .as_ref()
                 .unwrap()
-                .SetMarker(0, wide_label.as_ptr() as *const _, size)
+                .SetMarker(0, wide_label.as_ptr().cast(), size)
         };
     }
     unsafe fn begin_debug_marker(&mut self, group_label: &str) {
@@ -959,7 +936,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
             self.list
                 .as_ref()
                 .unwrap()
-                .BeginEvent(0, wide_label.as_ptr() as *const _, size)
+                .BeginEvent(0, wide_label.as_ptr().cast(), size)
         };
     }
     unsafe fn end_debug_marker(&mut self) {
@@ -996,7 +973,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn set_index_buffer<'a>(
         &mut self,
-        binding: crate::BufferBinding<'a, super::Api>,
+        binding: crate::BufferBinding<'a, super::Buffer>,
         format: wgt::IndexFormat,
     ) {
         self.list.as_ref().unwrap().set_index_buffer(
@@ -1008,7 +985,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
     unsafe fn set_vertex_buffer<'a>(
         &mut self,
         index: u32,
-        binding: crate::BufferBinding<'a, super::Api>,
+        binding: crate::BufferBinding<'a, super::Buffer>,
     ) {
         let vb = &mut self.pass.vertex_buffers[index as usize];
         vb.BufferLocation = binding.resolve_address();
@@ -1156,7 +1133,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn begin_compute_pass<'a>(
         &mut self,
-        desc: &crate::ComputePassDescriptor<'a, super::Api>,
+        desc: &crate::ComputePassDescriptor<'a, super::QuerySet>,
     ) {
         unsafe { self.begin_pass(super::PassKind::Compute, desc.label) };
 
@@ -1213,7 +1190,13 @@ impl crate::CommandEncoder for super::CommandEncoder {
         _descriptors: T,
     ) where
         super::Api: 'a,
-        T: IntoIterator<Item = crate::BuildAccelerationStructureDescriptor<'a, super::Api>>,
+        T: IntoIterator<
+            Item = crate::BuildAccelerationStructureDescriptor<
+                'a,
+                super::Buffer,
+                super::AccelerationStructure,
+            >,
+        >,
     {
         // Implement using `BuildRaytracingAccelerationStructure`:
         // https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#buildraytracingaccelerationstructure
diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs
index eeb60acbf6..8cfd8deaee 100644
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@@ -196,7 +196,6 @@ impl super::Device {
         }
 
         let value = cur_value + 1;
-        log::debug!("Waiting for idle with value {}", value);
         self.present_queue.signal(&self.idler.fence, value);
         let hr = self
             .idler
@@ -212,10 +211,10 @@ impl super::Device {
     /// allowed to be a subset of the vertex outputs.
     fn load_shader(
         &self,
-        stage: &crate::ProgrammableStage<super::Api>,
+        stage: &crate::ProgrammableStage<super::ShaderModule>,
         layout: &super::PipelineLayout,
         naga_stage: naga::ShaderStage,
-        fragment_stage: Option<&crate::ProgrammableStage<super::Api>>,
+        fragment_stage: Option<&crate::ProgrammableStage<super::ShaderModule>>,
     ) -> Result<super::CompiledShader, crate::PipelineError> {
         use naga::back::hlsl;
 
@@ -235,7 +234,7 @@ impl super::Device {
             &stage.module.naga.info,
             stage.constants,
         )
-        .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("HLSL: {e:?}")))?;
+        .map_err(|e| crate::PipelineError::PipelineConstants(stage_bit, format!("HLSL: {e:?}")))?;
 
         let needs_temp_options = stage.zero_initialize_workgroup_memory
             != layout.naga_options.zero_initialize_workgroup_memory;
@@ -404,6 +403,9 @@ impl crate::Device for super::Device {
     unsafe fn destroy_buffer(&self, mut buffer: super::Buffer) {
         // Only happens when it's using the windows_rs feature and there's an allocation
         if let Some(alloc) = buffer.allocation.take() {
+            // Resource should be dropped before free suballocation
+            drop(buffer);
+
             super::suballocation::free_buffer_allocation(
                 self,
                 alloc,
@@ -434,9 +436,8 @@ impl crate::Device for super::Device {
         })
     }
 
-    unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), DeviceError> {
+    unsafe fn unmap_buffer(&self, buffer: &super::Buffer) {
         unsafe { (*buffer.resource).Unmap(0, ptr::null()) };
-        Ok(())
     }
 
     unsafe fn flush_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) {}
@@ -494,6 +495,9 @@ impl crate::Device for super::Device {
 
     unsafe fn destroy_texture(&self, mut texture: super::Texture) {
         if let Some(alloc) = texture.allocation.take() {
+            // Resource should be dropped before free suballocation
+            drop(texture);
+
             super::suballocation::free_texture_allocation(
                 self,
                 alloc,
@@ -685,7 +689,7 @@ impl crate::Device for super::Device {
 
     unsafe fn create_command_encoder(
         &self,
-        desc: &crate::CommandEncoderDescriptor<super::Api>,
+        desc: &crate::CommandEncoderDescriptor<super::Queue>,
     ) -> Result<super::CommandEncoder, DeviceError> {
         let allocator = self
             .raw
@@ -772,7 +776,7 @@ impl crate::Device for super::Device {
 
     unsafe fn create_pipeline_layout(
         &self,
-        desc: &crate::PipelineLayoutDescriptor<super::Api>,
+        desc: &crate::PipelineLayoutDescriptor<super::BindGroupLayout>,
     ) -> Result<super::PipelineLayout, DeviceError> {
         use naga::back::hlsl;
         // Pipeline layouts are implemented as RootSignature for D3D12.
@@ -812,11 +816,6 @@ impl crate::Device for super::Device {
             }
         }
 
-        log::debug!(
-            "Creating Root Signature '{}'",
-            desc.label.unwrap_or_default()
-        );
-
         let mut binding_map = hlsl::BindingMap::default();
         let (mut bind_cbv, mut bind_srv, mut bind_uav, mut bind_sampler) = (
             hlsl::BindTarget::default(),
@@ -839,11 +838,6 @@ impl crate::Device for super::Device {
         if pc_start != u32::MAX && pc_end != u32::MIN {
             let parameter_index = parameters.len();
             let size = (pc_end - pc_start) / 4;
-            log::debug!(
-                "\tParam[{}] = push constant (count = {})",
-                parameter_index,
-                size,
-            );
             parameters.push(d3d12::RootParameter::constants(
                 d3d12::ShaderVisibility::All,
                 native_binding(&bind_cbv),
@@ -937,12 +931,6 @@ impl crate::Device for super::Device {
                 bt.register += entry.count.map(NonZeroU32::get).unwrap_or(1);
             }
             if ranges.len() > range_base {
-                log::debug!(
-                    "\tParam[{}] = views (vis = {:?}, count = {})",
-                    parameters.len(),
-                    visibility_view_static,
-                    ranges.len() - range_base,
-                );
                 parameters.push(d3d12::RootParameter::descriptor_table(
                     conv::map_visibility(visibility_view_static),
                     &ranges[range_base..],
@@ -976,12 +964,6 @@ impl crate::Device for super::Device {
                 bind_sampler.register += entry.count.map(NonZeroU32::get).unwrap_or(1);
             }
             if ranges.len() > range_base {
-                log::debug!(
-                    "\tParam[{}] = samplers (vis = {:?}, count = {})",
-                    parameters.len(),
-                    visibility_sampler,
-                    ranges.len() - range_base,
-                );
                 parameters.push(d3d12::RootParameter::descriptor_table(
                     conv::map_visibility(visibility_sampler),
                     &ranges[range_base..],
@@ -1031,12 +1013,6 @@ impl crate::Device for super::Device {
                 );
                 info.dynamic_buffers.push(kind);
 
-                log::debug!(
-                    "\tParam[{}] = dynamic {:?} (vis = {:?})",
-                    parameters.len(),
-                    buffer_ty,
-                    dynamic_buffers_visibility,
-                );
                 parameters.push(d3d12::RootParameter::descriptor(
                     parameter_ty,
                     dynamic_buffers_visibility,
@@ -1057,7 +1033,6 @@ impl crate::Device for super::Device {
                 | crate::PipelineLayoutFlags::NUM_WORK_GROUPS,
         ) {
             let parameter_index = parameters.len();
-            log::debug!("\tParam[{}] = special", parameter_index);
             parameters.push(d3d12::RootParameter::constants(
                 d3d12::ShaderVisibility::All, // really needed for VS and CS only
                 native_binding(&bind_cbv),
@@ -1070,9 +1045,6 @@ impl crate::Device for super::Device {
             (None, None)
         };
 
-        log::trace!("{:#?}", parameters);
-        log::trace!("Bindings {:#?}", binding_map);
-
         let (blob, error) = self
             .library
             .serialize_root_signature(
@@ -1100,8 +1072,6 @@ impl crate::Device for super::Device {
             .create_root_signature(blob, 0)
             .into_device_result("Root signature creation")?;
 
-        log::debug!("\traw = {:?}", raw);
-
         if let Some(label) = desc.label {
             let cwstr = conv::map_label(label);
             unsafe { raw.SetName(cwstr.as_ptr()) };
@@ -1134,7 +1104,13 @@ impl crate::Device for super::Device {
 
     unsafe fn create_bind_group(
         &self,
-        desc: &crate::BindGroupDescriptor<super::Api>,
+        desc: &crate::BindGroupDescriptor<
+            super::BindGroupLayout,
+            super::Buffer,
+            super::Sampler,
+            super::TextureView,
+            super::AccelerationStructure,
+        >,
     ) -> Result<super::BindGroup, DeviceError> {
         let mut cpu_views = desc
             .layout
@@ -1344,7 +1320,11 @@ impl crate::Device for super::Device {
 
     unsafe fn create_render_pipeline(
         &self,
-        desc: &crate::RenderPipelineDescriptor<super::Api>,
+        desc: &crate::RenderPipelineDescriptor<
+            super::PipelineLayout,
+            super::ShaderModule,
+            super::PipelineCache,
+        >,
     ) -> Result<super::RenderPipeline, crate::PipelineError> {
         let (topology_class, topology) = conv::map_topology(desc.primitive.topology);
         let mut shader_stages = wgt::ShaderStages::VERTEX;
@@ -1381,7 +1361,7 @@ impl crate::Device for super::Device {
             };
             for attribute in vbuf.attributes {
                 input_element_descs.push(d3d12_ty::D3D12_INPUT_ELEMENT_DESC {
-                    SemanticName: NAGA_LOCATION_SEMANTIC.as_ptr() as *const _,
+                    SemanticName: NAGA_LOCATION_SEMANTIC.as_ptr().cast(),
                     SemanticIndex: attribute.shader_location,
                     Format: auxil::dxgi::conv::map_vertex_format(attribute.format),
                     InputSlot: i as u32,
@@ -1539,7 +1519,11 @@ impl crate::Device for super::Device {
 
     unsafe fn create_compute_pipeline(
         &self,
-        desc: &crate::ComputePipelineDescriptor<super::Api>,
+        desc: &crate::ComputePipelineDescriptor<
+            super::PipelineLayout,
+            super::ShaderModule,
+            super::PipelineCache,
+        >,
     ) -> Result<super::ComputePipeline, crate::PipelineError> {
         let blob_cs =
             self.load_shader(&desc.stage, desc.layout, naga::ShaderStage::Compute, None)?;
@@ -1583,10 +1567,10 @@ impl crate::Device for super::Device {
     unsafe fn create_pipeline_cache(
         &self,
         _desc: &crate::PipelineCacheDescriptor<'_>,
-    ) -> Result<(), crate::PipelineCacheError> {
-        Ok(())
+    ) -> Result<super::PipelineCache, crate::PipelineCacheError> {
+        Ok(super::PipelineCache)
     }
-    unsafe fn destroy_pipeline_cache(&self, (): ()) {}
+    unsafe fn destroy_pipeline_cache(&self, _: super::PipelineCache) {}
 
     unsafe fn create_query_set(
         &self,
@@ -1744,7 +1728,7 @@ impl crate::Device for super::Device {
         {
             unsafe {
                 self.render_doc
-                    .start_frame_capture(self.raw.as_mut_ptr() as *mut _, ptr::null_mut())
+                    .start_frame_capture(self.raw.as_mut_ptr().cast(), ptr::null_mut())
             }
         }
         #[cfg(not(feature = "renderdoc"))]
@@ -1755,13 +1739,13 @@ impl crate::Device for super::Device {
         #[cfg(feature = "renderdoc")]
         unsafe {
             self.render_doc
-                .end_frame_capture(self.raw.as_mut_ptr() as *mut _, ptr::null_mut())
+                .end_frame_capture(self.raw.as_mut_ptr().cast(), ptr::null_mut())
         }
     }
 
     unsafe fn get_acceleration_structure_build_sizes<'a>(
         &self,
-        _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Api>,
+        _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Buffer>,
     ) -> crate::AccelerationStructureBuildSizes {
         // Implement using `GetRaytracingAccelerationStructurePrebuildInfo`:
         // https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#getraytracingaccelerationstructureprebuildinfo
@@ -1796,4 +1780,41 @@ impl crate::Device for super::Device {
     fn get_internal_counters(&self) -> wgt::HalCounters {
         self.counters.clone()
     }
+
+    #[cfg(feature = "windows_rs")]
+    fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
+        let mut upstream = {
+            self.mem_allocator
+                .as_ref()?
+                .lock()
+                .allocator
+                .generate_report()
+        };
+
+        let allocations = upstream
+            .allocations
+            .iter_mut()
+            .map(|alloc| wgt::AllocationReport {
+                name: mem::take(&mut alloc.name),
+                offset: alloc.offset,
+                size: alloc.size,
+            })
+            .collect();
+
+        let blocks = upstream
+            .blocks
+            .iter()
+            .map(|block| wgt::MemoryBlockReport {
+                size: block.size,
+                allocations: block.allocations.clone(),
+            })
+            .collect();
+
+        Some(wgt::AllocatorReport {
+            allocations,
+            blocks,
+            total_allocated_bytes: upstream.total_allocated_bytes,
+            total_reserved_bytes: upstream.total_reserved_bytes,
+        })
+    }
 }
diff --git a/wgpu-hal/src/dx12/instance.rs b/wgpu-hal/src/dx12/instance.rs
index 4a4c6c6ff9..c9557355fb 100644
--- a/wgpu-hal/src/dx12/instance.rs
+++ b/wgpu-hal/src/dx12/instance.rs
@@ -81,7 +81,7 @@ impl crate::Instance for super::Instance {
             let hr = unsafe {
                 factory5.CheckFeatureSupport(
                     dxgi1_5::DXGI_FEATURE_PRESENT_ALLOW_TEARING,
-                    &mut allow_tearing as *mut _ as *mut _,
+                    std::ptr::from_mut(&mut allow_tearing).cast(),
                     mem::size_of::<minwindef::BOOL>() as _,
                 )
             };
@@ -143,9 +143,6 @@ impl crate::Instance for super::Instance {
             ))),
         }
     }
-    unsafe fn destroy_surface(&self, _surface: super::Surface) {
-        // just drop
-    }
 
     unsafe fn enumerate_adapters(
         &self,
diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index 0bb7adc75e..8401bbe1eb 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -82,11 +82,35 @@ impl crate::Api for Api {
     type ShaderModule = ShaderModule;
     type RenderPipeline = RenderPipeline;
     type ComputePipeline = ComputePipeline;
-    type PipelineCache = ();
+    type PipelineCache = PipelineCache;
 
     type AccelerationStructure = AccelerationStructure;
 }
 
+crate::impl_dyn_resource!(
+    Adapter,
+    AccelerationStructure,
+    BindGroup,
+    BindGroupLayout,
+    Buffer,
+    CommandBuffer,
+    CommandEncoder,
+    ComputePipeline,
+    Device,
+    Fence,
+    Instance,
+    PipelineCache,
+    PipelineLayout,
+    QuerySet,
+    Queue,
+    RenderPipeline,
+    Sampler,
+    ShaderModule,
+    Surface,
+    Texture,
+    TextureView
+);
+
 // Limited by D3D12's root signature size of 64. Each element takes 1 or 2 entries.
 const MAX_ROOT_ELEMENTS: usize = 64;
 const ZERO_BUFFER_SIZE: wgt::BufferAddress = 256 << 10;
@@ -394,6 +418,8 @@ pub struct CommandBuffer {
     raw: d3d12::GraphicsCommandList,
 }
 
+impl crate::DynCommandBuffer for CommandBuffer {}
+
 unsafe impl Send for CommandBuffer {}
 unsafe impl Sync for CommandBuffer {}
 
@@ -407,7 +433,9 @@ pub struct Buffer {
 unsafe impl Send for Buffer {}
 unsafe impl Sync for Buffer {}
 
-impl crate::BufferBinding<'_, Api> {
+impl crate::DynBuffer for Buffer {}
+
+impl crate::BufferBinding<'_, Buffer> {
     fn resolve_size(&self) -> wgt::BufferAddress {
         match self.size {
             Some(size) => size.get(),
@@ -431,6 +459,15 @@ pub struct Texture {
     allocation: Option<suballocation::AllocationWrapper>,
 }
 
+impl crate::DynTexture for Texture {}
+impl crate::DynSurfaceTexture for Texture {}
+
+impl std::borrow::Borrow<dyn crate::DynTexture> for Texture {
+    fn borrow(&self) -> &dyn crate::DynTexture {
+        self
+    }
+}
+
 unsafe impl Send for Texture {}
 unsafe impl Sync for Texture {}
 
@@ -470,6 +507,8 @@ pub struct TextureView {
     handle_dsv_rw: Option<descriptor::Handle>,
 }
 
+impl crate::DynTextureView for TextureView {}
+
 unsafe impl Send for TextureView {}
 unsafe impl Sync for TextureView {}
 
@@ -478,6 +517,8 @@ pub struct Sampler {
     handle: descriptor::Handle,
 }
 
+impl crate::DynSampler for Sampler {}
+
 unsafe impl Send for Sampler {}
 unsafe impl Sync for Sampler {}
 
@@ -487,6 +528,8 @@ pub struct QuerySet {
     raw_ty: d3d12_ty::D3D12_QUERY_TYPE,
 }
 
+impl crate::DynQuerySet for QuerySet {}
+
 unsafe impl Send for QuerySet {}
 unsafe impl Sync for QuerySet {}
 
@@ -495,6 +538,8 @@ pub struct Fence {
     raw: d3d12::Fence,
 }
 
+impl crate::DynFence for Fence {}
+
 unsafe impl Send for Fence {}
 unsafe impl Sync for Fence {}
 
@@ -513,6 +558,8 @@ pub struct BindGroupLayout {
     copy_counts: Vec<u32>, // all 1's
 }
 
+impl crate::DynBindGroupLayout for BindGroupLayout {}
+
 #[derive(Debug, Clone, Copy)]
 enum BufferViewKind {
     Constant,
@@ -527,6 +574,8 @@ pub struct BindGroup {
     dynamic_buffers: Vec<d3d12::GpuAddress>,
 }
 
+impl crate::DynBindGroup for BindGroup {}
+
 bitflags::bitflags! {
     #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
     struct TableTypes: u8 {
@@ -571,12 +620,16 @@ pub struct PipelineLayout {
     naga_options: naga::back::hlsl::Options,
 }
 
+impl crate::DynPipelineLayout for PipelineLayout {}
+
 #[derive(Debug)]
 pub struct ShaderModule {
     naga: crate::NagaShader,
     raw_name: Option<ffi::CString>,
 }
 
+impl crate::DynShaderModule for ShaderModule {}
+
 pub(super) enum CompiledShader {
     #[allow(unused)]
     Dxc(Vec<u8>),
@@ -602,6 +655,8 @@ pub struct RenderPipeline {
     vertex_strides: [Option<NonZeroU32>; crate::MAX_VERTEX_BUFFERS],
 }
 
+impl crate::DynRenderPipeline for RenderPipeline {}
+
 unsafe impl Send for RenderPipeline {}
 unsafe impl Sync for RenderPipeline {}
 
@@ -611,12 +666,21 @@ pub struct ComputePipeline {
     layout: PipelineLayoutShared,
 }
 
+impl crate::DynComputePipeline for ComputePipeline {}
+
 unsafe impl Send for ComputePipeline {}
 unsafe impl Sync for ComputePipeline {}
 
+#[derive(Debug)]
+pub struct PipelineCache;
+
+impl crate::DynPipelineCache for PipelineCache {}
+
 #[derive(Debug)]
 pub struct AccelerationStructure {}
 
+impl crate::DynAccelerationStructure for AccelerationStructure {}
+
 impl SwapChain {
     unsafe fn release_resources(self) -> d3d12::ComPtr<dxgi1_4::IDXGISwapChain3> {
         self.raw
@@ -720,7 +784,7 @@ impl crate::Surface for Surface {
                         self.factory
                             .unwrap_factory2()
                             .create_swapchain_for_composition(
-                                device.present_queue.as_mut_ptr() as *mut _,
+                                device.present_queue.as_mut_ptr().cast(),
                                 &desc,
                             )
                             .into_result()
@@ -733,7 +797,7 @@ impl crate::Surface for Surface {
                             .clone()
                             .ok_or(crate::SurfaceError::Other("IDXGIFactoryMedia not found"))?
                             .create_swapchain_for_composition_surface_handle(
-                                device.present_queue.as_mut_ptr() as *mut _,
+                                device.present_queue.as_mut_ptr().cast(),
                                 handle,
                                 &desc,
                             )
@@ -745,7 +809,7 @@ impl crate::Surface for Surface {
                             .as_factory2()
                             .unwrap()
                             .create_swapchain_for_hwnd(
-                                device.present_queue.as_mut_ptr() as *mut _,
+                                device.present_queue.as_mut_ptr().cast(),
                                 hwnd,
                                 &desc,
                             )
diff --git a/wgpu-hal/src/dynamic/adapter.rs b/wgpu-hal/src/dynamic/adapter.rs
new file mode 100644
index 0000000000..aebe8ec775
--- /dev/null
+++ b/wgpu-hal/src/dynamic/adapter.rs
@@ -0,0 +1,67 @@
+use crate::{
+    Adapter, Api, DeviceError, OpenDevice, SurfaceCapabilities, TextureFormatCapabilities,
+};
+
+use super::{DynDevice, DynQueue, DynResource, DynResourceExt, DynSurface};
+
+pub struct DynOpenDevice {
+    pub device: Box<dyn DynDevice>,
+    pub queue: Box<dyn DynQueue>,
+}
+
+impl<A: Api> From<OpenDevice<A>> for DynOpenDevice {
+    fn from(open_device: OpenDevice<A>) -> Self {
+        Self {
+            device: Box::new(open_device.device),
+            queue: Box::new(open_device.queue),
+        }
+    }
+}
+
+pub trait DynAdapter: DynResource {
+    unsafe fn open(
+        &self,
+        features: wgt::Features,
+        limits: &wgt::Limits,
+        memory_hints: &wgt::MemoryHints,
+    ) -> Result<DynOpenDevice, DeviceError>;
+
+    unsafe fn texture_format_capabilities(
+        &self,
+        format: wgt::TextureFormat,
+    ) -> TextureFormatCapabilities;
+
+    unsafe fn surface_capabilities(&self, surface: &dyn DynSurface) -> Option<SurfaceCapabilities>;
+
+    unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp;
+}
+
+impl<A: Adapter + DynResource> DynAdapter for A {
+    unsafe fn open(
+        &self,
+        features: wgt::Features,
+        limits: &wgt::Limits,
+        memory_hints: &wgt::MemoryHints,
+    ) -> Result<DynOpenDevice, DeviceError> {
+        unsafe { A::open(self, features, limits, memory_hints) }.map(|open_device| DynOpenDevice {
+            device: Box::new(open_device.device),
+            queue: Box::new(open_device.queue),
+        })
+    }
+
+    unsafe fn texture_format_capabilities(
+        &self,
+        format: wgt::TextureFormat,
+    ) -> TextureFormatCapabilities {
+        unsafe { A::texture_format_capabilities(self, format) }
+    }
+
+    unsafe fn surface_capabilities(&self, surface: &dyn DynSurface) -> Option<SurfaceCapabilities> {
+        let surface = surface.expect_downcast_ref();
+        unsafe { A::surface_capabilities(self, surface) }
+    }
+
+    unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp {
+        unsafe { A::get_presentation_timestamp(self) }
+    }
+}
diff --git a/wgpu-hal/src/dynamic/command.rs b/wgpu-hal/src/dynamic/command.rs
new file mode 100644
index 0000000000..6c0f1cb02d
--- /dev/null
+++ b/wgpu-hal/src/dynamic/command.rs
@@ -0,0 +1,649 @@
+use std::ops::Range;
+
+use crate::{
+    AccelerationStructureBarrier, Api, Attachment, BufferBarrier, BufferBinding, BufferCopy,
+    BufferTextureCopy, BuildAccelerationStructureDescriptor, ColorAttachment, CommandEncoder,
+    ComputePassDescriptor, DepthStencilAttachment, DeviceError, Label, MemoryRange,
+    PassTimestampWrites, Rect, RenderPassDescriptor, TextureBarrier, TextureCopy, TextureUses,
+};
+
+use super::{
+    DynAccelerationStructure, DynBindGroup, DynBuffer, DynCommandBuffer, DynComputePipeline,
+    DynPipelineLayout, DynQuerySet, DynRenderPipeline, DynResource, DynResourceExt as _,
+    DynTexture, DynTextureView,
+};
+
+pub trait DynCommandEncoder: DynResource + std::fmt::Debug {
+    unsafe fn begin_encoding(&mut self, label: Label) -> Result<(), DeviceError>;
+
+    unsafe fn discard_encoding(&mut self);
+
+    unsafe fn end_encoding(&mut self) -> Result<Box<dyn DynCommandBuffer>, DeviceError>;
+
+    unsafe fn reset_all(&mut self, command_buffers: Vec<Box<dyn DynCommandBuffer>>);
+
+    unsafe fn transition_buffers(&mut self, barriers: &[BufferBarrier<'_, dyn DynBuffer>]);
+    unsafe fn transition_textures(&mut self, barriers: &[TextureBarrier<'_, dyn DynTexture>]);
+
+    unsafe fn clear_buffer(&mut self, buffer: &dyn DynBuffer, range: MemoryRange);
+
+    unsafe fn copy_buffer_to_buffer(
+        &mut self,
+        src: &dyn DynBuffer,
+        dst: &dyn DynBuffer,
+        regions: &[BufferCopy],
+    );
+
+    unsafe fn copy_texture_to_texture(
+        &mut self,
+        src: &dyn DynTexture,
+        src_usage: TextureUses,
+        dst: &dyn DynTexture,
+        regions: &[TextureCopy],
+    );
+
+    unsafe fn copy_buffer_to_texture(
+        &mut self,
+        src: &dyn DynBuffer,
+        dst: &dyn DynTexture,
+        regions: &[BufferTextureCopy],
+    );
+
+    unsafe fn copy_texture_to_buffer(
+        &mut self,
+        src: &dyn DynTexture,
+        src_usage: TextureUses,
+        dst: &dyn DynBuffer,
+        regions: &[BufferTextureCopy],
+    );
+
+    unsafe fn set_bind_group(
+        &mut self,
+        layout: &dyn DynPipelineLayout,
+        index: u32,
+        group: &dyn DynBindGroup,
+        dynamic_offsets: &[wgt::DynamicOffset],
+    );
+
+    unsafe fn set_push_constants(
+        &mut self,
+        layout: &dyn DynPipelineLayout,
+        stages: wgt::ShaderStages,
+        offset_bytes: u32,
+        data: &[u32],
+    );
+
+    unsafe fn insert_debug_marker(&mut self, label: &str);
+    unsafe fn begin_debug_marker(&mut self, group_label: &str);
+    unsafe fn end_debug_marker(&mut self);
+
+    unsafe fn begin_query(&mut self, set: &dyn DynQuerySet, index: u32);
+    unsafe fn end_query(&mut self, set: &dyn DynQuerySet, index: u32);
+    unsafe fn write_timestamp(&mut self, set: &dyn DynQuerySet, index: u32);
+    unsafe fn reset_queries(&mut self, set: &dyn DynQuerySet, range: Range<u32>);
+    unsafe fn copy_query_results(
+        &mut self,
+        set: &dyn DynQuerySet,
+        range: Range<u32>,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        stride: wgt::BufferSize,
+    );
+
+    unsafe fn begin_render_pass(
+        &mut self,
+        desc: &RenderPassDescriptor<dyn DynQuerySet, dyn DynTextureView>,
+    );
+    unsafe fn end_render_pass(&mut self);
+
+    unsafe fn set_render_pipeline(&mut self, pipeline: &dyn DynRenderPipeline);
+
+    unsafe fn set_index_buffer<'a>(
+        &mut self,
+        binding: BufferBinding<'a, dyn DynBuffer>,
+        format: wgt::IndexFormat,
+    );
+
+    unsafe fn set_vertex_buffer<'a>(
+        &mut self,
+        index: u32,
+        binding: BufferBinding<'a, dyn DynBuffer>,
+    );
+    unsafe fn set_viewport(&mut self, rect: &Rect<f32>, depth_range: Range<f32>);
+    unsafe fn set_scissor_rect(&mut self, rect: &Rect<u32>);
+    unsafe fn set_stencil_reference(&mut self, value: u32);
+    unsafe fn set_blend_constants(&mut self, color: &[f32; 4]);
+
+    unsafe fn draw(
+        &mut self,
+        first_vertex: u32,
+        vertex_count: u32,
+        first_instance: u32,
+        instance_count: u32,
+    );
+    unsafe fn draw_indexed(
+        &mut self,
+        first_index: u32,
+        index_count: u32,
+        base_vertex: i32,
+        first_instance: u32,
+        instance_count: u32,
+    );
+    unsafe fn draw_indirect(
+        &mut self,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        draw_count: u32,
+    );
+    unsafe fn draw_indexed_indirect(
+        &mut self,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        draw_count: u32,
+    );
+    unsafe fn draw_indirect_count(
+        &mut self,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        count_buffer: &dyn DynBuffer,
+        count_offset: wgt::BufferAddress,
+        max_count: u32,
+    );
+    unsafe fn draw_indexed_indirect_count(
+        &mut self,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        count_buffer: &dyn DynBuffer,
+        count_offset: wgt::BufferAddress,
+        max_count: u32,
+    );
+
+    unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor<dyn DynQuerySet>);
+    unsafe fn end_compute_pass(&mut self);
+
+    unsafe fn set_compute_pipeline(&mut self, pipeline: &dyn DynComputePipeline);
+
+    unsafe fn dispatch(&mut self, count: [u32; 3]);
+    unsafe fn dispatch_indirect(&mut self, buffer: &dyn DynBuffer, offset: wgt::BufferAddress);
+
+    unsafe fn build_acceleration_structures<'a>(
+        &mut self,
+        descriptors: &'a [BuildAccelerationStructureDescriptor<
+            'a,
+            dyn DynBuffer,
+            dyn DynAccelerationStructure,
+        >],
+    );
+
+    unsafe fn place_acceleration_structure_barrier(
+        &mut self,
+        barrier: AccelerationStructureBarrier,
+    );
+}
+
+impl<C: CommandEncoder + DynResource> DynCommandEncoder for C {
+    unsafe fn begin_encoding(&mut self, label: Label) -> Result<(), DeviceError> {
+        unsafe { C::begin_encoding(self, label) }
+    }
+
+    unsafe fn discard_encoding(&mut self) {
+        unsafe { C::discard_encoding(self) }
+    }
+
+    unsafe fn end_encoding(&mut self) -> Result<Box<dyn DynCommandBuffer>, DeviceError> {
+        unsafe { C::end_encoding(self) }.map(|cb| {
+            let boxed_command_buffer: Box<<C::A as Api>::CommandBuffer> = Box::new(cb);
+            let boxed_command_buffer: Box<dyn DynCommandBuffer> = boxed_command_buffer;
+            boxed_command_buffer
+        })
+    }
+
+    unsafe fn reset_all(&mut self, command_buffers: Vec<Box<dyn DynCommandBuffer>>) {
+        unsafe { C::reset_all(self, command_buffers.into_iter().map(|cb| cb.unbox())) }
+    }
+
+    unsafe fn transition_buffers(&mut self, barriers: &[BufferBarrier<'_, dyn DynBuffer>]) {
+        let barriers = barriers.iter().map(|barrier| BufferBarrier {
+            buffer: barrier.buffer.expect_downcast_ref(),
+            usage: barrier.usage.clone(),
+        });
+        unsafe { self.transition_buffers(barriers) };
+    }
+
+    unsafe fn transition_textures(&mut self, barriers: &[TextureBarrier<'_, dyn DynTexture>]) {
+        let barriers = barriers.iter().map(|barrier| TextureBarrier {
+            texture: barrier.texture.expect_downcast_ref(),
+            usage: barrier.usage.clone(),
+            range: barrier.range,
+        });
+        unsafe { self.transition_textures(barriers) };
+    }
+
+    unsafe fn clear_buffer(&mut self, buffer: &dyn DynBuffer, range: MemoryRange) {
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { C::clear_buffer(self, buffer, range) };
+    }
+
+    unsafe fn copy_buffer_to_buffer(
+        &mut self,
+        src: &dyn DynBuffer,
+        dst: &dyn DynBuffer,
+        regions: &[BufferCopy],
+    ) {
+        let src = src.expect_downcast_ref();
+        let dst = dst.expect_downcast_ref();
+        unsafe {
+            C::copy_buffer_to_buffer(self, src, dst, regions.iter().copied());
+        }
+    }
+
+    unsafe fn copy_texture_to_texture(
+        &mut self,
+        src: &dyn DynTexture,
+        src_usage: TextureUses,
+        dst: &dyn DynTexture,
+        regions: &[TextureCopy],
+    ) {
+        let src = src.expect_downcast_ref();
+        let dst = dst.expect_downcast_ref();
+        unsafe {
+            C::copy_texture_to_texture(self, src, src_usage, dst, regions.iter().cloned());
+        }
+    }
+
+    unsafe fn copy_buffer_to_texture(
+        &mut self,
+        src: &dyn DynBuffer,
+        dst: &dyn DynTexture,
+        regions: &[BufferTextureCopy],
+    ) {
+        let src = src.expect_downcast_ref();
+        let dst = dst.expect_downcast_ref();
+        unsafe {
+            C::copy_buffer_to_texture(self, src, dst, regions.iter().cloned());
+        }
+    }
+
+    unsafe fn copy_texture_to_buffer(
+        &mut self,
+        src: &dyn DynTexture,
+        src_usage: TextureUses,
+        dst: &dyn DynBuffer,
+        regions: &[BufferTextureCopy],
+    ) {
+        let src = src.expect_downcast_ref();
+        let dst = dst.expect_downcast_ref();
+        unsafe {
+            C::copy_texture_to_buffer(self, src, src_usage, dst, regions.iter().cloned());
+        }
+    }
+
+    unsafe fn set_bind_group(
+        &mut self,
+        layout: &dyn DynPipelineLayout,
+        index: u32,
+        group: &dyn DynBindGroup,
+        dynamic_offsets: &[wgt::DynamicOffset],
+    ) {
+        let layout = layout.expect_downcast_ref();
+        let group = group.expect_downcast_ref();
+        unsafe { C::set_bind_group(self, layout, index, group, dynamic_offsets) };
+    }
+
+    unsafe fn set_push_constants(
+        &mut self,
+        layout: &dyn DynPipelineLayout,
+        stages: wgt::ShaderStages,
+        offset_bytes: u32,
+        data: &[u32],
+    ) {
+        let layout = layout.expect_downcast_ref();
+        unsafe { C::set_push_constants(self, layout, stages, offset_bytes, data) };
+    }
+
+    unsafe fn insert_debug_marker(&mut self, label: &str) {
+        unsafe {
+            C::insert_debug_marker(self, label);
+        }
+    }
+
+    unsafe fn begin_debug_marker(&mut self, group_label: &str) {
+        unsafe {
+            C::begin_debug_marker(self, group_label);
+        }
+    }
+
+    unsafe fn end_debug_marker(&mut self) {
+        unsafe {
+            C::end_debug_marker(self);
+        }
+    }
+
+    unsafe fn begin_query(&mut self, set: &dyn DynQuerySet, index: u32) {
+        let set = set.expect_downcast_ref();
+        unsafe { C::begin_query(self, set, index) };
+    }
+
+    unsafe fn end_query(&mut self, set: &dyn DynQuerySet, index: u32) {
+        let set = set.expect_downcast_ref();
+        unsafe { C::end_query(self, set, index) };
+    }
+
+    unsafe fn write_timestamp(&mut self, set: &dyn DynQuerySet, index: u32) {
+        let set = set.expect_downcast_ref();
+        unsafe { C::write_timestamp(self, set, index) };
+    }
+
+    unsafe fn reset_queries(&mut self, set: &dyn DynQuerySet, range: Range<u32>) {
+        let set = set.expect_downcast_ref();
+        unsafe { C::reset_queries(self, set, range) };
+    }
+
+    unsafe fn copy_query_results(
+        &mut self,
+        set: &dyn DynQuerySet,
+        range: Range<u32>,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        stride: wgt::BufferSize,
+    ) {
+        let set = set.expect_downcast_ref();
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { C::copy_query_results(self, set, range, buffer, offset, stride) };
+    }
+
+    unsafe fn begin_render_pass(
+        &mut self,
+        desc: &RenderPassDescriptor<dyn DynQuerySet, dyn DynTextureView>,
+    ) {
+        let color_attachments = desc
+            .color_attachments
+            .iter()
+            .map(|attachment| {
+                attachment
+                    .as_ref()
+                    .map(|attachment| attachment.expect_downcast())
+            })
+            .collect::<Vec<_>>();
+
+        let desc: RenderPassDescriptor<<C::A as Api>::QuerySet, <C::A as Api>::TextureView> =
+            RenderPassDescriptor {
+                label: desc.label,
+                extent: desc.extent,
+                sample_count: desc.sample_count,
+                color_attachments: &color_attachments,
+                depth_stencil_attachment: desc
+                    .depth_stencil_attachment
+                    .as_ref()
+                    .map(|ds| ds.expect_downcast()),
+                multiview: desc.multiview,
+                timestamp_writes: desc
+                    .timestamp_writes
+                    .as_ref()
+                    .map(|writes| writes.expect_downcast()),
+                occlusion_query_set: desc
+                    .occlusion_query_set
+                    .map(|set| set.expect_downcast_ref()),
+            };
+        unsafe { C::begin_render_pass(self, &desc) };
+    }
+
+    unsafe fn end_render_pass(&mut self) {
+        unsafe {
+            C::end_render_pass(self);
+        }
+    }
+
+    unsafe fn set_viewport(&mut self, rect: &Rect<f32>, depth_range: Range<f32>) {
+        unsafe {
+            C::set_viewport(self, rect, depth_range);
+        }
+    }
+
+    unsafe fn set_scissor_rect(&mut self, rect: &Rect<u32>) {
+        unsafe {
+            C::set_scissor_rect(self, rect);
+        }
+    }
+
+    unsafe fn set_stencil_reference(&mut self, value: u32) {
+        unsafe {
+            C::set_stencil_reference(self, value);
+        }
+    }
+
+    unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) {
+        unsafe { C::set_blend_constants(self, color) };
+    }
+
+    unsafe fn draw(
+        &mut self,
+        first_vertex: u32,
+        vertex_count: u32,
+        first_instance: u32,
+        instance_count: u32,
+    ) {
+        unsafe {
+            C::draw(
+                self,
+                first_vertex,
+                vertex_count,
+                first_instance,
+                instance_count,
+            )
+        };
+    }
+
+    unsafe fn draw_indexed(
+        &mut self,
+        first_index: u32,
+        index_count: u32,
+        base_vertex: i32,
+        first_instance: u32,
+        instance_count: u32,
+    ) {
+        unsafe {
+            C::draw_indexed(
+                self,
+                first_index,
+                index_count,
+                base_vertex,
+                first_instance,
+                instance_count,
+            )
+        };
+    }
+
+    unsafe fn draw_indirect(
+        &mut self,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        draw_count: u32,
+    ) {
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { C::draw_indirect(self, buffer, offset, draw_count) };
+    }
+
+    unsafe fn draw_indexed_indirect(
+        &mut self,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        draw_count: u32,
+    ) {
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { C::draw_indexed_indirect(self, buffer, offset, draw_count) };
+    }
+
+    unsafe fn draw_indirect_count(
+        &mut self,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        count_buffer: &dyn DynBuffer,
+        count_offset: wgt::BufferAddress,
+        max_count: u32,
+    ) {
+        let buffer = buffer.expect_downcast_ref();
+        let count_buffer = count_buffer.expect_downcast_ref();
+        unsafe {
+            C::draw_indirect_count(self, buffer, offset, count_buffer, count_offset, max_count)
+        };
+    }
+
+    unsafe fn draw_indexed_indirect_count(
+        &mut self,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        count_buffer: &dyn DynBuffer,
+        count_offset: wgt::BufferAddress,
+        max_count: u32,
+    ) {
+        let buffer = buffer.expect_downcast_ref();
+        let count_buffer = count_buffer.expect_downcast_ref();
+        unsafe {
+            C::draw_indexed_indirect_count(
+                self,
+                buffer,
+                offset,
+                count_buffer,
+                count_offset,
+                max_count,
+            )
+        };
+    }
+
+    unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor<dyn DynQuerySet>) {
+        let desc = ComputePassDescriptor {
+            label: desc.label,
+            timestamp_writes: desc
+                .timestamp_writes
+                .as_ref()
+                .map(|writes| writes.expect_downcast()),
+        };
+        unsafe { C::begin_compute_pass(self, &desc) };
+    }
+
+    unsafe fn end_compute_pass(&mut self) {
+        unsafe { C::end_compute_pass(self) };
+    }
+
+    unsafe fn set_compute_pipeline(&mut self, pipeline: &dyn DynComputePipeline) {
+        let pipeline = pipeline.expect_downcast_ref();
+        unsafe { C::set_compute_pipeline(self, pipeline) };
+    }
+
+    unsafe fn dispatch(&mut self, count: [u32; 3]) {
+        unsafe { C::dispatch(self, count) };
+    }
+
+    unsafe fn dispatch_indirect(&mut self, buffer: &dyn DynBuffer, offset: wgt::BufferAddress) {
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { C::dispatch_indirect(self, buffer, offset) };
+    }
+
+    unsafe fn set_render_pipeline(&mut self, pipeline: &dyn DynRenderPipeline) {
+        let pipeline = pipeline.expect_downcast_ref();
+        unsafe { C::set_render_pipeline(self, pipeline) };
+    }
+
+    unsafe fn set_index_buffer<'a>(
+        &mut self,
+        binding: BufferBinding<'a, dyn DynBuffer>,
+        format: wgt::IndexFormat,
+    ) {
+        let binding = binding.expect_downcast();
+        unsafe { self.set_index_buffer(binding, format) };
+    }
+
+    unsafe fn set_vertex_buffer<'a>(
+        &mut self,
+        index: u32,
+        binding: BufferBinding<'a, dyn DynBuffer>,
+    ) {
+        let binding = binding.expect_downcast();
+        unsafe { self.set_vertex_buffer(index, binding) };
+    }
+
+    unsafe fn build_acceleration_structures<'a>(
+        &mut self,
+        descriptors: &'a [BuildAccelerationStructureDescriptor<
+            'a,
+            dyn DynBuffer,
+            dyn DynAccelerationStructure,
+        >],
+    ) {
+        // Need to collect entries here so we can reference them in the descriptor.
+        // TODO: API should be redesigned to avoid this and other descriptor copies that happen due to the dyn api.
+        let descriptor_entries = descriptors
+            .iter()
+            .map(|d| d.entries.expect_downcast())
+            .collect::<Vec<_>>();
+        let descriptors = descriptors
+            .iter()
+            .zip(descriptor_entries.iter())
+            .map(|(d, entries)| BuildAccelerationStructureDescriptor::<
+                <C::A as Api>::Buffer,
+                <C::A as Api>::AccelerationStructure,
+            > {
+                entries,
+                mode: d.mode,
+                flags: d.flags,
+                source_acceleration_structure: d
+                    .source_acceleration_structure
+                    .map(|a| a.expect_downcast_ref()),
+                destination_acceleration_structure: d
+                    .destination_acceleration_structure
+                    .expect_downcast_ref(),
+                scratch_buffer: d.scratch_buffer.expect_downcast_ref(),
+                scratch_buffer_offset: d.scratch_buffer_offset,
+            });
+        unsafe { C::build_acceleration_structures(self, descriptors.len() as _, descriptors) };
+    }
+
+    unsafe fn place_acceleration_structure_barrier(
+        &mut self,
+        barrier: AccelerationStructureBarrier,
+    ) {
+        unsafe { C::place_acceleration_structure_barrier(self, barrier) };
+    }
+}
+
+impl<'a> PassTimestampWrites<'a, dyn DynQuerySet> {
+    pub fn expect_downcast<B: DynQuerySet>(&self) -> PassTimestampWrites<'a, B> {
+        PassTimestampWrites {
+            query_set: self.query_set.expect_downcast_ref(),
+            beginning_of_pass_write_index: self.beginning_of_pass_write_index,
+            end_of_pass_write_index: self.end_of_pass_write_index,
+        }
+    }
+}
+
+impl<'a> Attachment<'a, dyn DynTextureView> {
+    pub fn expect_downcast<B: DynTextureView>(&self) -> Attachment<'a, B> {
+        Attachment {
+            view: self.view.expect_downcast_ref(),
+            usage: self.usage,
+        }
+    }
+}
+
+impl<'a> ColorAttachment<'a, dyn DynTextureView> {
+    pub fn expect_downcast<B: DynTextureView>(&self) -> ColorAttachment<'a, B> {
+        ColorAttachment {
+            target: self.target.expect_downcast(),
+            resolve_target: self.resolve_target.as_ref().map(|rt| rt.expect_downcast()),
+            ops: self.ops,
+            clear_value: self.clear_value,
+        }
+    }
+}
+
+impl<'a> DepthStencilAttachment<'a, dyn DynTextureView> {
+    pub fn expect_downcast<B: DynTextureView>(&self) -> DepthStencilAttachment<'a, B> {
+        DepthStencilAttachment {
+            target: self.target.expect_downcast(),
+            depth_ops: self.depth_ops,
+            stencil_ops: self.stencil_ops,
+            clear_value: self.clear_value,
+        }
+    }
+}
diff --git a/wgpu-hal/src/dynamic/device.rs b/wgpu-hal/src/dynamic/device.rs
new file mode 100644
index 0000000000..c1baf5b76d
--- /dev/null
+++ b/wgpu-hal/src/dynamic/device.rs
@@ -0,0 +1,526 @@
+// Box casts are needed, alternative would be a temporaries which are more verbose and not more expressive.
+#![allow(trivial_casts)]
+
+use crate::{
+    AccelerationStructureBuildSizes, AccelerationStructureDescriptor, Api, BindGroupDescriptor,
+    BindGroupLayoutDescriptor, BufferDescriptor, BufferMapping, CommandEncoderDescriptor,
+    ComputePipelineDescriptor, Device, DeviceError, FenceValue,
+    GetAccelerationStructureBuildSizesDescriptor, Label, MemoryRange, PipelineCacheDescriptor,
+    PipelineCacheError, PipelineError, PipelineLayoutDescriptor, RenderPipelineDescriptor,
+    SamplerDescriptor, ShaderError, ShaderInput, ShaderModuleDescriptor, TextureDescriptor,
+    TextureViewDescriptor,
+};
+
+use super::{
+    DynAccelerationStructure, DynBindGroup, DynBindGroupLayout, DynBuffer, DynCommandEncoder,
+    DynComputePipeline, DynFence, DynPipelineCache, DynPipelineLayout, DynQuerySet, DynQueue,
+    DynRenderPipeline, DynResource, DynResourceExt as _, DynSampler, DynShaderModule, DynTexture,
+    DynTextureView,
+};
+
+pub trait DynDevice: DynResource {
+    unsafe fn exit(self: Box<Self>, queue: Box<dyn DynQueue>);
+
+    unsafe fn create_buffer(
+        &self,
+        desc: &BufferDescriptor,
+    ) -> Result<Box<dyn DynBuffer>, DeviceError>;
+
+    unsafe fn destroy_buffer(&self, buffer: Box<dyn DynBuffer>);
+
+    unsafe fn map_buffer(
+        &self,
+        buffer: &dyn DynBuffer,
+        range: MemoryRange,
+    ) -> Result<BufferMapping, DeviceError>;
+
+    unsafe fn unmap_buffer(&self, buffer: &dyn DynBuffer);
+
+    unsafe fn flush_mapped_ranges(&self, buffer: &dyn DynBuffer, ranges: &[MemoryRange]);
+    unsafe fn invalidate_mapped_ranges(&self, buffer: &dyn DynBuffer, ranges: &[MemoryRange]);
+
+    unsafe fn create_texture(
+        &self,
+        desc: &TextureDescriptor,
+    ) -> Result<Box<dyn DynTexture>, DeviceError>;
+    unsafe fn destroy_texture(&self, texture: Box<dyn DynTexture>);
+    unsafe fn create_texture_view(
+        &self,
+        texture: &dyn DynTexture,
+        desc: &TextureViewDescriptor,
+    ) -> Result<Box<dyn DynTextureView>, DeviceError>;
+    unsafe fn destroy_texture_view(&self, view: Box<dyn DynTextureView>);
+    unsafe fn create_sampler(
+        &self,
+        desc: &SamplerDescriptor,
+    ) -> Result<Box<dyn DynSampler>, DeviceError>;
+    unsafe fn destroy_sampler(&self, sampler: Box<dyn DynSampler>);
+
+    unsafe fn create_command_encoder(
+        &self,
+        desc: &CommandEncoderDescriptor<dyn DynQueue>,
+    ) -> Result<Box<dyn DynCommandEncoder>, DeviceError>;
+    unsafe fn destroy_command_encoder(&self, pool: Box<dyn DynCommandEncoder>);
+
+    unsafe fn create_bind_group_layout(
+        &self,
+        desc: &BindGroupLayoutDescriptor,
+    ) -> Result<Box<dyn DynBindGroupLayout>, DeviceError>;
+    unsafe fn destroy_bind_group_layout(&self, bg_layout: Box<dyn DynBindGroupLayout>);
+
+    unsafe fn create_pipeline_layout(
+        &self,
+        desc: &PipelineLayoutDescriptor<dyn DynBindGroupLayout>,
+    ) -> Result<Box<dyn DynPipelineLayout>, DeviceError>;
+    unsafe fn destroy_pipeline_layout(&self, pipeline_layout: Box<dyn DynPipelineLayout>);
+
+    unsafe fn create_bind_group(
+        &self,
+        desc: &BindGroupDescriptor<
+            dyn DynBindGroupLayout,
+            dyn DynBuffer,
+            dyn DynSampler,
+            dyn DynTextureView,
+            dyn DynAccelerationStructure,
+        >,
+    ) -> Result<Box<dyn DynBindGroup>, DeviceError>;
+    unsafe fn destroy_bind_group(&self, group: Box<dyn DynBindGroup>);
+
+    unsafe fn create_shader_module(
+        &self,
+        desc: &ShaderModuleDescriptor,
+        shader: ShaderInput,
+    ) -> Result<Box<dyn DynShaderModule>, ShaderError>;
+    unsafe fn destroy_shader_module(&self, module: Box<dyn DynShaderModule>);
+
+    unsafe fn create_render_pipeline(
+        &self,
+        desc: &RenderPipelineDescriptor<
+            dyn DynPipelineLayout,
+            dyn DynShaderModule,
+            dyn DynPipelineCache,
+        >,
+    ) -> Result<Box<dyn DynRenderPipeline>, PipelineError>;
+    unsafe fn destroy_render_pipeline(&self, pipeline: Box<dyn DynRenderPipeline>);
+
+    unsafe fn create_compute_pipeline(
+        &self,
+        desc: &ComputePipelineDescriptor<
+            dyn DynPipelineLayout,
+            dyn DynShaderModule,
+            dyn DynPipelineCache,
+        >,
+    ) -> Result<Box<dyn DynComputePipeline>, PipelineError>;
+    unsafe fn destroy_compute_pipeline(&self, pipeline: Box<dyn DynComputePipeline>);
+
+    unsafe fn create_pipeline_cache(
+        &self,
+        desc: &PipelineCacheDescriptor<'_>,
+    ) -> Result<Box<dyn DynPipelineCache>, PipelineCacheError>;
+    fn pipeline_cache_validation_key(&self) -> Option<[u8; 16]> {
+        None
+    }
+    unsafe fn destroy_pipeline_cache(&self, cache: Box<dyn DynPipelineCache>);
+
+    unsafe fn create_query_set(
+        &self,
+        desc: &wgt::QuerySetDescriptor<Label>,
+    ) -> Result<Box<dyn DynQuerySet>, DeviceError>;
+    unsafe fn destroy_query_set(&self, set: Box<dyn DynQuerySet>);
+
+    unsafe fn create_fence(&self) -> Result<Box<dyn DynFence>, DeviceError>;
+    unsafe fn destroy_fence(&self, fence: Box<dyn DynFence>);
+    unsafe fn get_fence_value(&self, fence: &dyn DynFence) -> Result<FenceValue, DeviceError>;
+
+    unsafe fn wait(
+        &self,
+        fence: &dyn DynFence,
+        value: FenceValue,
+        timeout_ms: u32,
+    ) -> Result<bool, DeviceError>;
+
+    unsafe fn start_capture(&self) -> bool;
+    unsafe fn stop_capture(&self);
+
+    unsafe fn pipeline_cache_get_data(&self, cache: &dyn DynPipelineCache) -> Option<Vec<u8>>;
+
+    unsafe fn create_acceleration_structure(
+        &self,
+        desc: &AccelerationStructureDescriptor,
+    ) -> Result<Box<dyn DynAccelerationStructure>, DeviceError>;
+    unsafe fn get_acceleration_structure_build_sizes(
+        &self,
+        desc: &GetAccelerationStructureBuildSizesDescriptor<dyn DynBuffer>,
+    ) -> AccelerationStructureBuildSizes;
+    unsafe fn get_acceleration_structure_device_address(
+        &self,
+        acceleration_structure: &dyn DynAccelerationStructure,
+    ) -> wgt::BufferAddress;
+    unsafe fn destroy_acceleration_structure(
+        &self,
+        acceleration_structure: Box<dyn DynAccelerationStructure>,
+    );
+
+    fn get_internal_counters(&self) -> wgt::HalCounters;
+    fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport>;
+}
+
+impl<D: Device + DynResource> DynDevice for D {
+    unsafe fn exit(self: Box<Self>, queue: Box<dyn DynQueue>) {
+        unsafe { D::exit(*self, queue.unbox()) }
+    }
+
+    unsafe fn create_buffer(
+        &self,
+        desc: &BufferDescriptor,
+    ) -> Result<Box<dyn DynBuffer>, DeviceError> {
+        unsafe { D::create_buffer(self, desc) }.map(|b| -> Box<dyn DynBuffer> { Box::new(b) })
+    }
+
+    unsafe fn destroy_buffer(&self, buffer: Box<dyn DynBuffer>) {
+        unsafe { D::destroy_buffer(self, buffer.unbox()) };
+    }
+
+    unsafe fn map_buffer(
+        &self,
+        buffer: &dyn DynBuffer,
+        range: MemoryRange,
+    ) -> Result<BufferMapping, DeviceError> {
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { D::map_buffer(self, buffer, range) }
+    }
+
+    unsafe fn unmap_buffer(&self, buffer: &dyn DynBuffer) {
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { D::unmap_buffer(self, buffer) }
+    }
+
+    unsafe fn flush_mapped_ranges(&self, buffer: &dyn DynBuffer, ranges: &[MemoryRange]) {
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { D::flush_mapped_ranges(self, buffer, ranges.iter().cloned()) }
+    }
+
+    unsafe fn invalidate_mapped_ranges(&self, buffer: &dyn DynBuffer, ranges: &[MemoryRange]) {
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { D::invalidate_mapped_ranges(self, buffer, ranges.iter().cloned()) }
+    }
+
+    unsafe fn create_texture(
+        &self,
+        desc: &TextureDescriptor,
+    ) -> Result<Box<dyn DynTexture>, DeviceError> {
+        unsafe { D::create_texture(self, desc) }.map(|b| {
+            let boxed_texture: Box<<D::A as Api>::Texture> = Box::new(b);
+            let boxed_texture: Box<dyn DynTexture> = boxed_texture;
+            boxed_texture
+        })
+    }
+
+    unsafe fn destroy_texture(&self, texture: Box<dyn DynTexture>) {
+        unsafe { D::destroy_texture(self, texture.unbox()) };
+    }
+
+    unsafe fn create_texture_view(
+        &self,
+        texture: &dyn DynTexture,
+        desc: &TextureViewDescriptor,
+    ) -> Result<Box<dyn DynTextureView>, DeviceError> {
+        let texture = texture.expect_downcast_ref();
+        unsafe { D::create_texture_view(self, texture, desc) }.map(|b| {
+            let boxed_texture_view: Box<<D::A as Api>::TextureView> = Box::new(b);
+            let boxed_texture_view: Box<dyn DynTextureView> = boxed_texture_view;
+            boxed_texture_view
+        })
+    }
+
+    unsafe fn destroy_texture_view(&self, view: Box<dyn DynTextureView>) {
+        unsafe { D::destroy_texture_view(self, view.unbox()) };
+    }
+
+    unsafe fn create_sampler(
+        &self,
+        desc: &SamplerDescriptor,
+    ) -> Result<Box<dyn DynSampler>, DeviceError> {
+        unsafe { D::create_sampler(self, desc) }.map(|b| {
+            let boxed_sampler: Box<<D::A as Api>::Sampler> = Box::new(b);
+            let boxed_sampler: Box<dyn DynSampler> = boxed_sampler;
+            boxed_sampler
+        })
+    }
+
+    unsafe fn destroy_sampler(&self, sampler: Box<dyn DynSampler>) {
+        unsafe { D::destroy_sampler(self, sampler.unbox()) };
+    }
+
+    unsafe fn create_command_encoder(
+        &self,
+        desc: &CommandEncoderDescriptor<'_, dyn DynQueue>,
+    ) -> Result<Box<dyn DynCommandEncoder>, DeviceError> {
+        let desc = CommandEncoderDescriptor {
+            label: desc.label,
+            queue: desc.queue.expect_downcast_ref(),
+        };
+        unsafe { D::create_command_encoder(self, &desc) }
+            .map(|b| Box::new(b) as Box<dyn DynCommandEncoder>)
+    }
+
+    unsafe fn destroy_command_encoder(&self, encoder: Box<dyn DynCommandEncoder>) {
+        unsafe { D::destroy_command_encoder(self, encoder.unbox()) };
+    }
+
+    unsafe fn create_bind_group_layout(
+        &self,
+        desc: &BindGroupLayoutDescriptor,
+    ) -> Result<Box<dyn DynBindGroupLayout>, DeviceError> {
+        unsafe { D::create_bind_group_layout(self, desc) }
+            .map(|b| Box::new(b) as Box<dyn DynBindGroupLayout>)
+    }
+
+    unsafe fn destroy_bind_group_layout(&self, bg_layout: Box<dyn DynBindGroupLayout>) {
+        unsafe { D::destroy_bind_group_layout(self, bg_layout.unbox()) };
+    }
+
+    unsafe fn create_pipeline_layout(
+        &self,
+        desc: &PipelineLayoutDescriptor<dyn DynBindGroupLayout>,
+    ) -> Result<Box<dyn DynPipelineLayout>, DeviceError> {
+        let bind_group_layouts: Vec<_> = desc
+            .bind_group_layouts
+            .iter()
+            .map(|bgl| bgl.expect_downcast_ref())
+            .collect();
+        let desc = PipelineLayoutDescriptor {
+            label: desc.label,
+            bind_group_layouts: &bind_group_layouts,
+            push_constant_ranges: desc.push_constant_ranges,
+            flags: desc.flags,
+        };
+
+        unsafe { D::create_pipeline_layout(self, &desc) }
+            .map(|b| Box::new(b) as Box<dyn DynPipelineLayout>)
+    }
+
+    unsafe fn destroy_pipeline_layout(&self, pipeline_layout: Box<dyn DynPipelineLayout>) {
+        unsafe { D::destroy_pipeline_layout(self, pipeline_layout.unbox()) };
+    }
+
+    unsafe fn create_bind_group(
+        &self,
+        desc: &BindGroupDescriptor<
+            dyn DynBindGroupLayout,
+            dyn DynBuffer,
+            dyn DynSampler,
+            dyn DynTextureView,
+            dyn DynAccelerationStructure,
+        >,
+    ) -> Result<Box<dyn DynBindGroup>, DeviceError> {
+        let buffers: Vec<_> = desc
+            .buffers
+            .iter()
+            .map(|b| b.clone().expect_downcast())
+            .collect();
+        let samplers: Vec<_> = desc
+            .samplers
+            .iter()
+            .map(|s| s.expect_downcast_ref())
+            .collect();
+        let textures: Vec<_> = desc
+            .textures
+            .iter()
+            .map(|t| t.clone().expect_downcast())
+            .collect();
+        let acceleration_structures: Vec<_> = desc
+            .acceleration_structures
+            .iter()
+            .map(|a| a.expect_downcast_ref())
+            .collect();
+
+        let desc = BindGroupDescriptor {
+            label: desc.label.to_owned(),
+            layout: desc.layout.expect_downcast_ref(),
+            buffers: &buffers,
+            samplers: &samplers,
+            textures: &textures,
+            entries: desc.entries,
+            acceleration_structures: &acceleration_structures,
+        };
+
+        unsafe { D::create_bind_group(self, &desc) }.map(|b| Box::new(b) as Box<dyn DynBindGroup>)
+    }
+
+    unsafe fn destroy_bind_group(&self, group: Box<dyn DynBindGroup>) {
+        unsafe { D::destroy_bind_group(self, group.unbox()) };
+    }
+
+    unsafe fn create_shader_module(
+        &self,
+        desc: &ShaderModuleDescriptor,
+        shader: ShaderInput,
+    ) -> Result<Box<dyn DynShaderModule>, ShaderError> {
+        unsafe { D::create_shader_module(self, desc, shader) }
+            .map(|b| Box::new(b) as Box<dyn DynShaderModule>)
+    }
+
+    unsafe fn destroy_shader_module(&self, module: Box<dyn DynShaderModule>) {
+        unsafe { D::destroy_shader_module(self, module.unbox()) };
+    }
+
+    unsafe fn create_render_pipeline(
+        &self,
+        desc: &RenderPipelineDescriptor<
+            dyn DynPipelineLayout,
+            dyn DynShaderModule,
+            dyn DynPipelineCache,
+        >,
+    ) -> Result<Box<dyn DynRenderPipeline>, PipelineError> {
+        let desc = RenderPipelineDescriptor {
+            label: desc.label,
+            layout: desc.layout.expect_downcast_ref(),
+            vertex_buffers: desc.vertex_buffers,
+            vertex_stage: desc.vertex_stage.clone().expect_downcast(),
+            primitive: desc.primitive,
+            depth_stencil: desc.depth_stencil.clone(),
+            multisample: desc.multisample,
+            fragment_stage: desc.fragment_stage.clone().map(|f| f.expect_downcast()),
+            color_targets: desc.color_targets,
+            multiview: desc.multiview,
+            cache: desc.cache.map(|c| c.expect_downcast_ref()),
+        };
+
+        unsafe { D::create_render_pipeline(self, &desc) }
+            .map(|b| Box::new(b) as Box<dyn DynRenderPipeline>)
+    }
+
+    unsafe fn destroy_render_pipeline(&self, pipeline: Box<dyn DynRenderPipeline>) {
+        unsafe { D::destroy_render_pipeline(self, pipeline.unbox()) };
+    }
+
+    unsafe fn create_compute_pipeline(
+        &self,
+        desc: &ComputePipelineDescriptor<
+            dyn DynPipelineLayout,
+            dyn DynShaderModule,
+            dyn DynPipelineCache,
+        >,
+    ) -> Result<Box<dyn DynComputePipeline>, PipelineError> {
+        let desc = ComputePipelineDescriptor {
+            label: desc.label,
+            layout: desc.layout.expect_downcast_ref(),
+            stage: desc.stage.clone().expect_downcast(),
+            cache: desc.cache.as_ref().map(|c| c.expect_downcast_ref()),
+        };
+
+        unsafe { D::create_compute_pipeline(self, &desc) }
+            .map(|b| Box::new(b) as Box<dyn DynComputePipeline>)
+    }
+
+    unsafe fn destroy_compute_pipeline(&self, pipeline: Box<dyn DynComputePipeline>) {
+        unsafe { D::destroy_compute_pipeline(self, pipeline.unbox()) };
+    }
+
+    unsafe fn create_pipeline_cache(
+        &self,
+        desc: &PipelineCacheDescriptor<'_>,
+    ) -> Result<Box<dyn DynPipelineCache>, PipelineCacheError> {
+        unsafe { D::create_pipeline_cache(self, desc) }
+            .map(|b| Box::new(b) as Box<dyn DynPipelineCache>)
+    }
+
+    fn pipeline_cache_validation_key(&self) -> Option<[u8; 16]> {
+        D::pipeline_cache_validation_key(self)
+    }
+
+    unsafe fn destroy_pipeline_cache(&self, pipeline_cache: Box<dyn DynPipelineCache>) {
+        unsafe { D::destroy_pipeline_cache(self, pipeline_cache.unbox()) };
+    }
+
+    unsafe fn create_query_set(
+        &self,
+        desc: &wgt::QuerySetDescriptor<Label>,
+    ) -> Result<Box<dyn DynQuerySet>, DeviceError> {
+        unsafe { D::create_query_set(self, desc) }.map(|b| Box::new(b) as Box<dyn DynQuerySet>)
+    }
+
+    unsafe fn destroy_query_set(&self, query_set: Box<dyn DynQuerySet>) {
+        unsafe { D::destroy_query_set(self, query_set.unbox()) };
+    }
+
+    unsafe fn create_fence(&self) -> Result<Box<dyn DynFence>, DeviceError> {
+        unsafe { D::create_fence(self) }.map(|f| Box::new(f) as Box<dyn DynFence>)
+    }
+
+    unsafe fn destroy_fence(&self, fence: Box<dyn DynFence>) {
+        unsafe { D::destroy_fence(self, fence.unbox()) };
+    }
+
+    unsafe fn get_fence_value(&self, fence: &dyn DynFence) -> Result<FenceValue, DeviceError> {
+        let fence = fence.expect_downcast_ref();
+        unsafe { D::get_fence_value(self, fence) }
+    }
+
+    unsafe fn wait(
+        &self,
+        fence: &dyn DynFence,
+        value: FenceValue,
+        timeout_ms: u32,
+    ) -> Result<bool, DeviceError> {
+        let fence = fence.expect_downcast_ref();
+        unsafe { D::wait(self, fence, value, timeout_ms) }
+    }
+
+    unsafe fn start_capture(&self) -> bool {
+        unsafe { D::start_capture(self) }
+    }
+
+    unsafe fn stop_capture(&self) {
+        unsafe { D::stop_capture(self) }
+    }
+
+    unsafe fn pipeline_cache_get_data(&self, cache: &dyn DynPipelineCache) -> Option<Vec<u8>> {
+        let cache = cache.expect_downcast_ref();
+        unsafe { D::pipeline_cache_get_data(self, cache) }
+    }
+
+    unsafe fn create_acceleration_structure(
+        &self,
+        desc: &AccelerationStructureDescriptor,
+    ) -> Result<Box<dyn DynAccelerationStructure>, DeviceError> {
+        unsafe { D::create_acceleration_structure(self, desc) }
+            .map(|b| Box::new(b) as Box<dyn DynAccelerationStructure>)
+    }
+
+    unsafe fn get_acceleration_structure_build_sizes(
+        &self,
+        desc: &GetAccelerationStructureBuildSizesDescriptor<dyn DynBuffer>,
+    ) -> AccelerationStructureBuildSizes {
+        let entries = desc.entries.expect_downcast();
+        let desc = GetAccelerationStructureBuildSizesDescriptor {
+            entries: &entries,
+            flags: desc.flags,
+        };
+        unsafe { D::get_acceleration_structure_build_sizes(self, &desc) }
+    }
+
+    unsafe fn get_acceleration_structure_device_address(
+        &self,
+        acceleration_structure: &dyn DynAccelerationStructure,
+    ) -> wgt::BufferAddress {
+        let acceleration_structure = acceleration_structure.expect_downcast_ref();
+        unsafe { D::get_acceleration_structure_device_address(self, acceleration_structure) }
+    }
+
+    unsafe fn destroy_acceleration_structure(
+        &self,
+        acceleration_structure: Box<dyn DynAccelerationStructure>,
+    ) {
+        unsafe { D::destroy_acceleration_structure(self, acceleration_structure.unbox()) }
+    }
+
+    fn get_internal_counters(&self) -> wgt::HalCounters {
+        D::get_internal_counters(self)
+    }
+
+    fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
+        D::generate_allocator_report(self)
+    }
+}
diff --git a/wgpu-hal/src/dynamic/instance.rs b/wgpu-hal/src/dynamic/instance.rs
new file mode 100644
index 0000000000..6bac974b17
--- /dev/null
+++ b/wgpu-hal/src/dynamic/instance.rs
@@ -0,0 +1,71 @@
+// Box casts are needed, alternative would be a temporaries which are more verbose and not more expressive.
+#![allow(trivial_casts)]
+
+use crate::{Api, Capabilities, ExposedAdapter, Instance, InstanceError};
+
+use super::{DynAdapter, DynResource, DynResourceExt as _, DynSurface};
+
+pub struct DynExposedAdapter {
+    pub adapter: Box<dyn DynAdapter>,
+    pub info: wgt::AdapterInfo,
+    pub features: wgt::Features,
+    pub capabilities: Capabilities,
+}
+
+impl DynExposedAdapter {
+    /// Returns the backend this adapter is using.
+    pub fn backend(&self) -> wgt::Backend {
+        self.info.backend
+    }
+}
+
+impl<A: Api> From<ExposedAdapter<A>> for DynExposedAdapter {
+    fn from(exposed_adapter: ExposedAdapter<A>) -> Self {
+        Self {
+            adapter: Box::new(exposed_adapter.adapter),
+            info: exposed_adapter.info,
+            features: exposed_adapter.features,
+            capabilities: exposed_adapter.capabilities,
+        }
+    }
+}
+
+pub trait DynInstance: DynResource {
+    unsafe fn create_surface(
+        &self,
+        display_handle: raw_window_handle::RawDisplayHandle,
+        window_handle: raw_window_handle::RawWindowHandle,
+    ) -> Result<Box<dyn DynSurface>, InstanceError>;
+
+    unsafe fn enumerate_adapters(
+        &self,
+        surface_hint: Option<&dyn DynSurface>,
+    ) -> Vec<DynExposedAdapter>;
+}
+
+impl<I: Instance + DynResource> DynInstance for I {
+    unsafe fn create_surface(
+        &self,
+        display_handle: raw_window_handle::RawDisplayHandle,
+        window_handle: raw_window_handle::RawWindowHandle,
+    ) -> Result<Box<dyn DynSurface>, InstanceError> {
+        unsafe { I::create_surface(self, display_handle, window_handle) }
+            .map(|surface| Box::new(surface) as Box<dyn DynSurface>)
+    }
+
+    unsafe fn enumerate_adapters(
+        &self,
+        surface_hint: Option<&dyn DynSurface>,
+    ) -> Vec<DynExposedAdapter> {
+        let surface_hint = surface_hint.map(|s| s.expect_downcast_ref());
+        unsafe { I::enumerate_adapters(self, surface_hint) }
+            .into_iter()
+            .map(|exposed| DynExposedAdapter {
+                adapter: Box::new(exposed.adapter),
+                info: exposed.info,
+                features: exposed.features,
+                capabilities: exposed.capabilities,
+            })
+            .collect()
+    }
+}
diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs
new file mode 100644
index 0000000000..5509d7cce6
--- /dev/null
+++ b/wgpu-hal/src/dynamic/mod.rs
@@ -0,0 +1,206 @@
+mod adapter;
+mod command;
+mod device;
+mod instance;
+mod queue;
+mod surface;
+
+pub use adapter::{DynAdapter, DynOpenDevice};
+pub use command::DynCommandEncoder;
+pub use device::DynDevice;
+pub use instance::{DynExposedAdapter, DynInstance};
+pub use queue::DynQueue;
+pub use surface::{DynAcquiredSurfaceTexture, DynSurface};
+
+use std::any::Any;
+
+use wgt::WasmNotSendSync;
+
+use crate::{
+    AccelerationStructureAABBs, AccelerationStructureEntries, AccelerationStructureInstances,
+    AccelerationStructureTriangleIndices, AccelerationStructureTriangleTransform,
+    AccelerationStructureTriangles, BufferBinding, ProgrammableStage, TextureBinding,
+};
+
+/// Base trait for all resources, allows downcasting via [`Any`].
+pub trait DynResource: Any + WasmNotSendSync + 'static {
+    fn as_any(&self) -> &dyn Any;
+    fn as_any_mut(&mut self) -> &mut dyn Any;
+}
+
+/// Utility macro for implementing `DynResource` for a list of types.
+macro_rules! impl_dyn_resource {
+    ($($type:ty),*) => {
+        $(
+            impl crate::DynResource for $type {
+                fn as_any(&self) -> &dyn ::std::any::Any {
+                    self
+                }
+
+                fn as_any_mut(&mut self) -> &mut dyn ::std::any::Any {
+                    self
+                }
+            }
+        )*
+    };
+}
+pub(crate) use impl_dyn_resource;
+
+/// Extension trait for `DynResource` used by implementations of various dynamic resource traits.
+trait DynResourceExt {
+    /// # Panics
+    ///
+    /// - Panics if `self` is not downcastable to `T`.
+    fn expect_downcast_ref<T: DynResource>(&self) -> &T;
+    /// # Panics
+    ///
+    /// - Panics if `self` is not downcastable to `T`.
+    fn expect_downcast_mut<T: DynResource>(&mut self) -> &mut T;
+
+    /// Unboxes a `Box<dyn DynResource>` to a concrete type.
+    ///
+    /// # Safety
+    ///
+    /// - `self` must be the correct concrete type.
+    unsafe fn unbox<T: DynResource + 'static>(self: Box<Self>) -> T;
+}
+
+impl<R: DynResource + ?Sized> DynResourceExt for R {
+    fn expect_downcast_ref<'a, T: DynResource>(&'a self) -> &'a T {
+        self.as_any()
+            .downcast_ref()
+            .expect("Resource doesn't have the expected backend type.")
+    }
+
+    fn expect_downcast_mut<'a, T: DynResource>(&'a mut self) -> &'a mut T {
+        self.as_any_mut()
+            .downcast_mut()
+            .expect("Resource doesn't have the expected backend type.")
+    }
+
+    unsafe fn unbox<T: DynResource + 'static>(self: Box<Self>) -> T {
+        debug_assert!(
+            <Self as Any>::type_id(self.as_ref()) == std::any::TypeId::of::<T>(),
+            "Resource doesn't have the expected type, expected {:?}, got {:?}",
+            std::any::TypeId::of::<T>(),
+            <Self as Any>::type_id(self.as_ref())
+        );
+
+        let casted_ptr = Box::into_raw(self).cast::<T>();
+        // SAFETY: This is adheres to the safety contract of `Box::from_raw` because:
+        //
+        // - We are casting the value of a previously `Box`ed value, which guarantees:
+        //   - `casted_ptr` is not null.
+        //   - `casted_ptr` is valid for reads and writes, though by itself this does not mean
+        //     valid reads and writes for `T` (read on for that).
+        // - We don't change the allocator.
+        // - The contract of `Box::from_raw` requires that an initialized and aligned `T` is stored
+        //   within `casted_ptr`.
+        *unsafe { Box::from_raw(casted_ptr) }
+    }
+}
+
+pub trait DynAccelerationStructure: DynResource + std::fmt::Debug {}
+pub trait DynBindGroup: DynResource + std::fmt::Debug {}
+pub trait DynBindGroupLayout: DynResource + std::fmt::Debug {}
+pub trait DynBuffer: DynResource + std::fmt::Debug {}
+pub trait DynCommandBuffer: DynResource + std::fmt::Debug {}
+pub trait DynComputePipeline: DynResource + std::fmt::Debug {}
+pub trait DynFence: DynResource + std::fmt::Debug {}
+pub trait DynPipelineCache: DynResource + std::fmt::Debug {}
+pub trait DynPipelineLayout: DynResource + std::fmt::Debug {}
+pub trait DynQuerySet: DynResource + std::fmt::Debug {}
+pub trait DynRenderPipeline: DynResource + std::fmt::Debug {}
+pub trait DynSampler: DynResource + std::fmt::Debug {}
+pub trait DynShaderModule: DynResource + std::fmt::Debug {}
+pub trait DynSurfaceTexture:
+    DynResource + std::borrow::Borrow<dyn DynTexture> + std::fmt::Debug
+{
+}
+pub trait DynTexture: DynResource + std::fmt::Debug {}
+pub trait DynTextureView: DynResource + std::fmt::Debug {}
+
+impl<'a> BufferBinding<'a, dyn DynBuffer> {
+    pub fn expect_downcast<B: DynBuffer>(self) -> BufferBinding<'a, B> {
+        BufferBinding {
+            buffer: self.buffer.expect_downcast_ref(),
+            offset: self.offset,
+            size: self.size,
+        }
+    }
+}
+
+impl<'a> TextureBinding<'a, dyn DynTextureView> {
+    pub fn expect_downcast<T: DynTextureView>(self) -> TextureBinding<'a, T> {
+        TextureBinding {
+            view: self.view.expect_downcast_ref(),
+            usage: self.usage,
+        }
+    }
+}
+
+impl<'a> ProgrammableStage<'a, dyn DynShaderModule> {
+    fn expect_downcast<T: DynShaderModule>(self) -> ProgrammableStage<'a, T> {
+        ProgrammableStage {
+            module: self.module.expect_downcast_ref(),
+            entry_point: self.entry_point,
+            constants: self.constants,
+            zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory,
+        }
+    }
+}
+
+impl<'a> AccelerationStructureEntries<'a, dyn DynBuffer> {
+    fn expect_downcast<B: DynBuffer>(&self) -> AccelerationStructureEntries<'a, B> {
+        match self {
+            AccelerationStructureEntries::Instances(instances) => {
+                AccelerationStructureEntries::Instances(AccelerationStructureInstances {
+                    buffer: instances.buffer.map(|b| b.expect_downcast_ref()),
+                    offset: instances.offset,
+                    count: instances.count,
+                })
+            }
+            AccelerationStructureEntries::Triangles(triangles) => {
+                AccelerationStructureEntries::Triangles(
+                    triangles
+                        .iter()
+                        .map(|t| AccelerationStructureTriangles {
+                            vertex_buffer: t.vertex_buffer.map(|b| b.expect_downcast_ref()),
+                            vertex_format: t.vertex_format,
+                            first_vertex: t.first_vertex,
+                            vertex_count: t.vertex_count,
+                            vertex_stride: t.vertex_stride,
+                            indices: t.indices.as_ref().map(|i| {
+                                AccelerationStructureTriangleIndices {
+                                    buffer: i.buffer.map(|b| b.expect_downcast_ref()),
+                                    format: i.format,
+                                    offset: i.offset,
+                                    count: i.count,
+                                }
+                            }),
+                            transform: t.transform.as_ref().map(|t| {
+                                AccelerationStructureTriangleTransform {
+                                    buffer: t.buffer.expect_downcast_ref(),
+                                    offset: t.offset,
+                                }
+                            }),
+                            flags: t.flags,
+                        })
+                        .collect(),
+                )
+            }
+            AccelerationStructureEntries::AABBs(entries) => AccelerationStructureEntries::AABBs(
+                entries
+                    .iter()
+                    .map(|e| AccelerationStructureAABBs {
+                        buffer: e.buffer.map(|b| b.expect_downcast_ref()),
+                        offset: e.offset,
+                        count: e.count,
+                        stride: e.stride,
+                        flags: e.flags,
+                    })
+                    .collect(),
+            ),
+        }
+    }
+}
diff --git a/wgpu-hal/src/dynamic/queue.rs b/wgpu-hal/src/dynamic/queue.rs
new file mode 100644
index 0000000000..14d7e5a969
--- /dev/null
+++ b/wgpu-hal/src/dynamic/queue.rs
@@ -0,0 +1,54 @@
+use crate::{
+    DeviceError, DynCommandBuffer, DynFence, DynResource, DynSurface, DynSurfaceTexture,
+    FenceValue, Queue, SurfaceError,
+};
+
+use super::DynResourceExt as _;
+
+pub trait DynQueue: DynResource {
+    unsafe fn submit(
+        &self,
+        command_buffers: &[&dyn DynCommandBuffer],
+        surface_textures: &[&dyn DynSurfaceTexture],
+        signal_fence: (&mut dyn DynFence, FenceValue),
+    ) -> Result<(), DeviceError>;
+    unsafe fn present(
+        &self,
+        surface: &dyn DynSurface,
+        texture: Box<dyn DynSurfaceTexture>,
+    ) -> Result<(), SurfaceError>;
+    unsafe fn get_timestamp_period(&self) -> f32;
+}
+
+impl<Q: Queue + DynResource> DynQueue for Q {
+    unsafe fn submit(
+        &self,
+        command_buffers: &[&dyn DynCommandBuffer],
+        surface_textures: &[&dyn DynSurfaceTexture],
+        signal_fence: (&mut dyn DynFence, FenceValue),
+    ) -> Result<(), DeviceError> {
+        let command_buffers = command_buffers
+            .iter()
+            .map(|cb| (*cb).expect_downcast_ref())
+            .collect::<Vec<_>>();
+        let surface_textures = surface_textures
+            .iter()
+            .map(|surface| (*surface).expect_downcast_ref())
+            .collect::<Vec<_>>();
+        let signal_fence = (signal_fence.0.expect_downcast_mut(), signal_fence.1);
+        unsafe { Q::submit(self, &command_buffers, &surface_textures, signal_fence) }
+    }
+
+    unsafe fn present(
+        &self,
+        surface: &dyn DynSurface,
+        texture: Box<dyn DynSurfaceTexture>,
+    ) -> Result<(), SurfaceError> {
+        let surface = surface.expect_downcast_ref();
+        unsafe { Q::present(self, surface, texture.unbox()) }
+    }
+
+    unsafe fn get_timestamp_period(&self) -> f32 {
+        unsafe { Q::get_timestamp_period(self) }
+    }
+}
diff --git a/wgpu-hal/src/dynamic/surface.rs b/wgpu-hal/src/dynamic/surface.rs
new file mode 100644
index 0000000000..d6c3dad623
--- /dev/null
+++ b/wgpu-hal/src/dynamic/surface.rs
@@ -0,0 +1,71 @@
+use crate::{
+    DynDevice, DynFence, DynResource, DynSurfaceTexture, Surface, SurfaceConfiguration,
+    SurfaceError,
+};
+
+use super::DynResourceExt as _;
+
+#[derive(Debug)]
+pub struct DynAcquiredSurfaceTexture {
+    pub texture: Box<dyn DynSurfaceTexture>,
+    /// The presentation configuration no longer matches
+    /// the surface properties exactly, but can still be used to present
+    /// to the surface successfully.
+    pub suboptimal: bool,
+}
+
+pub trait DynSurface: DynResource {
+    unsafe fn configure(
+        &self,
+        device: &dyn DynDevice,
+        config: &SurfaceConfiguration,
+    ) -> Result<(), SurfaceError>;
+
+    unsafe fn unconfigure(&self, device: &dyn DynDevice);
+
+    unsafe fn acquire_texture(
+        &self,
+        timeout: Option<std::time::Duration>,
+        fence: &dyn DynFence,
+    ) -> Result<Option<DynAcquiredSurfaceTexture>, SurfaceError>;
+
+    unsafe fn discard_texture(&self, texture: Box<dyn DynSurfaceTexture>);
+}
+
+impl<S: Surface + DynResource> DynSurface for S {
+    unsafe fn configure(
+        &self,
+        device: &dyn DynDevice,
+        config: &SurfaceConfiguration,
+    ) -> Result<(), SurfaceError> {
+        let device = device.expect_downcast_ref();
+        unsafe { S::configure(self, device, config) }
+    }
+
+    unsafe fn unconfigure(&self, device: &dyn DynDevice) {
+        let device = device.expect_downcast_ref();
+        unsafe { S::unconfigure(self, device) }
+    }
+
+    unsafe fn acquire_texture(
+        &self,
+        timeout: Option<std::time::Duration>,
+        fence: &dyn DynFence,
+    ) -> Result<Option<DynAcquiredSurfaceTexture>, SurfaceError> {
+        let fence = fence.expect_downcast_ref();
+        unsafe { S::acquire_texture(self, timeout, fence) }.map(|acquired| {
+            acquired.map(|ast| {
+                let texture = Box::new(ast.texture);
+                let suboptimal = ast.suboptimal;
+                DynAcquiredSurfaceTexture {
+                    texture,
+                    suboptimal,
+                }
+            })
+        })
+    }
+
+    unsafe fn discard_texture(&self, texture: Box<dyn DynSurfaceTexture>) {
+        unsafe { S::discard_texture(self, texture.unbox()) }
+    }
+}
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index 5d6c42ab85..4d8868c360 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -40,6 +40,31 @@ impl crate::Api for Api {
     type ComputePipeline = Resource;
 }
 
+crate::impl_dyn_resource!(Context, Encoder, Resource);
+
+impl crate::DynAccelerationStructure for Resource {}
+impl crate::DynBindGroup for Resource {}
+impl crate::DynBindGroupLayout for Resource {}
+impl crate::DynBuffer for Resource {}
+impl crate::DynCommandBuffer for Resource {}
+impl crate::DynComputePipeline for Resource {}
+impl crate::DynFence for Resource {}
+impl crate::DynPipelineCache for Resource {}
+impl crate::DynPipelineLayout for Resource {}
+impl crate::DynQuerySet for Resource {}
+impl crate::DynRenderPipeline for Resource {}
+impl crate::DynSampler for Resource {}
+impl crate::DynShaderModule for Resource {}
+impl crate::DynSurfaceTexture for Resource {}
+impl crate::DynTexture for Resource {}
+impl crate::DynTextureView for Resource {}
+
+impl std::borrow::Borrow<dyn crate::DynTexture> for Resource {
+    fn borrow(&self) -> &dyn crate::DynTexture {
+        self
+    }
+}
+
 impl crate::Instance for Context {
     type A = Api;
 
@@ -53,7 +78,6 @@ impl crate::Instance for Context {
     ) -> Result<Context, crate::InstanceError> {
         Ok(Context)
     }
-    unsafe fn destroy_surface(&self, surface: Context) {}
     unsafe fn enumerate_adapters(
         &self,
         _surface_hint: Option<&Context>,
@@ -151,9 +175,7 @@ impl crate::Device for Context {
     ) -> DeviceResult<crate::BufferMapping> {
         Err(crate::DeviceError::Lost)
     }
-    unsafe fn unmap_buffer(&self, buffer: &Resource) -> DeviceResult<()> {
-        Ok(())
-    }
+    unsafe fn unmap_buffer(&self, buffer: &Resource) {}
     unsafe fn flush_mapped_ranges<I>(&self, buffer: &Resource, ranges: I) {}
     unsafe fn invalidate_mapped_ranges<I>(&self, buffer: &Resource, ranges: I) {}
 
@@ -176,7 +198,7 @@ impl crate::Device for Context {
 
     unsafe fn create_command_encoder(
         &self,
-        desc: &crate::CommandEncoderDescriptor<Api>,
+        desc: &crate::CommandEncoderDescriptor<Context>,
     ) -> DeviceResult<Encoder> {
         Ok(Encoder)
     }
@@ -191,14 +213,14 @@ impl crate::Device for Context {
     unsafe fn destroy_bind_group_layout(&self, bg_layout: Resource) {}
     unsafe fn create_pipeline_layout(
         &self,
-        desc: &crate::PipelineLayoutDescriptor<Api>,
+        desc: &crate::PipelineLayoutDescriptor<Resource>,
     ) -> DeviceResult<Resource> {
         Ok(Resource)
     }
     unsafe fn destroy_pipeline_layout(&self, pipeline_layout: Resource) {}
     unsafe fn create_bind_group(
         &self,
-        desc: &crate::BindGroupDescriptor<Api>,
+        desc: &crate::BindGroupDescriptor<Resource, Resource, Resource, Resource, Resource>,
     ) -> DeviceResult<Resource> {
         Ok(Resource)
     }
@@ -214,14 +236,14 @@ impl crate::Device for Context {
     unsafe fn destroy_shader_module(&self, module: Resource) {}
     unsafe fn create_render_pipeline(
         &self,
-        desc: &crate::RenderPipelineDescriptor<Api>,
+        desc: &crate::RenderPipelineDescriptor<Resource, Resource, Resource>,
     ) -> Result<Resource, crate::PipelineError> {
         Ok(Resource)
     }
     unsafe fn destroy_render_pipeline(&self, pipeline: Resource) {}
     unsafe fn create_compute_pipeline(
         &self,
-        desc: &crate::ComputePipelineDescriptor<Api>,
+        desc: &crate::ComputePipelineDescriptor<Resource, Resource, Resource>,
     ) -> Result<Resource, crate::PipelineError> {
         Ok(Resource)
     }
@@ -269,7 +291,7 @@ impl crate::Device for Context {
     }
     unsafe fn get_acceleration_structure_build_sizes<'a>(
         &self,
-        _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, Api>,
+        _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, Resource>,
     ) -> crate::AccelerationStructureBuildSizes {
         Default::default()
     }
@@ -300,13 +322,13 @@ impl crate::CommandEncoder for Encoder {
 
     unsafe fn transition_buffers<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = crate::BufferBarrier<'a, Api>>,
+        T: Iterator<Item = crate::BufferBarrier<'a, Resource>>,
     {
     }
 
     unsafe fn transition_textures<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = crate::TextureBarrier<'a, Api>>,
+        T: Iterator<Item = crate::TextureBarrier<'a, Resource>>,
     {
     }
 
@@ -362,7 +384,8 @@ impl crate::CommandEncoder for Encoder {
 
     // render
 
-    unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<Api>) {}
+    unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<Resource, Resource>) {
+    }
     unsafe fn end_render_pass(&mut self) {}
 
     unsafe fn set_bind_group(
@@ -390,11 +413,15 @@ impl crate::CommandEncoder for Encoder {
 
     unsafe fn set_index_buffer<'a>(
         &mut self,
-        binding: crate::BufferBinding<'a, Api>,
+        binding: crate::BufferBinding<'a, Resource>,
         format: wgt::IndexFormat,
     ) {
     }
-    unsafe fn set_vertex_buffer<'a>(&mut self, index: u32, binding: crate::BufferBinding<'a, Api>) {
+    unsafe fn set_vertex_buffer<'a>(
+        &mut self,
+        index: u32,
+        binding: crate::BufferBinding<'a, Resource>,
+    ) {
     }
     unsafe fn set_viewport(&mut self, rect: &crate::Rect<f32>, depth_range: Range<f32>) {}
     unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect<u32>) {}
@@ -453,7 +480,7 @@ impl crate::CommandEncoder for Encoder {
 
     // compute
 
-    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<Api>) {}
+    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<Resource>) {}
     unsafe fn end_compute_pass(&mut self) {}
 
     unsafe fn set_compute_pipeline(&mut self, pipeline: &Resource) {}
@@ -467,7 +494,7 @@ impl crate::CommandEncoder for Encoder {
         descriptors: T,
     ) where
         Api: 'a,
-        T: IntoIterator<Item = crate::BuildAccelerationStructureDescriptor<'a, Api>>,
+        T: IntoIterator<Item = crate::BuildAccelerationStructureDescriptor<'a, Resource, Resource>>,
     {
     }
 
diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs
index 1cda99b338..e7ecacebe0 100644
--- a/wgpu-hal/src/gles/adapter.rs
+++ b/wgpu-hal/src/gles/adapter.rs
@@ -503,6 +503,10 @@ impl super::Adapter {
             wgt::Features::TEXTURE_COMPRESSION_BC,
             bcn_exts.iter().all(|&ext| extensions.contains(ext)),
         );
+        features.set(
+            wgt::Features::TEXTURE_COMPRESSION_BC_SLICED_3D,
+            bcn_exts.iter().all(|&ext| extensions.contains(ext)), // BC guaranteed Sliced 3D
+        );
         let has_etc = if cfg!(any(webgl, Emscripten)) {
             extensions.contains("WEBGL_compressed_texture_etc")
         } else {
@@ -1093,7 +1097,7 @@ impl crate::Adapter for super::Adapter {
             Tf::Rgba8Sint => renderable | storage,
             Tf::Rgb10a2Uint => renderable,
             Tf::Rgb10a2Unorm => filterable_renderable,
-            Tf::Rg11b10Float => filterable | float_renderable,
+            Tf::Rg11b10UFloat => filterable | float_renderable,
             Tf::Rg32Uint => renderable,
             Tf::Rg32Sint => renderable,
             Tf::Rg32Float => unfilterable | float_renderable | texture_float_linear,
diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs
index 63a9b5496e..c002e76c1b 100644
--- a/wgpu-hal/src/gles/command.rs
+++ b/wgpu-hal/src/gles/command.rs
@@ -81,9 +81,8 @@ impl super::CommandBuffer {
     }
 
     fn add_push_constant_data(&mut self, data: &[u32]) -> Range<u32> {
-        let data_raw = unsafe {
-            std::slice::from_raw_parts(data.as_ptr() as *const _, mem::size_of_val(data))
-        };
+        let data_raw =
+            unsafe { std::slice::from_raw_parts(data.as_ptr().cast(), mem::size_of_val(data)) };
         let start = self.data_bytes.len();
         assert!(start < u32::MAX as usize);
         self.data_bytes.extend_from_slice(data_raw);
@@ -274,7 +273,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn transition_buffers<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>,
+        T: Iterator<Item = crate::BufferBarrier<'a, super::Buffer>>,
     {
         if !self
             .private_caps
@@ -299,7 +298,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn transition_textures<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>,
+        T: Iterator<Item = crate::TextureBarrier<'a, super::Texture>>,
     {
         if !self
             .private_caps
@@ -495,7 +494,10 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     // render
 
-    unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) {
+    unsafe fn begin_render_pass(
+        &mut self,
+        desc: &crate::RenderPassDescriptor<super::QuerySet, super::TextureView>,
+    ) {
         debug_assert!(self.state.end_of_pass_timestamp.is_none());
         if let Some(ref t) = desc.timestamp_writes {
             if let Some(index) = t.beginning_of_pass_write_index {
@@ -979,7 +981,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn set_index_buffer<'a>(
         &mut self,
-        binding: crate::BufferBinding<'a, super::Api>,
+        binding: crate::BufferBinding<'a, super::Buffer>,
         format: wgt::IndexFormat,
     ) {
         self.state.index_offset = binding.offset;
@@ -991,7 +993,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
     unsafe fn set_vertex_buffer<'a>(
         &mut self,
         index: u32,
-        binding: crate::BufferBinding<'a, super::Api>,
+        binding: crate::BufferBinding<'a, super::Buffer>,
     ) {
         self.state.dirty_vbuf_mask |= 1 << index;
         let (_, ref mut vb) = self.state.vertex_buffers[index as usize];
@@ -1138,7 +1140,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     // compute
 
-    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<super::Api>) {
+    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<super::QuerySet>) {
         debug_assert!(self.state.end_of_pass_timestamp.is_none());
         if let Some(ref t) = desc.timestamp_writes {
             if let Some(index) = t.beginning_of_pass_write_index {
@@ -1186,7 +1188,13 @@ impl crate::CommandEncoder for super::CommandEncoder {
         _descriptors: T,
     ) where
         super::Api: 'a,
-        T: IntoIterator<Item = crate::BuildAccelerationStructureDescriptor<'a, super::Api>>,
+        T: IntoIterator<
+            Item = crate::BuildAccelerationStructureDescriptor<
+                'a,
+                super::Buffer,
+                super::AccelerationStructure,
+            >,
+        >,
     {
         unimplemented!()
     }
diff --git a/wgpu-hal/src/gles/conv.rs b/wgpu-hal/src/gles/conv.rs
index a6c924f162..8733d54957 100644
--- a/wgpu-hal/src/gles/conv.rs
+++ b/wgpu-hal/src/gles/conv.rs
@@ -45,7 +45,7 @@ impl super::AdapterShared {
                 glow::RGBA,
                 glow::UNSIGNED_INT_2_10_10_10_REV,
             ),
-            Tf::Rg11b10Float => (
+            Tf::Rg11b10UFloat => (
                 glow::R11F_G11F_B10F,
                 glow::RGB,
                 glow::UNSIGNED_INT_10F_11F_11F_REV,
diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs
index 66b34bcd13..ad092307e9 100644
--- a/wgpu-hal/src/gles/device.rs
+++ b/wgpu-hal/src/gles/device.rs
@@ -9,13 +9,11 @@ use std::{
 };
 
 use arrayvec::ArrayVec;
-#[cfg(native)]
-use std::mem;
 use std::sync::atomic::Ordering;
 
 type ShaderStage<'a> = (
     naga::ShaderStage,
-    &'a crate::ProgrammableStage<'a, super::Api>,
+    &'a crate::ProgrammableStage<'a, super::ShaderModule>,
 );
 type NameBindingMap = rustc_hash::FxHashMap<String, (super::BindingRegister, u8)>;
 
@@ -178,9 +176,7 @@ impl super::Device {
         let raw = unsafe { gl.create_shader(target) }.unwrap();
         #[cfg(native)]
         if gl.supports_debug() {
-            //TODO: remove all transmutes from `object_label`
-            // https://github.com/grovesNL/glow/issues/186
-            let name = unsafe { mem::transmute(raw) };
+            let name = raw.0.get();
             unsafe { gl.object_label(glow::SHADER, name, label) };
         }
 
@@ -209,7 +205,7 @@ impl super::Device {
     fn create_shader(
         gl: &glow::Context,
         naga_stage: naga::ShaderStage,
-        stage: &crate::ProgrammableStage<super::Api>,
+        stage: &crate::ProgrammableStage<super::ShaderModule>,
         context: CompilationContext,
         program: glow::Program,
     ) -> Result<glow::Shader, crate::PipelineError> {
@@ -227,7 +223,7 @@ impl super::Device {
         )
         .map_err(|e| {
             let msg = format!("{e}");
-            crate::PipelineError::Linkage(map_naga_stage(naga_stage), msg)
+            crate::PipelineError::PipelineConstants(map_naga_stage(naga_stage), msg)
         })?;
 
         let entry_point_index = module
@@ -250,7 +246,6 @@ impl super::Device {
             index: BoundsCheckPolicy::Unchecked,
             buffer: BoundsCheckPolicy::Unchecked,
             image_load: image_check,
-            image_store: BoundsCheckPolicy::Unchecked,
             binding_array: BoundsCheckPolicy::Unchecked,
         };
 
@@ -366,7 +361,7 @@ impl super::Device {
         #[cfg(native)]
         if let Some(label) = label {
             if private_caps.contains(PrivateCapabilities::DEBUG_FNS) {
-                let name = unsafe { mem::transmute(program) };
+                let name = program.0.get();
                 unsafe { gl.object_label(glow::PROGRAM, name, Some(label)) };
             }
         }
@@ -541,6 +536,7 @@ impl crate::Device for super::Device {
                 size: desc.size,
                 map_flags: 0,
                 data: Some(Arc::new(Mutex::new(vec![0; desc.size as usize]))),
+                offset_of_current_mapping: Arc::new(Mutex::new(0)),
             });
         }
 
@@ -621,7 +617,7 @@ impl crate::Device for super::Device {
                 .private_caps
                 .contains(PrivateCapabilities::DEBUG_FNS)
             {
-                let name = unsafe { mem::transmute(raw) };
+                let name = raw.map_or(0, |buf| buf.0.get());
                 unsafe { gl.object_label(glow::BUFFER, name, Some(label)) };
             }
         }
@@ -640,6 +636,7 @@ impl crate::Device for super::Device {
             size: desc.size,
             map_flags,
             data,
+            offset_of_current_mapping: Arc::new(Mutex::new(0)),
         })
     }
 
@@ -673,6 +670,7 @@ impl crate::Device for super::Device {
                     unsafe { self.shared.get_buffer_sub_data(gl, buffer.target, 0, slice) };
                     slice.as_mut_ptr()
                 } else {
+                    *buffer.offset_of_current_mapping.lock().unwrap() = range.start;
                     unsafe {
                         gl.map_buffer_range(
                             buffer.target,
@@ -691,32 +689,36 @@ impl crate::Device for super::Device {
             is_coherent,
         })
     }
-    unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), crate::DeviceError> {
+    unsafe fn unmap_buffer(&self, buffer: &super::Buffer) {
         if let Some(raw) = buffer.raw {
             if buffer.data.is_none() {
                 let gl = &self.shared.context.lock();
                 unsafe { gl.bind_buffer(buffer.target, Some(raw)) };
                 unsafe { gl.unmap_buffer(buffer.target) };
                 unsafe { gl.bind_buffer(buffer.target, None) };
+                *buffer.offset_of_current_mapping.lock().unwrap() = 0;
             }
         }
-        Ok(())
     }
     unsafe fn flush_mapped_ranges<I>(&self, buffer: &super::Buffer, ranges: I)
     where
         I: Iterator<Item = crate::MemoryRange>,
     {
         if let Some(raw) = buffer.raw {
-            let gl = &self.shared.context.lock();
-            unsafe { gl.bind_buffer(buffer.target, Some(raw)) };
-            for range in ranges {
-                unsafe {
-                    gl.flush_mapped_buffer_range(
-                        buffer.target,
-                        range.start as i32,
-                        (range.end - range.start) as i32,
-                    )
-                };
+            if buffer.data.is_none() {
+                let gl = &self.shared.context.lock();
+                unsafe { gl.bind_buffer(buffer.target, Some(raw)) };
+                for range in ranges {
+                    let offset_of_current_mapping =
+                        *buffer.offset_of_current_mapping.lock().unwrap();
+                    unsafe {
+                        gl.flush_mapped_buffer_range(
+                            buffer.target,
+                            (range.start - offset_of_current_mapping) as i32,
+                            (range.end - range.start) as i32,
+                        )
+                    };
+                }
             }
         }
     }
@@ -769,7 +771,7 @@ impl crate::Device for super::Device {
                     .private_caps
                     .contains(PrivateCapabilities::DEBUG_FNS)
                 {
-                    let name = unsafe { mem::transmute(raw) };
+                    let name = raw.0.get();
                     unsafe { gl.object_label(glow::RENDERBUFFER, name, Some(label)) };
                 }
             }
@@ -937,7 +939,7 @@ impl crate::Device for super::Device {
                     .private_caps
                     .contains(PrivateCapabilities::DEBUG_FNS)
                 {
-                    let name = unsafe { mem::transmute(raw) };
+                    let name = raw.0.get();
                     unsafe { gl.object_label(glow::TEXTURE, name, Some(label)) };
                 }
             }
@@ -1089,7 +1091,7 @@ impl crate::Device for super::Device {
                 .private_caps
                 .contains(PrivateCapabilities::DEBUG_FNS)
             {
-                let name = unsafe { mem::transmute(raw) };
+                let name = raw.0.get();
                 unsafe { gl.object_label(glow::SAMPLER, name, Some(label)) };
             }
         }
@@ -1107,7 +1109,7 @@ impl crate::Device for super::Device {
 
     unsafe fn create_command_encoder(
         &self,
-        _desc: &crate::CommandEncoderDescriptor<super::Api>,
+        _desc: &crate::CommandEncoderDescriptor<super::Queue>,
     ) -> Result<super::CommandEncoder, crate::DeviceError> {
         self.counters.command_encoders.add(1);
 
@@ -1138,7 +1140,7 @@ impl crate::Device for super::Device {
 
     unsafe fn create_pipeline_layout(
         &self,
-        desc: &crate::PipelineLayoutDescriptor<super::Api>,
+        desc: &crate::PipelineLayoutDescriptor<super::BindGroupLayout>,
     ) -> Result<super::PipelineLayout, crate::DeviceError> {
         use naga::back::glsl;
 
@@ -1230,7 +1232,13 @@ impl crate::Device for super::Device {
 
     unsafe fn create_bind_group(
         &self,
-        desc: &crate::BindGroupDescriptor<super::Api>,
+        desc: &crate::BindGroupDescriptor<
+            super::BindGroupLayout,
+            super::Buffer,
+            super::Sampler,
+            super::TextureView,
+            super::AccelerationStructure,
+        >,
     ) -> Result<super::BindGroup, crate::DeviceError> {
         let mut contents = Vec::new();
 
@@ -1338,7 +1346,11 @@ impl crate::Device for super::Device {
 
     unsafe fn create_render_pipeline(
         &self,
-        desc: &crate::RenderPipelineDescriptor<super::Api>,
+        desc: &crate::RenderPipelineDescriptor<
+            super::PipelineLayout,
+            super::ShaderModule,
+            super::PipelineCache,
+        >,
     ) -> Result<super::RenderPipeline, crate::PipelineError> {
         let gl = &self.shared.context.lock();
         let mut shaders = ArrayVec::new();
@@ -1428,7 +1440,11 @@ impl crate::Device for super::Device {
 
     unsafe fn create_compute_pipeline(
         &self,
-        desc: &crate::ComputePipelineDescriptor<super::Api>,
+        desc: &crate::ComputePipelineDescriptor<
+            super::PipelineLayout,
+            super::ShaderModule,
+            super::PipelineCache,
+        >,
     ) -> Result<super::ComputePipeline, crate::PipelineError> {
         let gl = &self.shared.context.lock();
         let mut shaders = ArrayVec::new();
@@ -1461,12 +1477,12 @@ impl crate::Device for super::Device {
     unsafe fn create_pipeline_cache(
         &self,
         _: &crate::PipelineCacheDescriptor<'_>,
-    ) -> Result<(), crate::PipelineCacheError> {
+    ) -> Result<super::PipelineCache, crate::PipelineCacheError> {
         // Even though the cache doesn't do anything, we still return something here
         // as the least bad option
-        Ok(())
+        Ok(super::PipelineCache)
     }
-    unsafe fn destroy_pipeline_cache(&self, (): ()) {}
+    unsafe fn destroy_pipeline_cache(&self, _: super::PipelineCache) {}
 
     #[cfg_attr(target_arch = "wasm32", allow(unused))]
     unsafe fn create_query_set(
@@ -1587,22 +1603,26 @@ impl crate::Device for super::Device {
     unsafe fn create_acceleration_structure(
         &self,
         _desc: &crate::AccelerationStructureDescriptor,
-    ) -> Result<(), crate::DeviceError> {
+    ) -> Result<super::AccelerationStructure, crate::DeviceError> {
         unimplemented!()
     }
     unsafe fn get_acceleration_structure_build_sizes<'a>(
         &self,
-        _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Api>,
+        _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Buffer>,
     ) -> crate::AccelerationStructureBuildSizes {
         unimplemented!()
     }
     unsafe fn get_acceleration_structure_device_address(
         &self,
-        _acceleration_structure: &(),
+        _acceleration_structure: &super::AccelerationStructure,
     ) -> wgt::BufferAddress {
         unimplemented!()
     }
-    unsafe fn destroy_acceleration_structure(&self, _acceleration_structure: ()) {}
+    unsafe fn destroy_acceleration_structure(
+        &self,
+        _acceleration_structure: super::AccelerationStructure,
+    ) {
+    }
 
     fn get_internal_counters(&self) -> wgt::HalCounters {
         self.counters.clone()
diff --git a/wgpu-hal/src/gles/egl.rs b/wgpu-hal/src/gles/egl.rs
index f35d697d5e..9a8639d5a8 100644
--- a/wgpu-hal/src/gles/egl.rs
+++ b/wgpu-hal/src/gles/egl.rs
@@ -550,26 +550,25 @@ impl Inner {
         let supports_khr_context = display_extensions.contains("EGL_KHR_create_context");
 
         let mut context_attributes = vec![];
-        if supports_opengl {
-            context_attributes.push(khronos_egl::CONTEXT_MAJOR_VERSION);
-            context_attributes.push(3);
-            context_attributes.push(khronos_egl::CONTEXT_MINOR_VERSION);
-            context_attributes.push(3);
-            if force_gles_minor_version != wgt::Gles3MinorVersion::Automatic {
-                log::warn!("Ignoring specified GLES minor version as OpenGL is used");
-            }
-        } else {
-            context_attributes.push(khronos_egl::CONTEXT_MAJOR_VERSION);
-            context_attributes.push(3); // Request GLES 3.0 or higher
-            if force_gles_minor_version != wgt::Gles3MinorVersion::Automatic {
-                context_attributes.push(khronos_egl::CONTEXT_MINOR_VERSION);
-                context_attributes.push(match force_gles_minor_version {
-                    wgt::Gles3MinorVersion::Automatic => unreachable!(),
-                    wgt::Gles3MinorVersion::Version0 => 0,
-                    wgt::Gles3MinorVersion::Version1 => 1,
-                    wgt::Gles3MinorVersion::Version2 => 2,
-                });
-            }
+        let mut gl_context_attributes = vec![];
+        let mut gles_context_attributes = vec![];
+        gl_context_attributes.push(khronos_egl::CONTEXT_MAJOR_VERSION);
+        gl_context_attributes.push(3);
+        gl_context_attributes.push(khronos_egl::CONTEXT_MINOR_VERSION);
+        gl_context_attributes.push(3);
+        if supports_opengl && force_gles_minor_version != wgt::Gles3MinorVersion::Automatic {
+            log::warn!("Ignoring specified GLES minor version as OpenGL is used");
+        }
+        gles_context_attributes.push(khronos_egl::CONTEXT_MAJOR_VERSION);
+        gles_context_attributes.push(3); // Request GLES 3.0 or higher
+        if force_gles_minor_version != wgt::Gles3MinorVersion::Automatic {
+            gles_context_attributes.push(khronos_egl::CONTEXT_MINOR_VERSION);
+            gles_context_attributes.push(match force_gles_minor_version {
+                wgt::Gles3MinorVersion::Automatic => unreachable!(),
+                wgt::Gles3MinorVersion::Version0 => 0,
+                wgt::Gles3MinorVersion::Version1 => 1,
+                wgt::Gles3MinorVersion::Version2 => 2,
+            });
         }
         if flags.contains(wgt::InstanceFlags::DEBUG) {
             if version >= (1, 5) {
@@ -606,15 +605,31 @@ impl Inner {
             context_attributes.push(khr_context_flags);
         }
         context_attributes.push(khronos_egl::NONE);
-        let context = match egl.create_context(display, config, None, &context_attributes) {
-            Ok(context) => context,
-            Err(e) => {
-                return Err(crate::InstanceError::with_source(
-                    String::from("unable to create GLES 3.x context"),
-                    e,
-                ));
-            }
-        };
+
+        gl_context_attributes.extend(&context_attributes);
+        gles_context_attributes.extend(&context_attributes);
+
+        let context = if supports_opengl {
+            egl.create_context(display, config, None, &gl_context_attributes)
+                .or_else(|_| {
+                    egl.bind_api(khronos_egl::OPENGL_ES_API).unwrap();
+                    egl.create_context(display, config, None, &gles_context_attributes)
+                })
+                .map_err(|e| {
+                    crate::InstanceError::with_source(
+                        String::from("unable to create OpenGL or GLES 3.x context"),
+                        e,
+                    )
+                })
+        } else {
+            egl.create_context(display, config, None, &gles_context_attributes)
+                .map_err(|e| {
+                    crate::InstanceError::with_source(
+                        String::from("unable to create GLES 3.x context"),
+                        e,
+                    )
+                })
+        }?;
 
         // Testing if context can be binded without surface
         // and creating dummy pbuffer surface if not.
@@ -919,7 +934,10 @@ impl crate::Instance for Instance {
 
                 let ret = unsafe {
                     ndk_sys::ANativeWindow_setBuffersGeometry(
-                        handle.a_native_window.as_ptr() as *mut ndk_sys::ANativeWindow,
+                        handle
+                            .a_native_window
+                            .as_ptr()
+                            .cast::<ndk_sys::ANativeWindow>(),
                         0,
                         0,
                         format,
@@ -999,8 +1017,6 @@ impl crate::Instance for Instance {
         })
     }
 
-    unsafe fn destroy_surface(&self, _surface: Surface) {}
-
     unsafe fn enumerate_adapters(
         &self,
         _surface_hint: Option<&Surface>,
@@ -1229,12 +1245,12 @@ impl crate::Surface for Surface {
                 let native_window_ptr = match (self.wsi.kind, self.raw_window_handle) {
                     (WindowKind::Unknown | WindowKind::X11, Rwh::Xlib(handle)) => {
                         temp_xlib_handle = handle.window;
-                        &mut temp_xlib_handle as *mut _ as *mut ffi::c_void
+                        ptr::from_mut(&mut temp_xlib_handle).cast::<ffi::c_void>()
                     }
                     (WindowKind::AngleX11, Rwh::Xlib(handle)) => handle.window as *mut ffi::c_void,
                     (WindowKind::Unknown | WindowKind::X11, Rwh::Xcb(handle)) => {
                         temp_xcb_handle = handle.window;
-                        &mut temp_xcb_handle as *mut _ as *mut ffi::c_void
+                        ptr::from_mut(&mut temp_xcb_handle).cast::<ffi::c_void>()
                     }
                     (WindowKind::AngleX11, Rwh::Xcb(handle)) => {
                         handle.window.get() as *mut ffi::c_void
@@ -1248,7 +1264,7 @@ impl crate::Surface for Surface {
                             unsafe { library.get(b"wl_egl_window_create") }.unwrap();
                         let window =
                             unsafe { wl_egl_window_create(handle.surface.as_ptr(), 640, 480) }
-                                as *mut _;
+                                .cast();
                         wl_window = Some(window);
                         window
                     }
@@ -1265,8 +1281,8 @@ impl crate::Surface for Surface {
                             use objc::{msg_send, runtime::Object, sel, sel_impl};
                             // ns_view always have a layer and don't need to verify that it exists.
                             let layer: *mut Object =
-                                msg_send![handle.ns_view.as_ptr() as *mut Object, layer];
-                            layer as *mut ffi::c_void
+                                msg_send![handle.ns_view.as_ptr().cast::<Object>(), layer];
+                            layer.cast::<ffi::c_void>()
                         };
                         window_ptr
                     }
diff --git a/wgpu-hal/src/gles/emscripten.rs b/wgpu-hal/src/gles/emscripten.rs
index 7372dbd369..8a341d54d4 100644
--- a/wgpu-hal/src/gles/emscripten.rs
+++ b/wgpu-hal/src/gles/emscripten.rs
@@ -11,7 +11,7 @@ extern "C" {
 ///
 /// returns true on success
 ///
-/// # Safety:
+/// # Safety
 ///
 /// - opengl context MUST BE current
 /// - extension_name_null_terminated argument must be a valid string with null terminator.
@@ -20,7 +20,7 @@ pub unsafe fn enable_extension(extension_name_null_terminated: &str) -> bool {
     unsafe {
         emscripten_webgl_enable_extension(
             emscripten_webgl_get_current_context(),
-            extension_name_null_terminated.as_ptr() as _,
+            extension_name_null_terminated.as_ptr().cast(),
         ) == 1
     }
 }
diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs
index 73915d53e2..df59778065 100644
--- a/wgpu-hal/src/gles/mod.rs
+++ b/wgpu-hal/src/gles/mod.rs
@@ -153,8 +153,8 @@ impl crate::Api for Api {
     type Sampler = Sampler;
     type QuerySet = QuerySet;
     type Fence = Fence;
-    type AccelerationStructure = ();
-    type PipelineCache = ();
+    type AccelerationStructure = AccelerationStructure;
+    type PipelineCache = PipelineCache;
 
     type BindGroupLayout = BindGroupLayout;
     type BindGroup = BindGroup;
@@ -164,6 +164,30 @@ impl crate::Api for Api {
     type ComputePipeline = ComputePipeline;
 }
 
+crate::impl_dyn_resource!(
+    Adapter,
+    AccelerationStructure,
+    BindGroup,
+    BindGroupLayout,
+    Buffer,
+    CommandBuffer,
+    CommandEncoder,
+    ComputePipeline,
+    Device,
+    Fence,
+    Instance,
+    PipelineCache,
+    PipelineLayout,
+    QuerySet,
+    Queue,
+    RenderPipeline,
+    Sampler,
+    ShaderModule,
+    Surface,
+    Texture,
+    TextureView
+);
+
 bitflags::bitflags! {
     /// Flags that affect internal code paths but do not
     /// change the exposed feature set.
@@ -299,6 +323,7 @@ pub struct Buffer {
     size: wgt::BufferAddress,
     map_flags: u32,
     data: Option<Arc<std::sync::Mutex<Vec<u8>>>>,
+    offset_of_current_mapping: Arc<std::sync::Mutex<wgt::BufferAddress>>,
 }
 
 #[cfg(send_sync)]
@@ -306,6 +331,8 @@ unsafe impl Sync for Buffer {}
 #[cfg(send_sync)]
 unsafe impl Send for Buffer {}
 
+impl crate::DynBuffer for Buffer {}
+
 #[derive(Clone, Debug)]
 pub enum TextureInner {
     Renderbuffer {
@@ -352,6 +379,15 @@ pub struct Texture {
     pub copy_size: CopyExtent,
 }
 
+impl crate::DynTexture for Texture {}
+impl crate::DynSurfaceTexture for Texture {}
+
+impl std::borrow::Borrow<dyn crate::DynTexture> for Texture {
+    fn borrow(&self) -> &dyn crate::DynTexture {
+        self
+    }
+}
+
 impl Texture {
     pub fn default_framebuffer(format: wgt::TextureFormat) -> Self {
         Self {
@@ -439,16 +475,22 @@ pub struct TextureView {
     format: wgt::TextureFormat,
 }
 
+impl crate::DynTextureView for TextureView {}
+
 #[derive(Debug)]
 pub struct Sampler {
     raw: glow::Sampler,
 }
 
+impl crate::DynSampler for Sampler {}
+
 #[derive(Debug)]
 pub struct BindGroupLayout {
     entries: Arc<[wgt::BindGroupLayoutEntry]>,
 }
 
+impl crate::DynBindGroupLayout for BindGroupLayout {}
+
 #[derive(Debug)]
 struct BindGroupLayoutInfo {
     entries: Arc<[wgt::BindGroupLayoutEntry]>,
@@ -466,6 +508,8 @@ pub struct PipelineLayout {
     naga_options: naga::back::glsl::Options,
 }
 
+impl crate::DynPipelineLayout for PipelineLayout {}
+
 impl PipelineLayout {
     fn get_slot(&self, br: &naga::ResourceBinding) -> u8 {
         let group_info = &self.group_infos[br.group as usize];
@@ -504,6 +548,8 @@ pub struct BindGroup {
     contents: Box<[RawBinding]>,
 }
 
+impl crate::DynBindGroup for BindGroup {}
+
 type ShaderId = u32;
 
 #[derive(Debug)]
@@ -513,6 +559,8 @@ pub struct ShaderModule {
     id: ShaderId,
 }
 
+impl crate::DynShaderModule for ShaderModule {}
+
 #[derive(Clone, Debug, Default)]
 struct VertexFormatDesc {
     element_count: i32,
@@ -628,6 +676,8 @@ pub struct RenderPipeline {
     alpha_to_coverage_enabled: bool,
 }
 
+impl crate::DynRenderPipeline for RenderPipeline {}
+
 #[cfg(send_sync)]
 unsafe impl Sync for RenderPipeline {}
 #[cfg(send_sync)]
@@ -638,6 +688,8 @@ pub struct ComputePipeline {
     inner: Arc<PipelineInner>,
 }
 
+impl crate::DynComputePipeline for ComputePipeline {}
+
 #[cfg(send_sync)]
 unsafe impl Sync for ComputePipeline {}
 #[cfg(send_sync)]
@@ -649,12 +701,16 @@ pub struct QuerySet {
     target: BindTarget,
 }
 
+impl crate::DynQuerySet for QuerySet {}
+
 #[derive(Debug)]
 pub struct Fence {
     last_completed: crate::FenceValue,
     pending: Vec<(crate::FenceValue, glow::Fence)>,
 }
 
+impl crate::DynFence for Fence {}
+
 #[cfg(any(
     not(target_arch = "wasm32"),
     all(
@@ -698,6 +754,16 @@ impl Fence {
     }
 }
 
+#[derive(Debug)]
+pub struct AccelerationStructure;
+
+impl crate::DynAccelerationStructure for AccelerationStructure {}
+
+#[derive(Debug)]
+pub struct PipelineCache;
+
+impl crate::DynPipelineCache for PipelineCache {}
+
 #[derive(Clone, Debug, PartialEq)]
 struct StencilOps {
     pass: u32,
@@ -951,6 +1017,8 @@ pub struct CommandBuffer {
     queries: Vec<glow::Query>,
 }
 
+impl crate::DynCommandBuffer for CommandBuffer {}
+
 impl fmt::Debug for CommandBuffer {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         let mut builder = f.debug_struct("CommandBuffer");
diff --git a/wgpu-hal/src/gles/queue.rs b/wgpu-hal/src/gles/queue.rs
index 95eff36d57..398e37ffe6 100644
--- a/wgpu-hal/src/gles/queue.rs
+++ b/wgpu-hal/src/gles/queue.rs
@@ -955,7 +955,7 @@ impl super::Queue {
                     }
                     let query_data = unsafe {
                         slice::from_raw_parts(
-                            temp_query_results.as_ptr() as *const u8,
+                            temp_query_results.as_ptr().cast::<u8>(),
                             temp_query_results.len() * mem::size_of::<u64>(),
                         )
                     };
@@ -1526,8 +1526,7 @@ impl super::Queue {
 
                     debug_assert_eq!(data_required, raw.len());
 
-                    let slice: &[T] =
-                        unsafe { slice::from_raw_parts(raw.as_ptr() as *const _, COUNT) };
+                    let slice: &[T] = unsafe { slice::from_raw_parts(raw.as_ptr().cast(), COUNT) };
 
                     slice.try_into().unwrap()
                 }
diff --git a/wgpu-hal/src/gles/web.rs b/wgpu-hal/src/gles/web.rs
index a6c79721b4..99d4ff59b5 100644
--- a/wgpu-hal/src/gles/web.rs
+++ b/wgpu-hal/src/gles/web.rs
@@ -171,8 +171,6 @@ impl crate::Instance for Instance {
 
         self.create_surface_from_canvas(canvas)
     }
-
-    unsafe fn destroy_surface(&self, _surface: Surface) {}
 }
 
 #[derive(Debug)]
diff --git a/wgpu-hal/src/gles/wgl.rs b/wgpu-hal/src/gles/wgl.rs
index c221b3e59d..68bedb11d2 100644
--- a/wgpu-hal/src/gles/wgl.rs
+++ b/wgpu-hal/src/gles/wgl.rs
@@ -9,7 +9,6 @@ use raw_window_handle::{RawDisplayHandle, RawWindowHandle};
 use std::{
     collections::HashSet,
     ffi::{c_void, CStr, CString},
-    io::Error,
     mem,
     os::raw::c_int,
     ptr,
@@ -21,23 +20,13 @@ use std::{
     time::Duration,
 };
 use wgt::InstanceFlags;
-use winapi::{
-    shared::{
-        minwindef::{FALSE, HMODULE, LPARAM, LRESULT, UINT, WPARAM},
-        windef::{HDC, HGLRC, HWND},
-    },
-    um::{
-        libloaderapi::{GetModuleHandleA, GetProcAddress, LoadLibraryA},
-        wingdi::{
-            wglCreateContext, wglDeleteContext, wglGetCurrentContext, wglGetProcAddress,
-            wglMakeCurrent, ChoosePixelFormat, DescribePixelFormat, GetPixelFormat, SetPixelFormat,
-            SwapBuffers, PFD_DOUBLEBUFFER, PFD_DRAW_TO_WINDOW, PFD_SUPPORT_OPENGL, PFD_TYPE_RGBA,
-            PIXELFORMATDESCRIPTOR,
-        },
-        winuser::{
-            CreateWindowExA, DefWindowProcA, DestroyWindow, GetDC, RegisterClassExA, ReleaseDC,
-            CS_OWNDC, WNDCLASSEXA,
-        },
+use windows::{
+    core::{Error, PCSTR},
+    Win32::{
+        Foundation,
+        Graphics::{Gdi, OpenGL},
+        System::LibraryLoader,
+        UI::WindowsAndMessaging,
     },
 };
 
@@ -59,7 +48,7 @@ impl AdapterContext {
     }
 
     pub fn raw_context(&self) -> *mut c_void {
-        self.inner.lock().context.context as *mut _
+        self.inner.lock().context.context.0
     }
 
     /// Obtain a lock to the WGL context and get handle to the [`glow::Context`] that can be used to
@@ -84,7 +73,7 @@ impl AdapterContext {
     /// Unlike [`lock`](Self::lock), this accepts a device to pass to `make_current` and exposes the error
     /// when `make_current` fails.
     #[track_caller]
-    fn lock_with_dc(&self, device: HDC) -> Result<AdapterContextLock<'_>, Error> {
+    fn lock_with_dc(&self, device: Gdi::HDC) -> windows::core::Result<AdapterContextLock<'_>> {
         let inner = self
             .inner
             .try_lock_for(Duration::from_secs(CONTEXT_LOCK_TIMEOUT_SECS))
@@ -117,37 +106,27 @@ impl<'a> Drop for AdapterContextLock<'a> {
 }
 
 struct WglContext {
-    context: HGLRC,
+    context: OpenGL::HGLRC,
 }
 
 impl WglContext {
-    fn make_current(&self, device: HDC) -> Result<(), Error> {
-        if unsafe { wglMakeCurrent(device, self.context) } == FALSE {
-            Err(Error::last_os_error())
-        } else {
-            Ok(())
-        }
+    fn make_current(&self, device: Gdi::HDC) -> windows::core::Result<()> {
+        unsafe { OpenGL::wglMakeCurrent(device, self.context) }
     }
 
-    fn unmake_current(&self) -> Result<(), Error> {
-        if unsafe { wglGetCurrentContext().is_null() } {
+    fn unmake_current(&self) -> windows::core::Result<()> {
+        if unsafe { OpenGL::wglGetCurrentContext() }.is_invalid() {
             return Ok(());
         }
-        if unsafe { wglMakeCurrent(ptr::null_mut(), ptr::null_mut()) } == FALSE {
-            Err(Error::last_os_error())
-        } else {
-            Ok(())
-        }
+        unsafe { OpenGL::wglMakeCurrent(None, None) }
     }
 }
 
 impl Drop for WglContext {
     fn drop(&mut self) {
-        unsafe {
-            if wglDeleteContext(self.context) == FALSE {
-                log::error!("failed to delete WGL context {}", Error::last_os_error());
-            }
-        };
+        if let Err(e) = unsafe { OpenGL::wglDeleteContext(self.context) } {
+            log::error!("failed to delete WGL context: {e}");
+        }
     }
 }
 
@@ -171,20 +150,20 @@ pub struct Instance {
 unsafe impl Send for Instance {}
 unsafe impl Sync for Instance {}
 
-fn load_gl_func(name: &str, module: Option<HMODULE>) -> *const c_void {
+fn load_gl_func(name: &str, module: Option<Foundation::HMODULE>) -> *const c_void {
     let addr = CString::new(name.as_bytes()).unwrap();
-    let mut ptr = unsafe { wglGetProcAddress(addr.as_ptr()) };
-    if ptr.is_null() {
+    let mut ptr = unsafe { OpenGL::wglGetProcAddress(PCSTR(addr.as_ptr().cast())) };
+    if ptr.is_none() {
         if let Some(module) = module {
-            ptr = unsafe { GetProcAddress(module, addr.as_ptr()) };
+            ptr = unsafe { LibraryLoader::GetProcAddress(module, PCSTR(addr.as_ptr().cast())) };
         }
     }
-    ptr.cast()
+    ptr.map_or_else(ptr::null_mut, |p| p as *mut c_void)
 }
 
-fn get_extensions(extra: &Wgl, dc: HDC) -> HashSet<String> {
+fn get_extensions(extra: &Wgl, dc: Gdi::HDC) -> HashSet<String> {
     if extra.GetExtensionsStringARB.is_loaded() {
-        unsafe { CStr::from_ptr(extra.GetExtensionsStringARB(dc as *const _)) }
+        unsafe { CStr::from_ptr(extra.GetExtensionsStringARB(dc.0)) }
             .to_str()
             .unwrap_or("")
     } else {
@@ -195,63 +174,75 @@ fn get_extensions(extra: &Wgl, dc: HDC) -> HashSet<String> {
     .collect()
 }
 
-unsafe fn setup_pixel_format(dc: HDC) -> Result<(), crate::InstanceError> {
-    let mut format: PIXELFORMATDESCRIPTOR = unsafe { mem::zeroed() };
-    format.nVersion = 1;
-    format.nSize = mem::size_of_val(&format) as u16;
-    format.dwFlags = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER;
-    format.iPixelType = PFD_TYPE_RGBA;
-    format.cColorBits = 8;
+unsafe fn setup_pixel_format(dc: Gdi::HDC) -> Result<(), crate::InstanceError> {
+    {
+        let format = OpenGL::PIXELFORMATDESCRIPTOR {
+            nVersion: 1,
+            nSize: mem::size_of::<OpenGL::PIXELFORMATDESCRIPTOR>() as u16,
+            dwFlags: OpenGL::PFD_DRAW_TO_WINDOW
+                | OpenGL::PFD_SUPPORT_OPENGL
+                | OpenGL::PFD_DOUBLEBUFFER,
+            iPixelType: OpenGL::PFD_TYPE_RGBA,
+            cColorBits: 8,
+            ..unsafe { mem::zeroed() }
+        };
 
-    let index = unsafe { ChoosePixelFormat(dc, &format) };
-    if index == 0 {
-        return Err(crate::InstanceError::with_source(
-            String::from("unable to choose pixel format"),
-            Error::last_os_error(),
-        ));
-    }
+        let index = unsafe { OpenGL::ChoosePixelFormat(dc, &format) };
+        if index == 0 {
+            return Err(crate::InstanceError::with_source(
+                String::from("unable to choose pixel format"),
+                Error::from_win32(),
+            ));
+        }
 
-    let current = unsafe { GetPixelFormat(dc) };
+        let current = unsafe { OpenGL::GetPixelFormat(dc) };
 
-    if index != current && unsafe { SetPixelFormat(dc, index, &format) } == FALSE {
-        return Err(crate::InstanceError::with_source(
-            String::from("unable to set pixel format"),
-            Error::last_os_error(),
-        ));
+        if index != current {
+            unsafe { OpenGL::SetPixelFormat(dc, index, &format) }.map_err(|e| {
+                crate::InstanceError::with_source(String::from("unable to set pixel format"), e)
+            })?;
+        }
     }
 
-    let index = unsafe { GetPixelFormat(dc) };
-    if index == 0 {
-        return Err(crate::InstanceError::with_source(
-            String::from("unable to get pixel format index"),
-            Error::last_os_error(),
-        ));
-    }
-    if unsafe { DescribePixelFormat(dc, index, mem::size_of_val(&format) as UINT, &mut format) }
-        == 0
     {
-        return Err(crate::InstanceError::with_source(
-            String::from("unable to read pixel format"),
-            Error::last_os_error(),
-        ));
-    }
+        let index = unsafe { OpenGL::GetPixelFormat(dc) };
+        if index == 0 {
+            return Err(crate::InstanceError::with_source(
+                String::from("unable to get pixel format index"),
+                Error::from_win32(),
+            ));
+        }
+        let mut format = Default::default();
+        if unsafe {
+            OpenGL::DescribePixelFormat(
+                dc,
+                index,
+                mem::size_of_val(&format) as u32,
+                Some(&mut format),
+            )
+        } == 0
+        {
+            return Err(crate::InstanceError::with_source(
+                String::from("unable to read pixel format"),
+                Error::from_win32(),
+            ));
+        }
 
-    if format.dwFlags & PFD_SUPPORT_OPENGL == 0 || format.iPixelType != PFD_TYPE_RGBA {
-        return Err(crate::InstanceError::new(String::from(
-            "unsuitable pixel format",
-        )));
+        if !format.dwFlags.contains(OpenGL::PFD_SUPPORT_OPENGL)
+            || format.iPixelType != OpenGL::PFD_TYPE_RGBA
+        {
+            return Err(crate::InstanceError::new(String::from(
+                "unsuitable pixel format",
+            )));
+        }
     }
     Ok(())
 }
 
 fn create_global_window_class() -> Result<CString, crate::InstanceError> {
-    let instance = unsafe { GetModuleHandleA(ptr::null()) };
-    if instance.is_null() {
-        return Err(crate::InstanceError::with_source(
-            String::from("unable to get executable instance"),
-            Error::last_os_error(),
-        ));
-    }
+    let instance = unsafe { LibraryLoader::GetModuleHandleA(None) }.map_err(|e| {
+        crate::InstanceError::with_source(String::from("unable to get executable instance"), e)
+    })?;
 
     // Use the address of `UNIQUE` as part of the window class name to ensure different
     // `wgpu` versions use different names.
@@ -262,35 +253,35 @@ fn create_global_window_class() -> Result<CString, crate::InstanceError> {
 
     // Use a wrapper function for compatibility with `windows-rs`.
     unsafe extern "system" fn wnd_proc(
-        window: HWND,
-        msg: UINT,
-        wparam: WPARAM,
-        lparam: LPARAM,
-    ) -> LRESULT {
-        unsafe { DefWindowProcA(window, msg, wparam, lparam) }
+        window: Foundation::HWND,
+        msg: u32,
+        wparam: Foundation::WPARAM,
+        lparam: Foundation::LPARAM,
+    ) -> Foundation::LRESULT {
+        unsafe { WindowsAndMessaging::DefWindowProcA(window, msg, wparam, lparam) }
     }
 
-    let window_class = WNDCLASSEXA {
-        cbSize: mem::size_of::<WNDCLASSEXA>() as u32,
-        style: CS_OWNDC,
+    let window_class = WindowsAndMessaging::WNDCLASSEXA {
+        cbSize: mem::size_of::<WindowsAndMessaging::WNDCLASSEXA>() as u32,
+        style: WindowsAndMessaging::CS_OWNDC,
         lpfnWndProc: Some(wnd_proc),
         cbClsExtra: 0,
         cbWndExtra: 0,
-        hInstance: instance,
-        hIcon: ptr::null_mut(),
-        hCursor: ptr::null_mut(),
-        hbrBackground: ptr::null_mut(),
-        lpszMenuName: ptr::null_mut(),
-        lpszClassName: name.as_ptr(),
-        hIconSm: ptr::null_mut(),
+        hInstance: instance.into(),
+        hIcon: WindowsAndMessaging::HICON::default(),
+        hCursor: WindowsAndMessaging::HCURSOR::default(),
+        hbrBackground: Gdi::HBRUSH::default(),
+        lpszMenuName: PCSTR::null(),
+        lpszClassName: PCSTR(name.as_ptr().cast()),
+        hIconSm: WindowsAndMessaging::HICON::default(),
     };
 
-    let atom = unsafe { RegisterClassExA(&window_class) };
+    let atom = unsafe { WindowsAndMessaging::RegisterClassExA(&window_class) };
 
     if atom == 0 {
         return Err(crate::InstanceError::with_source(
             String::from("unable to register window class"),
-            Error::last_os_error(),
+            Error::from_win32(),
         ));
     }
 
@@ -306,7 +297,7 @@ fn get_global_window_class() -> Result<CString, crate::InstanceError> {
 }
 
 struct InstanceDevice {
-    dc: HDC,
+    dc: Gdi::HDC,
 
     /// This is used to keep the thread owning `dc` alive until this struct is dropped.
     _tx: SyncSender<()>,
@@ -314,31 +305,19 @@ struct InstanceDevice {
 
 fn create_instance_device() -> Result<InstanceDevice, crate::InstanceError> {
     #[derive(Clone, Copy)]
-    struct SendDc(HDC);
+    // TODO: We can get these SendSync definitions in the upstream metadata if this is the case
+    struct SendDc(Gdi::HDC);
     unsafe impl Sync for SendDc {}
     unsafe impl Send for SendDc {}
 
     struct Window {
-        window: HWND,
+        window: Foundation::HWND,
     }
     impl Drop for Window {
         fn drop(&mut self) {
-            unsafe {
-                if DestroyWindow(self.window) == FALSE {
-                    log::error!("failed to destroy window {}", Error::last_os_error());
-                }
-            };
-        }
-    }
-    struct DeviceContextHandle {
-        dc: HDC,
-        window: HWND,
-    }
-    impl Drop for DeviceContextHandle {
-        fn drop(&mut self) {
-            unsafe {
-                ReleaseDC(self.window, self.dc);
-            };
+            if let Err(e) = unsafe { WindowsAndMessaging::DestroyWindow(self.window) } {
+                log::error!("failed to destroy window: {e}");
+            }
         }
     }
 
@@ -353,58 +332,57 @@ fn create_instance_device() -> Result<InstanceDevice, crate::InstanceError> {
         .name("wgpu-hal WGL Instance Thread".to_owned())
         .spawn(move || {
             let setup = (|| {
-                let instance = unsafe { GetModuleHandleA(ptr::null()) };
-                if instance.is_null() {
-                    return Err(crate::InstanceError::with_source(
+                let instance = unsafe { LibraryLoader::GetModuleHandleA(None) }.map_err(|e| {
+                    crate::InstanceError::with_source(
                         String::from("unable to get executable instance"),
-                        Error::last_os_error(),
-                    ));
-                }
+                        e,
+                    )
+                })?;
 
                 // Create a hidden window since we don't pass `WS_VISIBLE`.
                 let window = unsafe {
-                    CreateWindowExA(
-                        0,
-                        window_class.as_ptr(),
-                        window_class.as_ptr(),
-                        0,
+                    WindowsAndMessaging::CreateWindowExA(
+                        WindowsAndMessaging::WINDOW_EX_STYLE::default(),
+                        PCSTR(window_class.as_ptr().cast()),
+                        PCSTR(window_class.as_ptr().cast()),
+                        WindowsAndMessaging::WINDOW_STYLE::default(),
                         0,
                         0,
                         1,
                         1,
-                        ptr::null_mut(),
-                        ptr::null_mut(),
+                        None,
+                        None,
                         instance,
-                        ptr::null_mut(),
+                        None,
                     )
-                };
-                if window.is_null() {
-                    return Err(crate::InstanceError::with_source(
-                        String::from("unable to create hidden instance window"),
-                        Error::last_os_error(),
-                    ));
                 }
+                .map_err(|e| {
+                    crate::InstanceError::with_source(
+                        String::from("unable to create hidden instance window"),
+                        e,
+                    )
+                })?;
                 let window = Window { window };
 
-                let dc = unsafe { GetDC(window.window) };
-                if dc.is_null() {
+                let dc = unsafe { Gdi::GetDC(window.window) };
+                if dc.is_invalid() {
                     return Err(crate::InstanceError::with_source(
                         String::from("unable to create memory device"),
-                        Error::last_os_error(),
+                        Error::from_win32(),
                     ));
                 }
                 let dc = DeviceContextHandle {
-                    dc,
+                    device: dc,
                     window: window.window,
                 };
-                unsafe { setup_pixel_format(dc.dc)? };
+                unsafe { setup_pixel_format(dc.device)? };
 
                 Ok((window, dc))
             })();
 
             match setup {
                 Ok((_window, dc)) => {
-                    setup_tx.send(Ok(SendDc(dc.dc))).unwrap();
+                    setup_tx.send(Ok(SendDc(dc.device))).unwrap();
                     // Wait for the shutdown event to free the window and device context handle.
                     drop_rx.recv().ok();
                 }
@@ -427,24 +405,25 @@ impl crate::Instance for Instance {
 
     unsafe fn init(desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> {
         profiling::scope!("Init OpenGL (WGL) Backend");
-        let opengl_module = unsafe { LoadLibraryA("opengl32.dll\0".as_ptr() as *const _) };
-        if opengl_module.is_null() {
-            return Err(crate::InstanceError::with_source(
-                String::from("unable to load the OpenGL library"),
-                Error::last_os_error(),
-            ));
-        }
+        let opengl_module =
+            unsafe { LibraryLoader::LoadLibraryA(PCSTR("opengl32.dll\0".as_ptr())) }.map_err(
+                |e| {
+                    crate::InstanceError::with_source(
+                        String::from("unable to load the OpenGL library"),
+                        e,
+                    )
+                },
+            )?;
 
         let device = create_instance_device()?;
         let dc = device.dc;
 
-        let context = unsafe { wglCreateContext(dc) };
-        if context.is_null() {
-            return Err(crate::InstanceError::with_source(
+        let context = unsafe { OpenGL::wglCreateContext(dc) }.map_err(|e| {
+            crate::InstanceError::with_source(
                 String::from("unable to create initial OpenGL context"),
-                Error::last_os_error(),
-            ));
-        }
+                e,
+            )
+        })?;
         let context = WglContext { context };
         context.make_current(dc).map_err(|e| {
             crate::InstanceError::with_source(
@@ -471,17 +450,16 @@ impl crate::Instance for Instance {
                 },
                 0, // End of list
             ];
-            let context = unsafe {
-                extra.CreateContextAttribsARB(dc as *const _, ptr::null(), attributes.as_ptr())
-            };
+            let context =
+                unsafe { extra.CreateContextAttribsARB(dc.0, ptr::null(), attributes.as_ptr()) };
             if context.is_null() {
                 return Err(crate::InstanceError::with_source(
                     String::from("unable to create OpenGL context"),
-                    Error::last_os_error(),
+                    Error::from_win32(),
                 ));
             }
             WglContext {
-                context: context as *mut _,
+                context: OpenGL::HGLRC(context.cast_mut()),
             }
         } else {
             context
@@ -550,13 +528,13 @@ impl crate::Instance for Instance {
             )));
         };
         Ok(Surface {
-            window: window.hwnd.get() as *mut _,
+            // This cast exists because of https://github.com/rust-windowing/raw-window-handle/issues/171
+            window: Foundation::HWND(window.hwnd.get() as *mut _),
             presentable: true,
             swapchain: RwLock::new(None),
             srgb_capable: self.srgb_capable,
         })
     }
-    unsafe fn destroy_surface(&self, _surface: Surface) {}
 
     unsafe fn enumerate_adapters(
         &self,
@@ -573,14 +551,14 @@ impl crate::Instance for Instance {
 }
 
 struct DeviceContextHandle {
-    device: HDC,
-    window: HWND,
+    device: Gdi::HDC,
+    window: Foundation::HWND,
 }
 
 impl Drop for DeviceContextHandle {
     fn drop(&mut self) {
         unsafe {
-            ReleaseDC(self.window, self.device);
+            Gdi::ReleaseDC(self.window, self.device);
         };
     }
 }
@@ -599,7 +577,7 @@ pub struct Swapchain {
 }
 
 pub struct Surface {
-    window: HWND,
+    window: Foundation::HWND,
     pub(super) presentable: bool,
     swapchain: RwLock<Option<Swapchain>>,
     srgb_capable: bool,
@@ -616,11 +594,11 @@ impl Surface {
     ) -> Result<(), crate::SurfaceError> {
         let swapchain = self.swapchain.read();
         let sc = swapchain.as_ref().unwrap();
-        let dc = unsafe { GetDC(self.window) };
-        if dc.is_null() {
+        let dc = unsafe { Gdi::GetDC(self.window) };
+        if dc.is_invalid() {
             log::error!(
                 "unable to get the device context from window: {}",
-                Error::last_os_error()
+                Error::from_win32()
             );
             return Err(crate::SurfaceError::Other(
                 "unable to get the device context from window",
@@ -670,8 +648,8 @@ impl Surface {
         unsafe { gl.bind_renderbuffer(glow::RENDERBUFFER, None) };
         unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, None) };
 
-        if unsafe { SwapBuffers(dc.device) } == FALSE {
-            log::error!("unable to swap buffers: {}", Error::last_os_error());
+        if let Err(e) = unsafe { OpenGL::SwapBuffers(dc.device) } {
+            log::error!("unable to swap buffers: {e}");
             return Err(crate::SurfaceError::Other("unable to swap buffers"));
         }
 
@@ -694,11 +672,11 @@ impl crate::Surface for Surface {
         // Remove the old configuration.
         unsafe { self.unconfigure(device) };
 
-        let dc = unsafe { GetDC(self.window) };
-        if dc.is_null() {
+        let dc = unsafe { Gdi::GetDC(self.window) };
+        if dc.is_invalid() {
             log::error!(
                 "unable to get the device context from window: {}",
-                Error::last_os_error()
+                Error::from_win32()
             );
             return Err(crate::SurfaceError::Other(
                 "unable to get the device context from window",
@@ -771,8 +749,8 @@ impl crate::Surface for Surface {
             }
         };
 
-        if unsafe { extra.SwapIntervalEXT(if vsync { 1 } else { 0 }) } == FALSE {
-            log::error!("unable to set swap interval: {}", Error::last_os_error());
+        if unsafe { extra.SwapIntervalEXT(if vsync { 1 } else { 0 }) } == Foundation::FALSE.0 {
+            log::error!("unable to set swap interval: {}", Error::from_win32());
             return Err(crate::SurfaceError::Other("unable to set swap interval"));
         }
 
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index e63f25ab07..f26b6925cc 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -1,214 +1,211 @@
-/*! A cross-platform unsafe graphics abstraction.
- *
- * This crate defines a set of traits abstracting over modern graphics APIs,
- * with implementations ("backends") for Vulkan, Metal, Direct3D, and GL.
- *
- * `wgpu-hal` is a spiritual successor to
- * [gfx-hal](https://github.com/gfx-rs/gfx), but with reduced scope, and
- * oriented towards WebGPU implementation goals. It has no overhead for
- * validation or tracking, and the API translation overhead is kept to the bare
- * minimum by the design of WebGPU. This API can be used for resource-demanding
- * applications and engines.
- *
- * The `wgpu-hal` crate's main design choices:
- *
- * - Our traits are meant to be *portable*: proper use
- *   should get equivalent results regardless of the backend.
- *
- * - Our traits' contracts are *unsafe*: implementations perform minimal
- *   validation, if any, and incorrect use will often cause undefined behavior.
- *   This allows us to minimize the overhead we impose over the underlying
- *   graphics system. If you need safety, the [`wgpu-core`] crate provides a
- *   safe API for driving `wgpu-hal`, implementing all necessary validation,
- *   resource state tracking, and so on. (Note that `wgpu-core` is designed for
- *   use via FFI; the [`wgpu`] crate provides more idiomatic Rust bindings for
- *   `wgpu-core`.) Or, you can do your own validation.
- *
- * - In the same vein, returned errors *only cover cases the user can't
- *   anticipate*, like running out of memory or losing the device. Any errors
- *   that the user could reasonably anticipate are their responsibility to
- *   avoid. For example, `wgpu-hal` returns no error for mapping a buffer that's
- *   not mappable: as the buffer creator, the user should already know if they
- *   can map it.
- *
- * - We use *static dispatch*. The traits are not
- *   generally object-safe. You must select a specific backend type
- *   like [`vulkan::Api`] or [`metal::Api`], and then use that
- *   according to the main traits, or call backend-specific methods.
- *
- * - We use *idiomatic Rust parameter passing*,
- *   taking objects by reference, returning them by value, and so on,
- *   unlike `wgpu-core`, which refers to objects by ID.
- *
- * - We map buffer contents *persistently*. This means that the buffer can
- *   remain mapped on the CPU while the GPU reads or writes to it. You must
- *   explicitly indicate when data might need to be transferred between CPU and
- *   GPU, if [`Device::map_buffer`] indicates that this is necessary.
- *
- * - You must record *explicit barriers* between different usages of a
- *   resource. For example, if a buffer is written to by a compute
- *   shader, and then used as and index buffer to a draw call, you
- *   must use [`CommandEncoder::transition_buffers`] between those two
- *   operations.
- *
- * - Pipeline layouts are *explicitly specified* when setting bind
- *   group. Incompatible layouts disturb groups bound at higher indices.
- *
- * - The API *accepts collections as iterators*, to avoid forcing the user to
- *   store data in particular containers. The implementation doesn't guarantee
- *   that any of the iterators are drained, unless stated otherwise by the
- *   function documentation. For this reason, we recommend that iterators don't
- *   do any mutating work.
- *
- * Unfortunately, `wgpu-hal`'s safety requirements are not fully documented.
- * Ideally, all trait methods would have doc comments setting out the
- * requirements users must meet to ensure correct and portable behavior. If you
- * are aware of a specific requirement that a backend imposes that is not
- * ensured by the traits' documented rules, please file an issue. Or, if you are
- * a capable technical writer, please file a pull request!
- *
- * [`wgpu-core`]: https://crates.io/crates/wgpu-core
- * [`wgpu`]: https://crates.io/crates/wgpu
- * [`vulkan::Api`]: vulkan/struct.Api.html
- * [`metal::Api`]: metal/struct.Api.html
- *
- * ## Primary backends
- *
- * The `wgpu-hal` crate has full-featured backends implemented on the following
- * platform graphics APIs:
- *
- * - Vulkan, available on Linux, Android, and Windows, using the [`ash`] crate's
- *   Vulkan bindings. It's also available on macOS, if you install [MoltenVK].
- *
- * - Metal on macOS, using the [`metal`] crate's bindings.
- *
- * - Direct3D 12 on Windows, using the [`d3d12`] crate's bindings.
- *
- * [`ash`]: https://crates.io/crates/ash
- * [MoltenVK]: https://github.com/KhronosGroup/MoltenVK
- * [`metal`]: https://crates.io/crates/metal
- * [`d3d12`]: ahttps://crates.io/crates/d3d12
- *
- * ## Secondary backends
- *
- * The `wgpu-hal` crate has a partial implementation based on the following
- * platform graphics API:
- *
- * - The GL backend is available anywhere OpenGL, OpenGL ES, or WebGL are
- *   available. See the [`gles`] module documentation for details.
- *
- * [`gles`]: gles/index.html
- *
- * You can see what capabilities an adapter is missing by checking the
- * [`DownlevelCapabilities`][tdc] in [`ExposedAdapter::capabilities`], available
- * from [`Instance::enumerate_adapters`].
- *
- * The API is generally designed to fit the primary backends better than the
- * secondary backends, so the latter may impose more overhead.
- *
- * [tdc]: wgt::DownlevelCapabilities
- *
- * ## Traits
- *
- * The `wgpu-hal` crate defines a handful of traits that together
- * represent a cross-platform abstraction for modern GPU APIs.
- *
- * - The [`Api`] trait represents a `wgpu-hal` backend. It has no methods of its
- *   own, only a collection of associated types.
- *
- * - [`Api::Instance`] implements the [`Instance`] trait. [`Instance::init`]
- *   creates an instance value, which you can use to enumerate the adapters
- *   available on the system. For example, [`vulkan::Api::Instance::init`][Ii]
- *   returns an instance that can enumerate the Vulkan physical devices on your
- *   system.
- *
- * - [`Api::Adapter`] implements the [`Adapter`] trait, representing a
- *   particular device from a particular backend. For example, a Vulkan instance
- *   might have a Lavapipe software adapter and a GPU-based adapter.
- *
- * - [`Api::Device`] implements the [`Device`] trait, representing an active
- *   link to a device. You get a device value by calling [`Adapter::open`], and
- *   then use it to create buffers, textures, shader modules, and so on.
- *
- * - [`Api::Queue`] implements the [`Queue`] trait, which you use to submit
- *   command buffers to a given device.
- *
- * - [`Api::CommandEncoder`] implements the [`CommandEncoder`] trait, which you
- *   use to build buffers of commands to submit to a queue. This has all the
- *   methods for drawing and running compute shaders, which is presumably what
- *   you're here for.
- *
- * - [`Api::Surface`] implements the [`Surface`] trait, which represents a
- *   swapchain for presenting images on the screen, via interaction with the
- *   system's window manager.
- *
- * The [`Api`] trait has various other associated types like [`Api::Buffer`] and
- * [`Api::Texture`] that represent resources the rest of the interface can
- * operate on, but these generally do not have their own traits.
- *
- * [Ii]: Instance::init
- *
- * ## Validation is the calling code's responsibility, not `wgpu-hal`'s
- *
- * As much as possible, `wgpu-hal` traits place the burden of validation,
- * resource tracking, and state tracking on the caller, not on the trait
- * implementations themselves. Anything which can reasonably be handled in
- * backend-independent code should be. A `wgpu_hal` backend's sole obligation is
- * to provide portable behavior, and report conditions that the calling code
- * can't reasonably anticipate, like device loss or running out of memory.
- *
- * The `wgpu` crate collection is intended for use in security-sensitive
- * applications, like web browsers, where the API is available to untrusted
- * code. This means that `wgpu-core`'s validation is not simply a service to
- * developers, to be provided opportunistically when the performance costs are
- * acceptable and the necessary data is ready at hand. Rather, `wgpu-core`'s
- * validation must be exhaustive, to ensure that even malicious content cannot
- * provoke and exploit undefined behavior in the platform's graphics API.
- *
- * Because graphics APIs' requirements are complex, the only practical way for
- * `wgpu` to provide exhaustive validation is to comprehensively track the
- * lifetime and state of all the resources in the system. Implementing this
- * separately for each backend is infeasible; effort would be better spent
- * making the cross-platform validation in `wgpu-core` legible and trustworthy.
- * Fortunately, the requirements are largely similar across the various
- * platforms, so cross-platform validation is practical.
- *
- * Some backends have specific requirements that aren't practical to foist off
- * on the `wgpu-hal` user. For example, properly managing macOS Objective-C or
- * Microsoft COM reference counts is best handled by using appropriate pointer
- * types within the backend.
- *
- * A desire for "defense in depth" may suggest performing additional validation
- * in `wgpu-hal` when the opportunity arises, but this must be done with
- * caution. Even experienced contributors infer the expectations their changes
- * must meet by considering not just requirements made explicit in types, tests,
- * assertions, and comments, but also those implicit in the surrounding code.
- * When one sees validation or state-tracking code in `wgpu-hal`, it is tempting
- * to conclude, "Oh, `wgpu-hal` checks for this, so `wgpu-core` needn't worry
- * about it - that would be redundant!" The responsibility for exhaustive
- * validation always rests with `wgpu-core`, regardless of what may or may not
- * be checked in `wgpu-hal`.
- *
- * To this end, any "defense in depth" validation that does appear in `wgpu-hal`
- * for requirements that `wgpu-core` should have enforced should report failure
- * via the `unreachable!` macro, because problems detected at this stage always
- * indicate a bug in `wgpu-core`.
- *
- * ## Debugging
- *
- * Most of the information on the wiki [Debugging wgpu Applications][wiki-debug]
- * page still applies to this API, with the exception of API tracing/replay
- * functionality, which is only available in `wgpu-core`.
- *
- * [wiki-debug]: https://github.com/gfx-rs/wgpu/wiki/Debugging-wgpu-Applications
- */
+//! A cross-platform unsafe graphics abstraction.
+//!
+//! This crate defines a set of traits abstracting over modern graphics APIs,
+//! with implementations ("backends") for Vulkan, Metal, Direct3D, and GL.
+//!
+//! `wgpu-hal` is a spiritual successor to
+//! [gfx-hal](https://github.com/gfx-rs/gfx), but with reduced scope, and
+//! oriented towards WebGPU implementation goals. It has no overhead for
+//! validation or tracking, and the API translation overhead is kept to the bare
+//! minimum by the design of WebGPU. This API can be used for resource-demanding
+//! applications and engines.
+//!
+//! The `wgpu-hal` crate's main design choices:
+//!
+//! - Our traits are meant to be *portable*: proper use
+//!   should get equivalent results regardless of the backend.
+//!
+//! - Our traits' contracts are *unsafe*: implementations perform minimal
+//!   validation, if any, and incorrect use will often cause undefined behavior.
+//!   This allows us to minimize the overhead we impose over the underlying
+//!   graphics system. If you need safety, the [`wgpu-core`] crate provides a
+//!   safe API for driving `wgpu-hal`, implementing all necessary validation,
+//!   resource state tracking, and so on. (Note that `wgpu-core` is designed for
+//!   use via FFI; the [`wgpu`] crate provides more idiomatic Rust bindings for
+//!   `wgpu-core`.) Or, you can do your own validation.
+//!
+//! - In the same vein, returned errors *only cover cases the user can't
+//!   anticipate*, like running out of memory or losing the device. Any errors
+//!   that the user could reasonably anticipate are their responsibility to
+//!   avoid. For example, `wgpu-hal` returns no error for mapping a buffer that's
+//!   not mappable: as the buffer creator, the user should already know if they
+//!   can map it.
+//!
+//! - We use *static dispatch*. The traits are not
+//!   generally object-safe. You must select a specific backend type
+//!   like [`vulkan::Api`] or [`metal::Api`], and then use that
+//!   according to the main traits, or call backend-specific methods.
+//!
+//! - We use *idiomatic Rust parameter passing*,
+//!   taking objects by reference, returning them by value, and so on,
+//!   unlike `wgpu-core`, which refers to objects by ID.
+//!
+//! - We map buffer contents *persistently*. This means that the buffer can
+//!   remain mapped on the CPU while the GPU reads or writes to it. You must
+//!   explicitly indicate when data might need to be transferred between CPU and
+//!   GPU, if [`Device::map_buffer`] indicates that this is necessary.
+//!
+//! - You must record *explicit barriers* between different usages of a
+//!   resource. For example, if a buffer is written to by a compute
+//!   shader, and then used as and index buffer to a draw call, you
+//!   must use [`CommandEncoder::transition_buffers`] between those two
+//!   operations.
+//!
+//! - Pipeline layouts are *explicitly specified* when setting bind
+//!   group. Incompatible layouts disturb groups bound at higher indices.
+//!
+//! - The API *accepts collections as iterators*, to avoid forcing the user to
+//!   store data in particular containers. The implementation doesn't guarantee
+//!   that any of the iterators are drained, unless stated otherwise by the
+//!   function documentation. For this reason, we recommend that iterators don't
+//!   do any mutating work.
+//!
+//! Unfortunately, `wgpu-hal`'s safety requirements are not fully documented.
+//! Ideally, all trait methods would have doc comments setting out the
+//! requirements users must meet to ensure correct and portable behavior. If you
+//! are aware of a specific requirement that a backend imposes that is not
+//! ensured by the traits' documented rules, please file an issue. Or, if you are
+//! a capable technical writer, please file a pull request!
+//!
+//! [`wgpu-core`]: https://crates.io/crates/wgpu-core
+//! [`wgpu`]: https://crates.io/crates/wgpu
+//! [`vulkan::Api`]: vulkan/struct.Api.html
+//! [`metal::Api`]: metal/struct.Api.html
+//!
+//! ## Primary backends
+//!
+//! The `wgpu-hal` crate has full-featured backends implemented on the following
+//! platform graphics APIs:
+//!
+//! - Vulkan, available on Linux, Android, and Windows, using the [`ash`] crate's
+//!   Vulkan bindings. It's also available on macOS, if you install [MoltenVK].
+//!
+//! - Metal on macOS, using the [`metal`] crate's bindings.
+//!
+//! - Direct3D 12 on Windows, using the [`d3d12`] crate's bindings.
+//!
+//! [`ash`]: https://crates.io/crates/ash
+//! [MoltenVK]: https://github.com/KhronosGroup/MoltenVK
+//! [`metal`]: https://crates.io/crates/metal
+//! [`d3d12`]: ahttps://crates.io/crates/d3d12
+//!
+//! ## Secondary backends
+//!
+//! The `wgpu-hal` crate has a partial implementation based on the following
+//! platform graphics API:
+//!
+//! - The GL backend is available anywhere OpenGL, OpenGL ES, or WebGL are
+//!   available. See the [`gles`] module documentation for details.
+//!
+//! [`gles`]: gles/index.html
+//!
+//! You can see what capabilities an adapter is missing by checking the
+//! [`DownlevelCapabilities`][tdc] in [`ExposedAdapter::capabilities`], available
+//! from [`Instance::enumerate_adapters`].
+//!
+//! The API is generally designed to fit the primary backends better than the
+//! secondary backends, so the latter may impose more overhead.
+//!
+//! [tdc]: wgt::DownlevelCapabilities
+//!
+//! ## Traits
+//!
+//! The `wgpu-hal` crate defines a handful of traits that together
+//! represent a cross-platform abstraction for modern GPU APIs.
+//!
+//! - The [`Api`] trait represents a `wgpu-hal` backend. It has no methods of its
+//!   own, only a collection of associated types.
+//!
+//! - [`Api::Instance`] implements the [`Instance`] trait. [`Instance::init`]
+//!   creates an instance value, which you can use to enumerate the adapters
+//!   available on the system. For example, [`vulkan::Api::Instance::init`][Ii]
+//!   returns an instance that can enumerate the Vulkan physical devices on your
+//!   system.
+//!
+//! - [`Api::Adapter`] implements the [`Adapter`] trait, representing a
+//!   particular device from a particular backend. For example, a Vulkan instance
+//!   might have a Lavapipe software adapter and a GPU-based adapter.
+//!
+//! - [`Api::Device`] implements the [`Device`] trait, representing an active
+//!   link to a device. You get a device value by calling [`Adapter::open`], and
+//!   then use it to create buffers, textures, shader modules, and so on.
+//!
+//! - [`Api::Queue`] implements the [`Queue`] trait, which you use to submit
+//!   command buffers to a given device.
+//!
+//! - [`Api::CommandEncoder`] implements the [`CommandEncoder`] trait, which you
+//!   use to build buffers of commands to submit to a queue. This has all the
+//!   methods for drawing and running compute shaders, which is presumably what
+//!   you're here for.
+//!
+//! - [`Api::Surface`] implements the [`Surface`] trait, which represents a
+//!   swapchain for presenting images on the screen, via interaction with the
+//!   system's window manager.
+//!
+//! The [`Api`] trait has various other associated types like [`Api::Buffer`] and
+//! [`Api::Texture`] that represent resources the rest of the interface can
+//! operate on, but these generally do not have their own traits.
+//!
+//! [Ii]: Instance::init
+//!
+//! ## Validation is the calling code's responsibility, not `wgpu-hal`'s
+//!
+//! As much as possible, `wgpu-hal` traits place the burden of validation,
+//! resource tracking, and state tracking on the caller, not on the trait
+//! implementations themselves. Anything which can reasonably be handled in
+//! backend-independent code should be. A `wgpu_hal` backend's sole obligation is
+//! to provide portable behavior, and report conditions that the calling code
+//! can't reasonably anticipate, like device loss or running out of memory.
+//!
+//! The `wgpu` crate collection is intended for use in security-sensitive
+//! applications, like web browsers, where the API is available to untrusted
+//! code. This means that `wgpu-core`'s validation is not simply a service to
+//! developers, to be provided opportunistically when the performance costs are
+//! acceptable and the necessary data is ready at hand. Rather, `wgpu-core`'s
+//! validation must be exhaustive, to ensure that even malicious content cannot
+//! provoke and exploit undefined behavior in the platform's graphics API.
+//!
+//! Because graphics APIs' requirements are complex, the only practical way for
+//! `wgpu` to provide exhaustive validation is to comprehensively track the
+//! lifetime and state of all the resources in the system. Implementing this
+//! separately for each backend is infeasible; effort would be better spent
+//! making the cross-platform validation in `wgpu-core` legible and trustworthy.
+//! Fortunately, the requirements are largely similar across the various
+//! platforms, so cross-platform validation is practical.
+//!
+//! Some backends have specific requirements that aren't practical to foist off
+//! on the `wgpu-hal` user. For example, properly managing macOS Objective-C or
+//! Microsoft COM reference counts is best handled by using appropriate pointer
+//! types within the backend.
+//!
+//! A desire for "defense in depth" may suggest performing additional validation
+//! in `wgpu-hal` when the opportunity arises, but this must be done with
+//! caution. Even experienced contributors infer the expectations their changes
+//! must meet by considering not just requirements made explicit in types, tests,
+//! assertions, and comments, but also those implicit in the surrounding code.
+//! When one sees validation or state-tracking code in `wgpu-hal`, it is tempting
+//! to conclude, "Oh, `wgpu-hal` checks for this, so `wgpu-core` needn't worry
+//! about it - that would be redundant!" The responsibility for exhaustive
+//! validation always rests with `wgpu-core`, regardless of what may or may not
+//! be checked in `wgpu-hal`.
+//!
+//! To this end, any "defense in depth" validation that does appear in `wgpu-hal`
+//! for requirements that `wgpu-core` should have enforced should report failure
+//! via the `unreachable!` macro, because problems detected at this stage always
+//! indicate a bug in `wgpu-core`.
+//!
+//! ## Debugging
+//!
+//! Most of the information on the wiki [Debugging wgpu Applications][wiki-debug]
+//! page still applies to this API, with the exception of API tracing/replay
+//! functionality, which is only available in `wgpu-core`.
+//!
+//! [wiki-debug]: https://github.com/gfx-rs/wgpu/wiki/Debugging-wgpu-Applications
 
 #![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))]
 #![allow(
     // this happens on the GL backend, where it is both thread safe and non-thread safe in the same code.
     clippy::arc_with_non_send_sync,
-    // for `if_then_panic` until it reaches stable
-    unknown_lints,
     // We don't use syntax sugar where it's not necessary.
     clippy::match_like_matches_macro,
     // Redundant matching is more explicit.
@@ -221,8 +218,6 @@
     clippy::single_match,
     // Push commands are more regular than macros.
     clippy::vec_init_then_push,
-    // "if panic" is a good uniform construct.
-    clippy::if_then_panic,
     // We unsafe impl `Send` for a reason.
     clippy::non_send_fields_in_send_ty,
     // TODO!
@@ -231,6 +226,7 @@
     clippy::pattern_type_mismatch,
 )]
 #![warn(
+    clippy::ptr_as_ptr,
     trivial_casts,
     trivial_numeric_casts,
     unsafe_op_in_unsafe_fn,
@@ -266,6 +262,17 @@ pub mod api {
     pub use super::vulkan::Api as Vulkan;
 }
 
+mod dynamic;
+
+pub(crate) use dynamic::impl_dyn_resource;
+pub use dynamic::{
+    DynAccelerationStructure, DynAcquiredSurfaceTexture, DynAdapter, DynBindGroup,
+    DynBindGroupLayout, DynBuffer, DynCommandBuffer, DynCommandEncoder, DynComputePipeline,
+    DynDevice, DynExposedAdapter, DynFence, DynInstance, DynOpenDevice, DynPipelineCache,
+    DynPipelineLayout, DynQuerySet, DynQueue, DynRenderPipeline, DynResource, DynSampler,
+    DynShaderModule, DynSurface, DynSurfaceTexture, DynTexture, DynTextureView,
+};
+
 use std::{
     borrow::{Borrow, Cow},
     fmt,
@@ -294,6 +301,7 @@ pub const QUERY_SIZE: wgt::BufferAddress = 8;
 pub type Label<'a> = Option<&'a str>;
 pub type MemoryRange = Range<wgt::BufferAddress>;
 pub type FenceValue = u64;
+pub type AtomicFenceValue = std::sync::atomic::AtomicU64;
 
 /// Drop guard to signal wgpu-hal is no longer using an externally created object.
 pub type DropGuard = Box<dyn std::any::Any + Send + Sync>;
@@ -324,6 +332,8 @@ pub enum PipelineError {
     EntryPoint(naga::ShaderStage),
     #[error(transparent)]
     Device(#[from] DeviceError),
+    #[error("Pipeline constant error for stage {0:?}: {1}")]
+    PipelineConstants(wgt::ShaderStages, String),
 }
 
 #[derive(Clone, Debug, Eq, PartialEq, Error)]
@@ -382,13 +392,13 @@ impl InstanceError {
 }
 
 pub trait Api: Clone + fmt::Debug + Sized {
-    type Instance: Instance<A = Self>;
-    type Surface: Surface<A = Self>;
-    type Adapter: Adapter<A = Self>;
-    type Device: Device<A = Self>;
+    type Instance: DynInstance + Instance<A = Self>;
+    type Surface: DynSurface + Surface<A = Self>;
+    type Adapter: DynAdapter + Adapter<A = Self>;
+    type Device: DynDevice + Device<A = Self>;
 
-    type Queue: Queue<A = Self>;
-    type CommandEncoder: CommandEncoder<A = Self>;
+    type Queue: DynQueue + Queue<A = Self>;
+    type CommandEncoder: DynCommandEncoder + CommandEncoder<A = Self>;
 
     /// This API's command buffer type.
     ///
@@ -398,14 +408,14 @@ pub trait Api: Clone + fmt::Debug + Sized {
     /// them to [`CommandEncoder::reset_all`].
     ///
     /// [`CommandEncoder`]: Api::CommandEncoder
-    type CommandBuffer: WasmNotSendSync + fmt::Debug;
+    type CommandBuffer: DynCommandBuffer;
 
-    type Buffer: fmt::Debug + WasmNotSendSync + 'static;
-    type Texture: fmt::Debug + WasmNotSendSync + 'static;
-    type SurfaceTexture: fmt::Debug + WasmNotSendSync + Borrow<Self::Texture>;
-    type TextureView: fmt::Debug + WasmNotSendSync;
-    type Sampler: fmt::Debug + WasmNotSendSync;
-    type QuerySet: fmt::Debug + WasmNotSendSync;
+    type Buffer: DynBuffer;
+    type Texture: DynTexture;
+    type SurfaceTexture: DynSurfaceTexture + Borrow<Self::Texture>;
+    type TextureView: DynTextureView;
+    type Sampler: DynSampler;
+    type QuerySet: DynQuerySet;
 
     /// A value you can block on to wait for something to finish.
     ///
@@ -424,17 +434,17 @@ pub trait Api: Clone + fmt::Debug + Sized {
     /// before a lower-valued operation, then waiting for the fence to reach the
     /// lower value could return before the lower-valued operation has actually
     /// finished.
-    type Fence: fmt::Debug + WasmNotSendSync;
+    type Fence: DynFence;
 
-    type BindGroupLayout: fmt::Debug + WasmNotSendSync;
-    type BindGroup: fmt::Debug + WasmNotSendSync;
-    type PipelineLayout: fmt::Debug + WasmNotSendSync;
-    type ShaderModule: fmt::Debug + WasmNotSendSync;
-    type RenderPipeline: fmt::Debug + WasmNotSendSync;
-    type ComputePipeline: fmt::Debug + WasmNotSendSync;
-    type PipelineCache: fmt::Debug + WasmNotSendSync;
+    type BindGroupLayout: DynBindGroupLayout;
+    type BindGroup: DynBindGroup;
+    type PipelineLayout: DynPipelineLayout;
+    type ShaderModule: DynShaderModule;
+    type RenderPipeline: DynRenderPipeline;
+    type ComputePipeline: DynComputePipeline;
+    type PipelineCache: DynPipelineCache;
 
-    type AccelerationStructure: fmt::Debug + WasmNotSendSync + 'static;
+    type AccelerationStructure: DynAccelerationStructure + 'static;
 }
 
 pub trait Instance: Sized + WasmNotSendSync {
@@ -446,7 +456,6 @@ pub trait Instance: Sized + WasmNotSendSync {
         display_handle: raw_window_handle::RawDisplayHandle,
         window_handle: raw_window_handle::RawWindowHandle,
     ) -> Result<<Self::A as Api>::Surface, InstanceError>;
-    unsafe fn destroy_surface(&self, surface: <Self::A as Api>::Surface);
     /// `surface_hint` is only used by the GLES backend targeting WebGL2
     unsafe fn enumerate_adapters(
         &self,
@@ -714,9 +723,13 @@ pub trait Device: WasmNotSendSync {
     ///   be ordered, so it is meaningful to talk about what must occur
     ///   "between" them.
     ///
+    /// - Zero-sized mappings are not allowed.
+    ///
+    /// - The returned [`BufferMapping::ptr`] must not be used after a call to
+    ///   [`Device::unmap_buffer`].
+    ///
     /// [`MAP_READ`]: BufferUses::MAP_READ
     /// [`MAP_WRITE`]: BufferUses::MAP_WRITE
-    //TODO: clarify if zero-sized mapping is allowed
     unsafe fn map_buffer(
         &self,
         buffer: &<Self::A as Api>::Buffer,
@@ -728,7 +741,7 @@ pub trait Device: WasmNotSendSync {
     /// # Safety
     ///
     /// - The given `buffer` must be currently mapped.
-    unsafe fn unmap_buffer(&self, buffer: &<Self::A as Api>::Buffer) -> Result<(), DeviceError>;
+    unsafe fn unmap_buffer(&self, buffer: &<Self::A as Api>::Buffer);
 
     /// Indicate that CPU writes to mapped buffer memory should be made visible to the GPU.
     ///
@@ -777,7 +790,7 @@ pub trait Device: WasmNotSendSync {
     /// The new `CommandEncoder` is in the "closed" state.
     unsafe fn create_command_encoder(
         &self,
-        desc: &CommandEncoderDescriptor<Self::A>,
+        desc: &CommandEncoderDescriptor<<Self::A as Api>::Queue>,
     ) -> Result<<Self::A as Api>::CommandEncoder, DeviceError>;
     unsafe fn destroy_command_encoder(&self, pool: <Self::A as Api>::CommandEncoder);
 
@@ -789,12 +802,20 @@ pub trait Device: WasmNotSendSync {
     unsafe fn destroy_bind_group_layout(&self, bg_layout: <Self::A as Api>::BindGroupLayout);
     unsafe fn create_pipeline_layout(
         &self,
-        desc: &PipelineLayoutDescriptor<Self::A>,
+        desc: &PipelineLayoutDescriptor<<Self::A as Api>::BindGroupLayout>,
     ) -> Result<<Self::A as Api>::PipelineLayout, DeviceError>;
     unsafe fn destroy_pipeline_layout(&self, pipeline_layout: <Self::A as Api>::PipelineLayout);
+
+    #[allow(clippy::type_complexity)]
     unsafe fn create_bind_group(
         &self,
-        desc: &BindGroupDescriptor<Self::A>,
+        desc: &BindGroupDescriptor<
+            <Self::A as Api>::BindGroupLayout,
+            <Self::A as Api>::Buffer,
+            <Self::A as Api>::Sampler,
+            <Self::A as Api>::TextureView,
+            <Self::A as Api>::AccelerationStructure,
+        >,
     ) -> Result<<Self::A as Api>::BindGroup, DeviceError>;
     unsafe fn destroy_bind_group(&self, group: <Self::A as Api>::BindGroup);
 
@@ -804,16 +825,29 @@ pub trait Device: WasmNotSendSync {
         shader: ShaderInput,
     ) -> Result<<Self::A as Api>::ShaderModule, ShaderError>;
     unsafe fn destroy_shader_module(&self, module: <Self::A as Api>::ShaderModule);
+
+    #[allow(clippy::type_complexity)]
     unsafe fn create_render_pipeline(
         &self,
-        desc: &RenderPipelineDescriptor<Self::A>,
+        desc: &RenderPipelineDescriptor<
+            <Self::A as Api>::PipelineLayout,
+            <Self::A as Api>::ShaderModule,
+            <Self::A as Api>::PipelineCache,
+        >,
     ) -> Result<<Self::A as Api>::RenderPipeline, PipelineError>;
     unsafe fn destroy_render_pipeline(&self, pipeline: <Self::A as Api>::RenderPipeline);
+
+    #[allow(clippy::type_complexity)]
     unsafe fn create_compute_pipeline(
         &self,
-        desc: &ComputePipelineDescriptor<Self::A>,
+        desc: &ComputePipelineDescriptor<
+            <Self::A as Api>::PipelineLayout,
+            <Self::A as Api>::ShaderModule,
+            <Self::A as Api>::PipelineCache,
+        >,
     ) -> Result<<Self::A as Api>::ComputePipeline, PipelineError>;
     unsafe fn destroy_compute_pipeline(&self, pipeline: <Self::A as Api>::ComputePipeline);
+
     unsafe fn create_pipeline_cache(
         &self,
         desc: &PipelineCacheDescriptor<'_>,
@@ -877,7 +911,7 @@ pub trait Device: WasmNotSendSync {
     ) -> Result<<Self::A as Api>::AccelerationStructure, DeviceError>;
     unsafe fn get_acceleration_structure_build_sizes(
         &self,
-        desc: &GetAccelerationStructureBuildSizesDescriptor<Self::A>,
+        desc: &GetAccelerationStructureBuildSizesDescriptor<<Self::A as Api>::Buffer>,
     ) -> AccelerationStructureBuildSizes;
     unsafe fn get_acceleration_structure_device_address(
         &self,
@@ -889,6 +923,10 @@ pub trait Device: WasmNotSendSync {
     );
 
     fn get_internal_counters(&self) -> wgt::HalCounters;
+
+    fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
+        None
+    }
 }
 
 pub trait Queue: WasmNotSendSync {
@@ -951,6 +989,9 @@ pub trait Queue: WasmNotSendSync {
     /// - All calls to this function that include a given [`SurfaceTexture`][st]
     ///   in `surface_textures` must use the same [`Fence`].
     ///
+    /// - The [`Fence`] passed as `signal_fence.0` must remain alive until
+    ///   all submissions that will signal it have completed.
+    ///
     /// [`Fence`]: Api::Fence
     /// [cb]: Api::CommandBuffer
     /// [ce]: Api::CommandEncoder
@@ -1089,11 +1130,11 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug {
 
     unsafe fn transition_buffers<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = BufferBarrier<'a, Self::A>>;
+        T: Iterator<Item = BufferBarrier<'a, <Self::A as Api>::Buffer>>;
 
     unsafe fn transition_textures<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = TextureBarrier<'a, Self::A>>;
+        T: Iterator<Item = TextureBarrier<'a, <Self::A as Api>::Texture>>;
 
     // copy operations
 
@@ -1214,17 +1255,24 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug {
     // render passes
 
     // Begins a render pass, clears all active bindings.
-    unsafe fn begin_render_pass(&mut self, desc: &RenderPassDescriptor<Self::A>);
+    unsafe fn begin_render_pass(
+        &mut self,
+        desc: &RenderPassDescriptor<<Self::A as Api>::QuerySet, <Self::A as Api>::TextureView>,
+    );
     unsafe fn end_render_pass(&mut self);
 
     unsafe fn set_render_pipeline(&mut self, pipeline: &<Self::A as Api>::RenderPipeline);
 
     unsafe fn set_index_buffer<'a>(
         &mut self,
-        binding: BufferBinding<'a, Self::A>,
+        binding: BufferBinding<'a, <Self::A as Api>::Buffer>,
         format: wgt::IndexFormat,
     );
-    unsafe fn set_vertex_buffer<'a>(&mut self, index: u32, binding: BufferBinding<'a, Self::A>);
+    unsafe fn set_vertex_buffer<'a>(
+        &mut self,
+        index: u32,
+        binding: BufferBinding<'a, <Self::A as Api>::Buffer>,
+    );
     unsafe fn set_viewport(&mut self, rect: &Rect<f32>, depth_range: Range<f32>);
     unsafe fn set_scissor_rect(&mut self, rect: &Rect<u32>);
     unsafe fn set_stencil_reference(&mut self, value: u32);
@@ -1277,7 +1325,10 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug {
     // compute passes
 
     // Begins a compute pass, clears all active bindings.
-    unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor<Self::A>);
+    unsafe fn begin_compute_pass(
+        &mut self,
+        desc: &ComputePassDescriptor<<Self::A as Api>::QuerySet>,
+    );
     unsafe fn end_compute_pass(&mut self);
 
     unsafe fn set_compute_pipeline(&mut self, pipeline: &<Self::A as Api>::ComputePipeline);
@@ -1302,7 +1353,13 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug {
         descriptors: T,
     ) where
         Self::A: 'a,
-        T: IntoIterator<Item = BuildAccelerationStructureDescriptor<'a, Self::A>>;
+        T: IntoIterator<
+            Item = BuildAccelerationStructureDescriptor<
+                'a,
+                <Self::A as Api>::Buffer,
+                <Self::A as Api>::AccelerationStructure,
+            >,
+        >;
 
     unsafe fn place_acceleration_structure_barrier(
         &mut self,
@@ -1714,17 +1771,17 @@ pub struct BindGroupLayoutDescriptor<'a> {
 }
 
 #[derive(Clone, Debug)]
-pub struct PipelineLayoutDescriptor<'a, A: Api> {
+pub struct PipelineLayoutDescriptor<'a, B: DynBindGroupLayout + ?Sized> {
     pub label: Label<'a>,
     pub flags: PipelineLayoutFlags,
-    pub bind_group_layouts: &'a [&'a A::BindGroupLayout],
+    pub bind_group_layouts: &'a [&'a B],
     pub push_constant_ranges: &'a [wgt::PushConstantRange],
 }
 
 #[derive(Debug)]
-pub struct BufferBinding<'a, A: Api> {
+pub struct BufferBinding<'a, B: DynBuffer + ?Sized> {
     /// The buffer being bound.
-    pub buffer: &'a A::Buffer,
+    pub buffer: &'a B,
 
     /// The offset at which the bound region starts.
     ///
@@ -1747,10 +1804,9 @@ pub struct BufferBinding<'a, A: Api> {
     pub size: Option<wgt::BufferSize>,
 }
 
-// Rust gets confused about the impl requirements for `A`
-impl<A: Api> Clone for BufferBinding<'_, A> {
+impl<'a, T: DynBuffer + ?Sized> Clone for BufferBinding<'a, T> {
     fn clone(&self) -> Self {
-        Self {
+        BufferBinding {
             buffer: self.buffer,
             offset: self.offset,
             size: self.size,
@@ -1759,15 +1815,14 @@ impl<A: Api> Clone for BufferBinding<'_, A> {
 }
 
 #[derive(Debug)]
-pub struct TextureBinding<'a, A: Api> {
-    pub view: &'a A::TextureView,
+pub struct TextureBinding<'a, T: DynTextureView + ?Sized> {
+    pub view: &'a T,
     pub usage: TextureUses,
 }
 
-// Rust gets confused about the impl requirements for `A`
-impl<A: Api> Clone for TextureBinding<'_, A> {
+impl<'a, T: DynTextureView + ?Sized> Clone for TextureBinding<'a, T> {
     fn clone(&self) -> Self {
-        Self {
+        TextureBinding {
             view: self.view,
             usage: self.usage,
         }
@@ -1791,20 +1846,27 @@ pub struct BindGroupEntry {
 ///    of the corresponding resource array, selected by the relevant
 ///    `BindGroupLayoutEntry`.
 #[derive(Clone, Debug)]
-pub struct BindGroupDescriptor<'a, A: Api> {
+pub struct BindGroupDescriptor<
+    'a,
+    Bgl: DynBindGroupLayout + ?Sized,
+    B: DynBuffer + ?Sized,
+    S: DynSampler + ?Sized,
+    T: DynTextureView + ?Sized,
+    A: DynAccelerationStructure + ?Sized,
+> {
     pub label: Label<'a>,
-    pub layout: &'a A::BindGroupLayout,
-    pub buffers: &'a [BufferBinding<'a, A>],
-    pub samplers: &'a [&'a A::Sampler],
-    pub textures: &'a [TextureBinding<'a, A>],
+    pub layout: &'a Bgl,
+    pub buffers: &'a [BufferBinding<'a, B>],
+    pub samplers: &'a [&'a S],
+    pub textures: &'a [TextureBinding<'a, T>],
     pub entries: &'a [BindGroupEntry],
-    pub acceleration_structures: &'a [&'a A::AccelerationStructure],
+    pub acceleration_structures: &'a [&'a A],
 }
 
 #[derive(Clone, Debug)]
-pub struct CommandEncoderDescriptor<'a, A: Api> {
+pub struct CommandEncoderDescriptor<'a, Q: DynQueue + ?Sized> {
     pub label: Label<'a>,
-    pub queue: &'a A::Queue,
+    pub queue: &'a Q,
 }
 
 /// Naga shader module.
@@ -1845,9 +1907,9 @@ pub struct DebugSource {
 
 /// Describes a programmable pipeline stage.
 #[derive(Debug)]
-pub struct ProgrammableStage<'a, A: Api> {
+pub struct ProgrammableStage<'a, M: DynShaderModule + ?Sized> {
     /// The compiled shader module for this stage.
-    pub module: &'a A::ShaderModule,
+    pub module: &'a M,
     /// The name of the entry point in the compiled shader. There must be a function with this name
     ///  in the shader.
     pub entry_point: &'a str,
@@ -1858,33 +1920,34 @@ pub struct ProgrammableStage<'a, A: Api> {
     /// This is required by the WebGPU spec, but may have overhead which can be avoided
     /// for cross-platform applications
     pub zero_initialize_workgroup_memory: bool,
-    /// Should the pipeline attempt to transform vertex shaders to use vertex pulling.
-    pub vertex_pulling_transform: bool,
 }
 
-// Rust gets confused about the impl requirements for `A`
-impl<A: Api> Clone for ProgrammableStage<'_, A> {
+impl<M: DynShaderModule + ?Sized> Clone for ProgrammableStage<'_, M> {
     fn clone(&self) -> Self {
         Self {
             module: self.module,
             entry_point: self.entry_point,
             constants: self.constants,
             zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory,
-            vertex_pulling_transform: self.vertex_pulling_transform,
         }
     }
 }
 
 /// Describes a compute pipeline.
 #[derive(Clone, Debug)]
-pub struct ComputePipelineDescriptor<'a, A: Api> {
+pub struct ComputePipelineDescriptor<
+    'a,
+    Pl: DynPipelineLayout + ?Sized,
+    M: DynShaderModule + ?Sized,
+    Pc: DynPipelineCache + ?Sized,
+> {
     pub label: Label<'a>,
     /// The layout of bind groups for this pipeline.
-    pub layout: &'a A::PipelineLayout,
+    pub layout: &'a Pl,
     /// The compiled compute stage and its entry point.
-    pub stage: ProgrammableStage<'a, A>,
+    pub stage: ProgrammableStage<'a, M>,
     /// The cache which will be used and filled when compiling this pipeline
-    pub cache: Option<&'a A::PipelineCache>,
+    pub cache: Option<&'a Pc>,
 }
 
 pub struct PipelineCacheDescriptor<'a> {
@@ -1905,14 +1968,19 @@ pub struct VertexBufferLayout<'a> {
 
 /// Describes a render (graphics) pipeline.
 #[derive(Clone, Debug)]
-pub struct RenderPipelineDescriptor<'a, A: Api> {
+pub struct RenderPipelineDescriptor<
+    'a,
+    Pl: DynPipelineLayout + ?Sized,
+    M: DynShaderModule + ?Sized,
+    Pc: DynPipelineCache + ?Sized,
+> {
     pub label: Label<'a>,
     /// The layout of bind groups for this pipeline.
-    pub layout: &'a A::PipelineLayout,
+    pub layout: &'a Pl,
     /// The format of any vertex buffers used with this pipeline.
     pub vertex_buffers: &'a [VertexBufferLayout<'a>],
     /// The vertex stage for this pipeline.
-    pub vertex_stage: ProgrammableStage<'a, A>,
+    pub vertex_stage: ProgrammableStage<'a, M>,
     /// The properties of the pipeline at the primitive assembly and rasterization level.
     pub primitive: wgt::PrimitiveState,
     /// The effect of draw calls on the depth and stencil aspects of the output target, if any.
@@ -1920,14 +1988,14 @@ pub struct RenderPipelineDescriptor<'a, A: Api> {
     /// The multi-sampling properties of the pipeline.
     pub multisample: wgt::MultisampleState,
     /// The fragment stage for this pipeline.
-    pub fragment_stage: Option<ProgrammableStage<'a, A>>,
+    pub fragment_stage: Option<ProgrammableStage<'a, M>>,
     /// The effect of draw calls on the color aspect of the output target.
     pub color_targets: &'a [Option<wgt::ColorTargetState>],
     /// If the pipeline will be used with a multiview render pass, this indicates how many array
     /// layers the attachments will have.
     pub multiview: Option<NonZeroU32>,
     /// The cache which will be used and filled when compiling this pipeline
-    pub cache: Option<&'a A::PipelineCache>,
+    pub cache: Option<&'a Pc>,
 }
 
 #[derive(Debug, Clone)]
@@ -1960,14 +2028,14 @@ pub struct Rect<T> {
 }
 
 #[derive(Debug, Clone)]
-pub struct BufferBarrier<'a, A: Api> {
-    pub buffer: &'a A::Buffer,
+pub struct BufferBarrier<'a, B: DynBuffer + ?Sized> {
+    pub buffer: &'a B,
     pub usage: Range<BufferUses>,
 }
 
 #[derive(Debug, Clone)]
-pub struct TextureBarrier<'a, A: Api> {
-    pub texture: &'a A::Texture,
+pub struct TextureBarrier<'a, T: DynTexture + ?Sized> {
+    pub texture: &'a T,
     pub range: wgt::ImageSubresourceRange,
     pub usage: Range<TextureUses>,
 }
@@ -2010,104 +2078,53 @@ pub struct BufferTextureCopy {
     pub size: CopyExtent,
 }
 
-#[derive(Debug)]
-pub struct Attachment<'a, A: Api> {
-    pub view: &'a A::TextureView,
+#[derive(Clone, Debug)]
+pub struct Attachment<'a, T: DynTextureView + ?Sized> {
+    pub view: &'a T,
     /// Contains either a single mutating usage as a target,
     /// or a valid combination of read-only usages.
     pub usage: TextureUses,
 }
 
-// Rust gets confused about the impl requirements for `A`
-impl<A: Api> Clone for Attachment<'_, A> {
-    fn clone(&self) -> Self {
-        Self {
-            view: self.view,
-            usage: self.usage,
-        }
-    }
-}
-
-#[derive(Debug)]
-pub struct ColorAttachment<'a, A: Api> {
-    pub target: Attachment<'a, A>,
-    pub resolve_target: Option<Attachment<'a, A>>,
+#[derive(Clone, Debug)]
+pub struct ColorAttachment<'a, T: DynTextureView + ?Sized> {
+    pub target: Attachment<'a, T>,
+    pub resolve_target: Option<Attachment<'a, T>>,
     pub ops: AttachmentOps,
     pub clear_value: wgt::Color,
 }
 
-// Rust gets confused about the impl requirements for `A`
-impl<A: Api> Clone for ColorAttachment<'_, A> {
-    fn clone(&self) -> Self {
-        Self {
-            target: self.target.clone(),
-            resolve_target: self.resolve_target.clone(),
-            ops: self.ops,
-            clear_value: self.clear_value,
-        }
-    }
-}
-
 #[derive(Clone, Debug)]
-pub struct DepthStencilAttachment<'a, A: Api> {
-    pub target: Attachment<'a, A>,
+pub struct DepthStencilAttachment<'a, T: DynTextureView + ?Sized> {
+    pub target: Attachment<'a, T>,
     pub depth_ops: AttachmentOps,
     pub stencil_ops: AttachmentOps,
     pub clear_value: (f32, u32),
 }
 
-#[derive(Debug)]
-pub struct RenderPassTimestampWrites<'a, A: Api> {
-    pub query_set: &'a A::QuerySet,
+#[derive(Clone, Debug)]
+pub struct PassTimestampWrites<'a, Q: DynQuerySet + ?Sized> {
+    pub query_set: &'a Q,
     pub beginning_of_pass_write_index: Option<u32>,
     pub end_of_pass_write_index: Option<u32>,
 }
 
-// Rust gets confused about the impl requirements for `A`
-impl<A: Api> Clone for RenderPassTimestampWrites<'_, A> {
-    fn clone(&self) -> Self {
-        Self {
-            query_set: self.query_set,
-            beginning_of_pass_write_index: self.beginning_of_pass_write_index,
-            end_of_pass_write_index: self.end_of_pass_write_index,
-        }
-    }
-}
-
 #[derive(Clone, Debug)]
-pub struct RenderPassDescriptor<'a, A: Api> {
+pub struct RenderPassDescriptor<'a, Q: DynQuerySet + ?Sized, T: DynTextureView + ?Sized> {
     pub label: Label<'a>,
     pub extent: wgt::Extent3d,
     pub sample_count: u32,
-    pub color_attachments: &'a [Option<ColorAttachment<'a, A>>],
-    pub depth_stencil_attachment: Option<DepthStencilAttachment<'a, A>>,
+    pub color_attachments: &'a [Option<ColorAttachment<'a, T>>],
+    pub depth_stencil_attachment: Option<DepthStencilAttachment<'a, T>>,
     pub multiview: Option<NonZeroU32>,
-    pub timestamp_writes: Option<RenderPassTimestampWrites<'a, A>>,
-    pub occlusion_query_set: Option<&'a A::QuerySet>,
-}
-
-#[derive(Debug)]
-pub struct ComputePassTimestampWrites<'a, A: Api> {
-    pub query_set: &'a A::QuerySet,
-    pub beginning_of_pass_write_index: Option<u32>,
-    pub end_of_pass_write_index: Option<u32>,
-}
-
-// Rust gets confused about the impl requirements for `A`
-impl<A: Api> Clone for ComputePassTimestampWrites<'_, A> {
-    fn clone(&self) -> Self {
-        Self {
-            query_set: self.query_set,
-            beginning_of_pass_write_index: self.beginning_of_pass_write_index,
-            end_of_pass_write_index: self.end_of_pass_write_index,
-        }
-    }
+    pub timestamp_writes: Option<PassTimestampWrites<'a, Q>>,
+    pub occlusion_query_set: Option<&'a Q>,
 }
 
 #[derive(Clone, Debug)]
-pub struct ComputePassDescriptor<'a, A: Api> {
+pub struct ComputePassDescriptor<'a, Q: DynQuerySet + ?Sized> {
     pub label: Label<'a>,
-    pub timestamp_writes: Option<ComputePassTimestampWrites<'a, A>>,
+    pub timestamp_writes: Option<PassTimestampWrites<'a, Q>>,
 }
 
 /// Stores the text of any validation errors that have occurred since
@@ -2182,24 +2199,28 @@ pub struct AccelerationStructureBuildSizes {
 /// Updates use source_acceleration_structure if present, else the update will be performed in place.
 /// For updates, only the data is allowed to change (not the meta data or sizes).
 #[derive(Clone, Debug)]
-pub struct BuildAccelerationStructureDescriptor<'a, A: Api> {
-    pub entries: &'a AccelerationStructureEntries<'a, A>,
+pub struct BuildAccelerationStructureDescriptor<
+    'a,
+    B: DynBuffer + ?Sized,
+    A: DynAccelerationStructure + ?Sized,
+> {
+    pub entries: &'a AccelerationStructureEntries<'a, B>,
     pub mode: AccelerationStructureBuildMode,
     pub flags: AccelerationStructureBuildFlags,
-    pub source_acceleration_structure: Option<&'a A::AccelerationStructure>,
-    pub destination_acceleration_structure: &'a A::AccelerationStructure,
-    pub scratch_buffer: &'a A::Buffer,
+    pub source_acceleration_structure: Option<&'a A>,
+    pub destination_acceleration_structure: &'a A,
+    pub scratch_buffer: &'a B,
     pub scratch_buffer_offset: wgt::BufferAddress,
 }
 
 /// - All buffers, buffer addresses and offsets will be ignored.
 /// - The build mode will be ignored.
 /// - Reducing the amount of Instances, Triangle groups or AABB groups (or the number of Triangles/AABBs in corresponding groups),
-/// may result in reduced size requirements.
+///   may result in reduced size requirements.
 /// - Any other change may result in a bigger or smaller size requirement.
 #[derive(Clone, Debug)]
-pub struct GetAccelerationStructureBuildSizesDescriptor<'a, A: Api> {
-    pub entries: &'a AccelerationStructureEntries<'a, A>,
+pub struct GetAccelerationStructureBuildSizesDescriptor<'a, B: DynBuffer + ?Sized> {
+    pub entries: &'a AccelerationStructureEntries<'a, B>,
     pub flags: AccelerationStructureBuildFlags,
 }
 
@@ -2208,31 +2229,31 @@ pub struct GetAccelerationStructureBuildSizesDescriptor<'a, A: Api> {
 /// * `Triangles` - Multiple triangle meshes for a bottom level acceleration structure
 /// * `AABBs` - List of list of axis aligned bounding boxes for a bottom level acceleration structure
 #[derive(Debug)]
-pub enum AccelerationStructureEntries<'a, A: Api> {
-    Instances(AccelerationStructureInstances<'a, A>),
-    Triangles(Vec<AccelerationStructureTriangles<'a, A>>),
-    AABBs(Vec<AccelerationStructureAABBs<'a, A>>),
+pub enum AccelerationStructureEntries<'a, B: DynBuffer + ?Sized> {
+    Instances(AccelerationStructureInstances<'a, B>),
+    Triangles(Vec<AccelerationStructureTriangles<'a, B>>),
+    AABBs(Vec<AccelerationStructureAABBs<'a, B>>),
 }
 
 /// * `first_vertex` - offset in the vertex buffer (as number of vertices)
 /// * `indices` - optional index buffer with attributes
 /// * `transform` - optional transform
 #[derive(Clone, Debug)]
-pub struct AccelerationStructureTriangles<'a, A: Api> {
-    pub vertex_buffer: Option<&'a A::Buffer>,
+pub struct AccelerationStructureTriangles<'a, B: DynBuffer + ?Sized> {
+    pub vertex_buffer: Option<&'a B>,
     pub vertex_format: wgt::VertexFormat,
     pub first_vertex: u32,
     pub vertex_count: u32,
     pub vertex_stride: wgt::BufferAddress,
-    pub indices: Option<AccelerationStructureTriangleIndices<'a, A>>,
-    pub transform: Option<AccelerationStructureTriangleTransform<'a, A>>,
+    pub indices: Option<AccelerationStructureTriangleIndices<'a, B>>,
+    pub transform: Option<AccelerationStructureTriangleTransform<'a, B>>,
     pub flags: AccelerationStructureGeometryFlags,
 }
 
 /// * `offset` - offset in bytes
 #[derive(Clone, Debug)]
-pub struct AccelerationStructureAABBs<'a, A: Api> {
-    pub buffer: Option<&'a A::Buffer>,
+pub struct AccelerationStructureAABBs<'a, B: DynBuffer + ?Sized> {
+    pub buffer: Option<&'a B>,
     pub offset: u32,
     pub count: u32,
     pub stride: wgt::BufferAddress,
@@ -2241,25 +2262,25 @@ pub struct AccelerationStructureAABBs<'a, A: Api> {
 
 /// * `offset` - offset in bytes
 #[derive(Clone, Debug)]
-pub struct AccelerationStructureInstances<'a, A: Api> {
-    pub buffer: Option<&'a A::Buffer>,
+pub struct AccelerationStructureInstances<'a, B: DynBuffer + ?Sized> {
+    pub buffer: Option<&'a B>,
     pub offset: u32,
     pub count: u32,
 }
 
 /// * `offset` - offset in bytes
 #[derive(Clone, Debug)]
-pub struct AccelerationStructureTriangleIndices<'a, A: Api> {
+pub struct AccelerationStructureTriangleIndices<'a, B: DynBuffer + ?Sized> {
     pub format: wgt::IndexFormat,
-    pub buffer: Option<&'a A::Buffer>,
+    pub buffer: Option<&'a B>,
     pub offset: u32,
     pub count: u32,
 }
 
 /// * `offset` - offset in bytes
 #[derive(Clone, Debug)]
-pub struct AccelerationStructureTriangleTransform<'a, A: Api> {
-    pub buffer: &'a A::Buffer,
+pub struct AccelerationStructureTriangleTransform<'a, B: DynBuffer + ?Sized> {
+    pub buffer: &'a B,
     pub offset: u32,
 }
 
diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs
index 924902517f..5ef6d358b8 100644
--- a/wgpu-hal/src/metal/adapter.rs
+++ b/wgpu-hal/src/metal/adapter.rs
@@ -178,7 +178,7 @@ impl crate::Adapter for super::Adapter {
                 flags.set(Tfc::STORAGE, pc.format_rgb10a2_unorm_all);
                 flags
             }
-            Tf::Rg11b10Float => {
+            Tf::Rg11b10UFloat => {
                 let mut flags = all_caps;
                 flags.set(Tfc::STORAGE, pc.format_rg11b10_all);
                 flags
@@ -876,6 +876,7 @@ impl super::PrivateCapabilities {
         features.set(F::TEXTURE_COMPRESSION_ASTC, self.format_astc);
         features.set(F::TEXTURE_COMPRESSION_ASTC_HDR, self.format_astc_hdr);
         features.set(F::TEXTURE_COMPRESSION_BC, self.format_bc);
+        features.set(F::TEXTURE_COMPRESSION_BC_SLICED_3D, self.format_bc); // BC guarantees Sliced 3D
         features.set(F::TEXTURE_COMPRESSION_ETC2, self.format_eac_etc);
 
         features.set(F::DEPTH_CLIP_CONTROL, self.supports_depth_clip_control);
@@ -1035,7 +1036,7 @@ impl super::PrivateCapabilities {
             Tf::Rgba8Sint => RGBA8Sint,
             Tf::Rgb10a2Uint => RGB10A2Uint,
             Tf::Rgb10a2Unorm => RGB10A2Unorm,
-            Tf::Rg11b10Float => RG11B10Float,
+            Tf::Rg11b10UFloat => RG11B10Float,
             Tf::Rg32Uint => RG32Uint,
             Tf::Rg32Sint => RG32Sint,
             Tf::Rg32Float => RG32Float,
diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs
index fb9c7e9c0e..7eea069a81 100644
--- a/wgpu-hal/src/metal/command.rs
+++ b/wgpu-hal/src/metal/command.rs
@@ -241,13 +241,13 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn transition_buffers<'a, T>(&mut self, _barriers: T)
     where
-        T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>,
+        T: Iterator<Item = crate::BufferBarrier<'a, super::Buffer>>,
     {
     }
 
     unsafe fn transition_textures<'a, T>(&mut self, _barriers: T)
     where
-        T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>,
+        T: Iterator<Item = crate::TextureBarrier<'a, super::Texture>>,
     {
     }
 
@@ -501,7 +501,10 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     // render
 
-    unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) {
+    unsafe fn begin_render_pass(
+        &mut self,
+        desc: &crate::RenderPassDescriptor<super::QuerySet, super::TextureView>,
+    ) {
         self.begin_pass();
         self.state.index = None;
 
@@ -679,7 +682,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
                     encoder.set_vertex_bytes(
                         index as _,
                         (sizes.len() * WORD_SIZE) as u64,
-                        sizes.as_ptr() as _,
+                        sizes.as_ptr().cast(),
                     );
                 }
             }
@@ -713,7 +716,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
                     encoder.set_fragment_bytes(
                         index as _,
                         (sizes.len() * WORD_SIZE) as u64,
-                        sizes.as_ptr() as _,
+                        sizes.as_ptr().cast(),
                     );
                 }
             }
@@ -785,7 +788,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
                     encoder.set_bytes(
                         index as _,
                         (sizes.len() * WORD_SIZE) as u64,
-                        sizes.as_ptr() as _,
+                        sizes.as_ptr().cast(),
                     );
                 }
             }
@@ -827,21 +830,21 @@ impl crate::CommandEncoder for super::CommandEncoder {
             self.state.compute.as_ref().unwrap().set_bytes(
                 layout.push_constants_infos.cs.unwrap().buffer_index as _,
                 (layout.total_push_constants as usize * WORD_SIZE) as _,
-                state_pc.as_ptr() as _,
+                state_pc.as_ptr().cast(),
             )
         }
         if stages.contains(wgt::ShaderStages::VERTEX) {
             self.state.render.as_ref().unwrap().set_vertex_bytes(
                 layout.push_constants_infos.vs.unwrap().buffer_index as _,
                 (layout.total_push_constants as usize * WORD_SIZE) as _,
-                state_pc.as_ptr() as _,
+                state_pc.as_ptr().cast(),
             )
         }
         if stages.contains(wgt::ShaderStages::FRAGMENT) {
             self.state.render.as_ref().unwrap().set_fragment_bytes(
                 layout.push_constants_infos.fs.unwrap().buffer_index as _,
                 (layout.total_push_constants as usize * WORD_SIZE) as _,
-                state_pc.as_ptr() as _,
+                state_pc.as_ptr().cast(),
             )
         }
     }
@@ -895,7 +898,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
                 encoder.set_vertex_bytes(
                     index as _,
                     (sizes.len() * WORD_SIZE) as u64,
-                    sizes.as_ptr() as _,
+                    sizes.as_ptr().cast(),
                 );
             }
         }
@@ -907,7 +910,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
                 encoder.set_fragment_bytes(
                     index as _,
                     (sizes.len() * WORD_SIZE) as u64,
-                    sizes.as_ptr() as _,
+                    sizes.as_ptr().cast(),
                 );
             }
         }
@@ -915,7 +918,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn set_index_buffer<'a>(
         &mut self,
-        binding: crate::BufferBinding<'a, super::Api>,
+        binding: crate::BufferBinding<'a, super::Buffer>,
         format: wgt::IndexFormat,
     ) {
         let (stride, raw_type) = match format {
@@ -933,7 +936,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
     unsafe fn set_vertex_buffer<'a>(
         &mut self,
         index: u32,
-        binding: crate::BufferBinding<'a, super::Api>,
+        binding: crate::BufferBinding<'a, super::Buffer>,
     ) {
         let buffer_index = self.shared.private_caps.max_vertex_buffers as u64 - 1 - index as u64;
         let encoder = self.state.render.as_ref().unwrap();
@@ -956,7 +959,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
             encoder.set_vertex_bytes(
                 index as _,
                 (sizes.len() * WORD_SIZE) as u64,
-                sizes.as_ptr() as _,
+                sizes.as_ptr().cast(),
             );
         }
     }
@@ -1128,7 +1131,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     // compute
 
-    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<super::Api>) {
+    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<super::QuerySet>) {
         self.begin_pass();
 
         debug_assert!(self.state.blit.is_none());
@@ -1212,7 +1215,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
             encoder.set_bytes(
                 index as _,
                 (sizes.len() * WORD_SIZE) as u64,
-                sizes.as_ptr() as _,
+                sizes.as_ptr().cast(),
             );
         }
 
@@ -1257,7 +1260,13 @@ impl crate::CommandEncoder for super::CommandEncoder {
         _descriptors: T,
     ) where
         super::Api: 'a,
-        T: IntoIterator<Item = crate::BuildAccelerationStructureDescriptor<'a, super::Api>>,
+        T: IntoIterator<
+            Item = crate::BuildAccelerationStructureDescriptor<
+                'a,
+                super::Buffer,
+                super::AccelerationStructure,
+            >,
+        >,
     {
         unimplemented!()
     }
diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs
index 6af82e1e62..077c10f517 100644
--- a/wgpu-hal/src/metal/device.rs
+++ b/wgpu-hal/src/metal/device.rs
@@ -99,7 +99,7 @@ const fn convert_vertex_format_to_naga(format: wgt::VertexFormat) -> naga::back:
 impl super::Device {
     fn load_shader(
         &self,
-        stage: &crate::ProgrammableStage<super::Api>,
+        stage: &crate::ProgrammableStage<super::ShaderModule>,
         vertex_buffer_mappings: &[naga::back::msl::VertexBufferMapping],
         layout: &super::PipelineLayout,
         primitive_class: metal::MTLPrimitiveTopologyClass,
@@ -112,7 +112,7 @@ impl super::Device {
             &stage.module.naga.info,
             stage.constants,
         )
-        .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("MSL: {:?}", e)))?;
+        .map_err(|e| crate::PipelineError::PipelineConstants(stage_bit, format!("MSL: {:?}", e)))?;
 
         let ep_resources = &layout.per_stage_map[naga_stage];
 
@@ -146,7 +146,6 @@ impl super::Device {
                 index: bounds_check_policy,
                 buffer: bounds_check_policy,
                 image_load: bounds_check_policy,
-                image_store: naga::proc::BoundsCheckPolicy::Unchecked,
                 // TODO: support bounds checks on binding arrays
                 binding_array: naga::proc::BoundsCheckPolicy::Unchecked,
             },
@@ -158,7 +157,7 @@ impl super::Device {
                 metal::MTLPrimitiveTopologyClass::Point => true,
                 _ => false,
             },
-            vertex_pulling_transform: stage.vertex_pulling_transform,
+            vertex_pulling_transform: true,
             vertex_buffer_mappings: vertex_buffer_mappings.to_vec(),
         };
 
@@ -362,7 +361,7 @@ impl crate::Device for super::Device {
         buffer: &super::Buffer,
         range: crate::MemoryRange,
     ) -> DeviceResult<crate::BufferMapping> {
-        let ptr = buffer.raw.contents() as *mut u8;
+        let ptr = buffer.raw.contents().cast::<u8>();
         assert!(!ptr.is_null());
         Ok(crate::BufferMapping {
             ptr: ptr::NonNull::new(unsafe { ptr.offset(range.start as isize) }).unwrap(),
@@ -370,9 +369,7 @@ impl crate::Device for super::Device {
         })
     }
 
-    unsafe fn unmap_buffer(&self, _buffer: &super::Buffer) -> DeviceResult<()> {
-        Ok(())
-    }
+    unsafe fn unmap_buffer(&self, _buffer: &super::Buffer) {}
     unsafe fn flush_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) {}
     unsafe fn invalidate_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) {}
 
@@ -572,7 +569,7 @@ impl crate::Device for super::Device {
 
     unsafe fn create_command_encoder(
         &self,
-        desc: &crate::CommandEncoderDescriptor<super::Api>,
+        desc: &crate::CommandEncoderDescriptor<super::Queue>,
     ) -> Result<super::CommandEncoder, crate::DeviceError> {
         self.counters.command_encoders.add(1);
         Ok(super::CommandEncoder {
@@ -605,7 +602,7 @@ impl crate::Device for super::Device {
 
     unsafe fn create_pipeline_layout(
         &self,
-        desc: &crate::PipelineLayoutDescriptor<super::Api>,
+        desc: &crate::PipelineLayoutDescriptor<super::BindGroupLayout>,
     ) -> DeviceResult<super::PipelineLayout> {
         #[derive(Debug)]
         struct StageInfo {
@@ -779,7 +776,13 @@ impl crate::Device for super::Device {
 
     unsafe fn create_bind_group(
         &self,
-        desc: &crate::BindGroupDescriptor<super::Api>,
+        desc: &crate::BindGroupDescriptor<
+            super::BindGroupLayout,
+            super::Buffer,
+            super::Sampler,
+            super::TextureView,
+            super::AccelerationStructure,
+        >,
     ) -> DeviceResult<super::BindGroup> {
         let mut bg = super::BindGroup::default();
         for (&stage, counter) in super::NAGA_STAGES.iter().zip(bg.counters.iter_mut()) {
@@ -895,7 +898,11 @@ impl crate::Device for super::Device {
 
     unsafe fn create_render_pipeline(
         &self,
-        desc: &crate::RenderPipelineDescriptor<super::Api>,
+        desc: &crate::RenderPipelineDescriptor<
+            super::PipelineLayout,
+            super::ShaderModule,
+            super::PipelineCache,
+        >,
     ) -> Result<super::RenderPipeline, crate::PipelineError> {
         objc::rc::autoreleasepool(|| {
             let descriptor = metal::RenderPipelineDescriptor::new();
@@ -1166,7 +1173,11 @@ impl crate::Device for super::Device {
 
     unsafe fn create_compute_pipeline(
         &self,
-        desc: &crate::ComputePipelineDescriptor<super::Api>,
+        desc: &crate::ComputePipelineDescriptor<
+            super::PipelineLayout,
+            super::ShaderModule,
+            super::PipelineCache,
+        >,
     ) -> Result<super::ComputePipeline, crate::PipelineError> {
         objc::rc::autoreleasepool(|| {
             let descriptor = metal::ComputePipelineDescriptor::new();
@@ -1229,10 +1240,10 @@ impl crate::Device for super::Device {
     unsafe fn create_pipeline_cache(
         &self,
         _desc: &crate::PipelineCacheDescriptor<'_>,
-    ) -> Result<(), crate::PipelineCacheError> {
-        Ok(())
+    ) -> Result<super::PipelineCache, crate::PipelineCacheError> {
+        Ok(super::PipelineCache)
     }
-    unsafe fn destroy_pipeline_cache(&self, (): ()) {}
+    unsafe fn destroy_pipeline_cache(&self, _: super::PipelineCache) {}
 
     unsafe fn create_query_set(
         &self,
@@ -1383,7 +1394,7 @@ impl crate::Device for super::Device {
 
     unsafe fn get_acceleration_structure_build_sizes(
         &self,
-        _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<super::Api>,
+        _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<super::Buffer>,
     ) -> crate::AccelerationStructureBuildSizes {
         unimplemented!()
     }
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index 177b02569a..62d409a8ff 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -66,11 +66,36 @@ impl crate::Api for Api {
     type ShaderModule = ShaderModule;
     type RenderPipeline = RenderPipeline;
     type ComputePipeline = ComputePipeline;
-    type PipelineCache = ();
+    type PipelineCache = PipelineCache;
 
     type AccelerationStructure = AccelerationStructure;
 }
 
+crate::impl_dyn_resource!(
+    Adapter,
+    AccelerationStructure,
+    BindGroup,
+    BindGroupLayout,
+    Buffer,
+    CommandBuffer,
+    CommandEncoder,
+    ComputePipeline,
+    Device,
+    Fence,
+    Instance,
+    PipelineCache,
+    PipelineLayout,
+    QuerySet,
+    Queue,
+    RenderPipeline,
+    Sampler,
+    ShaderModule,
+    Surface,
+    SurfaceTexture,
+    Texture,
+    TextureView
+);
+
 pub struct Instance {
     managed_metal_layer_delegate: surface::HalManagedMetalLayerDelegate,
 }
@@ -117,10 +142,6 @@ impl crate::Instance for Instance {
         }
     }
 
-    unsafe fn destroy_surface(&self, surface: Surface) {
-        unsafe { surface.dispose() };
-    }
-
     unsafe fn enumerate_adapters(
         &self,
         _surface_hint: Option<&Surface>,
@@ -366,12 +387,20 @@ pub struct SurfaceTexture {
     present_with_transaction: bool,
 }
 
+impl crate::DynSurfaceTexture for SurfaceTexture {}
+
 impl std::borrow::Borrow<Texture> for SurfaceTexture {
     fn borrow(&self) -> &Texture {
         &self.texture
     }
 }
 
+impl std::borrow::Borrow<dyn crate::DynTexture> for SurfaceTexture {
+    fn borrow(&self) -> &dyn crate::DynTexture {
+        &self.texture
+    }
+}
+
 unsafe impl Send for SurfaceTexture {}
 unsafe impl Sync for SurfaceTexture {}
 
@@ -464,13 +493,15 @@ pub struct Buffer {
 unsafe impl Send for Buffer {}
 unsafe impl Sync for Buffer {}
 
+impl crate::DynBuffer for Buffer {}
+
 impl Buffer {
     fn as_raw(&self) -> BufferPtr {
         unsafe { NonNull::new_unchecked(self.raw.as_ptr()) }
     }
 }
 
-impl crate::BufferBinding<'_, Api> {
+impl crate::BufferBinding<'_, Buffer> {
     fn resolve_size(&self) -> wgt::BufferAddress {
         match self.size {
             Some(size) => size.get(),
@@ -489,6 +520,8 @@ pub struct Texture {
     copy_size: crate::CopyExtent,
 }
 
+impl crate::DynTexture for Texture {}
+
 unsafe impl Send for Texture {}
 unsafe impl Sync for Texture {}
 
@@ -498,6 +531,8 @@ pub struct TextureView {
     aspects: crate::FormatAspects,
 }
 
+impl crate::DynTextureView for TextureView {}
+
 unsafe impl Send for TextureView {}
 unsafe impl Sync for TextureView {}
 
@@ -512,6 +547,8 @@ pub struct Sampler {
     raw: metal::SamplerState,
 }
 
+impl crate::DynSampler for Sampler {}
+
 unsafe impl Send for Sampler {}
 unsafe impl Sync for Sampler {}
 
@@ -527,6 +564,8 @@ pub struct BindGroupLayout {
     entries: Arc<[wgt::BindGroupLayoutEntry]>,
 }
 
+impl crate::DynBindGroupLayout for BindGroupLayout {}
+
 #[derive(Clone, Debug, Default)]
 struct ResourceData<T> {
     buffers: T,
@@ -608,6 +647,8 @@ pub struct PipelineLayout {
     per_stage_map: MultiStageResources,
 }
 
+impl crate::DynPipelineLayout for PipelineLayout {}
+
 trait AsNative {
     type Native;
     fn from(native: &Self::Native) -> Self;
@@ -681,6 +722,8 @@ pub struct BindGroup {
     textures: Vec<TexturePtr>,
 }
 
+impl crate::DynBindGroup for BindGroup {}
+
 unsafe impl Send for BindGroup {}
 unsafe impl Sync for BindGroup {}
 
@@ -690,6 +733,8 @@ pub struct ShaderModule {
     runtime_checks: bool,
 }
 
+impl crate::DynShaderModule for ShaderModule {}
+
 #[derive(Debug, Default)]
 struct PipelineStageInfo {
     push_constants: Option<PushConstantsInfo>,
@@ -747,6 +792,8 @@ pub struct RenderPipeline {
 unsafe impl Send for RenderPipeline {}
 unsafe impl Sync for RenderPipeline {}
 
+impl crate::DynRenderPipeline for RenderPipeline {}
+
 #[derive(Debug)]
 pub struct ComputePipeline {
     raw: metal::ComputePipelineState,
@@ -760,6 +807,8 @@ pub struct ComputePipeline {
 unsafe impl Send for ComputePipeline {}
 unsafe impl Sync for ComputePipeline {}
 
+impl crate::DynComputePipeline for ComputePipeline {}
+
 #[derive(Debug, Clone)]
 pub struct QuerySet {
     raw_buffer: metal::Buffer,
@@ -768,6 +817,8 @@ pub struct QuerySet {
     ty: wgt::QueryType,
 }
 
+impl crate::DynQuerySet for QuerySet {}
+
 unsafe impl Send for QuerySet {}
 unsafe impl Sync for QuerySet {}
 
@@ -778,6 +829,8 @@ pub struct Fence {
     pending_command_buffers: Vec<(crate::FenceValue, metal::CommandBuffer)>,
 }
 
+impl crate::DynFence for Fence {}
+
 unsafe impl Send for Fence {}
 unsafe impl Sync for Fence {}
 
@@ -875,8 +928,17 @@ pub struct CommandBuffer {
     raw: metal::CommandBuffer,
 }
 
+impl crate::DynCommandBuffer for CommandBuffer {}
+
 unsafe impl Send for CommandBuffer {}
 unsafe impl Sync for CommandBuffer {}
 
+#[derive(Debug)]
+pub struct PipelineCache;
+
+impl crate::DynPipelineCache for PipelineCache {}
+
 #[derive(Debug)]
 pub struct AccelerationStructure;
+
+impl crate::DynAccelerationStructure for AccelerationStructure {}
diff --git a/wgpu-hal/src/metal/surface.rs b/wgpu-hal/src/metal/surface.rs
index 1a11056609..8bbdb63786 100644
--- a/wgpu-hal/src/metal/surface.rs
+++ b/wgpu-hal/src/metal/surface.rs
@@ -1,6 +1,6 @@
 #![allow(clippy::let_unit_value)] // `let () =` being used to constrain result type
 
-use std::{mem, os::raw::c_void, ptr::NonNull, sync::Once, thread};
+use std::{os::raw::c_void, ptr::NonNull, sync::Once, thread};
 
 use core_graphics_types::{
     base::CGFloat,
@@ -70,22 +70,25 @@ impl super::Surface {
         }
     }
 
-    pub unsafe fn dispose(self) {
-        if let Some(view) = self.view {
-            let () = msg_send![view.as_ptr(), release];
-        }
-    }
-
     /// If not called on the main thread, this will panic.
     #[allow(clippy::transmute_ptr_to_ref)]
     pub unsafe fn from_view(
         view: *mut c_void,
         delegate: Option<&HalManagedMetalLayerDelegate>,
     ) -> Self {
-        let view = view as *mut Object;
+        let view = view.cast::<Object>();
         let render_layer = {
             let layer = unsafe { Self::get_metal_layer(view, delegate) };
-            unsafe { mem::transmute::<_, &metal::MetalLayerRef>(layer) }
+            let layer = layer.cast::<metal::MetalLayerRef>();
+            // SAFETY: This pointer…
+            //
+            // - …is properly aligned.
+            // - …is dereferenceable to a `MetalLayerRef` as an invariant of the `metal`
+            //   field.
+            // - …points to an _initialized_ `MetalLayerRef`.
+            // - …is only ever aliased via an immutable reference that lives within this
+            //   lexical scope.
+            unsafe { &*layer }
         }
         .to_owned();
         let _: *mut c_void = msg_send![view, retain];
@@ -169,6 +172,16 @@ impl super::Surface {
     }
 }
 
+impl Drop for super::Surface {
+    fn drop(&mut self) {
+        if let Some(view) = self.view {
+            unsafe {
+                let () = msg_send![view.as_ptr(), release];
+            }
+        }
+    }
+}
+
 impl crate::Surface for super::Surface {
     type A = super::Api;
 
diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs
index 3a2cdee55f..9015bca7fc 100644
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@@ -253,6 +253,7 @@ impl PhysicalDeviceFeatures {
                 )
                 .texture_compression_bc(
                     requested_features.contains(wgt::Features::TEXTURE_COMPRESSION_BC),
+                    // BC provides formats for Sliced 3D
                 )
                 //.occlusion_query_precise(requested_features.contains(wgt::Features::PRECISE_OCCLUSION_QUERY))
                 .pipeline_statistics_query(
@@ -428,12 +429,14 @@ impl PhysicalDeviceFeatures {
             shader_atomic_int64: if device_api_version >= vk::API_VERSION_1_2
                 || enabled_extensions.contains(&khr::shader_atomic_int64::NAME)
             {
+                let needed = requested_features.intersects(
+                    wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS
+                        | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX,
+                );
                 Some(
                     vk::PhysicalDeviceShaderAtomicInt64Features::default()
-                        .shader_buffer_int64_atomics(requested_features.intersects(
-                            wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS
-                                | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX,
-                        )),
+                        .shader_buffer_int64_atomics(needed)
+                        .shader_shared_int64_atomics(needed),
                 )
             } else {
                 None
@@ -537,6 +540,10 @@ impl PhysicalDeviceFeatures {
             F::TEXTURE_COMPRESSION_BC,
             self.core.texture_compression_bc != 0,
         );
+        features.set(
+            F::TEXTURE_COMPRESSION_BC_SLICED_3D,
+            self.core.texture_compression_bc != 0, // BC guarantees Sliced 3D
+        );
         features.set(
             F::PIPELINE_STATISTICS_QUERY,
             self.core.pipeline_statistics_query != 0,
@@ -1231,6 +1238,17 @@ impl super::InstanceShared {
                 features2 = features2.push_next(next);
             }
 
+            // `VK_KHR_shader_atomic_int64` is promoted to 1.2, but has no
+            // changes, so we can keep using the extension unconditionally.
+            if capabilities.device_api_version >= vk::API_VERSION_1_2
+                || capabilities.supports_extension(khr::shader_atomic_int64::NAME)
+            {
+                let next = features
+                    .shader_atomic_int64
+                    .insert(vk::PhysicalDeviceShaderAtomicInt64Features::default());
+                features2 = features2.push_next(next);
+            }
+
             if capabilities.supports_extension(ext::image_robustness::NAME) {
                 let next = features
                     .image_robustness
@@ -1763,7 +1781,6 @@ impl super::Adapter {
                     } else {
                         naga::proc::BoundsCheckPolicy::Restrict
                     },
-                    image_store: naga::proc::BoundsCheckPolicy::Unchecked,
                     // TODO: support bounds checks on binding arrays
                     binding_array: naga::proc::BoundsCheckPolicy::Unchecked,
                 },
diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs
index 5f3fdc5959..0c81321c93 100644
--- a/wgpu-hal/src/vulkan/command.rs
+++ b/wgpu-hal/src/vulkan/command.rs
@@ -116,7 +116,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn transition_buffers<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>,
+        T: Iterator<Item = crate::BufferBarrier<'a, super::Buffer>>,
     {
         //Note: this is done so that we never end up with empty stage flags
         let mut src_stages = vk::PipelineStageFlags::TOP_OF_PIPE;
@@ -156,7 +156,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn transition_textures<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>,
+        T: Iterator<Item = crate::TextureBarrier<'a, super::Texture>>,
     {
         let mut src_stages = vk::PipelineStageFlags::empty();
         let mut dst_stages = vk::PipelineStageFlags::empty();
@@ -408,7 +408,13 @@ impl crate::CommandEncoder for super::CommandEncoder {
     unsafe fn build_acceleration_structures<'a, T>(&mut self, descriptor_count: u32, descriptors: T)
     where
         super::Api: 'a,
-        T: IntoIterator<Item = crate::BuildAccelerationStructureDescriptor<'a, super::Api>>,
+        T: IntoIterator<
+            Item = crate::BuildAccelerationStructureDescriptor<
+                'a,
+                super::Buffer,
+                super::AccelerationStructure,
+            >,
+        >,
     {
         const CAPACITY_OUTER: usize = 8;
         const CAPACITY_INNER: usize = 1;
@@ -644,7 +650,10 @@ impl crate::CommandEncoder for super::CommandEncoder {
     }
     // render
 
-    unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) {
+    unsafe fn begin_render_pass(
+        &mut self,
+        desc: &crate::RenderPassDescriptor<super::QuerySet, super::TextureView>,
+    ) {
         let mut vk_clear_values =
             ArrayVec::<vk::ClearValue, { super::MAX_TOTAL_ATTACHMENTS }>::new();
         let mut vk_image_views = ArrayVec::<vk::ImageView, { super::MAX_TOTAL_ATTACHMENTS }>::new();
@@ -833,7 +842,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
                 layout.raw,
                 conv::map_shader_stage(stages),
                 offset_bytes,
-                slice::from_raw_parts(data.as_ptr() as _, data.len() * 4),
+                slice::from_raw_parts(data.as_ptr().cast(), data.len() * 4),
             )
         };
     }
@@ -870,7 +879,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn set_index_buffer<'a>(
         &mut self,
-        binding: crate::BufferBinding<'a, super::Api>,
+        binding: crate::BufferBinding<'a, super::Buffer>,
         format: wgt::IndexFormat,
     ) {
         unsafe {
@@ -885,7 +894,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
     unsafe fn set_vertex_buffer<'a>(
         &mut self,
         index: u32,
-        binding: crate::BufferBinding<'a, super::Api>,
+        binding: crate::BufferBinding<'a, super::Buffer>,
     ) {
         let vk_buffers = [binding.buffer.raw];
         let vk_offsets = [binding.offset];
@@ -1067,7 +1076,10 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     // compute
 
-    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<'_, super::Api>) {
+    unsafe fn begin_compute_pass(
+        &mut self,
+        desc: &crate::ComputePassDescriptor<'_, super::QuerySet>,
+    ) {
         self.bind_point = vk::PipelineBindPoint::COMPUTE;
         if let Some(label) = desc.label {
             unsafe { self.begin_debug_marker(label) };
diff --git a/wgpu-hal/src/vulkan/conv.rs b/wgpu-hal/src/vulkan/conv.rs
index fe284f32a9..38642ba082 100644
--- a/wgpu-hal/src/vulkan/conv.rs
+++ b/wgpu-hal/src/vulkan/conv.rs
@@ -36,7 +36,7 @@ impl super::PrivateCapabilities {
             Tf::Rgba8Sint => F::R8G8B8A8_SINT,
             Tf::Rgb10a2Uint => F::A2B10G10R10_UINT_PACK32,
             Tf::Rgb10a2Unorm => F::A2B10G10R10_UNORM_PACK32,
-            Tf::Rg11b10Float => F::B10G11R11_UFLOAT_PACK32,
+            Tf::Rg11b10UFloat => F::B10G11R11_UFLOAT_PACK32,
             Tf::Rg32Uint => F::R32G32_UINT,
             Tf::Rg32Sint => F::R32G32_SINT,
             Tf::Rg32Float => F::R32G32_SFLOAT,
@@ -178,7 +178,7 @@ pub fn map_vk_surface_formats(sf: vk::SurfaceFormatKHR) -> Option<wgt::TextureFo
     })
 }
 
-impl crate::Attachment<'_, super::Api> {
+impl crate::Attachment<'_, super::TextureView> {
     pub(super) fn make_attachment_key(
         &self,
         ops: crate::AttachmentOps,
@@ -192,7 +192,7 @@ impl crate::Attachment<'_, super::Api> {
     }
 }
 
-impl crate::ColorAttachment<'_, super::Api> {
+impl crate::ColorAttachment<'_, super::TextureView> {
     pub(super) unsafe fn make_vk_clear_color(&self) -> vk::ClearColorValue {
         let cv = &self.clear_value;
         match self
diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index d088314609..c42cace857 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -1,4 +1,4 @@
-use super::{conv, PipelineCache};
+use super::conv;
 
 use arrayvec::ArrayVec;
 use ash::{khr, vk};
@@ -343,7 +343,7 @@ impl gpu_alloc::MemoryDevice<vk::DeviceMemory> for super::DeviceShared {
             self.raw
                 .map_memory(*memory, offset, size, vk::MemoryMapFlags::empty())
         } {
-            Ok(ptr) => Ok(ptr::NonNull::new(ptr as *mut u8)
+            Ok(ptr) => Ok(ptr::NonNull::new(ptr.cast::<u8>())
                 .expect("Pointer to memory mapping must not be null")),
             Err(vk::Result::ERROR_OUT_OF_DEVICE_MEMORY) => {
                 Err(gpu_alloc::DeviceMapError::OutOfDeviceMemory)
@@ -709,7 +709,7 @@ impl super::Device {
 
     fn compile_stage(
         &self,
-        stage: &crate::ProgrammableStage<super::Api>,
+        stage: &crate::ProgrammableStage<super::ShaderModule>,
         naga_stage: naga::ShaderStage,
         binding_map: &naga::back::spv::BindingMap,
     ) -> Result<CompiledStage, crate::PipelineError> {
@@ -736,7 +736,6 @@ impl super::Device {
                             index: naga::proc::BoundsCheckPolicy::Unchecked,
                             buffer: naga::proc::BoundsCheckPolicy::Unchecked,
                             image_load: naga::proc::BoundsCheckPolicy::Unchecked,
-                            image_store: naga::proc::BoundsCheckPolicy::Unchecked,
                             binding_array: naga::proc::BoundsCheckPolicy::Unchecked,
                         };
                     }
@@ -765,7 +764,9 @@ impl super::Device {
                     &naga_shader.info,
                     stage.constants,
                 )
-                .map_err(|e| crate::PipelineError::Linkage(stage_flags, format!("{e}")))?;
+                .map_err(|e| {
+                    crate::PipelineError::PipelineConstants(stage_flags, format!("{e}"))
+                })?;
 
                 let spv = {
                     profiling::scope!("naga::spv::write_vec");
@@ -951,12 +952,10 @@ impl crate::Device for super::Device {
             Err(crate::DeviceError::OutOfMemory)
         }
     }
-    unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), crate::DeviceError> {
+    unsafe fn unmap_buffer(&self, buffer: &super::Buffer) {
+        // We can only unmap the buffer if it was already mapped successfully.
         if let Some(ref block) = buffer.block {
             unsafe { block.lock().unmap(&*self.shared) };
-            Ok(())
-        } else {
-            Err(crate::DeviceError::OutOfMemory)
         }
     }
 
@@ -1216,7 +1215,7 @@ impl crate::Device for super::Device {
 
     unsafe fn create_command_encoder(
         &self,
-        desc: &crate::CommandEncoderDescriptor<super::Api>,
+        desc: &crate::CommandEncoderDescriptor<super::Queue>,
     ) -> Result<super::CommandEncoder, crate::DeviceError> {
         let vk_info = vk::CommandPoolCreateInfo::default()
             .queue_family_index(desc.queue.family_index)
@@ -1388,7 +1387,7 @@ impl crate::Device for super::Device {
 
     unsafe fn create_pipeline_layout(
         &self,
-        desc: &crate::PipelineLayoutDescriptor<super::Api>,
+        desc: &crate::PipelineLayoutDescriptor<super::BindGroupLayout>,
     ) -> Result<super::PipelineLayout, crate::DeviceError> {
         //Note: not bothering with on stack array here as it's low frequency
         let vk_set_layouts = desc
@@ -1454,7 +1453,13 @@ impl crate::Device for super::Device {
 
     unsafe fn create_bind_group(
         &self,
-        desc: &crate::BindGroupDescriptor<super::Api>,
+        desc: &crate::BindGroupDescriptor<
+            super::BindGroupLayout,
+            super::Buffer,
+            super::Sampler,
+            super::TextureView,
+            super::AccelerationStructure,
+        >,
     ) -> Result<super::BindGroup, crate::DeviceError> {
         let mut vk_sets = unsafe {
             self.desc_allocator.lock().allocate(
@@ -1515,7 +1520,7 @@ impl crate::Device for super::Device {
                     // SAFETY: similar to safety notes for `slice_get_ref`, but we have a
                     // mutable reference which is also guaranteed to be valid for writes.
                     unsafe {
-                        &mut *(to_init as *mut [MaybeUninit<T>] as *mut [T])
+                        &mut *(ptr::from_mut::<[MaybeUninit<T>]>(to_init) as *mut [T])
                     }
                 };
                 (Self { remainder }, init)
@@ -1680,7 +1685,6 @@ impl crate::Device for super::Device {
                         index: naga::proc::BoundsCheckPolicy::Unchecked,
                         buffer: naga::proc::BoundsCheckPolicy::Unchecked,
                         image_load: naga::proc::BoundsCheckPolicy::Unchecked,
-                        image_store: naga::proc::BoundsCheckPolicy::Unchecked,
                         binding_array: naga::proc::BoundsCheckPolicy::Unchecked,
                     };
                 }
@@ -1721,7 +1725,11 @@ impl crate::Device for super::Device {
 
     unsafe fn create_render_pipeline(
         &self,
-        desc: &crate::RenderPipelineDescriptor<super::Api>,
+        desc: &crate::RenderPipelineDescriptor<
+            super::PipelineLayout,
+            super::ShaderModule,
+            super::PipelineCache,
+        >,
     ) -> Result<super::RenderPipeline, crate::PipelineError> {
         let dynamic_states = [
             vk::DynamicState::VIEWPORT,
@@ -1951,6 +1959,7 @@ impl crate::Device for super::Device {
 
         Ok(super::RenderPipeline { raw })
     }
+
     unsafe fn destroy_render_pipeline(&self, pipeline: super::RenderPipeline) {
         unsafe { self.shared.raw.destroy_pipeline(pipeline.raw, None) };
 
@@ -1959,7 +1968,11 @@ impl crate::Device for super::Device {
 
     unsafe fn create_compute_pipeline(
         &self,
-        desc: &crate::ComputePipelineDescriptor<super::Api>,
+        desc: &crate::ComputePipelineDescriptor<
+            super::PipelineLayout,
+            super::ShaderModule,
+            super::PipelineCache,
+        >,
     ) -> Result<super::ComputePipeline, crate::PipelineError> {
         let compiled = self.compile_stage(
             &desc.stage,
@@ -2011,7 +2024,7 @@ impl crate::Device for super::Device {
     unsafe fn create_pipeline_cache(
         &self,
         desc: &crate::PipelineCacheDescriptor<'_>,
-    ) -> Result<PipelineCache, crate::PipelineCacheError> {
+    ) -> Result<super::PipelineCache, crate::PipelineCacheError> {
         let mut info = vk::PipelineCacheCreateInfo::default();
         if let Some(data) = desc.data {
             info = info.initial_data(data)
@@ -2020,12 +2033,12 @@ impl crate::Device for super::Device {
         let raw = unsafe { self.shared.raw.create_pipeline_cache(&info, None) }
             .map_err(crate::DeviceError::from)?;
 
-        Ok(PipelineCache { raw })
+        Ok(super::PipelineCache { raw })
     }
     fn pipeline_cache_validation_key(&self) -> Option<[u8; 16]> {
         Some(self.shared.pipeline_cache_validation_key)
     }
-    unsafe fn destroy_pipeline_cache(&self, cache: PipelineCache) {
+    unsafe fn destroy_pipeline_cache(&self, cache: super::PipelineCache) {
         unsafe { self.shared.raw.destroy_pipeline_cache(cache.raw, None) }
     }
     unsafe fn create_query_set(
@@ -2156,14 +2169,14 @@ impl crate::Device for super::Device {
         }
     }
 
-    unsafe fn pipeline_cache_get_data(&self, cache: &PipelineCache) -> Option<Vec<u8>> {
+    unsafe fn pipeline_cache_get_data(&self, cache: &super::PipelineCache) -> Option<Vec<u8>> {
         let data = unsafe { self.raw_device().get_pipeline_cache_data(cache.raw) };
         data.ok()
     }
 
     unsafe fn get_acceleration_structure_build_sizes<'a>(
         &self,
-        desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Api>,
+        desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Buffer>,
     ) -> crate::AccelerationStructureBuildSizes {
         const CAPACITY: usize = 8;
 
diff --git a/wgpu-hal/src/vulkan/instance.rs b/wgpu-hal/src/vulkan/instance.rs
index ec720f3788..1d7386e623 100644
--- a/wgpu-hal/src/vulkan/instance.rs
+++ b/wgpu-hal/src/vulkan/instance.rs
@@ -23,7 +23,7 @@ unsafe extern "system" fn debug_utils_messenger_callback(
     }
 
     let cd = unsafe { &*callback_data_ptr };
-    let user_data = unsafe { &*(user_data as *mut super::DebugUtilsMessengerUserData) };
+    let user_data = unsafe { &*user_data.cast::<super::DebugUtilsMessengerUserData>() };
 
     const VUID_VKCMDENDDEBUGUTILSLABELEXT_COMMANDBUFFER_01912: i32 = 0x56146426;
     if cd.message_id_number == VUID_VKCMDENDDEBUGUTILSLABELEXT_COMMANDBUFFER_01912 {
@@ -515,7 +515,7 @@ impl super::Instance {
         }
 
         let layer = unsafe {
-            crate::metal::Surface::get_metal_layer(view as *mut objc::runtime::Object, None)
+            crate::metal::Surface::get_metal_layer(view.cast::<objc::runtime::Object>(), None)
         };
 
         let surface = {
@@ -523,7 +523,7 @@ impl super::Instance {
                 ext::metal_surface::Instance::new(&self.shared.entry, &self.shared.raw);
             let vk_info = vk::MetalSurfaceCreateInfoEXT::default()
                 .flags(vk::MetalSurfaceCreateFlagsEXT::empty())
-                .layer(layer as *mut _);
+                .layer(layer.cast());
 
             unsafe { metal_loader.create_metal_surface(&vk_info, None).unwrap() }
         };
@@ -880,10 +880,6 @@ impl crate::Instance for super::Instance {
         }
     }
 
-    unsafe fn destroy_surface(&self, surface: super::Surface) {
-        unsafe { surface.functor.destroy_surface(surface.raw, None) };
-    }
-
     unsafe fn enumerate_adapters(
         &self,
         _surface_hint: Option<&super::Surface>,
@@ -942,6 +938,12 @@ impl crate::Instance for super::Instance {
     }
 }
 
+impl Drop for super::Surface {
+    fn drop(&mut self) {
+        unsafe { self.functor.destroy_surface(self.raw, None) };
+    }
+}
+
 impl crate::Surface for super::Surface {
     type A = super::Api;
 
@@ -950,7 +952,7 @@ impl crate::Surface for super::Surface {
         device: &super::Device,
         config: &crate::SurfaceConfiguration,
     ) -> Result<(), crate::SurfaceError> {
-        // Safety: `configure`'s contract guarantees there are no resources derived from the swapchain in use.
+        // SAFETY: `configure`'s contract guarantees there are no resources derived from the swapchain in use.
         let mut swap_chain = self.swapchain.write();
         let old = swap_chain
             .take()
@@ -964,7 +966,7 @@ impl crate::Surface for super::Surface {
 
     unsafe fn unconfigure(&self, device: &super::Device) {
         if let Some(sc) = self.swapchain.write().take() {
-            // Safety: `unconfigure`'s contract guarantees there are no resources derived from the swapchain in use.
+            // SAFETY: `unconfigure`'s contract guarantees there are no resources derived from the swapchain in use.
             let swapchain = unsafe { sc.release_resources(&device.shared.raw) };
             unsafe { swapchain.functor.destroy_swapchain(swapchain.raw, None) };
         }
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index f0d881614c..0b024b80a7 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -7,7 +7,6 @@ Ash expects slices, which we don't generally have available.
 We cope with this requirement by the combination of the following ways:
   - temporarily allocating `Vec` on heap, where overhead is permitted
   - growing temporary local storage
-  - using `implace_it` on iterators
 
 ## Framebuffers and Render passes
 
@@ -79,6 +78,31 @@ impl crate::Api for Api {
     type ComputePipeline = ComputePipeline;
 }
 
+crate::impl_dyn_resource!(
+    Adapter,
+    AccelerationStructure,
+    BindGroup,
+    BindGroupLayout,
+    Buffer,
+    CommandBuffer,
+    CommandEncoder,
+    ComputePipeline,
+    Device,
+    Fence,
+    Instance,
+    PipelineCache,
+    PipelineLayout,
+    QuerySet,
+    Queue,
+    RenderPipeline,
+    Sampler,
+    ShaderModule,
+    Surface,
+    SurfaceTexture,
+    Texture,
+    TextureView
+);
+
 struct DebugUtils {
     extension: ext::debug_utils::Instance,
     messenger: vk::DebugUtilsMessengerEXT,
@@ -358,12 +382,20 @@ pub struct SurfaceTexture {
     surface_semaphores: Arc<Mutex<SwapchainImageSemaphores>>,
 }
 
+impl crate::DynSurfaceTexture for SurfaceTexture {}
+
 impl Borrow<Texture> for SurfaceTexture {
     fn borrow(&self) -> &Texture {
         &self.texture
     }
 }
 
+impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
+    fn borrow(&self) -> &dyn crate::DynTexture {
+        &self.texture
+    }
+}
+
 pub struct Adapter {
     raw: vk::PhysicalDevice,
     instance: Arc<InstanceShared>,
@@ -632,6 +664,8 @@ pub struct Buffer {
     block: Option<Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>>,
 }
 
+impl crate::DynBuffer for Buffer {}
+
 #[derive(Debug)]
 pub struct AccelerationStructure {
     raw: vk::AccelerationStructureKHR,
@@ -639,6 +673,8 @@ pub struct AccelerationStructure {
     block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
 }
 
+impl crate::DynAccelerationStructure for AccelerationStructure {}
+
 #[derive(Debug)]
 pub struct Texture {
     raw: vk::Image,
@@ -651,6 +687,8 @@ pub struct Texture {
     view_formats: Vec<wgt::TextureFormat>,
 }
 
+impl crate::DynTexture for Texture {}
+
 impl Texture {
     /// # Safety
     ///
@@ -667,6 +705,8 @@ pub struct TextureView {
     attachment: FramebufferAttachment,
 }
 
+impl crate::DynTextureView for TextureView {}
+
 impl TextureView {
     /// # Safety
     ///
@@ -681,6 +721,8 @@ pub struct Sampler {
     raw: vk::Sampler,
 }
 
+impl crate::DynSampler for Sampler {}
+
 #[derive(Debug)]
 pub struct BindGroupLayout {
     raw: vk::DescriptorSetLayout,
@@ -690,17 +732,23 @@ pub struct BindGroupLayout {
     binding_arrays: Vec<(u32, NonZeroU32)>,
 }
 
+impl crate::DynBindGroupLayout for BindGroupLayout {}
+
 #[derive(Debug)]
 pub struct PipelineLayout {
     raw: vk::PipelineLayout,
     binding_arrays: naga::back::spv::BindingMap,
 }
 
+impl crate::DynPipelineLayout for PipelineLayout {}
+
 #[derive(Debug)]
 pub struct BindGroup {
     set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
 }
 
+impl crate::DynBindGroup for BindGroup {}
+
 /// Miscellaneous allocation recycling pool for `CommandAllocator`.
 #[derive(Default)]
 struct Temp {
@@ -714,7 +762,6 @@ impl Temp {
         self.marker.clear();
         self.buffer_barriers.clear();
         self.image_barriers.clear();
-        //see also - https://github.com/NotIntMan/inplace_it/issues/8
     }
 
     fn make_c_str(&mut self, name: &str) -> &CStr {
@@ -784,6 +831,8 @@ pub struct CommandBuffer {
     raw: vk::CommandBuffer,
 }
 
+impl crate::DynCommandBuffer for CommandBuffer {}
+
 #[derive(Debug)]
 #[allow(clippy::large_enum_variant)]
 pub enum ShaderModule {
@@ -794,26 +843,36 @@ pub enum ShaderModule {
     },
 }
 
+impl crate::DynShaderModule for ShaderModule {}
+
 #[derive(Debug)]
 pub struct RenderPipeline {
     raw: vk::Pipeline,
 }
 
+impl crate::DynRenderPipeline for RenderPipeline {}
+
 #[derive(Debug)]
 pub struct ComputePipeline {
     raw: vk::Pipeline,
 }
 
+impl crate::DynComputePipeline for ComputePipeline {}
+
 #[derive(Debug)]
 pub struct PipelineCache {
     raw: vk::PipelineCache,
 }
 
+impl crate::DynPipelineCache for PipelineCache {}
+
 #[derive(Debug)]
 pub struct QuerySet {
     raw: vk::QueryPool,
 }
 
+impl crate::DynQuerySet for QuerySet {}
+
 /// The [`Api::Fence`] type for [`vulkan::Api`].
 ///
 /// This is an `enum` because there are two possible implementations of
@@ -863,6 +922,8 @@ pub enum Fence {
     },
 }
 
+impl crate::DynFence for Fence {}
+
 impl Fence {
     /// Return the highest [`FenceValue`] among the signalled fences in `active`.
     ///
diff --git a/wgpu-info/src/texture.rs b/wgpu-info/src/texture.rs
index b6f79c0482..40771d067d 100644
--- a/wgpu-info/src/texture.rs
+++ b/wgpu-info/src/texture.rs
@@ -1,6 +1,6 @@
 // Lets keep these on one line
 #[rustfmt::skip]
-pub const TEXTURE_FORMAT_LIST: [wgpu::TextureFormat; 119] = [
+pub const TEXTURE_FORMAT_LIST: [wgpu::TextureFormat; 116] = [
     wgpu::TextureFormat::R8Unorm,
     wgpu::TextureFormat::R8Snorm,
     wgpu::TextureFormat::R8Uint,
@@ -29,9 +29,10 @@ pub const TEXTURE_FORMAT_LIST: [wgpu::TextureFormat; 119] = [
     wgpu::TextureFormat::Rgba8Sint,
     wgpu::TextureFormat::Bgra8Unorm,
     wgpu::TextureFormat::Bgra8UnormSrgb,
+    wgpu::TextureFormat::Rgb9e5Ufloat,
     wgpu::TextureFormat::Rgb10a2Uint,
     wgpu::TextureFormat::Rgb10a2Unorm,
-    wgpu::TextureFormat::Rg11b10Float,
+    wgpu::TextureFormat::Rg11b10UFloat,
     wgpu::TextureFormat::Rg32Uint,
     wgpu::TextureFormat::Rg32Sint,
     wgpu::TextureFormat::Rg32Float,
@@ -45,14 +46,10 @@ pub const TEXTURE_FORMAT_LIST: [wgpu::TextureFormat; 119] = [
     wgpu::TextureFormat::Rgba32Float,
     wgpu::TextureFormat::Stencil8,
     wgpu::TextureFormat::Depth16Unorm,
-    wgpu::TextureFormat::Depth32Float,
-    wgpu::TextureFormat::Depth32FloatStencil8,
     wgpu::TextureFormat::Depth24Plus,
     wgpu::TextureFormat::Depth24PlusStencil8,
-    wgpu::TextureFormat::Rgb9e5Ufloat,
-    wgpu::TextureFormat::Rgb10a2Uint,
-    wgpu::TextureFormat::Rgb10a2Unorm,
-    wgpu::TextureFormat::Rg11b10Float,
+    wgpu::TextureFormat::Depth32Float,
+    wgpu::TextureFormat::Depth32FloatStencil8,
     wgpu::TextureFormat::NV12,
     wgpu::TextureFormat::Bc1RgbaUnorm,
     wgpu::TextureFormat::Bc1RgbaUnormSrgb,
@@ -122,6 +119,19 @@ pub const TEXTURE_FORMAT_LIST: [wgpu::TextureFormat; 119] = [
     wgpu::TextureFormat::Astc { block: wgpu::AstcBlock::B12x12, channel: wgpu::AstcChannel::Hdr },
 ];
 
+#[test]
+fn test_uniqueness_in_texture_format_list() {
+    use std::collections::HashSet;
+
+    let uniq: HashSet<wgpu::TextureFormat> = TEXTURE_FORMAT_LIST.into_iter().collect();
+    let mut duplicated = TEXTURE_FORMAT_LIST.to_vec();
+    uniq.iter().for_each(|u| {
+        let first_occurrence = duplicated.iter().position(|el| u == el).unwrap();
+        duplicated.remove(first_occurrence);
+    });
+    assert_eq!(duplicated, vec![]);
+}
+
 pub fn max_texture_format_string_size() -> usize {
     TEXTURE_FORMAT_LIST
         .into_iter()
diff --git a/wgpu-types/Cargo.toml b/wgpu-types/Cargo.toml
index 7accda274a..387e41a475 100644
--- a/wgpu-types/Cargo.toml
+++ b/wgpu-types/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "wgpu-types"
-version = "0.20.0"
+version = "22.0.0"
 authors = ["gfx-rs developers"]
 edition = "2021"
 description = "WebGPU types"
@@ -13,7 +13,7 @@ license = "MIT OR Apache-2.0"
 # copy the crates it actually uses out of the workspace, so it's meaningful for
 # them to have less restrictive MSRVs individually than the workspace as a
 # whole, if their code permits. See `../README.md` for details.
-rust-version = "1.74"
+rust-version = "1.76"
 
 [package.metadata.docs.rs]
 all-features = true
@@ -30,16 +30,17 @@ targets = [
 [features]
 strict_asserts = []
 fragile-send-sync-non-atomic-wasm = []
+serde = ["dep:serde"]
 # Enables some internal instrumentation for debugging purposes.
 counters = []
 
 [dependencies]
-bitflags = "2"
-serde = { version = "1", features = ["serde_derive"], optional = true }
+bitflags.workspace = true
+serde = { workspace = true, features = ["derive"], optional = true }
 
 [target.'cfg(target_arch = "wasm32")'.dependencies]
-js-sys = "0.3.69"
-web-sys = { version = "0.3.69", features = [
+js-sys.workspace = true
+web-sys = { workspace = true, features = [
     "ImageBitmap",
     "HtmlVideoElement",
     "HtmlCanvasElement",
@@ -47,5 +48,5 @@ web-sys = { version = "0.3.69", features = [
 ] }
 
 [dev-dependencies]
-serde = { version = "1", features = ["serde_derive"] }
-serde_json = "1.0.119"
+serde = { workspace = true, features = ["derive"] }
+serde_json.workspace = true
diff --git a/wgpu-types/src/counters.rs b/wgpu-types/src/counters.rs
index 9dfa739f8b..d0f9a5ea18 100644
--- a/wgpu-types/src/counters.rs
+++ b/wgpu-types/src/counters.rs
@@ -1,5 +1,6 @@
 #[cfg(feature = "counters")]
 use std::sync::atomic::{AtomicIsize, Ordering};
+use std::{fmt, ops::Range};
 
 /// An internal counter for debugging purposes
 ///
@@ -128,7 +129,7 @@ pub struct HalCounters {
 /// `wgpu-core`'s internal counters.
 #[derive(Clone, Default)]
 pub struct CoreCounters {
-    // TODO
+    // TODO    #[cfg(features=)]
 }
 
 /// All internal counters, exposed for debugging purposes.
@@ -139,3 +140,90 @@ pub struct InternalCounters {
     /// `wgpu-hal` counters.
     pub hal: HalCounters,
 }
+
+/// Describes an allocation in the [`AllocatorReport`].
+#[derive(Clone)]
+pub struct AllocationReport {
+    /// The name provided to the `allocate()` function.
+    pub name: String,
+    /// The offset in bytes of the allocation in its memory block.
+    pub offset: u64,
+    /// The size in bytes of the allocation.
+    pub size: u64,
+}
+
+/// Describes a memory block in the [`AllocatorReport`].
+#[derive(Clone)]
+pub struct MemoryBlockReport {
+    /// The size in bytes of this memory block.
+    pub size: u64,
+    /// The range of allocations in [`AllocatorReport::allocations`] that are associated
+    /// to this memory block.
+    pub allocations: Range<usize>,
+}
+
+/// A report that can be generated for informational purposes using `Allocator::generate_report()`.
+#[derive(Clone)]
+pub struct AllocatorReport {
+    /// All live allocations, sub-allocated from memory blocks.
+    pub allocations: Vec<AllocationReport>,
+    /// All memory blocks.
+    pub blocks: Vec<MemoryBlockReport>,
+    /// Sum of the memory used by all allocations, in bytes.
+    pub total_allocated_bytes: u64,
+    /// Sum of the memory reserved by all memory blocks including unallocated regions, in bytes.
+    pub total_reserved_bytes: u64,
+}
+
+impl fmt::Debug for AllocationReport {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let name = if !self.name.is_empty() {
+            self.name.as_str()
+        } else {
+            "--"
+        };
+        write!(f, "{name:?}: {}", FmtBytes(self.size))
+    }
+}
+
+impl fmt::Debug for AllocatorReport {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let mut allocations = self.allocations.clone();
+        allocations.sort_by_key(|alloc| std::cmp::Reverse(alloc.size));
+
+        let max_num_allocations_to_print = f.precision().unwrap_or(usize::MAX);
+        allocations.truncate(max_num_allocations_to_print);
+
+        f.debug_struct("AllocatorReport")
+            .field(
+                "summary",
+                &std::format_args!(
+                    "{} / {}",
+                    FmtBytes(self.total_allocated_bytes),
+                    FmtBytes(self.total_reserved_bytes)
+                ),
+            )
+            .field("blocks", &self.blocks.len())
+            .field("allocations", &self.allocations.len())
+            .field("largest", &allocations.as_slice())
+            .finish()
+    }
+}
+
+struct FmtBytes(u64);
+
+impl fmt::Display for FmtBytes {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        const SUFFIX: [&str; 5] = ["B", "KB", "MB", "GB", "TB"];
+        let mut idx = 0;
+        let mut amount = self.0 as f64;
+        loop {
+            if amount < 1024.0 || idx == SUFFIX.len() - 1 {
+                return write!(f, "{:.2} {}", amount, SUFFIX[idx]);
+            }
+
+            amount /= 1024.0;
+            idx += 1;
+        }
+    }
+}
diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
index 84c319bee1..f88d87299a 100644
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@@ -1,13 +1,12 @@
-/*! This library describes the API surface of WebGPU that is agnostic of the backend.
- *  This API is used for targeting both Web and Native.
- */
+//! This library describes the API surface of WebGPU that is agnostic of the backend.
+//! This API is used for targeting both Web and Native.
 
 #![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))]
 #![allow(
     // We don't use syntax sugar where it's not necessary.
     clippy::match_like_matches_macro,
 )]
-#![warn(missing_docs, unsafe_op_in_unsafe_fn)]
+#![warn(clippy::ptr_as_ptr, missing_docs, unsafe_op_in_unsafe_fn)]
 
 #[cfg(any(feature = "serde", test))]
 use serde::Deserialize;
@@ -291,12 +290,28 @@ bitflags::bitflags! {
         /// Support for this feature guarantees availability of [`TextureUsages::COPY_SRC | TextureUsages::COPY_DST | TextureUsages::TEXTURE_BINDING`] for BCn formats.
         /// [`Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES`] may enable additional usages.
         ///
+        /// This feature guarantees availability of sliced-3d textures for BC formats when combined with TEXTURE_COMPRESSION_BC_SLICED_3D.
+        ///
         /// Supported Platforms:
         /// - desktops
+        /// - Mobile (All Apple9 and some Apple7 and Apple8 devices)
         ///
         /// This is a web and native feature.
         const TEXTURE_COMPRESSION_BC = 1 << 2;
 
+
+        /// Allows the 3d dimension for textures with BC compressed formats.
+        ///
+        /// This feature must be used in combination with TEXTURE_COMPRESSION_BC to enable 3D textures with BC compression.
+        /// It does not enable the BC formats by itself.
+        ///
+        /// Supported Platforms:
+        /// - desktops
+        /// - Mobile (All Apple9 and some Apple7 and Apple8 devices)
+        ///
+        /// This is a web and native feature.
+        const TEXTURE_COMPRESSION_BC_SLICED_3D = 1 << 3;
+
         /// Enables ETC family of compressed textures. All ETC textures use 4x4 pixel blocks.
         /// ETC2 RGB and RGBA1 are 8 bytes per block. RTC2 RGBA8 and EAC are 16 bytes per block.
         ///
@@ -311,7 +326,7 @@ bitflags::bitflags! {
         /// - Mobile (some)
         ///
         /// This is a web and native feature.
-        const TEXTURE_COMPRESSION_ETC2 = 1 << 3;
+        const TEXTURE_COMPRESSION_ETC2 = 1 << 4;
 
         /// Enables ASTC family of compressed textures. ASTC textures use pixel blocks varying from 4x4 to 12x12.
         /// Blocks are always 16 bytes.
@@ -327,7 +342,7 @@ bitflags::bitflags! {
         /// - Mobile (some)
         ///
         /// This is a web and native feature.
-        const TEXTURE_COMPRESSION_ASTC = 1 << 4;
+        const TEXTURE_COMPRESSION_ASTC = 1 << 5;
 
         /// Enables use of Timestamp Queries. These queries tell the current gpu timestamp when
         /// all work before the query is finished.
@@ -351,7 +366,7 @@ bitflags::bitflags! {
         /// - Metal
         ///
         /// This is a web and native feature.
-        const TIMESTAMP_QUERY = 1 << 5;
+        const TIMESTAMP_QUERY = 1 << 6;
 
         /// Allows non-zero value for the `first_instance` member in indirect draw calls.
         ///
@@ -370,7 +385,7 @@ bitflags::bitflags! {
         /// - OpenGL ES / WebGL
         ///
         /// This is a web and native feature.
-        const INDIRECT_FIRST_INSTANCE = 1 << 6;
+        const INDIRECT_FIRST_INSTANCE = 1 << 7;
 
         /// Allows shaders to acquire the FP16 ability
         ///
@@ -381,10 +396,10 @@ bitflags::bitflags! {
         /// - Metal
         ///
         /// This is a web and native feature.
-        const SHADER_F16 = 1 << 7;
+        const SHADER_F16 = 1 << 8;
 
 
-        /// Allows for usage of textures of format [`TextureFormat::Rg11b10Float`] as a render target
+        /// Allows for usage of textures of format [`TextureFormat::Rg11b10UFloat`] as a render target
         ///
         /// Supported platforms:
         /// - Vulkan
@@ -392,7 +407,7 @@ bitflags::bitflags! {
         /// - Metal
         ///
         /// This is a web and native feature.
-        const RG11B10UFLOAT_RENDERABLE = 1 << 8;
+        const RG11B10UFLOAT_RENDERABLE = 1 << 9;
 
         /// Allows the [`wgpu::TextureUsages::STORAGE_BINDING`] usage on textures with format [`TextureFormat::Bgra8unorm`]
         ///
@@ -402,7 +417,7 @@ bitflags::bitflags! {
         /// - Metal
         ///
         /// This is a web and native feature.
-        const BGRA8UNORM_STORAGE = 1 << 9;
+        const BGRA8UNORM_STORAGE = 1 << 10;
 
 
         /// Allows textures with formats "r32float", "rg32float", and "rgba32float" to be filterable.
@@ -414,9 +429,9 @@ bitflags::bitflags! {
         /// - GL with one of `GL_ARB_color_buffer_float`/`GL_EXT_color_buffer_float`/`OES_texture_float_linear`
         ///
         /// This is a web and native feature.
-        const FLOAT32_FILTERABLE = 1 << 10;
+        const FLOAT32_FILTERABLE = 1 << 11;
 
-        // Bits 11-19 available for webgpu features. Should you chose to use some of them for
+        // Bits 12-19 available for webgpu features. Should you chose to use some of them for
         // for native features, don't forget to update `all_webgpu_mask` and `all_native_mask`
         // accordingly.
 
@@ -1445,6 +1460,7 @@ impl Limits {
         compare!(max_texture_dimension_3d, Less);
         compare!(max_texture_array_layers, Less);
         compare!(max_bind_groups, Less);
+        compare!(max_bindings_per_bind_group, Less);
         compare!(max_dynamic_uniform_buffers_per_pipeline_layout, Less);
         compare!(max_dynamic_storage_buffers_per_pipeline_layout, Less);
         compare!(max_sampled_textures_per_shader_stage, Less);
@@ -1455,23 +1471,25 @@ impl Limits {
         compare!(max_uniform_buffer_binding_size, Less);
         compare!(max_storage_buffer_binding_size, Less);
         compare!(max_vertex_buffers, Less);
+        compare!(max_buffer_size, Less);
         compare!(max_vertex_attributes, Less);
         compare!(max_vertex_buffer_array_stride, Less);
-        if self.min_subgroup_size > 0 && self.max_subgroup_size > 0 {
-            compare!(min_subgroup_size, Greater);
-            compare!(max_subgroup_size, Less);
-        }
-        compare!(max_push_constant_size, Less);
         compare!(min_uniform_buffer_offset_alignment, Greater);
         compare!(min_storage_buffer_offset_alignment, Greater);
         compare!(max_inter_stage_shader_components, Less);
+        compare!(max_color_attachments, Less);
+        compare!(max_color_attachment_bytes_per_sample, Less);
         compare!(max_compute_workgroup_storage_size, Less);
         compare!(max_compute_invocations_per_workgroup, Less);
         compare!(max_compute_workgroup_size_x, Less);
         compare!(max_compute_workgroup_size_y, Less);
         compare!(max_compute_workgroup_size_z, Less);
         compare!(max_compute_workgroups_per_dimension, Less);
-        compare!(max_buffer_size, Less);
+        if self.min_subgroup_size > 0 && self.max_subgroup_size > 0 {
+            compare!(min_subgroup_size, Greater);
+            compare!(max_subgroup_size, Less);
+        }
+        compare!(max_push_constant_size, Less);
         compare!(max_non_sampler_bindings, Less);
     }
 }
@@ -1482,7 +1500,6 @@ impl Limits {
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 pub struct DownlevelLimits {}
 
-#[allow(unknown_lints)] // derivable_impls is nightly only currently
 #[allow(clippy::derivable_impls)]
 impl Default for DownlevelLimits {
     fn default() -> Self {
@@ -2498,7 +2515,7 @@ pub enum TextureFormat {
     /// Red, green, blue, and alpha channels. 10 bit integer for RGB channels, 2 bit integer for alpha channel. [0, 1023] ([0, 3] for alpha) converted to/from float [0, 1] in shader.
     Rgb10a2Unorm,
     /// Red, green, and blue channels. 11 bit float with no sign bit for RG channels. 10 bit float with no sign bit for blue channel. Float in shader.
-    Rg11b10Float,
+    Rg11b10UFloat,
 
     // Normal 64 bit formats
     /// Red and green channels. 32 bit integer per channel. Unsigned in shader.
@@ -2561,13 +2578,14 @@ pub enum TextureFormat {
     /// [`Features::TEXTURE_FORMAT_NV12`] must be enabled to use this texture format.
     NV12,
 
-    // Compressed textures usable with `TEXTURE_COMPRESSION_BC` feature.
+    // Compressed textures usable with `TEXTURE_COMPRESSION_BC` feature. `TEXTURE_COMPRESSION_SLICED_3D` is required to use with 3D textures.
     /// 4x4 block compressed texture. 8 bytes per block (4 bit/px). 4 color + alpha pallet. 5 bit R + 6 bit G + 5 bit B + 1 bit alpha.
     /// [0, 63] ([0, 1] for alpha) converted to/from float [0, 1] in shader.
     ///
     /// Also known as DXT1.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc1RgbaUnorm,
     /// 4x4 block compressed texture. 8 bytes per block (4 bit/px). 4 color + alpha pallet. 5 bit R + 6 bit G + 5 bit B + 1 bit alpha.
     /// Srgb-color [0, 63] ([0, 1] for alpha) converted to/from linear-color float [0, 1] in shader.
@@ -2575,6 +2593,7 @@ pub enum TextureFormat {
     /// Also known as DXT1.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc1RgbaUnormSrgb,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). 4 color pallet. 5 bit R + 6 bit G + 5 bit B + 4 bit alpha.
     /// [0, 63] ([0, 15] for alpha) converted to/from float [0, 1] in shader.
@@ -2582,6 +2601,7 @@ pub enum TextureFormat {
     /// Also known as DXT3.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc2RgbaUnorm,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). 4 color pallet. 5 bit R + 6 bit G + 5 bit B + 4 bit alpha.
     /// Srgb-color [0, 63] ([0, 255] for alpha) converted to/from linear-color float [0, 1] in shader.
@@ -2589,6 +2609,7 @@ pub enum TextureFormat {
     /// Also known as DXT3.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc2RgbaUnormSrgb,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). 4 color pallet + 8 alpha pallet. 5 bit R + 6 bit G + 5 bit B + 8 bit alpha.
     /// [0, 63] ([0, 255] for alpha) converted to/from float [0, 1] in shader.
@@ -2596,6 +2617,7 @@ pub enum TextureFormat {
     /// Also known as DXT5.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc3RgbaUnorm,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). 4 color pallet + 8 alpha pallet. 5 bit R + 6 bit G + 5 bit B + 8 bit alpha.
     /// Srgb-color [0, 63] ([0, 255] for alpha) converted to/from linear-color float [0, 1] in shader.
@@ -2603,6 +2625,7 @@ pub enum TextureFormat {
     /// Also known as DXT5.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc3RgbaUnormSrgb,
     /// 4x4 block compressed texture. 8 bytes per block (4 bit/px). 8 color pallet. 8 bit R.
     /// [0, 255] converted to/from float [0, 1] in shader.
@@ -2610,6 +2633,7 @@ pub enum TextureFormat {
     /// Also known as RGTC1.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc4RUnorm,
     /// 4x4 block compressed texture. 8 bytes per block (4 bit/px). 8 color pallet. 8 bit R.
     /// [-127, 127] converted to/from float [-1, 1] in shader.
@@ -2617,6 +2641,7 @@ pub enum TextureFormat {
     /// Also known as RGTC1.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc4RSnorm,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). 8 color red pallet + 8 color green pallet. 8 bit RG.
     /// [0, 255] converted to/from float [0, 1] in shader.
@@ -2624,6 +2649,7 @@ pub enum TextureFormat {
     /// Also known as RGTC2.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc5RgUnorm,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). 8 color red pallet + 8 color green pallet. 8 bit RG.
     /// [-127, 127] converted to/from float [-1, 1] in shader.
@@ -2631,18 +2657,21 @@ pub enum TextureFormat {
     /// Also known as RGTC2.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc5RgSnorm,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). Variable sized pallet. 16 bit unsigned float RGB. Float in shader.
     ///
     /// Also known as BPTC (float).
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc6hRgbUfloat,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). Variable sized pallet. 16 bit signed float RGB. Float in shader.
     ///
     /// Also known as BPTC (float).
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc6hRgbFloat,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). Variable sized pallet. 8 bit integer RGBA.
     /// [0, 255] converted to/from float [0, 1] in shader.
@@ -2650,6 +2679,7 @@ pub enum TextureFormat {
     /// Also known as BPTC (unorm).
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc7RgbaUnorm,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). Variable sized pallet. 8 bit integer RGBA.
     /// Srgb-color [0, 255] converted to/from linear-color float [0, 1] in shader.
@@ -2657,6 +2687,7 @@ pub enum TextureFormat {
     /// Also known as BPTC (unorm).
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc7RgbaUnormSrgb,
     /// 4x4 block compressed texture. 8 bytes per block (4 bit/px). Complex pallet. 8 bit integer RGB.
     /// [0, 255] converted to/from float [0, 1] in shader.
@@ -2772,7 +2803,7 @@ impl<'de> Deserialize<'de> for TextureFormat {
                     "bgra8unorm-srgb" => TextureFormat::Bgra8UnormSrgb,
                     "rgb10a2uint" => TextureFormat::Rgb10a2Uint,
                     "rgb10a2unorm" => TextureFormat::Rgb10a2Unorm,
-                    "rg11b10ufloat" => TextureFormat::Rg11b10Float,
+                    "rg11b10ufloat" => TextureFormat::Rg11b10UFloat,
                     "rg32uint" => TextureFormat::Rg32Uint,
                     "rg32sint" => TextureFormat::Rg32Sint,
                     "rg32float" => TextureFormat::Rg32Float,
@@ -2900,7 +2931,7 @@ impl Serialize for TextureFormat {
             TextureFormat::Bgra8UnormSrgb => "bgra8unorm-srgb",
             TextureFormat::Rgb10a2Uint => "rgb10a2uint",
             TextureFormat::Rgb10a2Unorm => "rgb10a2unorm",
-            TextureFormat::Rg11b10Float => "rg11b10ufloat",
+            TextureFormat::Rg11b10UFloat => "rg11b10ufloat",
             TextureFormat::Rg32Uint => "rg32uint",
             TextureFormat::Rg32Sint => "rg32sint",
             TextureFormat::Rg32Float => "rg32float",
@@ -3130,7 +3161,7 @@ impl TextureFormat {
             | Self::Rgb9e5Ufloat
             | Self::Rgb10a2Uint
             | Self::Rgb10a2Unorm
-            | Self::Rg11b10Float
+            | Self::Rg11b10UFloat
             | Self::Rg32Uint
             | Self::Rg32Sint
             | Self::Rg32Float
@@ -3200,6 +3231,11 @@ impl TextureFormat {
         self.block_dimensions() != (1, 1)
     }
 
+    /// Returns `true` for BCn compressed formats.
+    pub fn is_bcn(&self) -> bool {
+        self.required_features() == Features::TEXTURE_COMPRESSION_BC
+    }
+
     /// Returns the required features (if any) in order to use the texture.
     pub fn required_features(&self) -> Features {
         match *self {
@@ -3230,7 +3266,7 @@ impl TextureFormat {
             | Self::Rgb9e5Ufloat
             | Self::Rgb10a2Uint
             | Self::Rgb10a2Unorm
-            | Self::Rg11b10Float
+            | Self::Rg11b10UFloat
             | Self::Rg32Uint
             | Self::Rg32Sint
             | Self::Rg32Float
@@ -3348,7 +3384,7 @@ impl TextureFormat {
             Self::Bgra8UnormSrgb =>       (msaa_resolve, attachment),
             Self::Rgb10a2Uint =>          (        msaa, attachment),
             Self::Rgb10a2Unorm =>         (msaa_resolve, attachment),
-            Self::Rg11b10Float =>         (        msaa,   rg11b10f),
+            Self::Rg11b10UFloat =>         (        msaa,   rg11b10f),
             Self::Rg32Uint =>             (        noaa,  all_flags),
             Self::Rg32Sint =>             (        noaa,  all_flags),
             Self::Rg32Float =>            (        noaa,  all_flags),
@@ -3458,7 +3494,7 @@ impl TextureFormat {
             | Self::Rg16Float
             | Self::Rgba16Float
             | Self::Rgb10a2Unorm
-            | Self::Rg11b10Float => Some(float),
+            | Self::Rg11b10UFloat => Some(float),
 
             Self::R32Float | Self::Rg32Float | Self::Rgba32Float => Some(float32_sample_type),
 
@@ -3588,7 +3624,7 @@ impl TextureFormat {
             | Self::Rg16Sint
             | Self::Rg16Float => Some(4),
             Self::R32Uint | Self::R32Sint | Self::R32Float => Some(4),
-            Self::Rgb9e5Ufloat | Self::Rgb10a2Uint | Self::Rgb10a2Unorm | Self::Rg11b10Float => {
+            Self::Rgb9e5Ufloat | Self::Rgb10a2Uint | Self::Rgb10a2Unorm | Self::Rg11b10UFloat => {
                 Some(4)
             }
 
@@ -3655,43 +3691,82 @@ impl TextureFormat {
     /// <https://gpuweb.github.io/gpuweb/#render-target-pixel-byte-cost>
     pub fn target_pixel_byte_cost(&self) -> Option<u32> {
         match *self {
-            Self::R8Unorm | Self::R8Uint | Self::R8Sint => Some(1),
+            Self::R8Unorm | Self::R8Snorm | Self::R8Uint | Self::R8Sint => Some(1),
             Self::Rg8Unorm
+            | Self::Rg8Snorm
             | Self::Rg8Uint
             | Self::Rg8Sint
             | Self::R16Uint
             | Self::R16Sint
+            | Self::R16Unorm
+            | Self::R16Snorm
             | Self::R16Float => Some(2),
             Self::Rgba8Uint
             | Self::Rgba8Sint
             | Self::Rg16Uint
             | Self::Rg16Sint
+            | Self::Rg16Unorm
+            | Self::Rg16Snorm
             | Self::Rg16Float
             | Self::R32Uint
             | Self::R32Sint
             | Self::R32Float => Some(4),
             Self::Rgba8Unorm
             | Self::Rgba8UnormSrgb
+            | Self::Rgba8Snorm
             | Self::Bgra8Unorm
             | Self::Bgra8UnormSrgb
             | Self::Rgba16Uint
             | Self::Rgba16Sint
+            | Self::Rgba16Unorm
+            | Self::Rgba16Snorm
             | Self::Rgba16Float
             | Self::Rg32Uint
             | Self::Rg32Sint
             | Self::Rg32Float
             | Self::Rgb10a2Uint
             | Self::Rgb10a2Unorm
-            | Self::Rg11b10Float => Some(8),
+            | Self::Rg11b10UFloat => Some(8),
             Self::Rgba32Uint | Self::Rgba32Sint | Self::Rgba32Float => Some(16),
-            Self::Rgba8Snorm | Self::Rg8Snorm | Self::R8Snorm => None,
-            _ => None,
+            Self::Stencil8
+            | Self::Depth16Unorm
+            | Self::Depth24Plus
+            | Self::Depth24PlusStencil8
+            | Self::Depth32Float
+            | Self::Depth32FloatStencil8
+            | Self::NV12
+            | Self::Rgb9e5Ufloat
+            | Self::Bc1RgbaUnorm
+            | Self::Bc1RgbaUnormSrgb
+            | Self::Bc2RgbaUnorm
+            | Self::Bc2RgbaUnormSrgb
+            | Self::Bc3RgbaUnorm
+            | Self::Bc3RgbaUnormSrgb
+            | Self::Bc4RUnorm
+            | Self::Bc4RSnorm
+            | Self::Bc5RgUnorm
+            | Self::Bc5RgSnorm
+            | Self::Bc6hRgbUfloat
+            | Self::Bc6hRgbFloat
+            | Self::Bc7RgbaUnorm
+            | Self::Bc7RgbaUnormSrgb
+            | Self::Etc2Rgb8Unorm
+            | Self::Etc2Rgb8UnormSrgb
+            | Self::Etc2Rgb8A1Unorm
+            | Self::Etc2Rgb8A1UnormSrgb
+            | Self::Etc2Rgba8Unorm
+            | Self::Etc2Rgba8UnormSrgb
+            | Self::EacR11Unorm
+            | Self::EacR11Snorm
+            | Self::EacRg11Unorm
+            | Self::EacRg11Snorm
+            | Self::Astc { .. } => None,
         }
     }
 
     /// See <https://gpuweb.github.io/gpuweb/#render-target-component-alignment>
     pub fn target_component_alignment(&self) -> Option<u32> {
-        match self {
+        match *self {
             Self::R8Unorm
             | Self::R8Snorm
             | Self::R8Uint
@@ -3709,12 +3784,18 @@ impl TextureFormat {
             | Self::Bgra8UnormSrgb => Some(1),
             Self::R16Uint
             | Self::R16Sint
+            | Self::R16Unorm
+            | Self::R16Snorm
             | Self::R16Float
             | Self::Rg16Uint
             | Self::Rg16Sint
+            | Self::Rg16Unorm
+            | Self::Rg16Snorm
             | Self::Rg16Float
             | Self::Rgba16Uint
             | Self::Rgba16Sint
+            | Self::Rgba16Unorm
+            | Self::Rgba16Snorm
             | Self::Rgba16Float => Some(2),
             Self::R32Uint
             | Self::R32Sint
@@ -3727,8 +3808,40 @@ impl TextureFormat {
             | Self::Rgba32Float
             | Self::Rgb10a2Uint
             | Self::Rgb10a2Unorm
-            | Self::Rg11b10Float => Some(4),
-            _ => None,
+            | Self::Rg11b10UFloat => Some(4),
+            Self::Stencil8
+            | Self::Depth16Unorm
+            | Self::Depth24Plus
+            | Self::Depth24PlusStencil8
+            | Self::Depth32Float
+            | Self::Depth32FloatStencil8
+            | Self::NV12
+            | Self::Rgb9e5Ufloat
+            | Self::Bc1RgbaUnorm
+            | Self::Bc1RgbaUnormSrgb
+            | Self::Bc2RgbaUnorm
+            | Self::Bc2RgbaUnormSrgb
+            | Self::Bc3RgbaUnorm
+            | Self::Bc3RgbaUnormSrgb
+            | Self::Bc4RUnorm
+            | Self::Bc4RSnorm
+            | Self::Bc5RgUnorm
+            | Self::Bc5RgSnorm
+            | Self::Bc6hRgbUfloat
+            | Self::Bc6hRgbFloat
+            | Self::Bc7RgbaUnorm
+            | Self::Bc7RgbaUnormSrgb
+            | Self::Etc2Rgb8Unorm
+            | Self::Etc2Rgb8UnormSrgb
+            | Self::Etc2Rgb8A1Unorm
+            | Self::Etc2Rgb8A1UnormSrgb
+            | Self::Etc2Rgba8Unorm
+            | Self::Etc2Rgba8UnormSrgb
+            | Self::EacR11Unorm
+            | Self::EacR11Snorm
+            | Self::EacRg11Unorm
+            | Self::EacRg11Snorm
+            | Self::Astc { .. } => None,
         }
     }
 
@@ -3784,7 +3897,7 @@ impl TextureFormat {
             | Self::Rgba32Sint
             | Self::Rgba32Float => 4,
 
-            Self::Rgb9e5Ufloat | Self::Rg11b10Float => 3,
+            Self::Rgb9e5Ufloat | Self::Rg11b10UFloat => 3,
             Self::Rgb10a2Uint | Self::Rgb10a2Unorm => 4,
 
             Self::Stencil8 | Self::Depth16Unorm | Self::Depth24Plus | Self::Depth32Float => 1,
@@ -3999,7 +4112,7 @@ fn texture_format_serialize() {
         "\"rgb10a2unorm\"".to_string()
     );
     assert_eq!(
-        serde_json::to_string(&TextureFormat::Rg11b10Float).unwrap(),
+        serde_json::to_string(&TextureFormat::Rg11b10UFloat).unwrap(),
         "\"rg11b10ufloat\"".to_string()
     );
     assert_eq!(
@@ -4296,7 +4409,7 @@ fn texture_format_deserialize() {
     );
     assert_eq!(
         serde_json::from_str::<TextureFormat>("\"rg11b10ufloat\"").unwrap(),
-        TextureFormat::Rg11b10Float
+        TextureFormat::Rg11b10UFloat
     );
     assert_eq!(
         serde_json::from_str::<TextureFormat>("\"rg32uint\"").unwrap(),
@@ -4772,7 +4885,7 @@ pub enum StencilOperation {
 pub struct StencilFaceState {
     /// Comparison function that determines if the fail_op or pass_op is used on the stencil buffer.
     pub compare: CompareFunction,
-    /// Operation that is preformed when stencil test fails.
+    /// Operation that is performed when stencil test fails.
     pub fail_op: StencilOperation,
     /// Operation that is performed when depth test fails but stencil test succeeds.
     pub depth_fail_op: StencilOperation,
@@ -5373,13 +5486,13 @@ pub struct SurfaceConfiguration<V> {
     ///
     /// Typical values range from 3 to 1, but higher values are possible:
     /// * Choose 2 or higher for potentially smoother frame display, as it allows to be at least one frame
-    /// to be queued up. This typically avoids starving the GPU's work queue.
-    /// Higher values are useful for achieving a constant flow of frames to the display under varying load.
+    ///   to be queued up. This typically avoids starving the GPU's work queue.
+    ///   Higher values are useful for achieving a constant flow of frames to the display under varying load.
     /// * Choose 1 for low latency from frame recording to frame display.
-    /// ⚠️ If the backend does not support waiting on present, this will cause the CPU to wait for the GPU
-    /// to finish all work related to the previous frame when calling `wgpu::Surface::get_current_texture`,
-    /// causing CPU-GPU serialization (i.e. when `wgpu::Surface::get_current_texture` returns, the GPU might be idle).
-    /// It is currently not possible to query this. See <https://github.com/gfx-rs/wgpu/issues/2869>.
+    ///   ⚠️ If the backend does not support waiting on present, this will cause the CPU to wait for the GPU
+    ///   to finish all work related to the previous frame when calling `wgpu::Surface::get_current_texture`,
+    ///   causing CPU-GPU serialization (i.e. when `wgpu::Surface::get_current_texture` returns, the GPU might be idle).
+    ///   It is currently not possible to query this. See <https://github.com/gfx-rs/wgpu/issues/2869>.
     /// * A value of 0 is generally not supported and always clamped to a higher value.
     pub desired_maximum_frame_latency: u32,
     /// Specifies how the alpha channel of the textures should be handled during compositing.
@@ -6997,7 +7110,7 @@ impl DrawIndirectArgs {
     pub fn as_bytes(&self) -> &[u8] {
         unsafe {
             std::mem::transmute(std::slice::from_raw_parts(
-                self as *const _ as *const u8,
+                std::ptr::from_ref(self).cast::<u8>(),
                 std::mem::size_of::<Self>(),
             ))
         }
@@ -7027,7 +7140,7 @@ impl DrawIndexedIndirectArgs {
     pub fn as_bytes(&self) -> &[u8] {
         unsafe {
             std::mem::transmute(std::slice::from_raw_parts(
-                self as *const _ as *const u8,
+                std::ptr::from_ref(self).cast::<u8>(),
                 std::mem::size_of::<Self>(),
             ))
         }
@@ -7051,7 +7164,7 @@ impl DispatchIndirectArgs {
     pub fn as_bytes(&self) -> &[u8] {
         unsafe {
             std::mem::transmute(std::slice::from_raw_parts(
-                self as *const _ as *const u8,
+                std::ptr::from_ref(self).cast::<u8>(),
                 std::mem::size_of::<Self>(),
             ))
         }
diff --git a/wgpu/Cargo.toml b/wgpu/Cargo.toml
index d8538a6ed9..cd73f5dc9e 100644
--- a/wgpu/Cargo.toml
+++ b/wgpu/Cargo.toml
@@ -87,8 +87,9 @@ strict_asserts = ["wgc?/strict_asserts", "wgt/strict_asserts"]
 ## Enables serialization via `serde` on common wgpu types.
 serde = ["dep:serde", "wgc/serde"]
 
-## Allow writing of trace capture files. See [`Adapter::request_device`].
-trace = ["serde", "wgc/trace"]
+# Uncomment once we get to https://github.com/gfx-rs/wgpu/issues/5974
+# ## Allow writing of trace capture files. See [`Adapter::request_device`].
+# trace = ["serde", "wgc/trace"]
 
 ## Allow deserializing of trace capture files that were written with the `trace` feature.
 ## To replay a trace file use the [wgpu player](https://github.com/gfx-rs/wgpu/tree/trunk/player).
diff --git a/wgpu/src/api/adapter.rs b/wgpu/src/api/adapter.rs
new file mode 100644
index 0000000000..5f43a461f1
--- /dev/null
+++ b/wgpu/src/api/adapter.rs
@@ -0,0 +1,255 @@
+use std::{future::Future, sync::Arc, thread};
+
+use crate::context::{DeviceRequest, DynContext, ObjectId};
+use crate::*;
+
+/// Handle to a physical graphics and/or compute device.
+///
+/// Adapters can be used to open a connection to the corresponding [`Device`]
+/// on the host system by using [`Adapter::request_device`].
+///
+/// Does not have to be kept alive.
+///
+/// Corresponds to [WebGPU `GPUAdapter`](https://gpuweb.github.io/gpuweb/#gpu-adapter).
+#[derive(Debug)]
+pub struct Adapter {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Adapter: Send, Sync);
+
+impl Drop for Adapter {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context.adapter_drop(&self.id, self.data.as_ref())
+        }
+    }
+}
+
+pub use wgt::RequestAdapterOptions as RequestAdapterOptionsBase;
+/// Additional information required when requesting an adapter.
+///
+/// For use with [`Instance::request_adapter`].
+///
+/// Corresponds to [WebGPU `GPURequestAdapterOptions`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpurequestadapteroptions).
+pub type RequestAdapterOptions<'a, 'b> = RequestAdapterOptionsBase<&'a Surface<'b>>;
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RequestAdapterOptions<'_, '_>: Send, Sync);
+
+impl Adapter {
+    /// Returns a globally-unique identifier for this `Adapter`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+
+    /// Requests a connection to a physical device, creating a logical device.
+    ///
+    /// Returns the [`Device`] together with a [`Queue`] that executes command buffers.
+    ///
+    /// [Per the WebGPU specification], an [`Adapter`] may only be used once to create a device.
+    /// If another device is wanted, call [`Instance::request_adapter()`] again to get a fresh
+    /// [`Adapter`].
+    /// However, `wgpu` does not currently enforce this restriction.
+    ///
+    /// # Arguments
+    ///
+    /// - `desc` - Description of the features and limits requested from the given device.
+    /// - `trace_path` - Can be used for API call tracing, if that feature is
+    ///   enabled in `wgpu-core`.
+    ///
+    /// # Panics
+    ///
+    /// - `request_device()` was already called on this `Adapter`.
+    /// - Features specified by `desc` are not supported by this adapter.
+    /// - Unsafe features were requested but not enabled when requesting the adapter.
+    /// - Limits requested exceed the values provided by the adapter.
+    /// - Adapter does not support all features wgpu requires to safely operate.
+    ///
+    /// [Per the WebGPU specification]: https://www.w3.org/TR/webgpu/#dom-gpuadapter-requestdevice
+    pub fn request_device(
+        &self,
+        desc: &DeviceDescriptor<'_>,
+        trace_path: Option<&std::path::Path>,
+    ) -> impl Future<Output = Result<(Device, Queue), RequestDeviceError>> + WasmNotSend {
+        let context = Arc::clone(&self.context);
+        let device = DynContext::adapter_request_device(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+            trace_path,
+        );
+        async move {
+            device.await.map(
+                |DeviceRequest {
+                     device_id,
+                     device_data,
+                     queue_id,
+                     queue_data,
+                 }| {
+                    (
+                        Device {
+                            context: Arc::clone(&context),
+                            id: device_id,
+                            data: device_data,
+                        },
+                        Queue {
+                            context,
+                            id: queue_id,
+                            data: queue_data,
+                        },
+                    )
+                },
+            )
+        }
+    }
+
+    /// Create a wgpu [`Device`] and [`Queue`] from a wgpu-hal `OpenDevice`
+    ///
+    /// # Safety
+    ///
+    /// - `hal_device` must be created from this adapter internal handle.
+    /// - `desc.features` must be a subset of `hal_device` features.
+    #[cfg(wgpu_core)]
+    pub unsafe fn create_device_from_hal<A: wgc::hal_api::HalApi>(
+        &self,
+        hal_device: hal::OpenDevice<A>,
+        desc: &DeviceDescriptor<'_>,
+        trace_path: Option<&std::path::Path>,
+    ) -> Result<(Device, Queue), RequestDeviceError> {
+        let context = Arc::clone(&self.context);
+        unsafe {
+            self.context
+                .as_any()
+                .downcast_ref::<crate::backend::ContextWgpuCore>()
+                // Part of the safety requirements is that the device was generated from the same adapter.
+                // Therefore, unwrap is fine here since only WgpuCoreContext based adapters have the ability to create hal devices.
+                .unwrap()
+                .create_device_from_hal(&self.id.into(), hal_device, desc, trace_path)
+        }
+        .map(|(device, queue)| {
+            (
+                Device {
+                    context: Arc::clone(&context),
+                    id: device.id().into(),
+                    data: Box::new(device),
+                },
+                Queue {
+                    context,
+                    id: queue.id().into(),
+                    data: Box::new(queue),
+                },
+            )
+        })
+    }
+
+    /// Apply a callback to this `Adapter`'s underlying backend adapter.
+    ///
+    /// If this `Adapter` is implemented by the backend API given by `A` (Vulkan,
+    /// Dx12, etc.), then apply `hal_adapter_callback` to `Some(&adapter)`, where
+    /// `adapter` is the underlying backend adapter type, [`A::Adapter`].
+    ///
+    /// If this `Adapter` uses a different backend, apply `hal_adapter_callback`
+    /// to `None`.
+    ///
+    /// The adapter is locked for reading while `hal_adapter_callback` runs. If
+    /// the callback attempts to perform any `wgpu` operations that require
+    /// write access to the adapter, deadlock will occur. The locks are
+    /// automatically released when the callback returns.
+    ///
+    /// # Safety
+    ///
+    /// - The raw handle passed to the callback must not be manually destroyed.
+    ///
+    /// [`A::Adapter`]: hal::Api::Adapter
+    #[cfg(wgpu_core)]
+    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Adapter>) -> R, R>(
+        &self,
+        hal_adapter_callback: F,
+    ) -> R {
+        if let Some(ctx) = self
+            .context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+        {
+            unsafe { ctx.adapter_as_hal::<A, F, R>(self.id.into(), hal_adapter_callback) }
+        } else {
+            hal_adapter_callback(None)
+        }
+    }
+
+    /// Returns whether this adapter may present to the passed surface.
+    pub fn is_surface_supported(&self, surface: &Surface<'_>) -> bool {
+        DynContext::adapter_is_surface_supported(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            &surface.id,
+            surface.surface_data.as_ref(),
+        )
+    }
+
+    /// The features which can be used to create devices on this adapter.
+    pub fn features(&self) -> Features {
+        DynContext::adapter_features(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// The best limits which can be used to create devices on this adapter.
+    pub fn limits(&self) -> Limits {
+        DynContext::adapter_limits(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Get info about the adapter itself.
+    pub fn get_info(&self) -> AdapterInfo {
+        DynContext::adapter_get_info(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Get info about the adapter itself.
+    pub fn get_downlevel_capabilities(&self) -> DownlevelCapabilities {
+        DynContext::adapter_downlevel_capabilities(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Returns the features supported for a given texture format by this adapter.
+    ///
+    /// Note that the WebGPU spec further restricts the available usages/features.
+    /// To disable these restrictions on a device, request the [`Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES`] feature.
+    pub fn get_texture_format_features(&self, format: TextureFormat) -> TextureFormatFeatures {
+        DynContext::adapter_get_texture_format_features(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            format,
+        )
+    }
+
+    /// Generates a timestamp using the clock used by the presentation engine.
+    ///
+    /// When comparing completely opaque timestamp systems, we need a way of generating timestamps that signal
+    /// the exact same time. You can do this by calling your own timestamp function immediately after a call to
+    /// this function. This should result in timestamps that are 0.5 to 5 microseconds apart. There are locks
+    /// that must be taken during the call, so don't call your function before.
+    ///
+    /// ```no_run
+    /// # let adapter: wgpu::Adapter = panic!();
+    /// # let some_code = || wgpu::PresentationTimestamp::INVALID_TIMESTAMP;
+    /// use std::time::{Duration, Instant};
+    /// let presentation = adapter.get_presentation_timestamp();
+    /// let instant = Instant::now();
+    ///
+    /// // We can now turn a new presentation timestamp into an Instant.
+    /// let some_pres_timestamp = some_code();
+    /// let duration = Duration::from_nanos((some_pres_timestamp.0 - presentation.0) as u64);
+    /// let new_instant: Instant = instant + duration;
+    /// ```
+    //
+    /// [Instant]: std::time::Instant
+    pub fn get_presentation_timestamp(&self) -> PresentationTimestamp {
+        DynContext::adapter_get_presentation_timestamp(&*self.context, &self.id, self.data.as_ref())
+    }
+}
diff --git a/wgpu/src/api/bind_group.rs b/wgpu/src/api/bind_group.rs
new file mode 100644
index 0000000000..05e47511db
--- /dev/null
+++ b/wgpu/src/api/bind_group.rs
@@ -0,0 +1,153 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a binding group.
+///
+/// A `BindGroup` represents the set of resources bound to the bindings described by a
+/// [`BindGroupLayout`]. It can be created with [`Device::create_bind_group`]. A `BindGroup` can
+/// be bound to a particular [`RenderPass`] with [`RenderPass::set_bind_group`], or to a
+/// [`ComputePass`] with [`ComputePass::set_bind_group`].
+///
+/// Corresponds to [WebGPU `GPUBindGroup`](https://gpuweb.github.io/gpuweb/#gpubindgroup).
+#[derive(Debug)]
+pub struct BindGroup {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(BindGroup: Send, Sync);
+
+impl BindGroup {
+    /// Returns a globally-unique identifier for this `BindGroup`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+}
+
+impl Drop for BindGroup {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context.bind_group_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Resource that can be bound to a pipeline.
+///
+/// Corresponds to [WebGPU `GPUBindingResource`](
+/// https://gpuweb.github.io/gpuweb/#typedefdef-gpubindingresource).
+#[non_exhaustive]
+#[derive(Clone, Debug)]
+pub enum BindingResource<'a> {
+    /// Binding is backed by a buffer.
+    ///
+    /// Corresponds to [`wgt::BufferBindingType::Uniform`] and [`wgt::BufferBindingType::Storage`]
+    /// with [`BindGroupLayoutEntry::count`] set to None.
+    Buffer(BufferBinding<'a>),
+    /// Binding is backed by an array of buffers.
+    ///
+    /// [`Features::BUFFER_BINDING_ARRAY`] must be supported to use this feature.
+    ///
+    /// Corresponds to [`wgt::BufferBindingType::Uniform`] and [`wgt::BufferBindingType::Storage`]
+    /// with [`BindGroupLayoutEntry::count`] set to Some.
+    BufferArray(&'a [BufferBinding<'a>]),
+    /// Binding is a sampler.
+    ///
+    /// Corresponds to [`wgt::BindingType::Sampler`] with [`BindGroupLayoutEntry::count`] set to None.
+    Sampler(&'a Sampler),
+    /// Binding is backed by an array of samplers.
+    ///
+    /// [`Features::TEXTURE_BINDING_ARRAY`] must be supported to use this feature.
+    ///
+    /// Corresponds to [`wgt::BindingType::Sampler`] with [`BindGroupLayoutEntry::count`] set
+    /// to Some.
+    SamplerArray(&'a [&'a Sampler]),
+    /// Binding is backed by a texture.
+    ///
+    /// Corresponds to [`wgt::BindingType::Texture`] and [`wgt::BindingType::StorageTexture`] with
+    /// [`BindGroupLayoutEntry::count`] set to None.
+    TextureView(&'a TextureView),
+    /// Binding is backed by an array of textures.
+    ///
+    /// [`Features::TEXTURE_BINDING_ARRAY`] must be supported to use this feature.
+    ///
+    /// Corresponds to [`wgt::BindingType::Texture`] and [`wgt::BindingType::StorageTexture`] with
+    /// [`BindGroupLayoutEntry::count`] set to Some.
+    TextureViewArray(&'a [&'a TextureView]),
+    /// Todo
+    AccelerationStructure(&'a ray_tracing::Tlas),
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(BindingResource<'_>: Send, Sync);
+
+/// Describes the segment of a buffer to bind.
+///
+/// Corresponds to [WebGPU `GPUBufferBinding`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpubufferbinding).
+#[derive(Clone, Debug)]
+pub struct BufferBinding<'a> {
+    /// The buffer to bind.
+    pub buffer: &'a Buffer,
+
+    /// Base offset of the buffer, in bytes.
+    ///
+    /// If the [`has_dynamic_offset`] field of this buffer's layout entry is
+    /// `true`, the offset here will be added to the dynamic offset passed to
+    /// [`RenderPass::set_bind_group`] or [`ComputePass::set_bind_group`].
+    ///
+    /// If the buffer was created with [`BufferUsages::UNIFORM`], then this
+    /// offset must be a multiple of
+    /// [`Limits::min_uniform_buffer_offset_alignment`].
+    ///
+    /// If the buffer was created with [`BufferUsages::STORAGE`], then this
+    /// offset must be a multiple of
+    /// [`Limits::min_storage_buffer_offset_alignment`].
+    ///
+    /// [`has_dynamic_offset`]: BindingType::Buffer::has_dynamic_offset
+    pub offset: BufferAddress,
+
+    /// Size of the binding in bytes, or `None` for using the rest of the buffer.
+    pub size: Option<BufferSize>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(BufferBinding<'_>: Send, Sync);
+
+/// An element of a [`BindGroupDescriptor`], consisting of a bindable resource
+/// and the slot to bind it to.
+///
+/// Corresponds to [WebGPU `GPUBindGroupEntry`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpubindgroupentry).
+#[derive(Clone, Debug)]
+pub struct BindGroupEntry<'a> {
+    /// Slot for which binding provides resource. Corresponds to an entry of the same
+    /// binding index in the [`BindGroupLayoutDescriptor`].
+    pub binding: u32,
+    /// Resource to attach to the binding
+    pub resource: BindingResource<'a>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(BindGroupEntry<'_>: Send, Sync);
+
+/// Describes a group of bindings and the resources to be bound.
+///
+/// For use with [`Device::create_bind_group`].
+///
+/// Corresponds to [WebGPU `GPUBindGroupDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpubindgroupdescriptor).
+#[derive(Clone, Debug)]
+pub struct BindGroupDescriptor<'a> {
+    /// Debug label of the bind group. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// The [`BindGroupLayout`] that corresponds to this bind group.
+    pub layout: &'a BindGroupLayout,
+    /// The resources to bind to this bind group.
+    pub entries: &'a [BindGroupEntry<'a>],
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(BindGroupDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/bind_group_layout.rs b/wgpu/src/api/bind_group_layout.rs
new file mode 100644
index 0000000000..1268c664f1
--- /dev/null
+++ b/wgpu/src/api/bind_group_layout.rs
@@ -0,0 +1,59 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a binding group layout.
+///
+/// A `BindGroupLayout` is a handle to the GPU-side layout of a binding group. It can be used to
+/// create a [`BindGroupDescriptor`] object, which in turn can be used to create a [`BindGroup`]
+/// object with [`Device::create_bind_group`]. A series of `BindGroupLayout`s can also be used to
+/// create a [`PipelineLayoutDescriptor`], which can be used to create a [`PipelineLayout`].
+///
+/// It can be created with [`Device::create_bind_group_layout`].
+///
+/// Corresponds to [WebGPU `GPUBindGroupLayout`](
+/// https://gpuweb.github.io/gpuweb/#gpubindgrouplayout).
+#[derive(Debug)]
+pub struct BindGroupLayout {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(BindGroupLayout: Send, Sync);
+
+impl BindGroupLayout {
+    /// Returns a globally-unique identifier for this `BindGroupLayout`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+}
+
+impl Drop for BindGroupLayout {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .bind_group_layout_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Describes a [`BindGroupLayout`].
+///
+/// For use with [`Device::create_bind_group_layout`].
+///
+/// Corresponds to [WebGPU `GPUBindGroupLayoutDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpubindgrouplayoutdescriptor).
+#[derive(Clone, Debug)]
+pub struct BindGroupLayoutDescriptor<'a> {
+    /// Debug label of the bind group layout. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+
+    /// Array of entries in this BindGroupLayout
+    pub entries: &'a [BindGroupLayoutEntry],
+}
+static_assertions::assert_impl_all!(BindGroupLayoutDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/buffer.rs b/wgpu/src/api/buffer.rs
new file mode 100644
index 0000000000..6f54637994
--- /dev/null
+++ b/wgpu/src/api/buffer.rs
@@ -0,0 +1,730 @@
+use std::{
+    error, fmt,
+    ops::{Bound, Deref, DerefMut, Range, RangeBounds},
+    sync::Arc,
+    thread,
+};
+
+use parking_lot::Mutex;
+
+use crate::context::{DynContext, ObjectId};
+use crate::*;
+
+/// Handle to a GPU-accessible buffer.
+///
+/// Created with [`Device::create_buffer`] or
+/// [`DeviceExt::create_buffer_init`](util::DeviceExt::create_buffer_init).
+///
+/// Corresponds to [WebGPU `GPUBuffer`](https://gpuweb.github.io/gpuweb/#buffer-interface).
+///
+/// A `Buffer`'s bytes have "interior mutability": functions like
+/// [`Queue::write_buffer`] or [mapping] a buffer for writing only require a
+/// `&Buffer`, not a `&mut Buffer`, even though they modify its contents. `wgpu`
+/// prevents simultaneous reads and writes of buffer contents using run-time
+/// checks.
+///
+/// [mapping]: Buffer#mapping-buffers
+///
+/// # Mapping buffers
+///
+/// If a `Buffer` is created with the appropriate [`usage`], it can be *mapped*:
+/// you can make its contents accessible to the CPU as an ordinary `&[u8]` or
+/// `&mut [u8]` slice of bytes. Buffers created with the
+/// [`mapped_at_creation`][mac] flag set are also mapped initially.
+///
+/// Depending on the hardware, the buffer could be memory shared between CPU and
+/// GPU, so that the CPU has direct access to the same bytes the GPU will
+/// consult; or it may be ordinary CPU memory, whose contents the system must
+/// copy to/from the GPU as needed. This crate's API is designed to work the
+/// same way in either case: at any given time, a buffer is either mapped and
+/// available to the CPU, or unmapped and ready for use by the GPU, but never
+/// both. This makes it impossible for either side to observe changes by the
+/// other immediately, and any necessary transfers can be carried out when the
+/// buffer transitions from one state to the other.
+///
+/// There are two ways to map a buffer:
+///
+/// - If [`BufferDescriptor::mapped_at_creation`] is `true`, then the entire
+///   buffer is mapped when it is created. This is the easiest way to initialize
+///   a new buffer. You can set `mapped_at_creation` on any kind of buffer,
+///   regardless of its [`usage`] flags.
+///
+/// - If the buffer's [`usage`] includes the [`MAP_READ`] or [`MAP_WRITE`]
+///   flags, then you can call `buffer.slice(range).map_async(mode, callback)`
+///   to map the portion of `buffer` given by `range`. This waits for the GPU to
+///   finish using the buffer, and invokes `callback` as soon as the buffer is
+///   safe for the CPU to access.
+///
+/// Once a buffer is mapped:
+///
+/// - You can call `buffer.slice(range).get_mapped_range()` to obtain a
+///   [`BufferView`], which dereferences to a `&[u8]` that you can use to read
+///   the buffer's contents.
+///
+/// - Or, you can call `buffer.slice(range).get_mapped_range_mut()` to obtain a
+///   [`BufferViewMut`], which dereferences to a `&mut [u8]` that you can use to
+///   read and write the buffer's contents.
+///
+/// The given `range` must fall within the mapped portion of the buffer. If you
+/// attempt to access overlapping ranges, even for shared access only, these
+/// methods panic.
+///
+/// While a buffer is mapped, you may not submit any commands to the GPU that
+/// access it. You may record command buffers that use the buffer, but if you
+/// submit them while the buffer is mapped, submission will panic.
+///
+/// When you are done using the buffer on the CPU, you must call
+/// [`Buffer::unmap`] to make it available for use by the GPU again. All
+/// [`BufferView`] and [`BufferViewMut`] views referring to the buffer must be
+/// dropped before you unmap it; otherwise, [`Buffer::unmap`] will panic.
+///
+/// # Example
+///
+/// If `buffer` was created with [`BufferUsages::MAP_WRITE`], we could fill it
+/// with `f32` values like this:
+///
+/// ```no_run
+/// # mod bytemuck {
+/// #     pub fn cast_slice_mut(bytes: &mut [u8]) -> &mut [f32] { todo!() }
+/// # }
+/// # let device: wgpu::Device = todo!();
+/// # let buffer: wgpu::Buffer = todo!();
+/// let buffer = std::sync::Arc::new(buffer);
+/// let capturable = buffer.clone();
+/// buffer.slice(..).map_async(wgpu::MapMode::Write, move |result| {
+///     if result.is_ok() {
+///         let mut view = capturable.slice(..).get_mapped_range_mut();
+///         let floats: &mut [f32] = bytemuck::cast_slice_mut(&mut view);
+///         floats.fill(42.0);
+///         drop(view);
+///         capturable.unmap();
+///     }
+/// });
+/// ```
+///
+/// This code takes the following steps:
+///
+/// - First, it moves `buffer` into an [`Arc`], and makes a clone for capture by
+///   the callback passed to [`map_async`]. Since a [`map_async`] callback may be
+///   invoked from another thread, interaction between the callback and the
+///   thread calling [`map_async`] generally requires some sort of shared heap
+///   data like this. In real code, the [`Arc`] would probably own some larger
+///   structure that itself owns `buffer`.
+///
+/// - Then, it calls [`Buffer::slice`] to make a [`BufferSlice`] referring to
+///   the buffer's entire contents.
+///
+/// - Next, it calls [`BufferSlice::map_async`] to request that the bytes to
+///   which the slice refers be made accessible to the CPU ("mapped"). This may
+///   entail waiting for previously enqueued operations on `buffer` to finish.
+///   Although [`map_async`] itself always returns immediately, it saves the
+///   callback function to be invoked later.
+///
+/// - When some later call to [`Device::poll`] or [`Instance::poll_all`] (not
+///   shown in this example) determines that the buffer is mapped and ready for
+///   the CPU to use, it invokes the callback function.
+///
+/// - The callback function calls [`Buffer::slice`] and then
+///   [`BufferSlice::get_mapped_range_mut`] to obtain a [`BufferViewMut`], which
+///   dereferences to a `&mut [u8]` slice referring to the buffer's bytes.
+///
+/// - It then uses the [`bytemuck`] crate to turn the `&mut [u8]` into a `&mut
+///   [f32]`, and calls the slice [`fill`] method to fill the buffer with a
+///   useful value.
+///
+/// - Finally, the callback drops the view and calls [`Buffer::unmap`] to unmap
+///   the buffer. In real code, the callback would also need to do some sort of
+///   synchronization to let the rest of the program know that it has completed
+///   its work.
+///
+/// If using [`map_async`] directly is awkward, you may find it more convenient to
+/// use [`Queue::write_buffer`] and [`util::DownloadBuffer::read_buffer`].
+/// However, those each have their own tradeoffs; the asynchronous nature of GPU
+/// execution makes it hard to avoid friction altogether.
+///
+/// [`Arc`]: std::sync::Arc
+/// [`map_async`]: BufferSlice::map_async
+/// [`bytemuck`]: https://crates.io/crates/bytemuck
+/// [`fill`]: slice::fill
+///
+/// ## Mapping buffers on the web
+///
+/// When compiled to WebAssembly and running in a browser content process,
+/// `wgpu` implements its API in terms of the browser's WebGPU implementation.
+/// In this context, `wgpu` is further isolated from the GPU:
+///
+/// - Depending on the browser's WebGPU implementation, mapping and unmapping
+///   buffers probably entails copies between WebAssembly linear memory and the
+///   graphics driver's buffers.
+///
+/// - All modern web browsers isolate web content in its own sandboxed process,
+///   which can only interact with the GPU via interprocess communication (IPC).
+///   Although most browsers' IPC systems use shared memory for large data
+///   transfers, there will still probably need to be copies into and out of the
+///   shared memory buffers.
+///
+/// All of these copies contribute to the cost of buffer mapping in this
+/// configuration.
+///
+/// [`usage`]: BufferDescriptor::usage
+/// [mac]: BufferDescriptor::mapped_at_creation
+/// [`MAP_READ`]: BufferUsages::MAP_READ
+/// [`MAP_WRITE`]: BufferUsages::MAP_WRITE
+#[derive(Debug)]
+pub struct Buffer {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+    pub(crate) map_context: Mutex<MapContext>,
+    pub(crate) size: wgt::BufferAddress,
+    pub(crate) usage: BufferUsages,
+    // Todo: missing map_state https://www.w3.org/TR/webgpu/#dom-gpubuffer-mapstate
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Buffer: Send, Sync);
+
+impl Buffer {
+    /// Returns a globally-unique identifier for this `Buffer`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+
+    /// Return the binding view of the entire buffer.
+    pub fn as_entire_binding(&self) -> BindingResource<'_> {
+        BindingResource::Buffer(self.as_entire_buffer_binding())
+    }
+
+    /// Return the binding view of the entire buffer.
+    pub fn as_entire_buffer_binding(&self) -> BufferBinding<'_> {
+        BufferBinding {
+            buffer: self,
+            offset: 0,
+            size: None,
+        }
+    }
+
+    /// Returns the inner hal Buffer using a callback. The hal buffer will be `None` if the
+    /// backend type argument does not match with this wgpu Buffer
+    ///
+    /// # Safety
+    ///
+    /// - The raw handle obtained from the hal Buffer must not be manually destroyed
+    #[cfg(wgpu_core)]
+    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Buffer>) -> R, R>(
+        &self,
+        hal_buffer_callback: F,
+    ) -> R {
+        let id = self.id;
+
+        if let Some(ctx) = self
+            .context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+        {
+            unsafe { ctx.buffer_as_hal::<A, F, R>(id.into(), hal_buffer_callback) }
+        } else {
+            hal_buffer_callback(None)
+        }
+    }
+
+    /// Return a slice of a [`Buffer`]'s bytes.
+    ///
+    /// Return a [`BufferSlice`] referring to the portion of `self`'s contents
+    /// indicated by `bounds`. Regardless of what sort of data `self` stores,
+    /// `bounds` start and end are given in bytes.
+    ///
+    /// A [`BufferSlice`] can be used to supply vertex and index data, or to map
+    /// buffer contents for access from the CPU. See the [`BufferSlice`]
+    /// documentation for details.
+    ///
+    /// The `range` argument can be half or fully unbounded: for example,
+    /// `buffer.slice(..)` refers to the entire buffer, and `buffer.slice(n..)`
+    /// refers to the portion starting at the `n`th byte and extending to the
+    /// end of the buffer.
+    pub fn slice<S: RangeBounds<BufferAddress>>(&self, bounds: S) -> BufferSlice<'_> {
+        let (offset, size) = range_to_offset_size(bounds);
+        BufferSlice {
+            buffer: self,
+            offset,
+            size,
+        }
+    }
+
+    /// Flushes any pending write operations and unmaps the buffer from host memory.
+    pub fn unmap(&self) {
+        self.map_context.lock().reset();
+        DynContext::buffer_unmap(&*self.context, &self.id, self.data.as_ref());
+    }
+
+    /// Destroy the associated native resources as soon as possible.
+    pub fn destroy(&self) {
+        DynContext::buffer_destroy(&*self.context, &self.id, self.data.as_ref());
+    }
+
+    /// Returns the length of the buffer allocation in bytes.
+    ///
+    /// This is always equal to the `size` that was specified when creating the buffer.
+    pub fn size(&self) -> BufferAddress {
+        self.size
+    }
+
+    /// Returns the allowed usages for this `Buffer`.
+    ///
+    /// This is always equal to the `usage` that was specified when creating the buffer.
+    pub fn usage(&self) -> BufferUsages {
+        self.usage
+    }
+}
+
+/// A slice of a [`Buffer`], to be mapped, used for vertex or index data, or the like.
+///
+/// You can create a `BufferSlice` by calling [`Buffer::slice`]:
+///
+/// ```no_run
+/// # let buffer: wgpu::Buffer = todo!();
+/// let slice = buffer.slice(10..20);
+/// ```
+///
+/// This returns a slice referring to the second ten bytes of `buffer`. To get a
+/// slice of the entire `Buffer`:
+///
+/// ```no_run
+/// # let buffer: wgpu::Buffer = todo!();
+/// let whole_buffer_slice = buffer.slice(..);
+/// ```
+///
+/// You can pass buffer slices to methods like [`RenderPass::set_vertex_buffer`]
+/// and [`RenderPass::set_index_buffer`] to indicate which portion of the buffer
+/// a draw call should consult.
+///
+/// To access the slice's contents on the CPU, you must first [map] the buffer,
+/// and then call [`BufferSlice::get_mapped_range`] or
+/// [`BufferSlice::get_mapped_range_mut`] to obtain a view of the slice's
+/// contents. See the documentation on [mapping][map] for more details,
+/// including example code.
+///
+/// Unlike a Rust shared slice `&[T]`, whose existence guarantees that
+/// nobody else is modifying the `T` values to which it refers, a
+/// [`BufferSlice`] doesn't guarantee that the buffer's contents aren't
+/// changing. You can still record and submit commands operating on the
+/// buffer while holding a [`BufferSlice`]. A [`BufferSlice`] simply
+/// represents a certain range of the buffer's bytes.
+///
+/// The `BufferSlice` type is unique to the Rust API of `wgpu`. In the WebGPU
+/// specification, an offset and size are specified as arguments to each call
+/// working with the [`Buffer`], instead.
+///
+/// [map]: Buffer#mapping-buffers
+#[derive(Copy, Clone, Debug)]
+pub struct BufferSlice<'a> {
+    pub(crate) buffer: &'a Buffer,
+    pub(crate) offset: BufferAddress,
+    pub(crate) size: Option<BufferSize>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(BufferSlice<'_>: Send, Sync);
+
+impl<'a> BufferSlice<'a> {
+    /// Map the buffer. Buffer is ready to map once the callback is called.
+    ///
+    /// For the callback to complete, either `queue.submit(..)`, `instance.poll_all(..)`, or `device.poll(..)`
+    /// must be called elsewhere in the runtime, possibly integrated into an event loop or run on a separate thread.
+    ///
+    /// The callback will be called on the thread that first calls the above functions after the gpu work
+    /// has completed. There are no restrictions on the code you can run in the callback, however on native the
+    /// call to the function will not complete until the callback returns, so prefer keeping callbacks short
+    /// and used to set flags, send messages, etc.
+    pub fn map_async(
+        &self,
+        mode: MapMode,
+        callback: impl FnOnce(Result<(), BufferAsyncError>) + WasmNotSend + 'static,
+    ) {
+        let mut mc = self.buffer.map_context.lock();
+        assert_eq!(
+            mc.initial_range,
+            0..0,
+            "Buffer {:?} is already mapped",
+            self.buffer.id
+        );
+        let end = match self.size {
+            Some(s) => self.offset + s.get(),
+            None => mc.total_size,
+        };
+        mc.initial_range = self.offset..end;
+
+        DynContext::buffer_map_async(
+            &*self.buffer.context,
+            &self.buffer.id,
+            self.buffer.data.as_ref(),
+            mode,
+            self.offset..end,
+            Box::new(callback),
+        )
+    }
+
+    /// Gain read-only access to the bytes of a [mapped] [`Buffer`].
+    ///
+    /// Return a [`BufferView`] referring to the buffer range represented by
+    /// `self`. See the documentation for [`BufferView`] for details.
+    ///
+    /// # Panics
+    ///
+    /// - This panics if the buffer to which `self` refers is not currently
+    ///   [mapped].
+    ///
+    /// - If you try to create overlapping views of a buffer, mutable or
+    ///   otherwise, `get_mapped_range` will panic.
+    ///
+    /// [mapped]: Buffer#mapping-buffers
+    pub fn get_mapped_range(&self) -> BufferView<'a> {
+        let end = self.buffer.map_context.lock().add(self.offset, self.size);
+        let data = DynContext::buffer_get_mapped_range(
+            &*self.buffer.context,
+            &self.buffer.id,
+            self.buffer.data.as_ref(),
+            self.offset..end,
+        );
+        BufferView { slice: *self, data }
+    }
+
+    /// Synchronously and immediately map a buffer for reading. If the buffer is not immediately mappable
+    /// through [`BufferDescriptor::mapped_at_creation`] or [`BufferSlice::map_async`], will fail.
+    ///
+    /// This is useful when targeting WebGPU and you want to pass mapped data directly to js.
+    /// Unlike `get_mapped_range` which unconditionally copies mapped data into the wasm heap,
+    /// this function directly hands you the ArrayBuffer that we mapped the data into in js.
+    ///
+    /// This is only available on WebGPU, on any other backends this will return `None`.
+    #[cfg(webgpu)]
+    pub fn get_mapped_range_as_array_buffer(&self) -> Option<js_sys::ArrayBuffer> {
+        self.buffer
+            .context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWebGpu>()
+            .map(|ctx| {
+                let buffer_data = crate::context::downcast_ref(self.buffer.data.as_ref());
+                let end = self.buffer.map_context.lock().add(self.offset, self.size);
+                ctx.buffer_get_mapped_range_as_array_buffer(buffer_data, self.offset..end)
+            })
+    }
+
+    /// Gain write access to the bytes of a [mapped] [`Buffer`].
+    ///
+    /// Return a [`BufferViewMut`] referring to the buffer range represented by
+    /// `self`. See the documentation for [`BufferViewMut`] for more details.
+    ///
+    /// # Panics
+    ///
+    /// - This panics if the buffer to which `self` refers is not currently
+    ///   [mapped].
+    ///
+    /// - If you try to create overlapping views of a buffer, mutable or
+    ///   otherwise, `get_mapped_range_mut` will panic.
+    ///
+    /// [mapped]: Buffer#mapping-buffers
+    pub fn get_mapped_range_mut(&self) -> BufferViewMut<'a> {
+        let end = self.buffer.map_context.lock().add(self.offset, self.size);
+        let data = DynContext::buffer_get_mapped_range(
+            &*self.buffer.context,
+            &self.buffer.id,
+            self.buffer.data.as_ref(),
+            self.offset..end,
+        );
+        BufferViewMut {
+            slice: *self,
+            data,
+            readable: self.buffer.usage.contains(BufferUsages::MAP_READ),
+        }
+    }
+}
+
+/// The mapped portion of a buffer, if any, and its outstanding views.
+///
+/// This ensures that views fall within the mapped range and don't overlap, and
+/// also takes care of turning `Option<BufferSize>` sizes into actual buffer
+/// offsets.
+#[derive(Debug)]
+pub(crate) struct MapContext {
+    /// The overall size of the buffer.
+    ///
+    /// This is just a convenient copy of [`Buffer::size`].
+    pub(crate) total_size: BufferAddress,
+
+    /// The range of the buffer that is mapped.
+    ///
+    /// This is `0..0` if the buffer is not mapped. This becomes non-empty when
+    /// the buffer is mapped at creation time, and when you call `map_async` on
+    /// some [`BufferSlice`] (so technically, it indicates the portion that is
+    /// *or has been requested to be* mapped.)
+    ///
+    /// All [`BufferView`]s and [`BufferViewMut`]s must fall within this range.
+    pub(crate) initial_range: Range<BufferAddress>,
+
+    /// The ranges covered by all outstanding [`BufferView`]s and
+    /// [`BufferViewMut`]s. These are non-overlapping, and are all contained
+    /// within `initial_range`.
+    sub_ranges: Vec<Range<BufferAddress>>,
+}
+
+impl MapContext {
+    pub(crate) fn new(total_size: BufferAddress) -> Self {
+        Self {
+            total_size,
+            initial_range: 0..0,
+            sub_ranges: Vec::new(),
+        }
+    }
+
+    /// Record that the buffer is no longer mapped.
+    fn reset(&mut self) {
+        self.initial_range = 0..0;
+
+        assert!(
+            self.sub_ranges.is_empty(),
+            "You cannot unmap a buffer that still has accessible mapped views"
+        );
+    }
+
+    /// Record that the `size` bytes of the buffer at `offset` are now viewed.
+    ///
+    /// Return the byte offset within the buffer of the end of the viewed range.
+    ///
+    /// # Panics
+    ///
+    /// This panics if the given range overlaps with any existing range.
+    fn add(&mut self, offset: BufferAddress, size: Option<BufferSize>) -> BufferAddress {
+        let end = match size {
+            Some(s) => offset + s.get(),
+            None => self.initial_range.end,
+        };
+        assert!(self.initial_range.start <= offset && end <= self.initial_range.end);
+        // This check is essential for avoiding undefined behavior: it is the
+        // only thing that ensures that `&mut` references to the buffer's
+        // contents don't alias anything else.
+        for sub in self.sub_ranges.iter() {
+            assert!(
+                end <= sub.start || offset >= sub.end,
+                "Intersecting map range with {sub:?}"
+            );
+        }
+        self.sub_ranges.push(offset..end);
+        end
+    }
+
+    /// Record that the `size` bytes of the buffer at `offset` are no longer viewed.
+    ///
+    /// # Panics
+    ///
+    /// This panics if the given range does not exactly match one previously
+    /// passed to [`add`].
+    ///
+    /// [`add]`: MapContext::add
+    fn remove(&mut self, offset: BufferAddress, size: Option<BufferSize>) {
+        let end = match size {
+            Some(s) => offset + s.get(),
+            None => self.initial_range.end,
+        };
+
+        let index = self
+            .sub_ranges
+            .iter()
+            .position(|r| *r == (offset..end))
+            .expect("unable to remove range from map context");
+        self.sub_ranges.swap_remove(index);
+    }
+}
+
+/// Describes a [`Buffer`].
+///
+/// For use with [`Device::create_buffer`].
+///
+/// Corresponds to [WebGPU `GPUBufferDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpubufferdescriptor).
+pub type BufferDescriptor<'a> = wgt::BufferDescriptor<Label<'a>>;
+static_assertions::assert_impl_all!(BufferDescriptor<'_>: Send, Sync);
+
+/// Error occurred when trying to async map a buffer.
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub struct BufferAsyncError;
+static_assertions::assert_impl_all!(BufferAsyncError: Send, Sync);
+
+impl fmt::Display for BufferAsyncError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "Error occurred when trying to async map a buffer")
+    }
+}
+
+impl error::Error for BufferAsyncError {}
+
+/// Type of buffer mapping.
+#[derive(Debug, Clone, Copy, Eq, PartialEq)]
+pub enum MapMode {
+    /// Map only for reading
+    Read,
+    /// Map only for writing
+    Write,
+}
+static_assertions::assert_impl_all!(MapMode: Send, Sync);
+
+/// A read-only view of a mapped buffer's bytes.
+///
+/// To get a `BufferView`, first [map] the buffer, and then
+/// call `buffer.slice(range).get_mapped_range()`.
+///
+/// `BufferView` dereferences to `&[u8]`, so you can use all the usual Rust
+/// slice methods to access the buffer's contents. It also implements
+/// `AsRef<[u8]>`, if that's more convenient.
+///
+/// Before the buffer can be unmapped, all `BufferView`s observing it
+/// must be dropped. Otherwise, the call to [`Buffer::unmap`] will panic.
+///
+/// For example code, see the documentation on [mapping buffers][map].
+///
+/// [map]: Buffer#mapping-buffers
+/// [`map_async`]: BufferSlice::map_async
+#[derive(Debug)]
+pub struct BufferView<'a> {
+    slice: BufferSlice<'a>,
+    data: Box<dyn crate::context::BufferMappedRange>,
+}
+
+impl std::ops::Deref for BufferView<'_> {
+    type Target = [u8];
+
+    #[inline]
+    fn deref(&self) -> &[u8] {
+        self.data.slice()
+    }
+}
+
+impl AsRef<[u8]> for BufferView<'_> {
+    #[inline]
+    fn as_ref(&self) -> &[u8] {
+        self.data.slice()
+    }
+}
+
+/// A write-only view of a mapped buffer's bytes.
+///
+/// To get a `BufferViewMut`, first [map] the buffer, and then
+/// call `buffer.slice(range).get_mapped_range_mut()`.
+///
+/// `BufferViewMut` dereferences to `&mut [u8]`, so you can use all the usual
+/// Rust slice methods to access the buffer's contents. It also implements
+/// `AsMut<[u8]>`, if that's more convenient.
+///
+/// It is possible to read the buffer using this view, but doing so is not
+/// recommended, as it is likely to be slow.
+///
+/// Before the buffer can be unmapped, all `BufferViewMut`s observing it
+/// must be dropped. Otherwise, the call to [`Buffer::unmap`] will panic.
+///
+/// For example code, see the documentation on [mapping buffers][map].
+///
+/// [map]: Buffer#mapping-buffers
+#[derive(Debug)]
+pub struct BufferViewMut<'a> {
+    slice: BufferSlice<'a>,
+    data: Box<dyn crate::context::BufferMappedRange>,
+    readable: bool,
+}
+
+impl AsMut<[u8]> for BufferViewMut<'_> {
+    #[inline]
+    fn as_mut(&mut self) -> &mut [u8] {
+        self.data.slice_mut()
+    }
+}
+
+impl Deref for BufferViewMut<'_> {
+    type Target = [u8];
+
+    fn deref(&self) -> &Self::Target {
+        if !self.readable {
+            log::warn!("Reading from a BufferViewMut is slow and not recommended.");
+        }
+
+        self.data.slice()
+    }
+}
+
+impl DerefMut for BufferViewMut<'_> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        self.data.slice_mut()
+    }
+}
+
+impl Drop for BufferView<'_> {
+    fn drop(&mut self) {
+        self.slice
+            .buffer
+            .map_context
+            .lock()
+            .remove(self.slice.offset, self.slice.size);
+    }
+}
+
+impl Drop for BufferViewMut<'_> {
+    fn drop(&mut self) {
+        self.slice
+            .buffer
+            .map_context
+            .lock()
+            .remove(self.slice.offset, self.slice.size);
+    }
+}
+
+impl Drop for Buffer {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context.buffer_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+fn range_to_offset_size<S: RangeBounds<BufferAddress>>(
+    bounds: S,
+) -> (BufferAddress, Option<BufferSize>) {
+    let offset = match bounds.start_bound() {
+        Bound::Included(&bound) => bound,
+        Bound::Excluded(&bound) => bound + 1,
+        Bound::Unbounded => 0,
+    };
+    let size = match bounds.end_bound() {
+        Bound::Included(&bound) => Some(bound + 1 - offset),
+        Bound::Excluded(&bound) => Some(bound - offset),
+        Bound::Unbounded => None,
+    }
+    .map(|size| BufferSize::new(size).expect("Buffer slices can not be empty"));
+
+    (offset, size)
+}
+#[cfg(test)]
+mod tests {
+    use super::{range_to_offset_size, BufferSize};
+
+    #[test]
+    fn range_to_offset_size_works() {
+        assert_eq!(range_to_offset_size(0..2), (0, BufferSize::new(2)));
+        assert_eq!(range_to_offset_size(2..5), (2, BufferSize::new(3)));
+        assert_eq!(range_to_offset_size(..), (0, None));
+        assert_eq!(range_to_offset_size(21..), (21, None));
+        assert_eq!(range_to_offset_size(0..), (0, None));
+        assert_eq!(range_to_offset_size(..21), (0, BufferSize::new(21)));
+    }
+
+    #[test]
+    #[should_panic]
+    fn range_to_offset_size_panics_for_empty_range() {
+        range_to_offset_size(123..123);
+    }
+
+    #[test]
+    #[should_panic]
+    fn range_to_offset_size_panics_for_unbounded_empty_range() {
+        range_to_offset_size(..0);
+    }
+}
diff --git a/wgpu/src/api/command_buffer.rs b/wgpu/src/api/command_buffer.rs
new file mode 100644
index 0000000000..4d56fe9b2f
--- /dev/null
+++ b/wgpu/src/api/command_buffer.rs
@@ -0,0 +1,31 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a command buffer on the GPU.
+///
+/// A `CommandBuffer` represents a complete sequence of commands that may be submitted to a command
+/// queue with [`Queue::submit`]. A `CommandBuffer` is obtained by recording a series of commands to
+/// a [`CommandEncoder`] and then calling [`CommandEncoder::finish`].
+///
+/// Corresponds to [WebGPU `GPUCommandBuffer`](https://gpuweb.github.io/gpuweb/#command-buffer).
+#[derive(Debug)]
+pub struct CommandBuffer {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: Option<ObjectId>,
+    pub(crate) data: Option<Box<Data>>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(CommandBuffer: Send, Sync);
+
+impl Drop for CommandBuffer {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            if let Some(id) = self.id.take() {
+                self.context
+                    .command_buffer_drop(&id, self.data.take().unwrap().as_ref());
+            }
+        }
+    }
+}
diff --git a/wgpu/src/api/command_encoder.rs b/wgpu/src/api/command_encoder.rs
new file mode 100644
index 0000000000..d8e8594a89
--- /dev/null
+++ b/wgpu/src/api/command_encoder.rs
@@ -0,0 +1,382 @@
+use std::{marker::PhantomData, ops::Range, sync::Arc, thread};
+
+use crate::context::{DynContext, ObjectId};
+use crate::*;
+
+/// Encodes a series of GPU operations.
+///
+/// A command encoder can record [`RenderPass`]es, [`ComputePass`]es,
+/// and transfer operations between driver-managed resources like [`Buffer`]s and [`Texture`]s.
+///
+/// When finished recording, call [`CommandEncoder::finish`] to obtain a [`CommandBuffer`] which may
+/// be submitted for execution.
+///
+/// Corresponds to [WebGPU `GPUCommandEncoder`](https://gpuweb.github.io/gpuweb/#command-encoder).
+#[derive(Debug)]
+pub struct CommandEncoder {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: Option<ObjectId>,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(CommandEncoder: Send, Sync);
+
+impl Drop for CommandEncoder {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            if let Some(id) = self.id.take() {
+                self.context.command_encoder_drop(&id, self.data.as_ref());
+            }
+        }
+    }
+}
+
+/// Describes a [`CommandEncoder`].
+///
+/// For use with [`Device::create_command_encoder`].
+///
+/// Corresponds to [WebGPU `GPUCommandEncoderDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpucommandencoderdescriptor).
+pub type CommandEncoderDescriptor<'a> = wgt::CommandEncoderDescriptor<Label<'a>>;
+static_assertions::assert_impl_all!(CommandEncoderDescriptor<'_>: Send, Sync);
+
+pub use wgt::ImageCopyBuffer as ImageCopyBufferBase;
+/// View of a buffer which can be used to copy to/from a texture.
+///
+/// Corresponds to [WebGPU `GPUImageCopyBuffer`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpuimagecopybuffer).
+pub type ImageCopyBuffer<'a> = ImageCopyBufferBase<&'a Buffer>;
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(ImageCopyBuffer<'_>: Send, Sync);
+
+pub use wgt::ImageCopyTexture as ImageCopyTextureBase;
+/// View of a texture which can be used to copy to/from a buffer/texture.
+///
+/// Corresponds to [WebGPU `GPUImageCopyTexture`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpuimagecopytexture).
+pub type ImageCopyTexture<'a> = ImageCopyTextureBase<&'a Texture>;
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(ImageCopyTexture<'_>: Send, Sync);
+
+pub use wgt::ImageCopyTextureTagged as ImageCopyTextureTaggedBase;
+/// View of a texture which can be used to copy to a texture, including
+/// color space and alpha premultiplication information.
+///
+/// Corresponds to [WebGPU `GPUImageCopyTextureTagged`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpuimagecopytexturetagged).
+pub type ImageCopyTextureTagged<'a> = ImageCopyTextureTaggedBase<&'a Texture>;
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(ImageCopyTexture<'_>: Send, Sync);
+
+impl CommandEncoder {
+    /// Finishes recording and returns a [`CommandBuffer`] that can be submitted for execution.
+    pub fn finish(mut self) -> CommandBuffer {
+        let (id, data) = DynContext::command_encoder_finish(
+            &*self.context,
+            self.id.take().unwrap(),
+            self.data.as_mut(),
+        );
+        CommandBuffer {
+            context: Arc::clone(&self.context),
+            id: Some(id),
+            data: Some(data),
+        }
+    }
+
+    /// Begins recording of a render pass.
+    ///
+    /// This function returns a [`RenderPass`] object which records a single render pass.
+    ///
+    /// As long as the returned  [`RenderPass`] has not ended,
+    /// any mutating operation on this command encoder causes an error and invalidates it.
+    /// Note that the `'encoder` lifetime relationship protects against this,
+    /// but it is possible to opt out of it by calling [`RenderPass::forget_lifetime`].
+    /// This can be useful for runtime handling of the encoder->pass
+    /// dependency e.g. when pass and encoder are stored in the same data structure.
+    pub fn begin_render_pass<'encoder>(
+        &'encoder mut self,
+        desc: &RenderPassDescriptor<'_>,
+    ) -> RenderPass<'encoder> {
+        let id = self.id.as_ref().unwrap();
+        let (id, data) = DynContext::command_encoder_begin_render_pass(
+            &*self.context,
+            id,
+            self.data.as_ref(),
+            desc,
+        );
+        RenderPass {
+            inner: RenderPassInner {
+                id,
+                data,
+                context: self.context.clone(),
+            },
+            encoder_guard: PhantomData,
+        }
+    }
+
+    /// Begins recording of a compute pass.
+    ///
+    /// This function returns a [`ComputePass`] object which records a single compute pass.
+    ///
+    /// As long as the returned  [`ComputePass`] has not ended,
+    /// any mutating operation on this command encoder causes an error and invalidates it.
+    /// Note that the `'encoder` lifetime relationship protects against this,
+    /// but it is possible to opt out of it by calling [`ComputePass::forget_lifetime`].
+    /// This can be useful for runtime handling of the encoder->pass
+    /// dependency e.g. when pass and encoder are stored in the same data structure.
+    pub fn begin_compute_pass<'encoder>(
+        &'encoder mut self,
+        desc: &ComputePassDescriptor<'_>,
+    ) -> ComputePass<'encoder> {
+        let id = self.id.as_ref().unwrap();
+        let (id, data) = DynContext::command_encoder_begin_compute_pass(
+            &*self.context,
+            id,
+            self.data.as_ref(),
+            desc,
+        );
+        ComputePass {
+            inner: ComputePassInner {
+                id,
+                data,
+                context: self.context.clone(),
+            },
+            encoder_guard: PhantomData,
+        }
+    }
+
+    /// Copy data from one buffer to another.
+    ///
+    /// # Panics
+    ///
+    /// - Buffer offsets or copy size not a multiple of [`COPY_BUFFER_ALIGNMENT`].
+    /// - Copy would overrun buffer.
+    /// - Copy within the same buffer.
+    pub fn copy_buffer_to_buffer(
+        &mut self,
+        source: &Buffer,
+        source_offset: BufferAddress,
+        destination: &Buffer,
+        destination_offset: BufferAddress,
+        copy_size: BufferAddress,
+    ) {
+        DynContext::command_encoder_copy_buffer_to_buffer(
+            &*self.context,
+            self.id.as_ref().unwrap(),
+            self.data.as_ref(),
+            &source.id,
+            source.data.as_ref(),
+            source_offset,
+            &destination.id,
+            destination.data.as_ref(),
+            destination_offset,
+            copy_size,
+        );
+    }
+
+    /// Copy data from a buffer to a texture.
+    pub fn copy_buffer_to_texture(
+        &mut self,
+        source: ImageCopyBuffer<'_>,
+        destination: ImageCopyTexture<'_>,
+        copy_size: Extent3d,
+    ) {
+        DynContext::command_encoder_copy_buffer_to_texture(
+            &*self.context,
+            self.id.as_ref().unwrap(),
+            self.data.as_ref(),
+            source,
+            destination,
+            copy_size,
+        );
+    }
+
+    /// Copy data from a texture to a buffer.
+    pub fn copy_texture_to_buffer(
+        &mut self,
+        source: ImageCopyTexture<'_>,
+        destination: ImageCopyBuffer<'_>,
+        copy_size: Extent3d,
+    ) {
+        DynContext::command_encoder_copy_texture_to_buffer(
+            &*self.context,
+            self.id.as_ref().unwrap(),
+            self.data.as_ref(),
+            source,
+            destination,
+            copy_size,
+        );
+    }
+
+    /// Copy data from one texture to another.
+    ///
+    /// # Panics
+    ///
+    /// - Textures are not the same type
+    /// - If a depth texture, or a multisampled texture, the entire texture must be copied
+    /// - Copy would overrun either texture
+    pub fn copy_texture_to_texture(
+        &mut self,
+        source: ImageCopyTexture<'_>,
+        destination: ImageCopyTexture<'_>,
+        copy_size: Extent3d,
+    ) {
+        DynContext::command_encoder_copy_texture_to_texture(
+            &*self.context,
+            self.id.as_ref().unwrap(),
+            self.data.as_ref(),
+            source,
+            destination,
+            copy_size,
+        );
+    }
+
+    /// Clears texture to zero.
+    ///
+    /// Note that unlike with clear_buffer, `COPY_DST` usage is not required.
+    ///
+    /// # Implementation notes
+    ///
+    /// - implemented either via buffer copies and render/depth target clear, path depends on texture usages
+    /// - behaves like texture zero init, but is performed immediately (clearing is *not* delayed via marking it as uninitialized)
+    ///
+    /// # Panics
+    ///
+    /// - `CLEAR_TEXTURE` extension not enabled
+    /// - Range is out of bounds
+    pub fn clear_texture(&mut self, texture: &Texture, subresource_range: &ImageSubresourceRange) {
+        DynContext::command_encoder_clear_texture(
+            &*self.context,
+            self.id.as_ref().unwrap(),
+            self.data.as_ref(),
+            texture,
+            subresource_range,
+        );
+    }
+
+    /// Clears buffer to zero.
+    ///
+    /// # Panics
+    ///
+    /// - Buffer does not have `COPY_DST` usage.
+    /// - Range is out of bounds
+    pub fn clear_buffer(
+        &mut self,
+        buffer: &Buffer,
+        offset: BufferAddress,
+        size: Option<BufferAddress>,
+    ) {
+        DynContext::command_encoder_clear_buffer(
+            &*self.context,
+            self.id.as_ref().unwrap(),
+            self.data.as_ref(),
+            buffer,
+            offset,
+            size,
+        );
+    }
+
+    /// Inserts debug marker.
+    pub fn insert_debug_marker(&mut self, label: &str) {
+        let id = self.id.as_ref().unwrap();
+        DynContext::command_encoder_insert_debug_marker(
+            &*self.context,
+            id,
+            self.data.as_ref(),
+            label,
+        );
+    }
+
+    /// Start record commands and group it into debug marker group.
+    pub fn push_debug_group(&mut self, label: &str) {
+        let id = self.id.as_ref().unwrap();
+        DynContext::command_encoder_push_debug_group(&*self.context, id, self.data.as_ref(), label);
+    }
+
+    /// Stops command recording and creates debug group.
+    pub fn pop_debug_group(&mut self) {
+        let id = self.id.as_ref().unwrap();
+        DynContext::command_encoder_pop_debug_group(&*self.context, id, self.data.as_ref());
+    }
+
+    /// Resolves a query set, writing the results into the supplied destination buffer.
+    ///
+    /// Occlusion and timestamp queries are 8 bytes each (see [`crate::QUERY_SIZE`]). For pipeline statistics queries,
+    /// see [`PipelineStatisticsTypes`] for more information.
+    pub fn resolve_query_set(
+        &mut self,
+        query_set: &QuerySet,
+        query_range: Range<u32>,
+        destination: &Buffer,
+        destination_offset: BufferAddress,
+    ) {
+        DynContext::command_encoder_resolve_query_set(
+            &*self.context,
+            self.id.as_ref().unwrap(),
+            self.data.as_ref(),
+            &query_set.id,
+            query_set.data.as_ref(),
+            query_range.start,
+            query_range.end - query_range.start,
+            &destination.id,
+            destination.data.as_ref(),
+            destination_offset,
+        )
+    }
+
+    /// Returns the inner hal CommandEncoder using a callback. The hal command encoder will be `None` if the
+    /// backend type argument does not match with this wgpu CommandEncoder
+    ///
+    /// This method will start the wgpu_core level command recording.
+    ///
+    /// # Safety
+    ///
+    /// - The raw handle obtained from the hal CommandEncoder must not be manually destroyed
+    #[cfg(wgpu_core)]
+    pub unsafe fn as_hal_mut<
+        A: wgc::hal_api::HalApi,
+        F: FnOnce(Option<&mut A::CommandEncoder>) -> R,
+        R,
+    >(
+        &mut self,
+        hal_command_encoder_callback: F,
+    ) -> Option<R> {
+        use wgc::id::CommandEncoderId;
+
+        self.context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+            .map(|ctx| unsafe {
+                ctx.command_encoder_as_hal_mut::<A, F, R>(
+                    CommandEncoderId::from(self.id.unwrap()),
+                    hal_command_encoder_callback,
+                )
+            })
+    }
+}
+
+/// [`Features::TIMESTAMP_QUERY_INSIDE_ENCODERS`] must be enabled on the device in order to call these functions.
+impl CommandEncoder {
+    /// Issue a timestamp command at this point in the queue.
+    /// The timestamp will be written to the specified query set, at the specified index.
+    ///
+    /// Must be multiplied by [`Queue::get_timestamp_period`] to get
+    /// the value in nanoseconds. Absolute values have no meaning,
+    /// but timestamps can be subtracted to get the time it takes
+    /// for a string of operations to complete.
+    ///
+    /// Attention: Since commands within a command recorder may be reordered,
+    /// there is no strict guarantee that timestamps are taken after all commands
+    /// recorded so far and all before all commands recorded after.
+    /// This may depend both on the backend and the driver.
+    pub fn write_timestamp(&mut self, query_set: &QuerySet, query_index: u32) {
+        DynContext::command_encoder_write_timestamp(
+            &*self.context,
+            self.id.as_ref().unwrap(),
+            self.data.as_mut(),
+            &query_set.id,
+            query_set.data.as_ref(),
+            query_index,
+        )
+    }
+}
diff --git a/wgpu/src/api/common_pipeline.rs b/wgpu/src/api/common_pipeline.rs
new file mode 100644
index 0000000000..697507bca2
--- /dev/null
+++ b/wgpu/src/api/common_pipeline.rs
@@ -0,0 +1,64 @@
+use std::collections::HashMap;
+
+use crate::*;
+
+#[derive(Clone, Debug)]
+/// Advanced options for use when a pipeline is compiled
+///
+/// This implements `Default`, and for most users can be set to `Default::default()`
+pub struct PipelineCompilationOptions<'a> {
+    /// Specifies the values of pipeline-overridable constants in the shader module.
+    ///
+    /// If an `@id` attribute was specified on the declaration,
+    /// the key must be the pipeline constant ID as a decimal ASCII number; if not,
+    /// the key must be the constant's identifier name.
+    ///
+    /// The value may represent any of WGSL's concrete scalar types.
+    pub constants: &'a HashMap<String, f64>,
+    /// Whether workgroup scoped memory will be initialized with zero values for this stage.
+    ///
+    /// This is required by the WebGPU spec, but may have overhead which can be avoided
+    /// for cross-platform applications
+    pub zero_initialize_workgroup_memory: bool,
+}
+
+impl<'a> Default for PipelineCompilationOptions<'a> {
+    fn default() -> Self {
+        // HashMap doesn't have a const constructor, due to the use of RandomState
+        // This does introduce some synchronisation costs, but these should be minor,
+        // and might be cheaper than the alternative of getting new random state
+        static DEFAULT_CONSTANTS: std::sync::OnceLock<HashMap<String, f64>> =
+            std::sync::OnceLock::new();
+        let constants = DEFAULT_CONSTANTS.get_or_init(Default::default);
+        Self {
+            constants,
+            zero_initialize_workgroup_memory: true,
+        }
+    }
+}
+
+/// Describes a pipeline cache, which allows reusing compilation work
+/// between program runs.
+///
+/// For use with [`Device::create_pipeline_cache`]
+///
+/// This type is unique to the Rust API of `wgpu`.
+#[derive(Clone, Debug)]
+pub struct PipelineCacheDescriptor<'a> {
+    /// Debug label of the pipeline cache. This might show up in some logs from `wgpu`
+    pub label: Label<'a>,
+    /// The data used to initialise the cache initialise
+    ///
+    /// # Safety
+    ///
+    /// This data must have been provided from a previous call to
+    /// [`PipelineCache::get_data`], if not `None`
+    pub data: Option<&'a [u8]>,
+    /// Whether to create a cache without data when the provided data
+    /// is invalid.
+    ///
+    /// Recommended to set to true
+    pub fallback: bool,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(PipelineCacheDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/compute_pass.rs b/wgpu/src/api/compute_pass.rs
new file mode 100644
index 0000000000..30123b8052
--- /dev/null
+++ b/wgpu/src/api/compute_pass.rs
@@ -0,0 +1,256 @@
+use std::{marker::PhantomData, sync::Arc, thread};
+
+use crate::context::{DynContext, ObjectId};
+use crate::*;
+
+/// In-progress recording of a compute pass.
+///
+/// It can be created with [`CommandEncoder::begin_compute_pass`].
+///
+/// Corresponds to [WebGPU `GPUComputePassEncoder`](
+/// https://gpuweb.github.io/gpuweb/#compute-pass-encoder).
+#[derive(Debug)]
+pub struct ComputePass<'encoder> {
+    /// The inner data of the compute pass, separated out so it's easy to replace the lifetime with 'static if desired.
+    pub(crate) inner: ComputePassInner,
+
+    /// This lifetime is used to protect the [`CommandEncoder`] from being used
+    /// while the pass is alive.
+    pub(crate) encoder_guard: PhantomData<&'encoder ()>,
+}
+
+impl<'encoder> ComputePass<'encoder> {
+    /// Drops the lifetime relationship to the parent command encoder, making usage of
+    /// the encoder while this pass is recorded a run-time error instead.
+    ///
+    /// Attention: As long as the compute pass has not been ended, any mutating operation on the parent
+    /// command encoder will cause a run-time error and invalidate it!
+    /// By default, the lifetime constraint prevents this, but it can be useful
+    /// to handle this at run time, such as when storing the pass and encoder in the same
+    /// data structure.
+    ///
+    /// This operation has no effect on pass recording.
+    /// It's a safe operation, since [`CommandEncoder`] is in a locked state as long as the pass is active
+    /// regardless of the lifetime constraint or its absence.
+    pub fn forget_lifetime(self) -> ComputePass<'static> {
+        ComputePass {
+            inner: self.inner,
+            encoder_guard: PhantomData,
+        }
+    }
+
+    /// Sets the active bind group for a given bind group index. The bind group layout
+    /// in the active pipeline when the `dispatch()` function is called must match the layout of this bind group.
+    ///
+    /// If the bind group have dynamic offsets, provide them in the binding order.
+    /// These offsets have to be aligned to [`Limits::min_uniform_buffer_offset_alignment`]
+    /// or [`Limits::min_storage_buffer_offset_alignment`] appropriately.
+    pub fn set_bind_group(
+        &mut self,
+        index: u32,
+        bind_group: &BindGroup,
+        offsets: &[DynamicOffset],
+    ) {
+        DynContext::compute_pass_set_bind_group(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            index,
+            &bind_group.id,
+            bind_group.data.as_ref(),
+            offsets,
+        );
+    }
+
+    /// Sets the active compute pipeline.
+    pub fn set_pipeline(&mut self, pipeline: &ComputePipeline) {
+        DynContext::compute_pass_set_pipeline(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &pipeline.id,
+            pipeline.data.as_ref(),
+        );
+    }
+
+    /// Inserts debug marker.
+    pub fn insert_debug_marker(&mut self, label: &str) {
+        DynContext::compute_pass_insert_debug_marker(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            label,
+        );
+    }
+
+    /// Start record commands and group it into debug marker group.
+    pub fn push_debug_group(&mut self, label: &str) {
+        DynContext::compute_pass_push_debug_group(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            label,
+        );
+    }
+
+    /// Stops command recording and creates debug group.
+    pub fn pop_debug_group(&mut self) {
+        DynContext::compute_pass_pop_debug_group(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+        );
+    }
+
+    /// Dispatches compute work operations.
+    ///
+    /// `x`, `y` and `z` denote the number of work groups to dispatch in each dimension.
+    pub fn dispatch_workgroups(&mut self, x: u32, y: u32, z: u32) {
+        DynContext::compute_pass_dispatch_workgroups(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            x,
+            y,
+            z,
+        );
+    }
+
+    /// Dispatches compute work operations, based on the contents of the `indirect_buffer`.
+    ///
+    /// The structure expected in `indirect_buffer` must conform to [`DispatchIndirectArgs`](crate::util::DispatchIndirectArgs).
+    pub fn dispatch_workgroups_indirect(
+        &mut self,
+        indirect_buffer: &Buffer,
+        indirect_offset: BufferAddress,
+    ) {
+        DynContext::compute_pass_dispatch_workgroups_indirect(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+        );
+    }
+}
+
+/// [`Features::PUSH_CONSTANTS`] must be enabled on the device in order to call these functions.
+impl<'encoder> ComputePass<'encoder> {
+    /// Set push constant data for subsequent dispatch calls.
+    ///
+    /// Write the bytes in `data` at offset `offset` within push constant
+    /// storage.  Both `offset` and the length of `data` must be
+    /// multiples of [`PUSH_CONSTANT_ALIGNMENT`], which is always 4.
+    ///
+    /// For example, if `offset` is `4` and `data` is eight bytes long, this
+    /// call will write `data` to bytes `4..12` of push constant storage.
+    pub fn set_push_constants(&mut self, offset: u32, data: &[u8]) {
+        DynContext::compute_pass_set_push_constants(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            offset,
+            data,
+        );
+    }
+}
+
+/// [`Features::TIMESTAMP_QUERY_INSIDE_PASSES`] must be enabled on the device in order to call these functions.
+impl<'encoder> ComputePass<'encoder> {
+    /// Issue a timestamp command at this point in the queue. The timestamp will be written to the specified query set, at the specified index.
+    ///
+    /// Must be multiplied by [`Queue::get_timestamp_period`] to get
+    /// the value in nanoseconds. Absolute values have no meaning,
+    /// but timestamps can be subtracted to get the time it takes
+    /// for a string of operations to complete.
+    pub fn write_timestamp(&mut self, query_set: &QuerySet, query_index: u32) {
+        DynContext::compute_pass_write_timestamp(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &query_set.id,
+            query_set.data.as_ref(),
+            query_index,
+        )
+    }
+}
+
+/// [`Features::PIPELINE_STATISTICS_QUERY`] must be enabled on the device in order to call these functions.
+impl<'encoder> ComputePass<'encoder> {
+    /// Start a pipeline statistics query on this compute pass. It can be ended with
+    /// `end_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
+    pub fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, query_index: u32) {
+        DynContext::compute_pass_begin_pipeline_statistics_query(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &query_set.id,
+            query_set.data.as_ref(),
+            query_index,
+        );
+    }
+
+    /// End the pipeline statistics query on this compute pass. It can be started with
+    /// `begin_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
+    pub fn end_pipeline_statistics_query(&mut self) {
+        DynContext::compute_pass_end_pipeline_statistics_query(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+        );
+    }
+}
+
+#[derive(Debug)]
+pub(crate) struct ComputePassInner {
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+    pub(crate) context: Arc<C>,
+}
+
+impl Drop for ComputePassInner {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .compute_pass_end(&mut self.id, self.data.as_mut());
+        }
+    }
+}
+
+/// Describes the timestamp writes of a compute pass.
+///
+/// For use with [`ComputePassDescriptor`].
+/// At least one of `beginning_of_pass_write_index` and `end_of_pass_write_index` must be `Some`.
+///
+/// Corresponds to [WebGPU `GPUComputePassTimestampWrites`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepasstimestampwrites).
+#[derive(Clone, Debug)]
+pub struct ComputePassTimestampWrites<'a> {
+    /// The query set to write to.
+    pub query_set: &'a QuerySet,
+    /// The index of the query set at which a start timestamp of this pass is written, if any.
+    pub beginning_of_pass_write_index: Option<u32>,
+    /// The index of the query set at which an end timestamp of this pass is written, if any.
+    pub end_of_pass_write_index: Option<u32>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(ComputePassTimestampWrites<'_>: Send, Sync);
+
+/// Describes the attachments of a compute pass.
+///
+/// For use with [`CommandEncoder::begin_compute_pass`].
+///
+/// Corresponds to [WebGPU `GPUComputePassDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepassdescriptor).
+#[derive(Clone, Default, Debug)]
+pub struct ComputePassDescriptor<'a> {
+    /// Debug label of the compute pass. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// Defines which timestamp values will be written for this pass, and where to write them to.
+    ///
+    /// Requires [`Features::TIMESTAMP_QUERY`] to be enabled.
+    pub timestamp_writes: Option<ComputePassTimestampWrites<'a>>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(ComputePassDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/compute_pipeline.rs b/wgpu/src/api/compute_pipeline.rs
new file mode 100644
index 0000000000..ea2de4b8b2
--- /dev/null
+++ b/wgpu/src/api/compute_pipeline.rs
@@ -0,0 +1,81 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a compute pipeline.
+///
+/// A `ComputePipeline` object represents a compute pipeline and its single shader stage.
+/// It can be created with [`Device::create_compute_pipeline`].
+///
+/// Corresponds to [WebGPU `GPUComputePipeline`](https://gpuweb.github.io/gpuweb/#compute-pipeline).
+#[derive(Debug)]
+pub struct ComputePipeline {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(ComputePipeline: Send, Sync);
+
+impl ComputePipeline {
+    /// Returns a globally-unique identifier for this `ComputePipeline`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+
+    /// Get an object representing the bind group layout at a given index.
+    pub fn get_bind_group_layout(&self, index: u32) -> BindGroupLayout {
+        let context = Arc::clone(&self.context);
+        let (id, data) = self.context.compute_pipeline_get_bind_group_layout(
+            &self.id,
+            self.data.as_ref(),
+            index,
+        );
+        BindGroupLayout { context, id, data }
+    }
+}
+
+impl Drop for ComputePipeline {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .compute_pipeline_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Describes a compute pipeline.
+///
+/// For use with [`Device::create_compute_pipeline`].
+///
+/// Corresponds to [WebGPU `GPUComputePipelineDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepipelinedescriptor).
+#[derive(Clone, Debug)]
+pub struct ComputePipelineDescriptor<'a> {
+    /// Debug label of the pipeline. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// The layout of bind groups for this pipeline.
+    pub layout: Option<&'a PipelineLayout>,
+    /// The compiled shader module for this stage.
+    pub module: &'a ShaderModule,
+    /// The name of the entry point in the compiled shader to use.
+    ///
+    /// If [`Some`], there must be a compute shader entry point with this name in `module`.
+    /// Otherwise, expect exactly one compute shader entry point in `module`, which will be
+    /// selected.
+    // NOTE: keep phrasing in sync. with `FragmentState::entry_point`
+    // NOTE: keep phrasing in sync. with `VertexState::entry_point`
+    pub entry_point: Option<&'a str>,
+    /// Advanced options for when this pipeline is compiled
+    ///
+    /// This implements `Default`, and for most users can be set to `Default::default()`
+    pub compilation_options: PipelineCompilationOptions<'a>,
+    /// The pipeline cache to use when creating this pipeline.
+    pub cache: Option<&'a PipelineCache>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(ComputePipelineDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/device.rs b/wgpu/src/api/device.rs
new file mode 100644
index 0000000000..fff1cf1bb2
--- /dev/null
+++ b/wgpu/src/api/device.rs
@@ -0,0 +1,727 @@
+use std::{error, fmt, future::Future, sync::Arc, thread};
+
+use parking_lot::Mutex;
+
+use crate::context::{DynContext, ObjectId};
+use crate::*;
+
+/// Open connection to a graphics and/or compute device.
+///
+/// Responsible for the creation of most rendering and compute resources.
+/// These are then used in commands, which are submitted to a [`Queue`].
+///
+/// A device may be requested from an adapter with [`Adapter::request_device`].
+///
+/// Corresponds to [WebGPU `GPUDevice`](https://gpuweb.github.io/gpuweb/#gpu-device).
+#[derive(Debug)]
+pub struct Device {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Device: Send, Sync);
+
+/// Describes a [`Device`].
+///
+/// For use with [`Adapter::request_device`].
+///
+/// Corresponds to [WebGPU `GPUDeviceDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpudevicedescriptor).
+pub type DeviceDescriptor<'a> = wgt::DeviceDescriptor<Label<'a>>;
+static_assertions::assert_impl_all!(DeviceDescriptor<'_>: Send, Sync);
+
+impl Device {
+    /// Returns a globally-unique identifier for this `Device`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+
+    /// Check for resource cleanups and mapping callbacks. Will block if [`Maintain::Wait`] is passed.
+    ///
+    /// Return `true` if the queue is empty, or `false` if there are more queue
+    /// submissions still in flight. (Note that, unless access to the [`Queue`] is
+    /// coordinated somehow, this information could be out of date by the time
+    /// the caller receives it. `Queue`s can be shared between threads, so
+    /// other threads could submit new work at any time.)
+    ///
+    /// When running on WebGPU, this is a no-op. `Device`s are automatically polled.
+    pub fn poll(&self, maintain: Maintain) -> MaintainResult {
+        DynContext::device_poll(&*self.context, &self.id, self.data.as_ref(), maintain)
+    }
+
+    /// The features which can be used on this device.
+    ///
+    /// No additional features can be used, even if the underlying adapter can support them.
+    pub fn features(&self) -> Features {
+        DynContext::device_features(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// The limits which can be used on this device.
+    ///
+    /// No better limits can be used, even if the underlying adapter can support them.
+    pub fn limits(&self) -> Limits {
+        DynContext::device_limits(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Creates a shader module from either SPIR-V or WGSL source code.
+    ///
+    /// <div class="warning">
+    // NOTE: Keep this in sync with `naga::front::wgsl::parse_str`!
+    // NOTE: Keep this in sync with `wgpu_core::Global::device_create_shader_module`!
+    ///
+    /// This function may consume a lot of stack space. Compiler-enforced limits for parsing
+    /// recursion exist; if shader compilation runs into them, it will return an error gracefully.
+    /// However, on some build profiles and platforms, the default stack size for a thread may be
+    /// exceeded before this limit is reached during parsing. Callers should ensure that there is
+    /// enough stack space for this, particularly if calls to this method are exposed to user
+    /// input.
+    ///
+    /// </div>
+    pub fn create_shader_module(&self, desc: ShaderModuleDescriptor<'_>) -> ShaderModule {
+        let (id, data) = DynContext::device_create_shader_module(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+            wgt::ShaderBoundChecks::new(),
+        );
+        ShaderModule {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates a shader module from either SPIR-V or WGSL source code without runtime checks.
+    ///
+    /// # Safety
+    /// In contrast with [`create_shader_module`](Self::create_shader_module) this function
+    /// creates a shader module without runtime checks which allows shaders to perform
+    /// operations which can lead to undefined behavior like indexing out of bounds, thus it's
+    /// the caller responsibility to pass a shader which doesn't perform any of this
+    /// operations.
+    ///
+    /// This has no effect on web.
+    pub unsafe fn create_shader_module_unchecked(
+        &self,
+        desc: ShaderModuleDescriptor<'_>,
+    ) -> ShaderModule {
+        let (id, data) = DynContext::device_create_shader_module(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+            unsafe { wgt::ShaderBoundChecks::unchecked() },
+        );
+        ShaderModule {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates a shader module from SPIR-V binary directly.
+    ///
+    /// # Safety
+    ///
+    /// This function passes binary data to the backend as-is and can potentially result in a
+    /// driver crash or bogus behaviour. No attempt is made to ensure that data is valid SPIR-V.
+    ///
+    /// See also [`include_spirv_raw!`] and [`util::make_spirv_raw`].
+    pub unsafe fn create_shader_module_spirv(
+        &self,
+        desc: &ShaderModuleDescriptorSpirV<'_>,
+    ) -> ShaderModule {
+        let (id, data) = unsafe {
+            DynContext::device_create_shader_module_spirv(
+                &*self.context,
+                &self.id,
+                self.data.as_ref(),
+                desc,
+            )
+        };
+        ShaderModule {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates an empty [`CommandEncoder`].
+    pub fn create_command_encoder(&self, desc: &CommandEncoderDescriptor<'_>) -> CommandEncoder {
+        let (id, data) = DynContext::device_create_command_encoder(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+        );
+        CommandEncoder {
+            context: Arc::clone(&self.context),
+            id: Some(id),
+            data,
+        }
+    }
+
+    /// Creates an empty [`RenderBundleEncoder`].
+    pub fn create_render_bundle_encoder(
+        &self,
+        desc: &RenderBundleEncoderDescriptor<'_>,
+    ) -> RenderBundleEncoder<'_> {
+        let (id, data) = DynContext::device_create_render_bundle_encoder(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+        );
+        RenderBundleEncoder {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+            parent: self,
+            _p: Default::default(),
+        }
+    }
+
+    /// Creates a new [`BindGroup`].
+    pub fn create_bind_group(&self, desc: &BindGroupDescriptor<'_>) -> BindGroup {
+        let (id, data) = DynContext::device_create_bind_group(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+        );
+        BindGroup {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates a [`BindGroupLayout`].
+    pub fn create_bind_group_layout(
+        &self,
+        desc: &BindGroupLayoutDescriptor<'_>,
+    ) -> BindGroupLayout {
+        let (id, data) = DynContext::device_create_bind_group_layout(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+        );
+        BindGroupLayout {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates a [`PipelineLayout`].
+    pub fn create_pipeline_layout(&self, desc: &PipelineLayoutDescriptor<'_>) -> PipelineLayout {
+        let (id, data) = DynContext::device_create_pipeline_layout(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+        );
+        PipelineLayout {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates a [`RenderPipeline`].
+    pub fn create_render_pipeline(&self, desc: &RenderPipelineDescriptor<'_>) -> RenderPipeline {
+        let (id, data) = DynContext::device_create_render_pipeline(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+        );
+        RenderPipeline {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates a [`ComputePipeline`].
+    pub fn create_compute_pipeline(&self, desc: &ComputePipelineDescriptor<'_>) -> ComputePipeline {
+        let (id, data) = DynContext::device_create_compute_pipeline(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+        );
+        ComputePipeline {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates a [`Buffer`].
+    pub fn create_buffer(&self, desc: &BufferDescriptor<'_>) -> Buffer {
+        let mut map_context = MapContext::new(desc.size);
+        if desc.mapped_at_creation {
+            map_context.initial_range = 0..desc.size;
+        }
+
+        let (id, data) =
+            DynContext::device_create_buffer(&*self.context, &self.id, self.data.as_ref(), desc);
+
+        Buffer {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+            map_context: Mutex::new(map_context),
+            size: desc.size,
+            usage: desc.usage,
+        }
+    }
+
+    /// Creates a new [`Texture`].
+    ///
+    /// `desc` specifies the general format of the texture.
+    pub fn create_texture(&self, desc: &TextureDescriptor<'_>) -> Texture {
+        let (id, data) =
+            DynContext::device_create_texture(&*self.context, &self.id, self.data.as_ref(), desc);
+        Texture {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+            owned: true,
+            descriptor: TextureDescriptor {
+                label: None,
+                view_formats: &[],
+                ..desc.clone()
+            },
+        }
+    }
+
+    /// Creates a [`Texture`] from a wgpu-hal Texture.
+    ///
+    /// # Safety
+    ///
+    /// - `hal_texture` must be created from this device internal handle
+    /// - `hal_texture` must be created respecting `desc`
+    /// - `hal_texture` must be initialized
+    #[cfg(wgpu_core)]
+    pub unsafe fn create_texture_from_hal<A: wgc::hal_api::HalApi>(
+        &self,
+        hal_texture: A::Texture,
+        desc: &TextureDescriptor<'_>,
+    ) -> Texture {
+        let texture = unsafe {
+            self.context
+                .as_any()
+                .downcast_ref::<crate::backend::ContextWgpuCore>()
+                // Part of the safety requirements is that the texture was generated from the same hal device.
+                // Therefore, unwrap is fine here since only WgpuCoreContext has the ability to create hal textures.
+                .unwrap()
+                .create_texture_from_hal::<A>(
+                    hal_texture,
+                    self.data.as_ref().downcast_ref().unwrap(),
+                    desc,
+                )
+        };
+        Texture {
+            context: Arc::clone(&self.context),
+            id: ObjectId::from(texture.id()),
+            data: Box::new(texture),
+            owned: true,
+            descriptor: TextureDescriptor {
+                label: None,
+                view_formats: &[],
+                ..desc.clone()
+            },
+        }
+    }
+
+    /// Creates a [`Buffer`] from a wgpu-hal Buffer.
+    ///
+    /// # Safety
+    ///
+    /// - `hal_buffer` must be created from this device internal handle
+    /// - `hal_buffer` must be created respecting `desc`
+    /// - `hal_buffer` must be initialized
+    #[cfg(wgpu_core)]
+    pub unsafe fn create_buffer_from_hal<A: wgc::hal_api::HalApi>(
+        &self,
+        hal_buffer: A::Buffer,
+        desc: &BufferDescriptor<'_>,
+    ) -> Buffer {
+        let mut map_context = MapContext::new(desc.size);
+        if desc.mapped_at_creation {
+            map_context.initial_range = 0..desc.size;
+        }
+
+        let (id, buffer) = unsafe {
+            self.context
+                .as_any()
+                .downcast_ref::<crate::backend::ContextWgpuCore>()
+                // Part of the safety requirements is that the buffer was generated from the same hal device.
+                // Therefore, unwrap is fine here since only WgpuCoreContext has the ability to create hal buffers.
+                .unwrap()
+                .create_buffer_from_hal::<A>(
+                    hal_buffer,
+                    self.data.as_ref().downcast_ref().unwrap(),
+                    desc,
+                )
+        };
+
+        Buffer {
+            context: Arc::clone(&self.context),
+            id: ObjectId::from(id),
+            data: Box::new(buffer),
+            map_context: Mutex::new(map_context),
+            size: desc.size,
+            usage: desc.usage,
+        }
+    }
+
+    /// Creates a new [`Sampler`].
+    ///
+    /// `desc` specifies the behavior of the sampler.
+    pub fn create_sampler(&self, desc: &SamplerDescriptor<'_>) -> Sampler {
+        let (id, data) =
+            DynContext::device_create_sampler(&*self.context, &self.id, self.data.as_ref(), desc);
+        Sampler {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates a new [`QuerySet`].
+    pub fn create_query_set(&self, desc: &QuerySetDescriptor<'_>) -> QuerySet {
+        let (id, data) =
+            DynContext::device_create_query_set(&*self.context, &self.id, self.data.as_ref(), desc);
+        QuerySet {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Set a callback for errors that are not handled in error scopes.
+    pub fn on_uncaptured_error(&self, handler: Box<dyn UncapturedErrorHandler>) {
+        self.context
+            .device_on_uncaptured_error(&self.id, self.data.as_ref(), handler);
+    }
+
+    /// Push an error scope.
+    pub fn push_error_scope(&self, filter: ErrorFilter) {
+        self.context
+            .device_push_error_scope(&self.id, self.data.as_ref(), filter);
+    }
+
+    /// Pop an error scope.
+    pub fn pop_error_scope(&self) -> impl Future<Output = Option<Error>> + WasmNotSend {
+        self.context
+            .device_pop_error_scope(&self.id, self.data.as_ref())
+    }
+
+    /// Starts frame capture.
+    pub fn start_capture(&self) {
+        DynContext::device_start_capture(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Stops frame capture.
+    pub fn stop_capture(&self) {
+        DynContext::device_stop_capture(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Query internal counters from the native backend for debugging purposes.
+    ///
+    /// Some backends may not set all counters, or may not set any counter at all.
+    /// The `counters` cargo feature must be enabled for any counter to be set.
+    ///
+    /// If a counter is not set, its contains its default value (zero).
+    pub fn get_internal_counters(&self) -> wgt::InternalCounters {
+        DynContext::device_get_internal_counters(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Generate an GPU memory allocation report if the underlying backend supports it.
+    ///
+    /// Backends that do not support producing these reports return `None`. A backend may
+    /// Support it and still return `None` if it is not using performing sub-allocation,
+    /// for example as a workaround for driver issues.
+    pub fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
+        DynContext::generate_allocator_report(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Apply a callback to this `Device`'s underlying backend device.
+    ///
+    /// If this `Device` is implemented by the backend API given by `A` (Vulkan,
+    /// Dx12, etc.), then apply `hal_device_callback` to `Some(&device)`, where
+    /// `device` is the underlying backend device type, [`A::Device`].
+    ///
+    /// If this `Device` uses a different backend, apply `hal_device_callback`
+    /// to `None`.
+    ///
+    /// The device is locked for reading while `hal_device_callback` runs. If
+    /// the callback attempts to perform any `wgpu` operations that require
+    /// write access to the device (destroying a buffer, say), deadlock will
+    /// occur. The locks are automatically released when the callback returns.
+    ///
+    /// # Safety
+    ///
+    /// - The raw handle passed to the callback must not be manually destroyed.
+    ///
+    /// [`A::Device`]: hal::Api::Device
+    #[cfg(wgpu_core)]
+    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Device>) -> R, R>(
+        &self,
+        hal_device_callback: F,
+    ) -> Option<R> {
+        self.context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+            .map(|ctx| unsafe {
+                ctx.device_as_hal::<A, F, R>(
+                    self.data.as_ref().downcast_ref().unwrap(),
+                    hal_device_callback,
+                )
+            })
+    }
+
+    /// Destroy this device.
+    pub fn destroy(&self) {
+        DynContext::device_destroy(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Set a DeviceLostCallback on this device.
+    pub fn set_device_lost_callback(
+        &self,
+        callback: impl Fn(DeviceLostReason, String) + Send + 'static,
+    ) {
+        DynContext::device_set_device_lost_callback(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            Box::new(callback),
+        )
+    }
+
+    /// Test-only function to make this device invalid.
+    #[doc(hidden)]
+    pub fn make_invalid(&self) {
+        DynContext::device_make_invalid(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Create a [`PipelineCache`] with initial data
+    ///
+    /// This can be passed to [`Device::create_compute_pipeline`]
+    /// and [`Device::create_render_pipeline`] to either accelerate these
+    /// or add the cache results from those.
+    ///
+    /// # Safety
+    ///
+    /// If the `data` field of `desc` is set, it must have previously been returned from a call
+    /// to [`PipelineCache::get_data`][^saving]. This `data` will only be used if it came
+    /// from an adapter with the same [`util::pipeline_cache_key`].
+    /// This *is* compatible across wgpu versions, as any data format change will
+    /// be accounted for.
+    ///
+    /// It is *not* supported to bring caches from previous direct uses of backend APIs
+    /// into this method.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error value if:
+    ///  * the [`PIPELINE_CACHE`](wgt::Features::PIPELINE_CACHE) feature is not enabled
+    ///  * this device is invalid; or
+    ///  * the device is out of memory
+    ///
+    /// This method also returns an error value if:
+    ///  * The `fallback` field on `desc` is false; and
+    ///  * the `data` provided would not be used[^data_not_used]
+    ///
+    /// If an error value is used in subsequent calls, default caching will be used.
+    ///
+    /// [^saving]: We do recognise that saving this data to disk means this condition
+    /// is impossible to fully prove. Consider the risks for your own application in this case.
+    ///
+    /// [^data_not_used]: This data may be not used if: the data was produced by a prior
+    /// version of wgpu; or was created for an incompatible adapter, or there was a GPU driver
+    /// update. In some cases, the data might not be used and a real value is returned,
+    /// this is left to the discretion of GPU drivers.
+    pub unsafe fn create_pipeline_cache(
+        &self,
+        desc: &PipelineCacheDescriptor<'_>,
+    ) -> PipelineCache {
+        let (id, data) = unsafe {
+            DynContext::device_create_pipeline_cache(
+                &*self.context,
+                &self.id,
+                self.data.as_ref(),
+                desc,
+            )
+        };
+        PipelineCache {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+}
+
+impl Drop for Device {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context.device_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Requesting a device from an [`Adapter`] failed.
+#[derive(Clone, Debug)]
+pub struct RequestDeviceError {
+    pub(crate) inner: RequestDeviceErrorKind,
+}
+#[derive(Clone, Debug)]
+pub(crate) enum RequestDeviceErrorKind {
+    /// Error from [`wgpu_core`].
+    // must match dependency cfg
+    #[cfg(wgpu_core)]
+    Core(wgc::instance::RequestDeviceError),
+
+    /// Error from web API that was called by `wgpu` to request a device.
+    ///
+    /// (This is currently never used by the webgl backend, but it could be.)
+    #[cfg(webgpu)]
+    WebGpu(wasm_bindgen::JsValue),
+}
+
+#[cfg(send_sync)]
+unsafe impl Send for RequestDeviceErrorKind {}
+#[cfg(send_sync)]
+unsafe impl Sync for RequestDeviceErrorKind {}
+
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RequestDeviceError: Send, Sync);
+
+impl fmt::Display for RequestDeviceError {
+    fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match &self.inner {
+            #[cfg(wgpu_core)]
+            RequestDeviceErrorKind::Core(error) => error.fmt(_f),
+            #[cfg(webgpu)]
+            RequestDeviceErrorKind::WebGpu(error_js_value) => {
+                // wasm-bindgen provides a reasonable error stringification via `Debug` impl
+                write!(_f, "{error_js_value:?}")
+            }
+            #[cfg(not(any(webgpu, wgpu_core)))]
+            _ => unimplemented!("unknown `RequestDeviceErrorKind`"),
+        }
+    }
+}
+
+impl error::Error for RequestDeviceError {
+    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
+        match &self.inner {
+            #[cfg(wgpu_core)]
+            RequestDeviceErrorKind::Core(error) => error.source(),
+            #[cfg(webgpu)]
+            RequestDeviceErrorKind::WebGpu(_) => None,
+            #[cfg(not(any(webgpu, wgpu_core)))]
+            _ => unimplemented!("unknown `RequestDeviceErrorKind`"),
+        }
+    }
+}
+
+#[cfg(wgpu_core)]
+impl From<wgc::instance::RequestDeviceError> for RequestDeviceError {
+    fn from(error: wgc::instance::RequestDeviceError) -> Self {
+        Self {
+            inner: RequestDeviceErrorKind::Core(error),
+        }
+    }
+}
+
+/// Type for the callback of uncaptured error handler
+pub trait UncapturedErrorHandler: Fn(Error) + Send + 'static {}
+impl<T> UncapturedErrorHandler for T where T: Fn(Error) + Send + 'static {}
+
+/// Filter for error scopes.
+#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd)]
+pub enum ErrorFilter {
+    /// Catch only out-of-memory errors.
+    OutOfMemory,
+    /// Catch only validation errors.
+    Validation,
+    /// Catch only internal errors.
+    Internal,
+}
+static_assertions::assert_impl_all!(ErrorFilter: Send, Sync);
+
+/// Error type
+#[derive(Debug)]
+pub enum Error {
+    /// Out of memory error
+    OutOfMemory {
+        /// Lower level source of the error.
+        #[cfg(send_sync)]
+        #[cfg_attr(docsrs, doc(cfg(all())))]
+        source: Box<dyn error::Error + Send + Sync + 'static>,
+        /// Lower level source of the error.
+        #[cfg(not(send_sync))]
+        #[cfg_attr(docsrs, doc(cfg(all())))]
+        source: Box<dyn error::Error + 'static>,
+    },
+    /// Validation error, signifying a bug in code or data
+    Validation {
+        /// Lower level source of the error.
+        #[cfg(send_sync)]
+        #[cfg_attr(docsrs, doc(cfg(all())))]
+        source: Box<dyn error::Error + Send + Sync + 'static>,
+        /// Lower level source of the error.
+        #[cfg(not(send_sync))]
+        #[cfg_attr(docsrs, doc(cfg(all())))]
+        source: Box<dyn error::Error + 'static>,
+        /// Description of the validation error.
+        description: String,
+    },
+    /// Internal error. Used for signalling any failures not explicitly expected by WebGPU.
+    ///
+    /// These could be due to internal implementation or system limits being reached.
+    Internal {
+        /// Lower level source of the error.
+        #[cfg(send_sync)]
+        #[cfg_attr(docsrs, doc(cfg(all())))]
+        source: Box<dyn error::Error + Send + Sync + 'static>,
+        /// Lower level source of the error.
+        #[cfg(not(send_sync))]
+        #[cfg_attr(docsrs, doc(cfg(all())))]
+        source: Box<dyn error::Error + 'static>,
+        /// Description of the internal GPU error.
+        description: String,
+    },
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Error: Send, Sync);
+
+impl error::Error for Error {
+    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
+        match self {
+            Error::OutOfMemory { source } => Some(source.as_ref()),
+            Error::Validation { source, .. } => Some(source.as_ref()),
+            Error::Internal { source, .. } => Some(source.as_ref()),
+        }
+    }
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Error::OutOfMemory { .. } => f.write_str("Out of Memory"),
+            Error::Validation { description, .. } => f.write_str(description),
+            Error::Internal { description, .. } => f.write_str(description),
+        }
+    }
+}
diff --git a/wgpu/src/api/id.rs b/wgpu/src/api/id.rs
new file mode 100644
index 0000000000..d9041883b2
--- /dev/null
+++ b/wgpu/src/api/id.rs
@@ -0,0 +1,67 @@
+use std::{cmp::Ordering, fmt, marker::PhantomData, num::NonZeroU64};
+
+use crate::context::ObjectId;
+
+/// Opaque globally-unique identifier
+#[repr(transparent)]
+pub struct Id<T>(NonZeroU64, PhantomData<*mut T>);
+
+impl<T> Id<T> {
+    /// Create a new `Id` from a ObjectID.
+    pub(crate) fn new(id: ObjectId) -> Self {
+        Id(id.global_id(), PhantomData)
+    }
+
+    /// For testing use only. We provide no guarantees about the actual value of the ids.
+    #[doc(hidden)]
+    pub fn inner(&self) -> u64 {
+        self.0.get()
+    }
+}
+
+// SAFETY: `Id` is a bare `NonZeroU64`, the type parameter is a marker purely to avoid confusing Ids
+// returned for different types , so `Id` can safely implement Send and Sync.
+unsafe impl<T> Send for Id<T> {}
+
+// SAFETY: See the implementation for `Send`.
+unsafe impl<T> Sync for Id<T> {}
+
+impl<T> Clone for Id<T> {
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+impl<T> Copy for Id<T> {}
+
+impl<T> fmt::Debug for Id<T> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_tuple("Id").field(&self.0).finish()
+    }
+}
+
+impl<T> PartialEq for Id<T> {
+    fn eq(&self, other: &Id<T>) -> bool {
+        self.0 == other.0
+    }
+}
+
+impl<T> Eq for Id<T> {}
+
+impl<T> PartialOrd for Id<T> {
+    fn partial_cmp(&self, other: &Id<T>) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl<T> Ord for Id<T> {
+    fn cmp(&self, other: &Id<T>) -> Ordering {
+        self.0.cmp(&other.0)
+    }
+}
+
+impl<T> std::hash::Hash for Id<T> {
+    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+        self.0.hash(state)
+    }
+}
diff --git a/wgpu/src/api/instance.rs b/wgpu/src/api/instance.rs
new file mode 100644
index 0000000000..26d8b863b1
--- /dev/null
+++ b/wgpu/src/api/instance.rs
@@ -0,0 +1,400 @@
+use parking_lot::Mutex;
+
+use crate::*;
+
+use std::{future::Future, sync::Arc};
+
+/// Context for all other wgpu objects. Instance of wgpu.
+///
+/// This is the first thing you create when using wgpu.
+/// Its primary use is to create [`Adapter`]s and [`Surface`]s.
+///
+/// Does not have to be kept alive.
+///
+/// Corresponds to [WebGPU `GPU`](https://gpuweb.github.io/gpuweb/#gpu-interface).
+#[derive(Debug)]
+pub struct Instance {
+    context: Arc<C>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Instance: Send, Sync);
+
+impl Default for Instance {
+    /// Creates a new instance of wgpu with default options.
+    ///
+    /// Backends are set to `Backends::all()`, and FXC is chosen as the `dx12_shader_compiler`.
+    ///
+    /// # Panics
+    ///
+    /// If no backend feature for the active target platform is enabled,
+    /// this method will panic, see [`Instance::enabled_backend_features()`].
+    fn default() -> Self {
+        Self::new(InstanceDescriptor::default())
+    }
+}
+
+impl Instance {
+    /// Returns which backends can be picked for the current build configuration.
+    ///
+    /// The returned set depends on a combination of target platform and enabled features.
+    /// This does *not* do any runtime checks and is exclusively based on compile time information.
+    ///
+    /// `InstanceDescriptor::backends` does not need to be a subset of this,
+    /// but any backend that is not in this set, will not be picked.
+    ///
+    /// TODO: Right now it's otherwise not possible yet to opt-out of all features on some platforms.
+    /// See <https://github.com/gfx-rs/wgpu/issues/3514>
+    /// * Windows/Linux/Android: always enables Vulkan and GLES with no way to opt out
+    pub const fn enabled_backend_features() -> Backends {
+        let mut backends = Backends::empty();
+
+        if cfg!(native) {
+            if cfg!(metal) {
+                backends = backends.union(Backends::METAL);
+            }
+            if cfg!(dx12) {
+                backends = backends.union(Backends::DX12);
+            }
+
+            // Windows, Android, Linux currently always enable Vulkan and OpenGL.
+            // See <https://github.com/gfx-rs/wgpu/issues/3514>
+            if cfg!(target_os = "windows") || cfg!(unix) {
+                backends = backends.union(Backends::VULKAN).union(Backends::GL);
+            }
+
+            // Vulkan on Mac/iOS is only available through vulkan-portability.
+            if (cfg!(target_os = "ios") || cfg!(target_os = "macos"))
+                && cfg!(feature = "vulkan-portability")
+            {
+                backends = backends.union(Backends::VULKAN);
+            }
+
+            // GL on Mac is only available through angle.
+            if cfg!(target_os = "macos") && cfg!(feature = "angle") {
+                backends = backends.union(Backends::GL);
+            }
+        } else {
+            if cfg!(webgpu) {
+                backends = backends.union(Backends::BROWSER_WEBGPU);
+            }
+            if cfg!(webgl) {
+                backends = backends.union(Backends::GL);
+            }
+        }
+
+        backends
+    }
+
+    /// Create an new instance of wgpu.
+    ///
+    /// # Arguments
+    ///
+    /// - `instance_desc` - Has fields for which [backends][Backends] wgpu will choose
+    ///   during instantiation, and which [DX12 shader compiler][Dx12Compiler] wgpu will use.
+    ///
+    ///   [`Backends::BROWSER_WEBGPU`] takes a special role:
+    ///   If it is set and WebGPU support is detected, this instance will *only* be able to create
+    ///   WebGPU adapters. If you instead want to force use of WebGL, either
+    ///   disable the `webgpu` compile-time feature or do add the [`Backends::BROWSER_WEBGPU`]
+    ///   flag to the the `instance_desc`'s `backends` field.
+    ///   If it is set and WebGPU support is *not* detected, the instance will use wgpu-core
+    ///   to create adapters. Meaning that if the `webgl` feature is enabled, it is able to create
+    ///   a WebGL adapter.
+    ///
+    /// # Panics
+    ///
+    /// If no backend feature for the active target platform is enabled,
+    /// this method will panic, see [`Instance::enabled_backend_features()`].
+    #[allow(unreachable_code)]
+    pub fn new(_instance_desc: InstanceDescriptor) -> Self {
+        if Self::enabled_backend_features().is_empty() {
+            panic!(
+                "No wgpu backend feature that is implemented for the target platform was enabled. \
+                 See `wgpu::Instance::enabled_backend_features()` for more information."
+            );
+        }
+
+        #[cfg(webgpu)]
+        {
+            let is_only_available_backend = !cfg!(wgpu_core);
+            let requested_webgpu = _instance_desc.backends.contains(Backends::BROWSER_WEBGPU);
+            let support_webgpu =
+                crate::backend::get_browser_gpu_property().map_or(false, |gpu| !gpu.is_undefined());
+
+            if is_only_available_backend || (requested_webgpu && support_webgpu) {
+                return Self {
+                    context: Arc::from(crate::backend::ContextWebGpu::init(_instance_desc)),
+                };
+            }
+        }
+
+        #[cfg(wgpu_core)]
+        {
+            return Self {
+                context: Arc::from(crate::backend::ContextWgpuCore::init(_instance_desc)),
+            };
+        }
+
+        unreachable!(
+            "Earlier check of `enabled_backend_features` should have prevented getting here!"
+        );
+    }
+
+    /// Create an new instance of wgpu from a wgpu-hal instance.
+    ///
+    /// # Arguments
+    ///
+    /// - `hal_instance` - wgpu-hal instance.
+    ///
+    /// # Safety
+    ///
+    /// Refer to the creation of wgpu-hal Instance for every backend.
+    #[cfg(wgpu_core)]
+    pub unsafe fn from_hal<A: wgc::hal_api::HalApi>(hal_instance: A::Instance) -> Self {
+        Self {
+            context: Arc::new(unsafe {
+                crate::backend::ContextWgpuCore::from_hal_instance::<A>(hal_instance)
+            }),
+        }
+    }
+
+    /// Return a reference to a specific backend instance, if available.
+    ///
+    /// If this `Instance` has a wgpu-hal [`Instance`] for backend
+    /// `A`, return a reference to it. Otherwise, return `None`.
+    ///
+    /// # Safety
+    ///
+    /// - The raw instance handle returned must not be manually destroyed.
+    ///
+    /// [`Instance`]: hal::Api::Instance
+    #[cfg(wgpu_core)]
+    pub unsafe fn as_hal<A: wgc::hal_api::HalApi>(&self) -> Option<&A::Instance> {
+        self.context
+            .as_any()
+            // If we don't have a wgpu-core instance, we don't have a hal instance either.
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+            .and_then(|ctx| unsafe { ctx.instance_as_hal::<A>() })
+    }
+
+    /// Create an new instance of wgpu from a wgpu-core instance.
+    ///
+    /// # Arguments
+    ///
+    /// - `core_instance` - wgpu-core instance.
+    ///
+    /// # Safety
+    ///
+    /// Refer to the creation of wgpu-core Instance.
+    #[cfg(wgpu_core)]
+    pub unsafe fn from_core(core_instance: wgc::instance::Instance) -> Self {
+        Self {
+            context: Arc::new(unsafe {
+                crate::backend::ContextWgpuCore::from_core_instance(core_instance)
+            }),
+        }
+    }
+
+    /// Retrieves all available [`Adapter`]s that match the given [`Backends`].
+    ///
+    /// # Arguments
+    ///
+    /// - `backends` - Backends from which to enumerate adapters.
+    #[cfg(native)]
+    pub fn enumerate_adapters(&self, backends: Backends) -> Vec<Adapter> {
+        use crate::context::ObjectId;
+
+        let context = Arc::clone(&self.context);
+        self.context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+            .map(|ctx| {
+                ctx.enumerate_adapters(backends)
+                    .into_iter()
+                    .map(move |id| crate::Adapter {
+                        context: Arc::clone(&context),
+                        id: ObjectId::from(id),
+                        data: Box::new(()),
+                    })
+                    .collect()
+            })
+            .unwrap()
+    }
+
+    /// Retrieves an [`Adapter`] which matches the given [`RequestAdapterOptions`].
+    ///
+    /// Some options are "soft", so treated as non-mandatory. Others are "hard".
+    ///
+    /// If no adapters are found that suffice all the "hard" options, `None` is returned.
+    ///
+    /// A `compatible_surface` is required when targeting WebGL2.
+    pub fn request_adapter(
+        &self,
+        options: &RequestAdapterOptions<'_, '_>,
+    ) -> impl Future<Output = Option<Adapter>> + WasmNotSend {
+        let context = Arc::clone(&self.context);
+        let adapter = self.context.instance_request_adapter(options);
+        async move {
+            adapter
+                .await
+                .map(|(id, data)| Adapter { context, id, data })
+        }
+    }
+
+    /// Converts a wgpu-hal `ExposedAdapter` to a wgpu [`Adapter`].
+    ///
+    /// # Safety
+    ///
+    /// `hal_adapter` must be created from this instance internal handle.
+    #[cfg(wgpu_core)]
+    pub unsafe fn create_adapter_from_hal<A: wgc::hal_api::HalApi>(
+        &self,
+        hal_adapter: hal::ExposedAdapter<A>,
+    ) -> Adapter {
+        let context = Arc::clone(&self.context);
+        let id = unsafe {
+            context
+                .as_any()
+                .downcast_ref::<crate::backend::ContextWgpuCore>()
+                .unwrap()
+                .create_adapter_from_hal(hal_adapter)
+                .into()
+        };
+        Adapter {
+            context,
+            id,
+            data: Box::new(()),
+        }
+    }
+
+    /// Creates a new surface targeting a given window/canvas/surface/etc..
+    ///
+    /// Internally, this creates surfaces for all backends that are enabled for this instance.
+    ///
+    /// See [`SurfaceTarget`] for what targets are supported.
+    /// See [`Instance::create_surface_unsafe`] for surface creation with unsafe target variants.
+    ///
+    /// Most commonly used are window handles (or provider of windows handles)
+    /// which can be passed directly as they're automatically converted to [`SurfaceTarget`].
+    pub fn create_surface<'window>(
+        &self,
+        target: impl Into<SurfaceTarget<'window>>,
+    ) -> Result<Surface<'window>, CreateSurfaceError> {
+        // Handle origin (i.e. window) to optionally take ownership of to make the surface outlast the window.
+        let handle_source;
+
+        let target = target.into();
+        let mut surface = match target {
+            SurfaceTarget::Window(window) => unsafe {
+                let surface = self.create_surface_unsafe(
+                    SurfaceTargetUnsafe::from_window(&window).map_err(|e| CreateSurfaceError {
+                        inner: CreateSurfaceErrorKind::RawHandle(e),
+                    })?,
+                );
+                handle_source = Some(window);
+
+                surface
+            }?,
+
+            #[cfg(any(webgpu, webgl))]
+            SurfaceTarget::Canvas(canvas) => {
+                handle_source = None;
+
+                let value: &wasm_bindgen::JsValue = &canvas;
+                let obj = std::ptr::NonNull::from(value).cast();
+                let raw_window_handle = raw_window_handle::WebCanvasWindowHandle::new(obj).into();
+                let raw_display_handle = raw_window_handle::WebDisplayHandle::new().into();
+
+                // Note that we need to call this while we still have `value` around.
+                // This is safe without storing canvas to `handle_origin` since the surface will create a copy internally.
+                unsafe {
+                    self.create_surface_unsafe(SurfaceTargetUnsafe::RawHandle {
+                        raw_display_handle,
+                        raw_window_handle,
+                    })
+                }?
+            }
+
+            #[cfg(any(webgpu, webgl))]
+            SurfaceTarget::OffscreenCanvas(canvas) => {
+                handle_source = None;
+
+                let value: &wasm_bindgen::JsValue = &canvas;
+                let obj = std::ptr::NonNull::from(value).cast();
+                let raw_window_handle =
+                    raw_window_handle::WebOffscreenCanvasWindowHandle::new(obj).into();
+                let raw_display_handle = raw_window_handle::WebDisplayHandle::new().into();
+
+                // Note that we need to call this while we still have `value` around.
+                // This is safe without storing canvas to `handle_origin` since the surface will create a copy internally.
+                unsafe {
+                    self.create_surface_unsafe(SurfaceTargetUnsafe::RawHandle {
+                        raw_display_handle,
+                        raw_window_handle,
+                    })
+                }?
+            }
+        };
+
+        surface._handle_source = handle_source;
+
+        Ok(surface)
+    }
+
+    /// Creates a new surface targeting a given window/canvas/surface/etc. using an unsafe target.
+    ///
+    /// Internally, this creates surfaces for all backends that are enabled for this instance.
+    ///
+    /// See [`SurfaceTargetUnsafe`] for what targets are supported.
+    /// See [`Instance::create_surface`] for surface creation with safe target variants.
+    ///
+    /// # Safety
+    ///
+    /// - See respective [`SurfaceTargetUnsafe`] variants for safety requirements.
+    pub unsafe fn create_surface_unsafe<'window>(
+        &self,
+        target: SurfaceTargetUnsafe,
+    ) -> Result<Surface<'window>, CreateSurfaceError> {
+        let (id, data) = unsafe { self.context.instance_create_surface(target) }?;
+
+        Ok(Surface {
+            context: Arc::clone(&self.context),
+            _handle_source: None,
+            id,
+            surface_data: data,
+            config: Mutex::new(None),
+        })
+    }
+
+    /// Polls all devices.
+    ///
+    /// If `force_wait` is true and this is not running on the web, then this
+    /// function will block until all in-flight buffers have been mapped and
+    /// all submitted commands have finished execution.
+    ///
+    /// Return `true` if all devices' queues are empty, or `false` if there are
+    /// queue submissions still in flight. (Note that, unless access to all
+    /// [`Queue`s] associated with this [`Instance`] is coordinated somehow,
+    /// this information could be out of date by the time the caller receives
+    /// it. `Queue`s can be shared between threads, and other threads could
+    /// submit new work at any time.)
+    ///
+    /// On the web, this is a no-op. `Device`s are automatically polled.
+    ///
+    /// [`Queue`s]: Queue
+    pub fn poll_all(&self, force_wait: bool) -> bool {
+        self.context.instance_poll_all_devices(force_wait)
+    }
+
+    /// Generates memory report.
+    ///
+    /// Returns `None` if the feature is not supported by the backend
+    /// which happens only when WebGPU is pre-selected by the instance creation.
+    #[cfg(wgpu_core)]
+    pub fn generate_report(&self) -> Option<wgc::global::GlobalReport> {
+        self.context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+            .map(|ctx| ctx.generate_report())
+    }
+}
diff --git a/wgpu/src/api/mod.rs b/wgpu/src/api/mod.rs
new file mode 100644
index 0000000000..819f6847cf
--- /dev/null
+++ b/wgpu/src/api/mod.rs
@@ -0,0 +1,80 @@
+//! Types and functions which define our public api and their
+//! helper functionality.
+//!
+//! # Conventions
+//!
+//! Each major type gets its own module. The module is laid out as follows:
+//!
+//! - The type itself
+//! - `impl` block for the type
+//! - `Drop` implementation for the type (if needed)
+//! - Descriptor types and their subtypes.
+//! - Any non-public helper types or functions.
+//!
+//! # Imports
+//!
+//! Because our public api is "flat" (i.e. all types are directly under the `wgpu` module),
+//! we use a single `crate::*` import at the top of each module to bring in all the types in
+//! the public api. This is done to:
+//! - Avoid having to write out a long list of imports for each module.
+//! - Allow docs to be written naturally, without needing to worry about needing dedicated doc imports.
+//! - Treat wgpu-types types and wgpu-core types as a single set.
+//!
+
+mod adapter;
+mod bind_group;
+mod bind_group_layout;
+mod buffer;
+mod command_buffer;
+mod command_encoder;
+// Not a root type, but common descriptor types for pipelines.
+mod common_pipeline;
+mod compute_pass;
+mod compute_pipeline;
+mod device;
+mod id;
+mod instance;
+mod pipeline_cache;
+mod pipeline_layout;
+mod query_set;
+mod queue;
+mod render_bundle;
+mod render_bundle_encoder;
+mod render_pass;
+mod render_pipeline;
+mod sampler;
+mod shader_module;
+mod surface;
+mod surface_texture;
+mod texture;
+mod texture_view;
+
+pub use adapter::*;
+pub use bind_group::*;
+pub use bind_group_layout::*;
+pub use buffer::*;
+pub use command_buffer::*;
+pub use command_encoder::*;
+pub use common_pipeline::*;
+pub use compute_pass::*;
+pub use compute_pipeline::*;
+pub use device::*;
+pub use id::*;
+pub use instance::*;
+pub use pipeline_cache::*;
+pub use pipeline_layout::*;
+pub use query_set::*;
+pub use queue::*;
+pub use render_bundle::*;
+pub use render_bundle_encoder::*;
+pub use render_pass::*;
+pub use render_pipeline::*;
+pub use sampler::*;
+pub use shader_module::*;
+pub use surface::*;
+pub use surface_texture::*;
+pub use texture::*;
+pub use texture_view::*;
+
+/// Object debugging label.
+pub type Label<'a> = Option<&'a str>;
diff --git a/wgpu/src/api/pipeline_cache.rs b/wgpu/src/api/pipeline_cache.rs
new file mode 100644
index 0000000000..42ab15b8ba
--- /dev/null
+++ b/wgpu/src/api/pipeline_cache.rs
@@ -0,0 +1,98 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a pipeline cache, which is used to accelerate
+/// creating [`RenderPipeline`]s and [`ComputePipeline`]s
+/// in subsequent executions
+///
+/// This reuse is only applicable for the same or similar devices.
+/// See [`util::pipeline_cache_key`] for some details.
+///
+/// # Background
+///
+/// In most GPU drivers, shader code must be converted into a machine code
+/// which can be executed on the GPU.
+/// Generating this machine code can require a lot of computation.
+/// Pipeline caches allow this computation to be reused between executions
+/// of the program.
+/// This can be very useful for reducing program startup time.
+///
+/// Note that most desktop GPU drivers will manage their own caches,
+/// meaning that little advantage can be gained from this on those platforms.
+/// However, on some platforms, especially Android, drivers leave this to the
+/// application to implement.
+///
+/// Unfortunately, drivers do not expose whether they manage their own caches.
+/// Some reasonable policies for applications to use are:
+/// - Manage their own pipeline cache on all platforms
+/// - Only manage pipeline caches on Android
+///
+/// # Usage
+///
+/// It is valid to use this resource when creating multiple pipelines, in
+/// which case it will likely cache each of those pipelines.
+/// It is also valid to create a new cache for each pipeline.
+///
+/// This resource is most useful when the data produced from it (using
+/// [`PipelineCache::get_data`]) is persisted.
+/// Care should be taken that pipeline caches are only used for the same device,
+/// as pipeline caches from compatible devices are unlikely to provide any advantage.
+/// `util::pipeline_cache_key` can be used as a file/directory name to help ensure that.
+///
+/// It is recommended to store pipeline caches atomically. If persisting to disk,
+/// this can usually be achieved by creating a temporary file, then moving/[renaming]
+/// the temporary file over the existing cache
+///
+/// # Storage Usage
+///
+/// There is not currently an API available to reduce the size of a cache.
+/// This is due to limitations in the underlying graphics APIs used.
+/// This is especially impactful if your application is being updated, so
+/// previous caches are no longer being used.
+///
+/// One option to work around this is to regenerate the cache.
+/// That is, creating the pipelines which your program runs using
+/// with the stored cached data, then recreating the *same* pipelines
+/// using a new cache, which your application then store.
+///
+/// # Implementations
+///
+/// This resource currently only works on the following backends:
+///  - Vulkan
+///
+/// This type is unique to the Rust API of `wgpu`.
+///
+/// [renaming]: std::fs::rename
+#[derive(Debug)]
+pub struct PipelineCache {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(PipelineCache: Send, Sync);
+
+impl PipelineCache {
+    /// Get the data associated with this pipeline cache.
+    /// The data format is an implementation detail of `wgpu`.
+    /// The only defined operation on this data setting it as the `data` field
+    /// on [`PipelineCacheDescriptor`], then to [`Device::create_pipeline_cache`].
+    ///
+    /// This function is unique to the Rust API of `wgpu`.
+    pub fn get_data(&self) -> Option<Vec<u8>> {
+        self.context
+            .pipeline_cache_get_data(&self.id, self.data.as_ref())
+    }
+}
+
+impl Drop for PipelineCache {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .pipeline_cache_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
diff --git a/wgpu/src/api/pipeline_layout.rs b/wgpu/src/api/pipeline_layout.rs
new file mode 100644
index 0000000000..f47ea1a174
--- /dev/null
+++ b/wgpu/src/api/pipeline_layout.rs
@@ -0,0 +1,61 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a pipeline layout.
+///
+/// A `PipelineLayout` object describes the available binding groups of a pipeline.
+/// It can be created with [`Device::create_pipeline_layout`].
+///
+/// Corresponds to [WebGPU `GPUPipelineLayout`](https://gpuweb.github.io/gpuweb/#gpupipelinelayout).
+#[derive(Debug)]
+pub struct PipelineLayout {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(PipelineLayout: Send, Sync);
+
+impl PipelineLayout {
+    /// Returns a globally-unique identifier for this `PipelineLayout`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+}
+
+impl Drop for PipelineLayout {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .pipeline_layout_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Describes a [`PipelineLayout`].
+///
+/// For use with [`Device::create_pipeline_layout`].
+///
+/// Corresponds to [WebGPU `GPUPipelineLayoutDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpupipelinelayoutdescriptor).
+#[derive(Clone, Debug, Default)]
+pub struct PipelineLayoutDescriptor<'a> {
+    /// Debug label of the pipeline layout. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// Bind groups that this pipeline uses. The first entry will provide all the bindings for
+    /// "set = 0", second entry will provide all the bindings for "set = 1" etc.
+    pub bind_group_layouts: &'a [&'a BindGroupLayout],
+    /// Set of push constant ranges this pipeline uses. Each shader stage that uses push constants
+    /// must define the range in push constant memory that corresponds to its single `layout(push_constant)`
+    /// uniform block.
+    ///
+    /// If this array is non-empty, the [`Features::PUSH_CONSTANTS`] must be enabled.
+    pub push_constant_ranges: &'a [PushConstantRange],
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(PipelineLayoutDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/query_set.rs b/wgpu/src/api/query_set.rs
new file mode 100644
index 0000000000..41c262bd98
--- /dev/null
+++ b/wgpu/src/api/query_set.rs
@@ -0,0 +1,46 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a query set.
+///
+/// It can be created with [`Device::create_query_set`].
+///
+/// Corresponds to [WebGPU `GPUQuerySet`](https://gpuweb.github.io/gpuweb/#queryset).
+#[derive(Debug)]
+pub struct QuerySet {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(QuerySet: Send, Sync);
+
+impl QuerySet {
+    /// Returns a globally-unique identifier for this `QuerySet`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+}
+
+impl Drop for QuerySet {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context.query_set_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Describes a [`QuerySet`].
+///
+/// For use with [`Device::create_query_set`].
+///
+/// Corresponds to [WebGPU `GPUQuerySetDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpuquerysetdescriptor).
+pub type QuerySetDescriptor<'a> = wgt::QuerySetDescriptor<Label<'a>>;
+static_assertions::assert_impl_all!(QuerySetDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/queue.rs b/wgpu/src/api/queue.rs
new file mode 100644
index 0000000000..c675f9f926
--- /dev/null
+++ b/wgpu/src/api/queue.rs
@@ -0,0 +1,300 @@
+use std::{
+    ops::{Deref, DerefMut},
+    sync::Arc,
+    thread,
+};
+
+use crate::context::{DynContext, ObjectId, QueueWriteBuffer};
+use crate::*;
+
+/// Handle to a command queue on a device.
+///
+/// A `Queue` executes recorded [`CommandBuffer`] objects and provides convenience methods
+/// for writing to [buffers](Queue::write_buffer) and [textures](Queue::write_texture).
+/// It can be created along with a [`Device`] by calling [`Adapter::request_device`].
+///
+/// Corresponds to [WebGPU `GPUQueue`](https://gpuweb.github.io/gpuweb/#gpu-queue).
+#[derive(Debug)]
+pub struct Queue {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Queue: Send, Sync);
+
+impl Drop for Queue {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context.queue_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Identifier for a particular call to [`Queue::submit`]. Can be used
+/// as part of an argument to [`Device::poll`] to block for a particular
+/// submission to finish.
+///
+/// This type is unique to the Rust API of `wgpu`.
+/// There is no analogue in the WebGPU specification.
+#[derive(Debug, Clone)]
+pub struct SubmissionIndex(pub(crate) Arc<crate::Data>);
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(SubmissionIndex: Send, Sync);
+
+pub use wgt::Maintain as MaintainBase;
+/// Passed to [`Device::poll`] to control how and if it should block.
+pub type Maintain = wgt::Maintain<SubmissionIndex>;
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Maintain: Send, Sync);
+
+/// A write-only view into a staging buffer.
+///
+/// Reading into this buffer won't yield the contents of the buffer from the
+/// GPU and is likely to be slow. Because of this, although [`AsMut`] is
+/// implemented for this type, [`AsRef`] is not.
+pub struct QueueWriteBufferView<'a> {
+    queue: &'a Queue,
+    buffer: &'a Buffer,
+    offset: BufferAddress,
+    inner: Box<dyn QueueWriteBuffer>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(QueueWriteBufferView<'_>: Send, Sync);
+
+impl Deref for QueueWriteBufferView<'_> {
+    type Target = [u8];
+
+    fn deref(&self) -> &Self::Target {
+        log::warn!("Reading from a QueueWriteBufferView won't yield the contents of the buffer and may be slow.");
+        self.inner.slice()
+    }
+}
+
+impl DerefMut for QueueWriteBufferView<'_> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        self.inner.slice_mut()
+    }
+}
+
+impl<'a> AsMut<[u8]> for QueueWriteBufferView<'a> {
+    fn as_mut(&mut self) -> &mut [u8] {
+        self.inner.slice_mut()
+    }
+}
+
+impl<'a> Drop for QueueWriteBufferView<'a> {
+    fn drop(&mut self) {
+        DynContext::queue_write_staging_buffer(
+            &*self.queue.context,
+            &self.queue.id,
+            self.queue.data.as_ref(),
+            &self.buffer.id,
+            self.buffer.data.as_ref(),
+            self.offset,
+            &*self.inner,
+        );
+    }
+}
+
+impl Queue {
+    /// Schedule a data write into `buffer` starting at `offset`.
+    ///
+    /// This method fails if `data` overruns the size of `buffer` starting at `offset`.
+    ///
+    /// This does *not* submit the transfer to the GPU immediately. Calls to
+    /// `write_buffer` begin execution only on the next call to
+    /// [`Queue::submit`]. To get a set of scheduled transfers started
+    /// immediately, it's fine to call `submit` with no command buffers at all:
+    ///
+    /// ```no_run
+    /// # let queue: wgpu::Queue = todo!();
+    /// queue.submit([]);
+    /// ```
+    ///
+    /// However, `data` will be immediately copied into staging memory, so the
+    /// caller may discard it any time after this call completes.
+    ///
+    /// If possible, consider using [`Queue::write_buffer_with`] instead. That
+    /// method avoids an intermediate copy and is often able to transfer data
+    /// more efficiently than this one.
+    pub fn write_buffer(&self, buffer: &Buffer, offset: BufferAddress, data: &[u8]) {
+        DynContext::queue_write_buffer(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            &buffer.id,
+            buffer.data.as_ref(),
+            offset,
+            data,
+        )
+    }
+
+    /// Write to a buffer via a directly mapped staging buffer.
+    ///
+    /// Return a [`QueueWriteBufferView`] which, when dropped, schedules a copy
+    /// of its contents into `buffer` at `offset`. The returned view
+    /// dereferences to a `size`-byte long `&mut [u8]`, in which you should
+    /// store the data you would like written to `buffer`.
+    ///
+    /// This method may perform transfers faster than [`Queue::write_buffer`],
+    /// because the returned [`QueueWriteBufferView`] is actually the staging
+    /// buffer for the write, mapped into the caller's address space. Writing
+    /// your data directly into this staging buffer avoids the temporary
+    /// CPU-side buffer needed by `write_buffer`.
+    ///
+    /// Reading from the returned view is slow, and will not yield the current
+    /// contents of `buffer`.
+    ///
+    /// Note that dropping the [`QueueWriteBufferView`] does *not* submit the
+    /// transfer to the GPU immediately. The transfer begins only on the next
+    /// call to [`Queue::submit`] after the view is dropped. To get a set of
+    /// scheduled transfers started immediately, it's fine to call `submit` with
+    /// no command buffers at all:
+    ///
+    /// ```no_run
+    /// # let queue: wgpu::Queue = todo!();
+    /// queue.submit([]);
+    /// ```
+    ///
+    /// This method fails if `size` is greater than the size of `buffer` starting at `offset`.
+    #[must_use]
+    pub fn write_buffer_with<'a>(
+        &'a self,
+        buffer: &'a Buffer,
+        offset: BufferAddress,
+        size: BufferSize,
+    ) -> Option<QueueWriteBufferView<'a>> {
+        profiling::scope!("Queue::write_buffer_with");
+        DynContext::queue_validate_write_buffer(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            &buffer.id,
+            buffer.data.as_ref(),
+            offset,
+            size,
+        )?;
+        let staging_buffer = DynContext::queue_create_staging_buffer(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            size,
+        )?;
+        Some(QueueWriteBufferView {
+            queue: self,
+            buffer,
+            offset,
+            inner: staging_buffer,
+        })
+    }
+
+    /// Schedule a write of some data into a texture.
+    ///
+    /// * `data` contains the texels to be written, which must be in
+    ///   [the same format as the texture](TextureFormat).
+    /// * `data_layout` describes the memory layout of `data`, which does not necessarily
+    ///   have to have tightly packed rows.
+    /// * `texture` specifies the texture to write into, and the location within the
+    ///   texture (coordinate offset, mip level) that will be overwritten.
+    /// * `size` is the size, in texels, of the region to be written.
+    ///
+    /// This method fails if `size` overruns the size of `texture`, or if `data` is too short.
+    ///
+    /// This does *not* submit the transfer to the GPU immediately. Calls to
+    /// `write_texture` begin execution only on the next call to
+    /// [`Queue::submit`]. To get a set of scheduled transfers started
+    /// immediately, it's fine to call `submit` with no command buffers at all:
+    ///
+    /// ```no_run
+    /// # let queue: wgpu::Queue = todo!();
+    /// queue.submit([]);
+    /// ```
+    ///
+    /// However, `data` will be immediately copied into staging memory, so the
+    /// caller may discard it any time after this call completes.
+    pub fn write_texture(
+        &self,
+        texture: ImageCopyTexture<'_>,
+        data: &[u8],
+        data_layout: ImageDataLayout,
+        size: Extent3d,
+    ) {
+        DynContext::queue_write_texture(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            texture,
+            data,
+            data_layout,
+            size,
+        )
+    }
+
+    /// Schedule a copy of data from `image` into `texture`.
+    #[cfg(any(webgpu, webgl))]
+    pub fn copy_external_image_to_texture(
+        &self,
+        source: &wgt::ImageCopyExternalImage,
+        dest: crate::ImageCopyTextureTagged<'_>,
+        size: Extent3d,
+    ) {
+        DynContext::queue_copy_external_image_to_texture(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            source,
+            dest,
+            size,
+        )
+    }
+
+    /// Submits a series of finished command buffers for execution.
+    pub fn submit<I: IntoIterator<Item = CommandBuffer>>(
+        &self,
+        command_buffers: I,
+    ) -> SubmissionIndex {
+        let mut command_buffers = command_buffers
+            .into_iter()
+            .map(|mut comb| (comb.id.take().unwrap(), comb.data.take().unwrap()));
+
+        let data = DynContext::queue_submit(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            &mut command_buffers,
+        );
+
+        SubmissionIndex(data)
+    }
+
+    /// Gets the amount of nanoseconds each tick of a timestamp query represents.
+    ///
+    /// Returns zero if timestamp queries are unsupported.
+    ///
+    /// Timestamp values are represented in nanosecond values on WebGPU, see `<https://gpuweb.github.io/gpuweb/#timestamp>`
+    /// Therefore, this is always 1.0 on the web, but on wgpu-core a manual conversion is required.
+    pub fn get_timestamp_period(&self) -> f32 {
+        DynContext::queue_get_timestamp_period(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Registers a callback when the previous call to submit finishes running on the gpu. This callback
+    /// being called implies that all mapped buffer callbacks which were registered before this call will
+    /// have been called.
+    ///
+    /// For the callback to complete, either `queue.submit(..)`, `instance.poll_all(..)`, or `device.poll(..)`
+    /// must be called elsewhere in the runtime, possibly integrated into an event loop or run on a separate thread.
+    ///
+    /// The callback will be called on the thread that first calls the above functions after the gpu work
+    /// has completed. There are no restrictions on the code you can run in the callback, however on native the
+    /// call to the function will not complete until the callback returns, so prefer keeping callbacks short
+    /// and used to set flags, send messages, etc.
+    pub fn on_submitted_work_done(&self, callback: impl FnOnce() + Send + 'static) {
+        DynContext::queue_on_submitted_work_done(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            Box::new(callback),
+        )
+    }
+}
diff --git a/wgpu/src/api/render_bundle.rs b/wgpu/src/api/render_bundle.rs
new file mode 100644
index 0000000000..e80da93e2d
--- /dev/null
+++ b/wgpu/src/api/render_bundle.rs
@@ -0,0 +1,50 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Pre-prepared reusable bundle of GPU operations.
+///
+/// It only supports a handful of render commands, but it makes them reusable. Executing a
+/// [`RenderBundle`] is often more efficient than issuing the underlying commands manually.
+///
+/// It can be created by use of a [`RenderBundleEncoder`], and executed onto a [`CommandEncoder`]
+/// using [`RenderPass::execute_bundles`].
+///
+/// Corresponds to [WebGPU `GPURenderBundle`](https://gpuweb.github.io/gpuweb/#render-bundle).
+#[derive(Debug)]
+pub struct RenderBundle {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RenderBundle: Send, Sync);
+
+impl RenderBundle {
+    /// Returns a globally-unique identifier for this `RenderBundle`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+}
+
+impl Drop for RenderBundle {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .render_bundle_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Describes a [`RenderBundle`].
+///
+/// For use with [`RenderBundleEncoder::finish`].
+///
+/// Corresponds to [WebGPU `GPURenderBundleDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderbundledescriptor).
+pub type RenderBundleDescriptor<'a> = wgt::RenderBundleDescriptor<Label<'a>>;
+static_assertions::assert_impl_all!(RenderBundleDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/render_bundle_encoder.rs b/wgpu/src/api/render_bundle_encoder.rs
new file mode 100644
index 0000000000..ae5829bee1
--- /dev/null
+++ b/wgpu/src/api/render_bundle_encoder.rs
@@ -0,0 +1,278 @@
+use std::{marker::PhantomData, num::NonZeroU32, ops::Range, sync::Arc};
+
+use crate::context::{DynContext, ObjectId};
+use crate::*;
+
+/// Encodes a series of GPU operations into a reusable "render bundle".
+///
+/// It only supports a handful of render commands, but it makes them reusable.
+/// It can be created with [`Device::create_render_bundle_encoder`].
+/// It can be executed onto a [`CommandEncoder`] using [`RenderPass::execute_bundles`].
+///
+/// Executing a [`RenderBundle`] is often more efficient than issuing the underlying commands
+/// manually.
+///
+/// Corresponds to [WebGPU `GPURenderBundleEncoder`](
+/// https://gpuweb.github.io/gpuweb/#gpurenderbundleencoder).
+#[derive(Debug)]
+pub struct RenderBundleEncoder<'a> {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+    pub(crate) parent: &'a Device,
+    /// This type should be !Send !Sync, because it represents an allocation on this thread's
+    /// command buffer.
+    pub(crate) _p: PhantomData<*const u8>,
+}
+static_assertions::assert_not_impl_any!(RenderBundleEncoder<'_>: Send, Sync);
+
+/// Describes a [`RenderBundleEncoder`].
+///
+/// For use with [`Device::create_render_bundle_encoder`].
+///
+/// Corresponds to [WebGPU `GPURenderBundleEncoderDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderbundleencoderdescriptor).
+#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
+pub struct RenderBundleEncoderDescriptor<'a> {
+    /// Debug label of the render bundle encoder. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// The formats of the color attachments that this render bundle is capable to rendering to. This
+    /// must match the formats of the color attachments in the render pass this render bundle is executed in.
+    pub color_formats: &'a [Option<TextureFormat>],
+    /// Information about the depth attachment that this render bundle is capable to rendering to. This
+    /// must match the format of the depth attachments in the render pass this render bundle is executed in.
+    pub depth_stencil: Option<RenderBundleDepthStencil>,
+    /// Sample count this render bundle is capable of rendering to. This must match the pipelines and
+    /// the render passes it is used in.
+    pub sample_count: u32,
+    /// If this render bundle will rendering to multiple array layers in the attachments at the same time.
+    pub multiview: Option<NonZeroU32>,
+}
+static_assertions::assert_impl_all!(RenderBundleEncoderDescriptor<'_>: Send, Sync);
+
+impl<'a> RenderBundleEncoder<'a> {
+    /// Finishes recording and returns a [`RenderBundle`] that can be executed in other render passes.
+    pub fn finish(self, desc: &RenderBundleDescriptor<'_>) -> RenderBundle {
+        let (id, data) =
+            DynContext::render_bundle_encoder_finish(&*self.context, self.id, self.data, desc);
+        RenderBundle {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Sets the active bind group for a given bind group index. The bind group layout
+    /// in the active pipeline when any `draw()` function is called must match the layout of this bind group.
+    ///
+    /// If the bind group have dynamic offsets, provide them in the binding order.
+    pub fn set_bind_group(
+        &mut self,
+        index: u32,
+        bind_group: &'a BindGroup,
+        offsets: &[DynamicOffset],
+    ) {
+        DynContext::render_bundle_encoder_set_bind_group(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            index,
+            &bind_group.id,
+            bind_group.data.as_ref(),
+            offsets,
+        )
+    }
+
+    /// Sets the active render pipeline.
+    ///
+    /// Subsequent draw calls will exhibit the behavior defined by `pipeline`.
+    pub fn set_pipeline(&mut self, pipeline: &'a RenderPipeline) {
+        DynContext::render_bundle_encoder_set_pipeline(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            &pipeline.id,
+            pipeline.data.as_ref(),
+        )
+    }
+
+    /// Sets the active index buffer.
+    ///
+    /// Subsequent calls to [`draw_indexed`](RenderBundleEncoder::draw_indexed) on this [`RenderBundleEncoder`] will
+    /// use `buffer` as the source index buffer.
+    pub fn set_index_buffer(&mut self, buffer_slice: BufferSlice<'a>, index_format: IndexFormat) {
+        DynContext::render_bundle_encoder_set_index_buffer(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            &buffer_slice.buffer.id,
+            buffer_slice.buffer.data.as_ref(),
+            index_format,
+            buffer_slice.offset,
+            buffer_slice.size,
+        )
+    }
+
+    /// Assign a vertex buffer to a slot.
+    ///
+    /// Subsequent calls to [`draw`] and [`draw_indexed`] on this
+    /// [`RenderBundleEncoder`] will use `buffer` as one of the source vertex buffers.
+    ///
+    /// The `slot` refers to the index of the matching descriptor in
+    /// [`VertexState::buffers`].
+    ///
+    /// [`draw`]: RenderBundleEncoder::draw
+    /// [`draw_indexed`]: RenderBundleEncoder::draw_indexed
+    pub fn set_vertex_buffer(&mut self, slot: u32, buffer_slice: BufferSlice<'a>) {
+        DynContext::render_bundle_encoder_set_vertex_buffer(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            slot,
+            &buffer_slice.buffer.id,
+            buffer_slice.buffer.data.as_ref(),
+            buffer_slice.offset,
+            buffer_slice.size,
+        )
+    }
+
+    /// Draws primitives from the active vertex buffer(s).
+    ///
+    /// The active vertex buffers can be set with [`RenderBundleEncoder::set_vertex_buffer`].
+    /// Does not use an Index Buffer. If you need this see [`RenderBundleEncoder::draw_indexed`]
+    ///
+    /// Panics if vertices Range is outside of the range of the vertices range of any set vertex buffer.
+    ///
+    /// vertices: The range of vertices to draw.
+    /// instances: Range of Instances to draw. Use 0..1 if instance buffers are not used.
+    /// E.g.of how its used internally
+    /// ```rust ignore
+    /// for instance_id in instance_range {
+    ///     for vertex_id in vertex_range {
+    ///         let vertex = vertex[vertex_id];
+    ///         vertex_shader(vertex, vertex_id, instance_id);
+    ///     }
+    /// }
+    /// ```
+    pub fn draw(&mut self, vertices: Range<u32>, instances: Range<u32>) {
+        DynContext::render_bundle_encoder_draw(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            vertices,
+            instances,
+        )
+    }
+
+    /// Draws indexed primitives using the active index buffer and the active vertex buffer(s).
+    ///
+    /// The active index buffer can be set with [`RenderBundleEncoder::set_index_buffer`].
+    /// The active vertex buffer(s) can be set with [`RenderBundleEncoder::set_vertex_buffer`].
+    ///
+    /// Panics if indices Range is outside of the range of the indices range of any set index buffer.
+    ///
+    /// indices: The range of indices to draw.
+    /// base_vertex: value added to each index value before indexing into the vertex buffers.
+    /// instances: Range of Instances to draw. Use 0..1 if instance buffers are not used.
+    /// E.g.of how its used internally
+    /// ```rust ignore
+    /// for instance_id in instance_range {
+    ///     for index_index in index_range {
+    ///         let vertex_id = index_buffer[index_index];
+    ///         let adjusted_vertex_id = vertex_id + base_vertex;
+    ///         let vertex = vertex[adjusted_vertex_id];
+    ///         vertex_shader(vertex, adjusted_vertex_id, instance_id);
+    ///     }
+    /// }
+    /// ```
+    pub fn draw_indexed(&mut self, indices: Range<u32>, base_vertex: i32, instances: Range<u32>) {
+        DynContext::render_bundle_encoder_draw_indexed(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            indices,
+            base_vertex,
+            instances,
+        );
+    }
+
+    /// Draws primitives from the active vertex buffer(s) based on the contents of the `indirect_buffer`.
+    ///
+    /// The active vertex buffers can be set with [`RenderBundleEncoder::set_vertex_buffer`].
+    ///
+    /// The structure expected in `indirect_buffer` must conform to [`DrawIndirectArgs`](crate::util::DrawIndirectArgs).
+    pub fn draw_indirect(&mut self, indirect_buffer: &'a Buffer, indirect_offset: BufferAddress) {
+        DynContext::render_bundle_encoder_draw_indirect(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+        );
+    }
+
+    /// Draws indexed primitives using the active index buffer and the active vertex buffers,
+    /// based on the contents of the `indirect_buffer`.
+    ///
+    /// The active index buffer can be set with [`RenderBundleEncoder::set_index_buffer`], while the active
+    /// vertex buffers can be set with [`RenderBundleEncoder::set_vertex_buffer`].
+    ///
+    /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirectArgs`](crate::util::DrawIndexedIndirectArgs).
+    pub fn draw_indexed_indirect(
+        &mut self,
+        indirect_buffer: &'a Buffer,
+        indirect_offset: BufferAddress,
+    ) {
+        DynContext::render_bundle_encoder_draw_indexed_indirect(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+        );
+    }
+}
+
+/// [`Features::PUSH_CONSTANTS`] must be enabled on the device in order to call these functions.
+impl<'a> RenderBundleEncoder<'a> {
+    /// Set push constant data.
+    ///
+    /// Offset is measured in bytes, but must be a multiple of [`PUSH_CONSTANT_ALIGNMENT`].
+    ///
+    /// Data size must be a multiple of 4 and must have an alignment of 4.
+    /// For example, with an offset of 4 and an array of `[u8; 8]`, that will write to the range
+    /// of 4..12.
+    ///
+    /// For each byte in the range of push constant data written, the union of the stages of all push constant
+    /// ranges that covers that byte must be exactly `stages`. There's no good way of explaining this simply,
+    /// so here are some examples:
+    ///
+    /// ```text
+    /// For the given ranges:
+    /// - 0..4 Vertex
+    /// - 4..8 Fragment
+    /// ```
+    ///
+    /// You would need to upload this in two set_push_constants calls. First for the `Vertex` range, second for the `Fragment` range.
+    ///
+    /// ```text
+    /// For the given ranges:
+    /// - 0..8  Vertex
+    /// - 4..12 Fragment
+    /// ```
+    ///
+    /// You would need to upload this in three set_push_constants calls. First for the `Vertex` only range 0..4, second
+    /// for the `Vertex | Fragment` range 4..8, third for the `Fragment` range 8..12.
+    pub fn set_push_constants(&mut self, stages: ShaderStages, offset: u32, data: &[u8]) {
+        DynContext::render_bundle_encoder_set_push_constants(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            stages,
+            offset,
+            data,
+        );
+    }
+}
diff --git a/wgpu/src/api/render_pass.rs b/wgpu/src/api/render_pass.rs
new file mode 100644
index 0000000000..bdb8ebe372
--- /dev/null
+++ b/wgpu/src/api/render_pass.rs
@@ -0,0 +1,817 @@
+use std::{marker::PhantomData, ops::Range, sync::Arc, thread};
+
+use crate::context::{DynContext, ObjectId};
+use crate::*;
+
+#[derive(Debug)]
+pub(crate) struct RenderPassInner {
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+    pub(crate) context: Arc<C>,
+}
+
+impl Drop for RenderPassInner {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .render_pass_end(&mut self.id, self.data.as_mut());
+        }
+    }
+}
+
+/// In-progress recording of a render pass: a list of render commands in a [`CommandEncoder`].
+///
+/// It can be created with [`CommandEncoder::begin_render_pass()`], whose [`RenderPassDescriptor`]
+/// specifies the attachments (textures) that will be rendered to.
+///
+/// Most of the methods on `RenderPass` serve one of two purposes, identifiable by their names:
+///
+/// * `draw_*()`: Drawing (that is, encoding a render command, which, when executed by the GPU, will
+///   rasterize something and execute shaders).
+/// * `set_*()`: Setting part of the [render state](https://gpuweb.github.io/gpuweb/#renderstate)
+///   for future drawing commands.
+///
+/// A render pass may contain any number of drawing commands, and before/between each command the
+/// render state may be updated however you wish; each drawing command will be executed using the
+/// render state that has been set when the `draw_*()` function is called.
+///
+/// Corresponds to [WebGPU `GPURenderPassEncoder`](
+/// https://gpuweb.github.io/gpuweb/#render-pass-encoder).
+#[derive(Debug)]
+pub struct RenderPass<'encoder> {
+    /// The inner data of the render pass, separated out so it's easy to replace the lifetime with 'static if desired.
+    pub(crate) inner: RenderPassInner,
+
+    /// This lifetime is used to protect the [`CommandEncoder`] from being used
+    /// while the pass is alive.
+    pub(crate) encoder_guard: PhantomData<&'encoder ()>,
+}
+
+impl<'encoder> RenderPass<'encoder> {
+    /// Drops the lifetime relationship to the parent command encoder, making usage of
+    /// the encoder while this pass is recorded a run-time error instead.
+    ///
+    /// Attention: As long as the render pass has not been ended, any mutating operation on the parent
+    /// command encoder will cause a run-time error and invalidate it!
+    /// By default, the lifetime constraint prevents this, but it can be useful
+    /// to handle this at run time, such as when storing the pass and encoder in the same
+    /// data structure.
+    ///
+    /// This operation has no effect on pass recording.
+    /// It's a safe operation, since [`CommandEncoder`] is in a locked state as long as the pass is active
+    /// regardless of the lifetime constraint or its absence.
+    pub fn forget_lifetime(self) -> RenderPass<'static> {
+        RenderPass {
+            inner: self.inner,
+            encoder_guard: PhantomData,
+        }
+    }
+
+    /// Sets the active bind group for a given bind group index. The bind group layout
+    /// in the active pipeline when any `draw_*()` method is called must match the layout of
+    /// this bind group.
+    ///
+    /// If the bind group have dynamic offsets, provide them in binding order.
+    /// These offsets have to be aligned to [`Limits::min_uniform_buffer_offset_alignment`]
+    /// or [`Limits::min_storage_buffer_offset_alignment`] appropriately.
+    ///
+    /// Subsequent draw calls’ shader executions will be able to access data in these bind groups.
+    pub fn set_bind_group(
+        &mut self,
+        index: u32,
+        bind_group: &BindGroup,
+        offsets: &[DynamicOffset],
+    ) {
+        DynContext::render_pass_set_bind_group(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            index,
+            &bind_group.id,
+            bind_group.data.as_ref(),
+            offsets,
+        )
+    }
+
+    /// Sets the active render pipeline.
+    ///
+    /// Subsequent draw calls will exhibit the behavior defined by `pipeline`.
+    pub fn set_pipeline(&mut self, pipeline: &RenderPipeline) {
+        DynContext::render_pass_set_pipeline(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &pipeline.id,
+            pipeline.data.as_ref(),
+        )
+    }
+
+    /// Sets the blend color as used by some of the blending modes.
+    ///
+    /// Subsequent blending tests will test against this value.
+    /// If this method has not been called, the blend constant defaults to [`Color::TRANSPARENT`]
+    /// (all components zero).
+    pub fn set_blend_constant(&mut self, color: Color) {
+        DynContext::render_pass_set_blend_constant(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            color,
+        )
+    }
+
+    /// Sets the active index buffer.
+    ///
+    /// Subsequent calls to [`draw_indexed`](RenderPass::draw_indexed) on this [`RenderPass`] will
+    /// use `buffer` as the source index buffer.
+    pub fn set_index_buffer(&mut self, buffer_slice: BufferSlice<'_>, index_format: IndexFormat) {
+        DynContext::render_pass_set_index_buffer(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &buffer_slice.buffer.id,
+            buffer_slice.buffer.data.as_ref(),
+            index_format,
+            buffer_slice.offset,
+            buffer_slice.size,
+        )
+    }
+
+    /// Assign a vertex buffer to a slot.
+    ///
+    /// Subsequent calls to [`draw`] and [`draw_indexed`] on this
+    /// [`RenderPass`] will use `buffer` as one of the source vertex buffers.
+    ///
+    /// The `slot` refers to the index of the matching descriptor in
+    /// [`VertexState::buffers`].
+    ///
+    /// [`draw`]: RenderPass::draw
+    /// [`draw_indexed`]: RenderPass::draw_indexed
+    pub fn set_vertex_buffer(&mut self, slot: u32, buffer_slice: BufferSlice<'_>) {
+        DynContext::render_pass_set_vertex_buffer(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            slot,
+            &buffer_slice.buffer.id,
+            buffer_slice.buffer.data.as_ref(),
+            buffer_slice.offset,
+            buffer_slice.size,
+        )
+    }
+
+    /// Sets the scissor rectangle used during the rasterization stage.
+    /// After transformation into [viewport coordinates](https://www.w3.org/TR/webgpu/#viewport-coordinates).
+    ///
+    /// Subsequent draw calls will discard any fragments which fall outside the scissor rectangle.
+    /// If this method has not been called, the scissor rectangle defaults to the entire bounds of
+    /// the render targets.
+    ///
+    /// The function of the scissor rectangle resembles [`set_viewport()`](Self::set_viewport),
+    /// but it does not affect the coordinate system, only which fragments are discarded.
+    pub fn set_scissor_rect(&mut self, x: u32, y: u32, width: u32, height: u32) {
+        DynContext::render_pass_set_scissor_rect(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            x,
+            y,
+            width,
+            height,
+        );
+    }
+
+    /// Sets the viewport used during the rasterization stage to linearly map
+    /// from [normalized device coordinates](https://www.w3.org/TR/webgpu/#ndc) to [viewport coordinates](https://www.w3.org/TR/webgpu/#viewport-coordinates).
+    ///
+    /// Subsequent draw calls will only draw within this region.
+    /// If this method has not been called, the viewport defaults to the entire bounds of the render
+    /// targets.
+    pub fn set_viewport(&mut self, x: f32, y: f32, w: f32, h: f32, min_depth: f32, max_depth: f32) {
+        DynContext::render_pass_set_viewport(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            x,
+            y,
+            w,
+            h,
+            min_depth,
+            max_depth,
+        );
+    }
+
+    /// Sets the stencil reference.
+    ///
+    /// Subsequent stencil tests will test against this value.
+    /// If this method has not been called, the stencil reference value defaults to `0`.
+    pub fn set_stencil_reference(&mut self, reference: u32) {
+        DynContext::render_pass_set_stencil_reference(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            reference,
+        );
+    }
+
+    /// Inserts debug marker.
+    pub fn insert_debug_marker(&mut self, label: &str) {
+        DynContext::render_pass_insert_debug_marker(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            label,
+        );
+    }
+
+    /// Start record commands and group it into debug marker group.
+    pub fn push_debug_group(&mut self, label: &str) {
+        DynContext::render_pass_push_debug_group(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            label,
+        );
+    }
+
+    /// Stops command recording and creates debug group.
+    pub fn pop_debug_group(&mut self) {
+        DynContext::render_pass_pop_debug_group(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+        );
+    }
+
+    /// Draws primitives from the active vertex buffer(s).
+    ///
+    /// The active vertex buffer(s) can be set with [`RenderPass::set_vertex_buffer`].
+    /// Does not use an Index Buffer. If you need this see [`RenderPass::draw_indexed`]
+    ///
+    /// Panics if vertices Range is outside of the range of the vertices range of any set vertex buffer.
+    ///
+    /// vertices: The range of vertices to draw.
+    /// instances: Range of Instances to draw. Use 0..1 if instance buffers are not used.
+    /// E.g.of how its used internally
+    /// ```rust ignore
+    /// for instance_id in instance_range {
+    ///     for vertex_id in vertex_range {
+    ///         let vertex = vertex[vertex_id];
+    ///         vertex_shader(vertex, vertex_id, instance_id);
+    ///     }
+    /// }
+    /// ```
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
+    pub fn draw(&mut self, vertices: Range<u32>, instances: Range<u32>) {
+        DynContext::render_pass_draw(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            vertices,
+            instances,
+        )
+    }
+
+    /// Draws indexed primitives using the active index buffer and the active vertex buffers.
+    ///
+    /// The active index buffer can be set with [`RenderPass::set_index_buffer`]
+    /// The active vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
+    ///
+    /// Panics if indices Range is outside of the range of the indices range of any set index buffer.
+    ///
+    /// indices: The range of indices to draw.
+    /// base_vertex: value added to each index value before indexing into the vertex buffers.
+    /// instances: Range of Instances to draw. Use 0..1 if instance buffers are not used.
+    /// E.g.of how its used internally
+    /// ```rust ignore
+    /// for instance_id in instance_range {
+    ///     for index_index in index_range {
+    ///         let vertex_id = index_buffer[index_index];
+    ///         let adjusted_vertex_id = vertex_id + base_vertex;
+    ///         let vertex = vertex[adjusted_vertex_id];
+    ///         vertex_shader(vertex, adjusted_vertex_id, instance_id);
+    ///     }
+    /// }
+    /// ```
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
+    pub fn draw_indexed(&mut self, indices: Range<u32>, base_vertex: i32, instances: Range<u32>) {
+        DynContext::render_pass_draw_indexed(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            indices,
+            base_vertex,
+            instances,
+        );
+    }
+
+    /// Draws primitives from the active vertex buffer(s) based on the contents of the `indirect_buffer`.
+    ///
+    /// This is like calling [`RenderPass::draw`] but the contents of the call are specified in the `indirect_buffer`.
+    /// The structure expected in `indirect_buffer` must conform to [`DrawIndirectArgs`](crate::util::DrawIndirectArgs).
+    ///
+    /// Indirect drawing has some caveats depending on the features available. We are not currently able to validate
+    /// these and issue an error.
+    /// - If [`Features::INDIRECT_FIRST_INSTANCE`] is not present on the adapter,
+    ///   [`DrawIndirect::first_instance`](crate::util::DrawIndirectArgs::first_instance) will be ignored.
+    /// - If [`DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW`] is not present on the adapter,
+    ///   any use of `@builtin(vertex_index)` or `@builtin(instance_index)` in the vertex shader will have different values.
+    ///
+    /// See details on the individual flags for more information.
+    pub fn draw_indirect(&mut self, indirect_buffer: &Buffer, indirect_offset: BufferAddress) {
+        DynContext::render_pass_draw_indirect(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+        );
+    }
+
+    /// Draws indexed primitives using the active index buffer and the active vertex buffers,
+    /// based on the contents of the `indirect_buffer`.
+    ///
+    /// This is like calling [`RenderPass::draw_indexed`] but the contents of the call are specified in the `indirect_buffer`.
+    /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirectArgs`](crate::util::DrawIndexedIndirectArgs).
+    ///
+    /// Indirect drawing has some caveats depending on the features available. We are not currently able to validate
+    /// these and issue an error.
+    /// - If [`Features::INDIRECT_FIRST_INSTANCE`] is not present on the adapter,
+    ///   [`DrawIndexedIndirect::first_instance`](crate::util::DrawIndexedIndirectArgs::first_instance) will be ignored.
+    /// - If [`DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW`] is not present on the adapter,
+    ///   any use of `@builtin(vertex_index)` or `@builtin(instance_index)` in the vertex shader will have different values.
+    ///
+    /// See details on the individual flags for more information.
+    pub fn draw_indexed_indirect(
+        &mut self,
+        indirect_buffer: &Buffer,
+        indirect_offset: BufferAddress,
+    ) {
+        DynContext::render_pass_draw_indexed_indirect(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+        );
+    }
+
+    /// Execute a [render bundle][RenderBundle], which is a set of pre-recorded commands
+    /// that can be run together.
+    ///
+    /// Commands in the bundle do not inherit this render pass's current render state, and after the
+    /// bundle has executed, the state is **cleared** (reset to defaults, not the previous state).
+    pub fn execute_bundles<'a, I: IntoIterator<Item = &'a RenderBundle>>(
+        &mut self,
+        render_bundles: I,
+    ) {
+        let mut render_bundles = render_bundles
+            .into_iter()
+            .map(|rb| (&rb.id, rb.data.as_ref()));
+
+        DynContext::render_pass_execute_bundles(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &mut render_bundles,
+        )
+    }
+}
+
+/// [`Features::MULTI_DRAW_INDIRECT`] must be enabled on the device in order to call these functions.
+impl<'encoder> RenderPass<'encoder> {
+    /// Dispatches multiple draw calls from the active vertex buffer(s) based on the contents of the `indirect_buffer`.
+    /// `count` draw calls are issued.
+    ///
+    /// The active vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
+    ///
+    /// The structure expected in `indirect_buffer` must conform to [`DrawIndirectArgs`](crate::util::DrawIndirectArgs).
+    /// These draw structures are expected to be tightly packed.
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
+    pub fn multi_draw_indirect(
+        &mut self,
+        indirect_buffer: &Buffer,
+        indirect_offset: BufferAddress,
+        count: u32,
+    ) {
+        DynContext::render_pass_multi_draw_indirect(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+            count,
+        );
+    }
+
+    /// Dispatches multiple draw calls from the active index buffer and the active vertex buffers,
+    /// based on the contents of the `indirect_buffer`. `count` draw calls are issued.
+    ///
+    /// The active index buffer can be set with [`RenderPass::set_index_buffer`], while the active
+    /// vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
+    ///
+    /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirectArgs`](crate::util::DrawIndexedIndirectArgs).
+    /// These draw structures are expected to be tightly packed.
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
+    pub fn multi_draw_indexed_indirect(
+        &mut self,
+        indirect_buffer: &Buffer,
+        indirect_offset: BufferAddress,
+        count: u32,
+    ) {
+        DynContext::render_pass_multi_draw_indexed_indirect(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+            count,
+        );
+    }
+}
+
+/// [`Features::MULTI_DRAW_INDIRECT_COUNT`] must be enabled on the device in order to call these functions.
+impl<'encoder> RenderPass<'encoder> {
+    /// Dispatches multiple draw calls from the active vertex buffer(s) based on the contents of the `indirect_buffer`.
+    /// The count buffer is read to determine how many draws to issue.
+    ///
+    /// The indirect buffer must be long enough to account for `max_count` draws, however only `count`
+    /// draws will be read. If `count` is greater than `max_count`, `max_count` will be used.
+    ///
+    /// The active vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
+    ///
+    /// The structure expected in `indirect_buffer` must conform to [`DrawIndirectArgs`](crate::util::DrawIndirectArgs).
+    /// These draw structures are expected to be tightly packed.
+    ///
+    /// The structure expected in `count_buffer` is the following:
+    ///
+    /// ```rust
+    /// #[repr(C)]
+    /// struct DrawIndirectCount {
+    ///     count: u32, // Number of draw calls to issue.
+    /// }
+    /// ```
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
+    pub fn multi_draw_indirect_count(
+        &mut self,
+        indirect_buffer: &Buffer,
+        indirect_offset: BufferAddress,
+        count_buffer: &Buffer,
+        count_offset: BufferAddress,
+        max_count: u32,
+    ) {
+        DynContext::render_pass_multi_draw_indirect_count(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+            &count_buffer.id,
+            count_buffer.data.as_ref(),
+            count_offset,
+            max_count,
+        );
+    }
+
+    /// Dispatches multiple draw calls from the active index buffer and the active vertex buffers,
+    /// based on the contents of the `indirect_buffer`. The count buffer is read to determine how many draws to issue.
+    ///
+    /// The indirect buffer must be long enough to account for `max_count` draws, however only `count`
+    /// draws will be read. If `count` is greater than `max_count`, `max_count` will be used.
+    ///
+    /// The active index buffer can be set with [`RenderPass::set_index_buffer`], while the active
+    /// vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
+    ///
+    ///
+    /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirectArgs`](crate::util::DrawIndexedIndirectArgs).
+    ///
+    /// These draw structures are expected to be tightly packed.
+    ///
+    /// The structure expected in `count_buffer` is the following:
+    ///
+    /// ```rust
+    /// #[repr(C)]
+    /// struct DrawIndexedIndirectCount {
+    ///     count: u32, // Number of draw calls to issue.
+    /// }
+    /// ```
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
+    pub fn multi_draw_indexed_indirect_count(
+        &mut self,
+        indirect_buffer: &Buffer,
+        indirect_offset: BufferAddress,
+        count_buffer: &Buffer,
+        count_offset: BufferAddress,
+        max_count: u32,
+    ) {
+        DynContext::render_pass_multi_draw_indexed_indirect_count(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+            &count_buffer.id,
+            count_buffer.data.as_ref(),
+            count_offset,
+            max_count,
+        );
+    }
+}
+
+/// [`Features::PUSH_CONSTANTS`] must be enabled on the device in order to call these functions.
+impl<'encoder> RenderPass<'encoder> {
+    /// Set push constant data for subsequent draw calls.
+    ///
+    /// Write the bytes in `data` at offset `offset` within push constant
+    /// storage, all of which are accessible by all the pipeline stages in
+    /// `stages`, and no others.  Both `offset` and the length of `data` must be
+    /// multiples of [`PUSH_CONSTANT_ALIGNMENT`], which is always 4.
+    ///
+    /// For example, if `offset` is `4` and `data` is eight bytes long, this
+    /// call will write `data` to bytes `4..12` of push constant storage.
+    ///
+    /// # Stage matching
+    ///
+    /// Every byte in the affected range of push constant storage must be
+    /// accessible to exactly the same set of pipeline stages, which must match
+    /// `stages`. If there are two bytes of storage that are accessible by
+    /// different sets of pipeline stages - say, one is accessible by fragment
+    /// shaders, and the other is accessible by both fragment shaders and vertex
+    /// shaders - then no single `set_push_constants` call may affect both of
+    /// them; to write both, you must make multiple calls, each with the
+    /// appropriate `stages` value.
+    ///
+    /// Which pipeline stages may access a given byte is determined by the
+    /// pipeline's [`PushConstant`] global variable and (if it is a struct) its
+    /// members' offsets.
+    ///
+    /// For example, suppose you have twelve bytes of push constant storage,
+    /// where bytes `0..8` are accessed by the vertex shader, and bytes `4..12`
+    /// are accessed by the fragment shader. This means there are three byte
+    /// ranges each accessed by a different set of stages:
+    ///
+    /// - Bytes `0..4` are accessed only by the fragment shader.
+    ///
+    /// - Bytes `4..8` are accessed by both the fragment shader and the vertex shader.
+    ///
+    /// - Bytes `8..12` are accessed only by the vertex shader.
+    ///
+    /// To write all twelve bytes requires three `set_push_constants` calls, one
+    /// for each range, each passing the matching `stages` mask.
+    ///
+    /// [`PushConstant`]: https://docs.rs/naga/latest/naga/enum.StorageClass.html#variant.PushConstant
+    pub fn set_push_constants(&mut self, stages: ShaderStages, offset: u32, data: &[u8]) {
+        DynContext::render_pass_set_push_constants(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            stages,
+            offset,
+            data,
+        );
+    }
+}
+
+/// [`Features::TIMESTAMP_QUERY_INSIDE_PASSES`] must be enabled on the device in order to call these functions.
+impl<'encoder> RenderPass<'encoder> {
+    /// Issue a timestamp command at this point in the queue. The
+    /// timestamp will be written to the specified query set, at the specified index.
+    ///
+    /// Must be multiplied by [`Queue::get_timestamp_period`] to get
+    /// the value in nanoseconds. Absolute values have no meaning,
+    /// but timestamps can be subtracted to get the time it takes
+    /// for a string of operations to complete.
+    pub fn write_timestamp(&mut self, query_set: &QuerySet, query_index: u32) {
+        DynContext::render_pass_write_timestamp(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &query_set.id,
+            query_set.data.as_ref(),
+            query_index,
+        )
+    }
+}
+
+impl<'encoder> RenderPass<'encoder> {
+    /// Start a occlusion query on this render pass. It can be ended with
+    /// `end_occlusion_query`. Occlusion queries may not be nested.
+    pub fn begin_occlusion_query(&mut self, query_index: u32) {
+        DynContext::render_pass_begin_occlusion_query(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            query_index,
+        );
+    }
+
+    /// End the occlusion query on this render pass. It can be started with
+    /// `begin_occlusion_query`. Occlusion queries may not be nested.
+    pub fn end_occlusion_query(&mut self) {
+        DynContext::render_pass_end_occlusion_query(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+        );
+    }
+}
+
+/// [`Features::PIPELINE_STATISTICS_QUERY`] must be enabled on the device in order to call these functions.
+impl<'encoder> RenderPass<'encoder> {
+    /// Start a pipeline statistics query on this render pass. It can be ended with
+    /// `end_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
+    pub fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, query_index: u32) {
+        DynContext::render_pass_begin_pipeline_statistics_query(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &query_set.id,
+            query_set.data.as_ref(),
+            query_index,
+        );
+    }
+
+    /// End the pipeline statistics query on this render pass. It can be started with
+    /// `begin_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
+    pub fn end_pipeline_statistics_query(&mut self) {
+        DynContext::render_pass_end_pipeline_statistics_query(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+        );
+    }
+}
+
+/// Operation to perform to the output attachment at the start of a render pass.
+///
+/// Corresponds to [WebGPU `GPULoadOp`](https://gpuweb.github.io/gpuweb/#enumdef-gpuloadop),
+/// plus the corresponding clearValue.
+#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub enum LoadOp<V> {
+    /// Loads the specified value for this attachment into the render pass.
+    ///
+    /// On some GPU hardware (primarily mobile), "clear" is significantly cheaper
+    /// because it avoids loading data from main memory into tile-local memory.
+    ///
+    /// On other GPU hardware, there isn’t a significant difference.
+    ///
+    /// As a result, it is recommended to use "clear" rather than "load" in cases
+    /// where the initial value doesn’t matter
+    /// (e.g. the render target will be cleared using a skybox).
+    Clear(V),
+    /// Loads the existing value for this attachment into the render pass.
+    Load,
+}
+
+impl<V: Default> Default for LoadOp<V> {
+    fn default() -> Self {
+        Self::Clear(Default::default())
+    }
+}
+
+/// Operation to perform to the output attachment at the end of a render pass.
+///
+/// Corresponds to [WebGPU `GPUStoreOp`](https://gpuweb.github.io/gpuweb/#enumdef-gpustoreop).
+#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Default)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub enum StoreOp {
+    /// Stores the resulting value of the render pass for this attachment.
+    #[default]
+    Store,
+    /// Discards the resulting value of the render pass for this attachment.
+    ///
+    /// The attachment will be treated as uninitialized afterwards.
+    /// (If only either Depth or Stencil texture-aspects is set to `Discard`,
+    /// the respective other texture-aspect will be preserved.)
+    ///
+    /// This can be significantly faster on tile-based render hardware.
+    ///
+    /// Prefer this if the attachment is not read by subsequent passes.
+    Discard,
+}
+
+/// Pair of load and store operations for an attachment aspect.
+///
+/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
+/// separate `loadOp` and `storeOp` fields are used instead.
+#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub struct Operations<V> {
+    /// How data should be read through this attachment.
+    pub load: LoadOp<V>,
+    /// Whether data will be written to through this attachment.
+    ///
+    /// Note that resolve textures (if specified) are always written to,
+    /// regardless of this setting.
+    pub store: StoreOp,
+}
+
+impl<V: Default> Default for Operations<V> {
+    #[inline]
+    fn default() -> Self {
+        Self {
+            load: LoadOp::<V>::default(),
+            store: StoreOp::default(),
+        }
+    }
+}
+
+/// Describes the timestamp writes of a render pass.
+///
+/// For use with [`RenderPassDescriptor`].
+/// At least one of `beginning_of_pass_write_index` and `end_of_pass_write_index` must be `Some`.
+///
+/// Corresponds to [WebGPU `GPURenderPassTimestampWrite`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderpasstimestampwrites).
+#[derive(Clone, Debug)]
+pub struct RenderPassTimestampWrites<'a> {
+    /// The query set to write to.
+    pub query_set: &'a QuerySet,
+    /// The index of the query set at which a start timestamp of this pass is written, if any.
+    pub beginning_of_pass_write_index: Option<u32>,
+    /// The index of the query set at which an end timestamp of this pass is written, if any.
+    pub end_of_pass_write_index: Option<u32>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RenderPassTimestampWrites<'_>: Send, Sync);
+
+/// Describes a color attachment to a [`RenderPass`].
+///
+/// For use with [`RenderPassDescriptor`].
+///
+/// Corresponds to [WebGPU `GPURenderPassColorAttachment`](
+/// https://gpuweb.github.io/gpuweb/#color-attachments).
+#[derive(Clone, Debug)]
+pub struct RenderPassColorAttachment<'tex> {
+    /// The view to use as an attachment.
+    pub view: &'tex TextureView,
+    /// The view that will receive the resolved output if multisampling is used.
+    ///
+    /// If set, it is always written to, regardless of how [`Self::ops`] is configured.
+    pub resolve_target: Option<&'tex TextureView>,
+    /// What operations will be performed on this color attachment.
+    pub ops: Operations<Color>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RenderPassColorAttachment<'_>: Send, Sync);
+
+/// Describes a depth/stencil attachment to a [`RenderPass`].
+///
+/// For use with [`RenderPassDescriptor`].
+///
+/// Corresponds to [WebGPU `GPURenderPassDepthStencilAttachment`](
+/// https://gpuweb.github.io/gpuweb/#depth-stencil-attachments).
+#[derive(Clone, Debug)]
+pub struct RenderPassDepthStencilAttachment<'tex> {
+    /// The view to use as an attachment.
+    pub view: &'tex TextureView,
+    /// What operations will be performed on the depth part of the attachment.
+    pub depth_ops: Option<Operations<f32>>,
+    /// What operations will be performed on the stencil part of the attachment.
+    pub stencil_ops: Option<Operations<u32>>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RenderPassDepthStencilAttachment<'_>: Send, Sync);
+
+/// Describes the attachments of a render pass.
+///
+/// For use with [`CommandEncoder::begin_render_pass`].
+///
+/// Corresponds to [WebGPU `GPURenderPassDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderpassdescriptor).
+#[derive(Clone, Debug, Default)]
+pub struct RenderPassDescriptor<'a> {
+    /// Debug label of the render pass. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// The color attachments of the render pass.
+    pub color_attachments: &'a [Option<RenderPassColorAttachment<'a>>],
+    /// The depth and stencil attachment of the render pass, if any.
+    pub depth_stencil_attachment: Option<RenderPassDepthStencilAttachment<'a>>,
+    /// Defines which timestamp values will be written for this pass, and where to write them to.
+    ///
+    /// Requires [`Features::TIMESTAMP_QUERY`] to be enabled.
+    pub timestamp_writes: Option<RenderPassTimestampWrites<'a>>,
+    /// Defines where the occlusion query results will be stored for this pass.
+    pub occlusion_query_set: Option<&'a QuerySet>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RenderPassDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/render_pipeline.rs b/wgpu/src/api/render_pipeline.rs
new file mode 100644
index 0000000000..7e74127167
--- /dev/null
+++ b/wgpu/src/api/render_pipeline.rs
@@ -0,0 +1,151 @@
+use std::{num::NonZeroU32, sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a rendering (graphics) pipeline.
+///
+/// A `RenderPipeline` object represents a graphics pipeline and its stages, bindings, vertex
+/// buffers and targets. It can be created with [`Device::create_render_pipeline`].
+///
+/// Corresponds to [WebGPU `GPURenderPipeline`](https://gpuweb.github.io/gpuweb/#render-pipeline).
+#[derive(Debug)]
+pub struct RenderPipeline {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RenderPipeline: Send, Sync);
+
+impl Drop for RenderPipeline {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .render_pipeline_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+impl RenderPipeline {
+    /// Returns a globally-unique identifier for this `RenderPipeline`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+
+    /// Get an object representing the bind group layout at a given index.
+    pub fn get_bind_group_layout(&self, index: u32) -> BindGroupLayout {
+        let context = Arc::clone(&self.context);
+        let (id, data) =
+            self.context
+                .render_pipeline_get_bind_group_layout(&self.id, self.data.as_ref(), index);
+        BindGroupLayout { context, id, data }
+    }
+}
+
+/// Describes how the vertex buffer is interpreted.
+///
+/// For use in [`VertexState`].
+///
+/// Corresponds to [WebGPU `GPUVertexBufferLayout`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpuvertexbufferlayout).
+#[derive(Clone, Debug, Hash, Eq, PartialEq)]
+pub struct VertexBufferLayout<'a> {
+    /// The stride, in bytes, between elements of this buffer.
+    pub array_stride: BufferAddress,
+    /// How often this vertex buffer is "stepped" forward.
+    pub step_mode: VertexStepMode,
+    /// The list of attributes which comprise a single vertex.
+    pub attributes: &'a [VertexAttribute],
+}
+static_assertions::assert_impl_all!(VertexBufferLayout<'_>: Send, Sync);
+
+/// Describes the vertex processing in a render pipeline.
+///
+/// For use in [`RenderPipelineDescriptor`].
+///
+/// Corresponds to [WebGPU `GPUVertexState`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpuvertexstate).
+#[derive(Clone, Debug)]
+pub struct VertexState<'a> {
+    /// The compiled shader module for this stage.
+    pub module: &'a ShaderModule,
+    /// The name of the entry point in the compiled shader to use.
+    ///
+    /// If [`Some`], there must be a vertex-stage shader entry point with this name in `module`.
+    /// Otherwise, expect exactly one vertex-stage entry point in `module`, which will be
+    /// selected.
+    // NOTE: keep phrasing in sync. with `ComputePipelineDescriptor::entry_point`
+    // NOTE: keep phrasing in sync. with `FragmentState::entry_point`
+    pub entry_point: Option<&'a str>,
+    /// Advanced options for when this pipeline is compiled
+    ///
+    /// This implements `Default`, and for most users can be set to `Default::default()`
+    pub compilation_options: PipelineCompilationOptions<'a>,
+    /// The format of any vertex buffers used with this pipeline.
+    pub buffers: &'a [VertexBufferLayout<'a>],
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(VertexState<'_>: Send, Sync);
+
+/// Describes the fragment processing in a render pipeline.
+///
+/// For use in [`RenderPipelineDescriptor`].
+///
+/// Corresponds to [WebGPU `GPUFragmentState`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpufragmentstate).
+#[derive(Clone, Debug)]
+pub struct FragmentState<'a> {
+    /// The compiled shader module for this stage.
+    pub module: &'a ShaderModule,
+    /// The name of the entry point in the compiled shader to use.
+    ///
+    /// If [`Some`], there must be a `@fragment` shader entry point with this name in `module`.
+    /// Otherwise, expect exactly one fragment-stage entry point in `module`, which will be
+    /// selected.
+    // NOTE: keep phrasing in sync. with `ComputePipelineDescriptor::entry_point`
+    // NOTE: keep phrasing in sync. with `VertexState::entry_point`
+    pub entry_point: Option<&'a str>,
+    /// Advanced options for when this pipeline is compiled
+    ///
+    /// This implements `Default`, and for most users can be set to `Default::default()`
+    pub compilation_options: PipelineCompilationOptions<'a>,
+    /// The color state of the render targets.
+    pub targets: &'a [Option<ColorTargetState>],
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(FragmentState<'_>: Send, Sync);
+
+/// Describes a render (graphics) pipeline.
+///
+/// For use with [`Device::create_render_pipeline`].
+///
+/// Corresponds to [WebGPU `GPURenderPipelineDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderpipelinedescriptor).
+#[derive(Clone, Debug)]
+pub struct RenderPipelineDescriptor<'a> {
+    /// Debug label of the pipeline. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// The layout of bind groups for this pipeline.
+    pub layout: Option<&'a PipelineLayout>,
+    /// The compiled vertex stage, its entry point, and the input buffers layout.
+    pub vertex: VertexState<'a>,
+    /// The properties of the pipeline at the primitive assembly and rasterization level.
+    pub primitive: PrimitiveState,
+    /// The effect of draw calls on the depth and stencil aspects of the output target, if any.
+    pub depth_stencil: Option<DepthStencilState>,
+    /// The multi-sampling properties of the pipeline.
+    pub multisample: MultisampleState,
+    /// The compiled fragment stage, its entry point, and the color targets.
+    pub fragment: Option<FragmentState<'a>>,
+    /// If the pipeline will be used with a multiview render pass, this indicates how many array
+    /// layers the attachments will have.
+    pub multiview: Option<NonZeroU32>,
+    /// The pipeline cache to use when creating this pipeline.
+    pub cache: Option<&'a PipelineCache>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RenderPipelineDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/sampler.rs b/wgpu/src/api/sampler.rs
new file mode 100644
index 0000000000..63267ded5d
--- /dev/null
+++ b/wgpu/src/api/sampler.rs
@@ -0,0 +1,94 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a sampler.
+///
+/// A `Sampler` object defines how a pipeline will sample from a [`TextureView`]. Samplers define
+/// image filters (including anisotropy) and address (wrapping) modes, among other things. See
+/// the documentation for [`SamplerDescriptor`] for more information.
+///
+/// It can be created with [`Device::create_sampler`].
+///
+/// Corresponds to [WebGPU `GPUSampler`](https://gpuweb.github.io/gpuweb/#sampler-interface).
+#[derive(Debug)]
+pub struct Sampler {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Sampler: Send, Sync);
+
+impl Sampler {
+    /// Returns a globally-unique identifier for this `Sampler`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+}
+
+impl Drop for Sampler {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context.sampler_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Describes a [`Sampler`].
+///
+/// For use with [`Device::create_sampler`].
+///
+/// Corresponds to [WebGPU `GPUSamplerDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpusamplerdescriptor).
+#[derive(Clone, Debug, PartialEq)]
+pub struct SamplerDescriptor<'a> {
+    /// Debug label of the sampler. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// How to deal with out of bounds accesses in the u (i.e. x) direction
+    pub address_mode_u: AddressMode,
+    /// How to deal with out of bounds accesses in the v (i.e. y) direction
+    pub address_mode_v: AddressMode,
+    /// How to deal with out of bounds accesses in the w (i.e. z) direction
+    pub address_mode_w: AddressMode,
+    /// How to filter the texture when it needs to be magnified (made larger)
+    pub mag_filter: FilterMode,
+    /// How to filter the texture when it needs to be minified (made smaller)
+    pub min_filter: FilterMode,
+    /// How to filter between mip map levels
+    pub mipmap_filter: FilterMode,
+    /// Minimum level of detail (i.e. mip level) to use
+    pub lod_min_clamp: f32,
+    /// Maximum level of detail (i.e. mip level) to use
+    pub lod_max_clamp: f32,
+    /// If this is enabled, this is a comparison sampler using the given comparison function.
+    pub compare: Option<CompareFunction>,
+    /// Must be at least 1. If this is not 1, all filter modes must be linear.
+    pub anisotropy_clamp: u16,
+    /// Border color to use when address_mode is [`AddressMode::ClampToBorder`]
+    pub border_color: Option<SamplerBorderColor>,
+}
+static_assertions::assert_impl_all!(SamplerDescriptor<'_>: Send, Sync);
+
+impl Default for SamplerDescriptor<'_> {
+    fn default() -> Self {
+        Self {
+            label: None,
+            address_mode_u: Default::default(),
+            address_mode_v: Default::default(),
+            address_mode_w: Default::default(),
+            mag_filter: Default::default(),
+            min_filter: Default::default(),
+            mipmap_filter: Default::default(),
+            lod_min_clamp: 0.0,
+            lod_max_clamp: 32.0,
+            compare: None,
+            anisotropy_clamp: 1,
+            border_color: None,
+        }
+    }
+}
diff --git a/wgpu/src/api/shader_module.rs b/wgpu/src/api/shader_module.rs
new file mode 100644
index 0000000000..d81562e932
--- /dev/null
+++ b/wgpu/src/api/shader_module.rs
@@ -0,0 +1,249 @@
+use std::{borrow::Cow, future::Future, marker::PhantomData, sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a compiled shader module.
+///
+/// A `ShaderModule` represents a compiled shader module on the GPU. It can be created by passing
+/// source code to [`Device::create_shader_module`] or valid SPIR-V binary to
+/// [`Device::create_shader_module_spirv`]. Shader modules are used to define programmable stages
+/// of a pipeline.
+///
+/// Corresponds to [WebGPU `GPUShaderModule`](https://gpuweb.github.io/gpuweb/#shader-module).
+#[derive(Debug)]
+pub struct ShaderModule {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(ShaderModule: Send, Sync);
+
+impl Drop for ShaderModule {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .shader_module_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+impl ShaderModule {
+    /// Returns a globally-unique identifier for this `ShaderModule`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+
+    /// Get the compilation info for the shader module.
+    pub fn get_compilation_info(&self) -> impl Future<Output = CompilationInfo> + WasmNotSend {
+        self.context
+            .shader_get_compilation_info(&self.id, self.data.as_ref())
+    }
+}
+
+/// Compilation information for a shader module.
+///
+/// Corresponds to [WebGPU `GPUCompilationInfo`](https://gpuweb.github.io/gpuweb/#gpucompilationinfo).
+/// The source locations use bytes, and index a UTF-8 encoded string.
+#[derive(Debug, Clone)]
+pub struct CompilationInfo {
+    /// The messages from the shader compilation process.
+    pub messages: Vec<CompilationMessage>,
+}
+
+/// A single message from the shader compilation process.
+///
+/// Roughly corresponds to [`GPUCompilationMessage`](https://www.w3.org/TR/webgpu/#gpucompilationmessage),
+/// except that the location uses UTF-8 for all positions.
+#[derive(Debug, Clone)]
+pub struct CompilationMessage {
+    /// The text of the message.
+    pub message: String,
+    /// The type of the message.
+    pub message_type: CompilationMessageType,
+    /// Where in the source code the message points at.
+    pub location: Option<SourceLocation>,
+}
+
+/// The type of a compilation message.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum CompilationMessageType {
+    /// An error message.
+    Error,
+    /// A warning message.
+    Warning,
+    /// An informational message.
+    Info,
+}
+
+/// A human-readable representation for a span, tailored for text source.
+///
+/// Roughly corresponds to the positional members of [`GPUCompilationMessage`][gcm] from
+/// the WebGPU specification, except
+/// - `offset` and `length` are in bytes (UTF-8 code units), instead of UTF-16 code units.
+/// - `line_position` is in bytes (UTF-8 code units), and is usually not directly intended for humans.
+///
+/// [gcm]: https://www.w3.org/TR/webgpu/#gpucompilationmessage
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub struct SourceLocation {
+    /// 1-based line number.
+    pub line_number: u32,
+    /// 1-based column in code units (in bytes) of the start of the span.
+    /// Remember to convert accordingly when displaying to the user.
+    pub line_position: u32,
+    /// 0-based Offset in code units (in bytes) of the start of the span.
+    pub offset: u32,
+    /// Length in code units (in bytes) of the span.
+    pub length: u32,
+}
+
+#[cfg(all(feature = "wgsl", wgpu_core))]
+impl From<crate::naga::error::ShaderError<crate::naga::front::wgsl::ParseError>>
+    for CompilationInfo
+{
+    fn from(value: crate::naga::error::ShaderError<crate::naga::front::wgsl::ParseError>) -> Self {
+        CompilationInfo {
+            messages: vec![CompilationMessage {
+                message: value.to_string(),
+                message_type: CompilationMessageType::Error,
+                location: value.inner.location(&value.source).map(Into::into),
+            }],
+        }
+    }
+}
+#[cfg(feature = "glsl")]
+impl From<naga::error::ShaderError<naga::front::glsl::ParseErrors>> for CompilationInfo {
+    fn from(value: naga::error::ShaderError<naga::front::glsl::ParseErrors>) -> Self {
+        let messages = value
+            .inner
+            .errors
+            .into_iter()
+            .map(|err| CompilationMessage {
+                message: err.to_string(),
+                message_type: CompilationMessageType::Error,
+                location: err.location(&value.source).map(Into::into),
+            })
+            .collect();
+        CompilationInfo { messages }
+    }
+}
+
+#[cfg(feature = "spirv")]
+impl From<naga::error::ShaderError<naga::front::spv::Error>> for CompilationInfo {
+    fn from(value: naga::error::ShaderError<naga::front::spv::Error>) -> Self {
+        CompilationInfo {
+            messages: vec![CompilationMessage {
+                message: value.to_string(),
+                message_type: CompilationMessageType::Error,
+                location: None,
+            }],
+        }
+    }
+}
+
+#[cfg(any(wgpu_core, naga))]
+impl
+    From<
+        crate::naga::error::ShaderError<crate::naga::WithSpan<crate::naga::valid::ValidationError>>,
+    > for CompilationInfo
+{
+    fn from(
+        value: crate::naga::error::ShaderError<
+            crate::naga::WithSpan<crate::naga::valid::ValidationError>,
+        >,
+    ) -> Self {
+        CompilationInfo {
+            messages: vec![CompilationMessage {
+                message: value.to_string(),
+                message_type: CompilationMessageType::Error,
+                location: value.inner.location(&value.source).map(Into::into),
+            }],
+        }
+    }
+}
+
+#[cfg(any(wgpu_core, naga))]
+impl From<crate::naga::SourceLocation> for SourceLocation {
+    fn from(value: crate::naga::SourceLocation) -> Self {
+        SourceLocation {
+            length: value.length,
+            offset: value.offset,
+            line_number: value.line_number,
+            line_position: value.line_position,
+        }
+    }
+}
+
+/// Source of a shader module.
+///
+/// The source will be parsed and validated.
+///
+/// Any necessary shader translation (e.g. from WGSL to SPIR-V or vice versa)
+/// will be done internally by wgpu.
+///
+/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
+/// only WGSL source code strings are accepted.
+#[cfg_attr(feature = "naga-ir", allow(clippy::large_enum_variant))]
+#[derive(Clone, Debug)]
+#[non_exhaustive]
+pub enum ShaderSource<'a> {
+    /// SPIR-V module represented as a slice of words.
+    ///
+    /// See also: [`util::make_spirv`], [`include_spirv`]
+    #[cfg(feature = "spirv")]
+    SpirV(Cow<'a, [u32]>),
+    /// GLSL module as a string slice.
+    ///
+    /// Note: GLSL is not yet fully supported and must be a specific ShaderStage.
+    #[cfg(feature = "glsl")]
+    Glsl {
+        /// The source code of the shader.
+        shader: Cow<'a, str>,
+        /// The shader stage that the shader targets. For example, `naga::ShaderStage::Vertex`
+        stage: naga::ShaderStage,
+        /// Defines to unlock configured shader features.
+        defines: naga::FastHashMap<String, String>,
+    },
+    /// WGSL module as a string slice.
+    #[cfg(feature = "wgsl")]
+    Wgsl(Cow<'a, str>),
+    /// Naga module.
+    #[cfg(feature = "naga-ir")]
+    Naga(Cow<'static, naga::Module>),
+    /// Dummy variant because `Naga` doesn't have a lifetime and without enough active features it
+    /// could be the last one active.
+    #[doc(hidden)]
+    Dummy(PhantomData<&'a ()>),
+}
+static_assertions::assert_impl_all!(ShaderSource<'_>: Send, Sync);
+
+/// Descriptor for use with [`Device::create_shader_module`].
+///
+/// Corresponds to [WebGPU `GPUShaderModuleDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpushadermoduledescriptor).
+#[derive(Clone, Debug)]
+pub struct ShaderModuleDescriptor<'a> {
+    /// Debug label of the shader module. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// Source code for the shader.
+    pub source: ShaderSource<'a>,
+}
+static_assertions::assert_impl_all!(ShaderModuleDescriptor<'_>: Send, Sync);
+
+/// Descriptor for a shader module given by SPIR-V binary, for use with
+/// [`Device::create_shader_module_spirv`].
+///
+/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
+/// only WGSL source code strings are accepted.
+#[derive(Debug)]
+pub struct ShaderModuleDescriptorSpirV<'a> {
+    /// Debug label of the shader module. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// Binary SPIR-V data, in 4-byte words.
+    pub source: Cow<'a, [u32]>,
+}
+static_assertions::assert_impl_all!(ShaderModuleDescriptorSpirV<'_>: Send, Sync);
diff --git a/wgpu/src/api/surface.rs b/wgpu/src/api/surface.rs
new file mode 100644
index 0000000000..9c7e056aaf
--- /dev/null
+++ b/wgpu/src/api/surface.rs
@@ -0,0 +1,425 @@
+use std::{error, fmt, sync::Arc, thread};
+
+use parking_lot::Mutex;
+use raw_window_handle::{HasDisplayHandle, HasWindowHandle};
+
+use crate::context::{DynContext, ObjectId};
+use crate::*;
+
+/// Describes a [`Surface`].
+///
+/// For use with [`Surface::configure`].
+///
+/// Corresponds to [WebGPU `GPUCanvasConfiguration`](
+/// https://gpuweb.github.io/gpuweb/#canvas-configuration).
+pub type SurfaceConfiguration = wgt::SurfaceConfiguration<Vec<TextureFormat>>;
+static_assertions::assert_impl_all!(SurfaceConfiguration: Send, Sync);
+
+/// Handle to a presentable surface.
+///
+/// A `Surface` represents a platform-specific surface (e.g. a window) onto which rendered images may
+/// be presented. A `Surface` may be created with the function [`Instance::create_surface`].
+///
+/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
+/// [`GPUCanvasContext`](https://gpuweb.github.io/gpuweb/#canvas-context)
+/// serves a similar role.
+pub struct Surface<'window> {
+    pub(crate) context: Arc<C>,
+
+    /// Optionally, keep the source of the handle used for the surface alive.
+    ///
+    /// This is useful for platforms where the surface is created from a window and the surface
+    /// would become invalid when the window is dropped.
+    pub(crate) _handle_source: Option<Box<dyn WindowHandle + 'window>>,
+
+    /// Wgpu-core surface id.
+    pub(crate) id: ObjectId,
+
+    /// Additional surface data returned by [`DynContext::instance_create_surface`].
+    pub(crate) surface_data: Box<Data>,
+
+    // Stores the latest `SurfaceConfiguration` that was set using `Surface::configure`.
+    // It is required to set the attributes of the `SurfaceTexture` in the
+    // `Surface::get_current_texture` method.
+    // Because the `Surface::configure` method operates on an immutable reference this type has to
+    // be wrapped in a mutex and since the configuration is only supplied after the surface has
+    // been created is is additionally wrapped in an option.
+    pub(crate) config: Mutex<Option<SurfaceConfiguration>>,
+}
+
+impl Surface<'_> {
+    /// Returns a globally-unique identifier for this `Surface`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Surface<'_>> {
+        Id::new(self.id)
+    }
+
+    /// Returns the capabilities of the surface when used with the given adapter.
+    ///
+    /// Returns specified values (see [`SurfaceCapabilities`]) if surface is incompatible with the adapter.
+    pub fn get_capabilities(&self, adapter: &Adapter) -> SurfaceCapabilities {
+        DynContext::surface_get_capabilities(
+            &*self.context,
+            &self.id,
+            self.surface_data.as_ref(),
+            &adapter.id,
+            adapter.data.as_ref(),
+        )
+    }
+
+    /// Return a default `SurfaceConfiguration` from width and height to use for the [`Surface`] with this adapter.
+    ///
+    /// Returns None if the surface isn't supported by this adapter
+    pub fn get_default_config(
+        &self,
+        adapter: &Adapter,
+        width: u32,
+        height: u32,
+    ) -> Option<SurfaceConfiguration> {
+        let caps = self.get_capabilities(adapter);
+        Some(SurfaceConfiguration {
+            usage: wgt::TextureUsages::RENDER_ATTACHMENT,
+            format: *caps.formats.first()?,
+            width,
+            height,
+            desired_maximum_frame_latency: 2,
+            present_mode: *caps.present_modes.first()?,
+            alpha_mode: wgt::CompositeAlphaMode::Auto,
+            view_formats: vec![],
+        })
+    }
+
+    /// Initializes [`Surface`] for presentation.
+    ///
+    /// # Panics
+    ///
+    /// - A old [`SurfaceTexture`] is still alive referencing an old surface.
+    /// - Texture format requested is unsupported on the surface.
+    /// - `config.width` or `config.height` is zero.
+    pub fn configure(&self, device: &Device, config: &SurfaceConfiguration) {
+        DynContext::surface_configure(
+            &*self.context,
+            &self.id,
+            self.surface_data.as_ref(),
+            &device.id,
+            device.data.as_ref(),
+            config,
+        );
+
+        let mut conf = self.config.lock();
+        *conf = Some(config.clone());
+    }
+
+    /// Returns the next texture to be presented by the swapchain for drawing.
+    ///
+    /// In order to present the [`SurfaceTexture`] returned by this method,
+    /// first a [`Queue::submit`] needs to be done with some work rendering to this texture.
+    /// Then [`SurfaceTexture::present`] needs to be called.
+    ///
+    /// If a SurfaceTexture referencing this surface is alive when the swapchain is recreated,
+    /// recreating the swapchain will panic.
+    pub fn get_current_texture(&self) -> Result<SurfaceTexture, SurfaceError> {
+        let (texture_id, texture_data, status, detail) = DynContext::surface_get_current_texture(
+            &*self.context,
+            &self.id,
+            self.surface_data.as_ref(),
+        );
+
+        let suboptimal = match status {
+            SurfaceStatus::Good => false,
+            SurfaceStatus::Suboptimal => true,
+            SurfaceStatus::Timeout => return Err(SurfaceError::Timeout),
+            SurfaceStatus::Outdated => return Err(SurfaceError::Outdated),
+            SurfaceStatus::Lost => return Err(SurfaceError::Lost),
+        };
+
+        let guard = self.config.lock();
+        let config = guard
+            .as_ref()
+            .expect("This surface has not been configured yet.");
+
+        let descriptor = TextureDescriptor {
+            label: None,
+            size: Extent3d {
+                width: config.width,
+                height: config.height,
+                depth_or_array_layers: 1,
+            },
+            format: config.format,
+            usage: config.usage,
+            mip_level_count: 1,
+            sample_count: 1,
+            dimension: TextureDimension::D2,
+            view_formats: &[],
+        };
+
+        texture_id
+            .zip(texture_data)
+            .map(|(id, data)| SurfaceTexture {
+                texture: Texture {
+                    context: Arc::clone(&self.context),
+                    id,
+                    data,
+                    owned: false,
+                    descriptor,
+                },
+                suboptimal,
+                presented: false,
+                detail,
+            })
+            .ok_or(SurfaceError::Lost)
+    }
+
+    /// Returns the inner hal Surface using a callback. The hal surface will be `None` if the
+    /// backend type argument does not match with this wgpu Surface
+    ///
+    /// # Safety
+    ///
+    /// - The raw handle obtained from the hal Surface must not be manually destroyed
+    #[cfg(wgpu_core)]
+    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Surface>) -> R, R>(
+        &mut self,
+        hal_surface_callback: F,
+    ) -> Option<R> {
+        self.context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+            .map(|ctx| unsafe {
+                ctx.surface_as_hal::<A, F, R>(
+                    self.surface_data.downcast_ref().unwrap(),
+                    hal_surface_callback,
+                )
+            })
+    }
+}
+
+// This custom implementation is required because [`Surface::_surface`] doesn't
+// require [`Debug`](fmt::Debug), which we should not require from the user.
+impl<'window> fmt::Debug for Surface<'window> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Surface")
+            .field("context", &self.context)
+            .field(
+                "_handle_source",
+                &if self._handle_source.is_some() {
+                    "Some"
+                } else {
+                    "None"
+                },
+            )
+            .field("id", &self.id)
+            .field("data", &self.surface_data)
+            .field("config", &self.config)
+            .finish()
+    }
+}
+
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Surface<'_>: Send, Sync);
+
+impl Drop for Surface<'_> {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .surface_drop(&self.id, self.surface_data.as_ref())
+        }
+    }
+}
+
+/// Super trait for window handles as used in [`SurfaceTarget`].
+pub trait WindowHandle: HasWindowHandle + HasDisplayHandle + WasmNotSendSync {}
+
+impl<T> WindowHandle for T where T: HasWindowHandle + HasDisplayHandle + WasmNotSendSync {}
+
+/// The window/canvas/surface/swap-chain/etc. a surface is attached to, for use with safe surface creation.
+///
+/// This is either a window or an actual web canvas depending on the platform and
+/// enabled features.
+/// Refer to the individual variants for more information.
+///
+/// See also [`SurfaceTargetUnsafe`] for unsafe variants.
+#[non_exhaustive]
+pub enum SurfaceTarget<'window> {
+    /// Window handle producer.
+    ///
+    /// If the specified display and window handle are not supported by any of the backends, then the surface
+    /// will not be supported by any adapters.
+    ///
+    /// # Errors
+    ///
+    /// - On WebGL2: surface creation returns an error if the browser does not support WebGL2,
+    ///   or declines to provide GPU access (such as due to a resource shortage).
+    ///
+    /// # Panics
+    ///
+    /// - On macOS/Metal: will panic if not called on the main thread.
+    /// - On web: will panic if the `raw_window_handle` does not properly refer to a
+    ///   canvas element.
+    Window(Box<dyn WindowHandle + 'window>),
+
+    /// Surface from a `web_sys::HtmlCanvasElement`.
+    ///
+    /// The `canvas` argument must be a valid `<canvas>` element to
+    /// create a surface upon.
+    ///
+    /// # Errors
+    ///
+    /// - On WebGL2: surface creation will return an error if the browser does not support WebGL2,
+    ///   or declines to provide GPU access (such as due to a resource shortage).
+    #[cfg(any(webgpu, webgl))]
+    Canvas(web_sys::HtmlCanvasElement),
+
+    /// Surface from a `web_sys::OffscreenCanvas`.
+    ///
+    /// The `canvas` argument must be a valid `OffscreenCanvas` object
+    /// to create a surface upon.
+    ///
+    /// # Errors
+    ///
+    /// - On WebGL2: surface creation will return an error if the browser does not support WebGL2,
+    ///   or declines to provide GPU access (such as due to a resource shortage).
+    #[cfg(any(webgpu, webgl))]
+    OffscreenCanvas(web_sys::OffscreenCanvas),
+}
+
+impl<'a, T> From<T> for SurfaceTarget<'a>
+where
+    T: WindowHandle + 'a,
+{
+    fn from(window: T) -> Self {
+        Self::Window(Box::new(window))
+    }
+}
+
+/// The window/canvas/surface/swap-chain/etc. a surface is attached to, for use with unsafe surface creation.
+///
+/// This is either a window or an actual web canvas depending on the platform and
+/// enabled features.
+/// Refer to the individual variants for more information.
+///
+/// See also [`SurfaceTarget`] for safe variants.
+#[non_exhaustive]
+pub enum SurfaceTargetUnsafe {
+    /// Raw window & display handle.
+    ///
+    /// If the specified display and window handle are not supported by any of the backends, then the surface
+    /// will not be supported by any adapters.
+    ///
+    /// # Safety
+    ///
+    /// - `raw_window_handle` & `raw_display_handle` must be valid objects to create a surface upon.
+    /// - `raw_window_handle` & `raw_display_handle` must remain valid until after the returned
+    ///    [`Surface`] is  dropped.
+    RawHandle {
+        /// Raw display handle, underlying display must outlive the surface created from this.
+        raw_display_handle: raw_window_handle::RawDisplayHandle,
+
+        /// Raw display handle, underlying window must outlive the surface created from this.
+        raw_window_handle: raw_window_handle::RawWindowHandle,
+    },
+
+    /// Surface from `CoreAnimationLayer`.
+    ///
+    /// # Safety
+    ///
+    /// - layer must be a valid object to create a surface upon.
+    #[cfg(metal)]
+    CoreAnimationLayer(*mut std::ffi::c_void),
+
+    /// Surface from `IDCompositionVisual`.
+    ///
+    /// # Safety
+    ///
+    /// - visual must be a valid IDCompositionVisual to create a surface upon.
+    #[cfg(dx12)]
+    CompositionVisual(*mut std::ffi::c_void),
+
+    /// Surface from DX12 `SurfaceHandle`.
+    ///
+    /// # Safety
+    ///
+    /// - surface_handle must be a valid SurfaceHandle to create a surface upon.
+    #[cfg(dx12)]
+    SurfaceHandle(*mut std::ffi::c_void),
+
+    /// Surface from DX12 `SwapChainPanel`.
+    ///
+    /// # Safety
+    ///
+    /// - visual must be a valid SwapChainPanel to create a surface upon.
+    #[cfg(dx12)]
+    SwapChainPanel(*mut std::ffi::c_void),
+}
+
+impl SurfaceTargetUnsafe {
+    /// Creates a [`SurfaceTargetUnsafe::RawHandle`] from a window.
+    ///
+    /// # Safety
+    ///
+    /// - `window` must outlive the resulting surface target
+    ///   (and subsequently the surface created for this target).
+    pub unsafe fn from_window<T>(window: &T) -> Result<Self, raw_window_handle::HandleError>
+    where
+        T: HasDisplayHandle + HasWindowHandle,
+    {
+        Ok(Self::RawHandle {
+            raw_display_handle: window.display_handle()?.as_raw(),
+            raw_window_handle: window.window_handle()?.as_raw(),
+        })
+    }
+}
+
+/// [`Instance::create_surface()`] or a related function failed.
+#[derive(Clone, Debug)]
+#[non_exhaustive]
+pub struct CreateSurfaceError {
+    pub(crate) inner: CreateSurfaceErrorKind,
+}
+#[derive(Clone, Debug)]
+pub(crate) enum CreateSurfaceErrorKind {
+    /// Error from [`wgpu_hal`].
+    #[cfg(wgpu_core)]
+    Hal(wgc::instance::CreateSurfaceError),
+
+    /// Error from WebGPU surface creation.
+    #[allow(dead_code)] // may be unused depending on target and features
+    Web(String),
+
+    /// Error when trying to get a [`DisplayHandle`] or a [`WindowHandle`] from
+    /// `raw_window_handle`.
+    RawHandle(raw_window_handle::HandleError),
+}
+static_assertions::assert_impl_all!(CreateSurfaceError: Send, Sync);
+
+impl fmt::Display for CreateSurfaceError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match &self.inner {
+            #[cfg(wgpu_core)]
+            CreateSurfaceErrorKind::Hal(e) => e.fmt(f),
+            CreateSurfaceErrorKind::Web(e) => e.fmt(f),
+            CreateSurfaceErrorKind::RawHandle(e) => e.fmt(f),
+        }
+    }
+}
+
+impl error::Error for CreateSurfaceError {
+    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
+        match &self.inner {
+            #[cfg(wgpu_core)]
+            CreateSurfaceErrorKind::Hal(e) => e.source(),
+            CreateSurfaceErrorKind::Web(_) => None,
+            CreateSurfaceErrorKind::RawHandle(e) => e.source(),
+        }
+    }
+}
+
+#[cfg(wgpu_core)]
+impl From<wgc::instance::CreateSurfaceError> for CreateSurfaceError {
+    fn from(e: wgc::instance::CreateSurfaceError) -> Self {
+        Self {
+            inner: CreateSurfaceErrorKind::Hal(e),
+        }
+    }
+}
diff --git a/wgpu/src/api/surface_texture.rs b/wgpu/src/api/surface_texture.rs
new file mode 100644
index 0000000000..417ad56169
--- /dev/null
+++ b/wgpu/src/api/surface_texture.rs
@@ -0,0 +1,84 @@
+use std::{error, fmt, thread};
+
+use crate::context::DynContext;
+use crate::*;
+
+/// Surface texture that can be rendered to.
+/// Result of a successful call to [`Surface::get_current_texture`].
+///
+/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
+/// the [`GPUCanvasContext`](https://gpuweb.github.io/gpuweb/#canvas-context) provides
+/// a texture without any additional information.
+#[derive(Debug)]
+pub struct SurfaceTexture {
+    /// Accessible view of the frame.
+    pub texture: Texture,
+    /// `true` if the acquired buffer can still be used for rendering,
+    /// but should be recreated for maximum performance.
+    pub suboptimal: bool,
+    pub(crate) presented: bool,
+    pub(crate) detail: Box<dyn AnyWasmNotSendSync>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(SurfaceTexture: Send, Sync);
+
+impl SurfaceTexture {
+    /// Schedule this texture to be presented on the owning surface.
+    ///
+    /// Needs to be called after any work on the texture is scheduled via [`Queue::submit`].
+    ///
+    /// # Platform dependent behavior
+    ///
+    /// On Wayland, `present` will attach a `wl_buffer` to the underlying `wl_surface` and commit the new surface
+    /// state. If it is desired to do things such as request a frame callback, scale the surface using the viewporter
+    /// or synchronize other double buffered state, then these operations should be done before the call to `present`.
+    pub fn present(mut self) {
+        self.presented = true;
+        DynContext::surface_present(
+            &*self.texture.context,
+            // This call to as_ref is essential because we want the DynContext implementation to see the inner
+            // value of the Box (T::SurfaceOutputDetail), not the Box itself.
+            self.detail.as_ref(),
+        );
+    }
+}
+
+impl Drop for SurfaceTexture {
+    fn drop(&mut self) {
+        if !self.presented && !thread::panicking() {
+            DynContext::surface_texture_discard(
+                &*self.texture.context,
+                // This call to as_ref is essential because we want the DynContext implementation to see the inner
+                // value of the Box (T::SurfaceOutputDetail), not the Box itself.
+                self.detail.as_ref(),
+            );
+        }
+    }
+}
+
+/// Result of an unsuccessful call to [`Surface::get_current_texture`].
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub enum SurfaceError {
+    /// A timeout was encountered while trying to acquire the next frame.
+    Timeout,
+    /// The underlying surface has changed, and therefore the swap chain must be updated.
+    Outdated,
+    /// The swap chain has been lost and needs to be recreated.
+    Lost,
+    /// There is no more memory left to allocate a new frame.
+    OutOfMemory,
+}
+static_assertions::assert_impl_all!(SurfaceError: Send, Sync);
+
+impl fmt::Display for SurfaceError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}", match self {
+            Self::Timeout => "A timeout was encountered while trying to acquire the next frame",
+            Self::Outdated => "The underlying surface has changed, and therefore the swap chain must be updated",
+            Self::Lost =>  "The swap chain has been lost and needs to be recreated",
+            Self::OutOfMemory => "There is no more memory left to allocate a new frame",
+        })
+    }
+}
+
+impl error::Error for SurfaceError {}
diff --git a/wgpu/src/api/texture.rs b/wgpu/src/api/texture.rs
new file mode 100644
index 0000000000..98295b9396
--- /dev/null
+++ b/wgpu/src/api/texture.rs
@@ -0,0 +1,160 @@
+use std::{sync::Arc, thread};
+
+use crate::context::{DynContext, ObjectId};
+use crate::*;
+
+/// Handle to a texture on the GPU.
+///
+/// It can be created with [`Device::create_texture`].
+///
+/// Corresponds to [WebGPU `GPUTexture`](https://gpuweb.github.io/gpuweb/#texture-interface).
+#[derive(Debug)]
+pub struct Texture {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+    pub(crate) owned: bool,
+    pub(crate) descriptor: TextureDescriptor<'static>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Texture: Send, Sync);
+
+impl Texture {
+    /// Returns a globally-unique identifier for this `Texture`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+
+    /// Returns the inner hal Texture using a callback. The hal texture will be `None` if the
+    /// backend type argument does not match with this wgpu Texture
+    ///
+    /// # Safety
+    ///
+    /// - The raw handle obtained from the hal Texture must not be manually destroyed
+    #[cfg(wgpu_core)]
+    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Texture>) -> R, R>(
+        &self,
+        hal_texture_callback: F,
+    ) -> R {
+        let texture = self.data.as_ref().downcast_ref().unwrap();
+
+        if let Some(ctx) = self
+            .context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+        {
+            unsafe { ctx.texture_as_hal::<A, F, R>(texture, hal_texture_callback) }
+        } else {
+            hal_texture_callback(None)
+        }
+    }
+
+    /// Creates a view of this texture.
+    pub fn create_view(&self, desc: &TextureViewDescriptor<'_>) -> TextureView {
+        let (id, data) =
+            DynContext::texture_create_view(&*self.context, &self.id, self.data.as_ref(), desc);
+        TextureView {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Destroy the associated native resources as soon as possible.
+    pub fn destroy(&self) {
+        DynContext::texture_destroy(&*self.context, &self.id, self.data.as_ref());
+    }
+
+    /// Make an `ImageCopyTexture` representing the whole texture.
+    pub fn as_image_copy(&self) -> ImageCopyTexture<'_> {
+        ImageCopyTexture {
+            texture: self,
+            mip_level: 0,
+            origin: Origin3d::ZERO,
+            aspect: TextureAspect::All,
+        }
+    }
+
+    /// Returns the size of this `Texture`.
+    ///
+    /// This is always equal to the `size` that was specified when creating the texture.
+    pub fn size(&self) -> Extent3d {
+        self.descriptor.size
+    }
+
+    /// Returns the width of this `Texture`.
+    ///
+    /// This is always equal to the `size.width` that was specified when creating the texture.
+    pub fn width(&self) -> u32 {
+        self.descriptor.size.width
+    }
+
+    /// Returns the height of this `Texture`.
+    ///
+    /// This is always equal to the `size.height` that was specified when creating the texture.
+    pub fn height(&self) -> u32 {
+        self.descriptor.size.height
+    }
+
+    /// Returns the depth or layer count of this `Texture`.
+    ///
+    /// This is always equal to the `size.depth_or_array_layers` that was specified when creating the texture.
+    pub fn depth_or_array_layers(&self) -> u32 {
+        self.descriptor.size.depth_or_array_layers
+    }
+
+    /// Returns the mip_level_count of this `Texture`.
+    ///
+    /// This is always equal to the `mip_level_count` that was specified when creating the texture.
+    pub fn mip_level_count(&self) -> u32 {
+        self.descriptor.mip_level_count
+    }
+
+    /// Returns the sample_count of this `Texture`.
+    ///
+    /// This is always equal to the `sample_count` that was specified when creating the texture.
+    pub fn sample_count(&self) -> u32 {
+        self.descriptor.sample_count
+    }
+
+    /// Returns the dimension of this `Texture`.
+    ///
+    /// This is always equal to the `dimension` that was specified when creating the texture.
+    pub fn dimension(&self) -> TextureDimension {
+        self.descriptor.dimension
+    }
+
+    /// Returns the format of this `Texture`.
+    ///
+    /// This is always equal to the `format` that was specified when creating the texture.
+    pub fn format(&self) -> TextureFormat {
+        self.descriptor.format
+    }
+
+    /// Returns the allowed usages of this `Texture`.
+    ///
+    /// This is always equal to the `usage` that was specified when creating the texture.
+    pub fn usage(&self) -> TextureUsages {
+        self.descriptor.usage
+    }
+}
+
+impl Drop for Texture {
+    fn drop(&mut self) {
+        if self.owned && !thread::panicking() {
+            self.context.texture_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Describes a [`Texture`].
+///
+/// For use with [`Device::create_texture`].
+///
+/// Corresponds to [WebGPU `GPUTextureDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gputexturedescriptor).
+pub type TextureDescriptor<'a> = wgt::TextureDescriptor<Label<'a>, &'a [TextureFormat]>;
+static_assertions::assert_impl_all!(TextureDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/texture_view.rs b/wgpu/src/api/texture_view.rs
new file mode 100644
index 0000000000..b6e60a3c60
--- /dev/null
+++ b/wgpu/src/api/texture_view.rs
@@ -0,0 +1,98 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a texture view.
+///
+/// A `TextureView` object describes a texture and associated metadata needed by a
+/// [`RenderPipeline`] or [`BindGroup`].
+///
+/// Corresponds to [WebGPU `GPUTextureView`](https://gpuweb.github.io/gpuweb/#gputextureview).
+#[derive(Debug)]
+pub struct TextureView {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(TextureView: Send, Sync);
+
+impl TextureView {
+    /// Returns a globally-unique identifier for this `TextureView`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+
+    /// Returns the inner hal TextureView using a callback. The hal texture will be `None` if the
+    /// backend type argument does not match with this wgpu Texture
+    ///
+    /// # Safety
+    ///
+    /// - The raw handle obtained from the hal TextureView must not be manually destroyed
+    #[cfg(wgpu_core)]
+    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::TextureView>) -> R, R>(
+        &self,
+        hal_texture_view_callback: F,
+    ) -> R {
+        use wgc::id::TextureViewId;
+
+        let texture_view_id = TextureViewId::from(self.id);
+
+        if let Some(ctx) = self
+            .context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+        {
+            unsafe {
+                ctx.texture_view_as_hal::<A, F, R>(texture_view_id, hal_texture_view_callback)
+            }
+        } else {
+            hal_texture_view_callback(None)
+        }
+    }
+}
+
+impl Drop for TextureView {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context.texture_view_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Describes a [`TextureView`].
+///
+/// For use with [`Texture::create_view`].
+///
+/// Corresponds to [WebGPU `GPUTextureViewDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gputextureviewdescriptor).
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
+pub struct TextureViewDescriptor<'a> {
+    /// Debug label of the texture view. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// Format of the texture view. Either must be the same as the texture format or in the list
+    /// of `view_formats` in the texture's descriptor.
+    pub format: Option<TextureFormat>,
+    /// The dimension of the texture view. For 1D textures, this must be `D1`. For 2D textures it must be one of
+    /// `D2`, `D2Array`, `Cube`, and `CubeArray`. For 3D textures it must be `D3`
+    pub dimension: Option<TextureViewDimension>,
+    /// Aspect of the texture. Color textures must be [`TextureAspect::All`].
+    pub aspect: TextureAspect,
+    /// Base mip level.
+    pub base_mip_level: u32,
+    /// Mip level count.
+    /// If `Some(count)`, `base_mip_level + count` must be less or equal to underlying texture mip count.
+    /// If `None`, considered to include the rest of the mipmap levels, but at least 1 in total.
+    pub mip_level_count: Option<u32>,
+    /// Base array layer.
+    pub base_array_layer: u32,
+    /// Layer count.
+    /// If `Some(count)`, `base_array_layer + count` must be less or equal to the underlying array count.
+    /// If `None`, considered to include the rest of the array layers, but at least 1 in total.
+    pub array_layer_count: Option<u32>,
+}
+static_assertions::assert_impl_all!(TextureViewDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs
index b61ad431f6..8568d2f550 100644
--- a/wgpu/src/backend/webgpu.rs
+++ b/wgpu/src/backend/webgpu.rs
@@ -261,7 +261,7 @@ fn map_texture_format(texture_format: wgt::TextureFormat) -> webgpu_sys::GpuText
             unimplemented!("Current version of web_sys is missing {texture_format:?}")
         }
         TextureFormat::Rgb10a2Unorm => tf::Rgb10a2unorm,
-        TextureFormat::Rg11b10Float => tf::Rg11b10ufloat,
+        TextureFormat::Rg11b10UFloat => tf::Rg11b10ufloat,
         // 64-bit formats
         TextureFormat::Rg32Uint => tf::Rg32uint,
         TextureFormat::Rg32Sint => tf::Rg32sint,
@@ -726,7 +726,7 @@ fn map_map_mode(mode: crate::MapMode) -> u32 {
     }
 }
 
-const FEATURES_MAPPING: [(wgt::Features, webgpu_sys::GpuFeatureName); 11] = [
+const FEATURES_MAPPING: [(wgt::Features, webgpu_sys::GpuFeatureName); 12] = [
     //TODO: update the name
     (
         wgt::Features::DEPTH_CLIP_CONTROL,
@@ -740,6 +740,10 @@ const FEATURES_MAPPING: [(wgt::Features, webgpu_sys::GpuFeatureName); 11] = [
         wgt::Features::TEXTURE_COMPRESSION_BC,
         webgpu_sys::GpuFeatureName::TextureCompressionBc,
     ),
+    (
+        wgt::Features::TEXTURE_COMPRESSION_BC_SLICED_3D,
+        webgpu_sys::GpuFeatureName::TextureCompressionBcSliced3d,
+    ),
     (
         wgt::Features::TEXTURE_COMPRESSION_ETC2,
         webgpu_sys::GpuFeatureName::TextureCompressionEtc2,
@@ -1495,15 +1499,11 @@ impl crate::context::Context for ContextWebGpu {
         )
     }
 
-    fn surface_present(&self, _texture: &Self::TextureId, _detail: &Self::SurfaceOutputDetail) {
+    fn surface_present(&self, _detail: &Self::SurfaceOutputDetail) {
         // Swapchain is presented automatically
     }
 
-    fn surface_texture_discard(
-        &self,
-        _texture: &Self::TextureId,
-        _detail: &Self::SurfaceOutputDetail,
-    ) {
+    fn surface_texture_discard(&self, _detail: &Self::SurfaceOutputDetail) {
         // Can't really discard this on the Web
     }
 
@@ -1887,7 +1887,9 @@ impl crate::context::Context for ContextWebGpu {
             &mapped_vertex_state,
             desc.vertex.compilation_options.constants,
         );
-        mapped_vertex_state.entry_point(desc.vertex.entry_point);
+        if let Some(ep) = desc.vertex.entry_point {
+            mapped_vertex_state.entry_point(ep);
+        }
 
         let buffers = desc
             .vertex
@@ -1964,7 +1966,9 @@ impl crate::context::Context for ContextWebGpu {
             let mut mapped_fragment_desc =
                 webgpu_sys::GpuFragmentState::new(&module.0.module, &targets);
             insert_constants_map(&mapped_vertex_state, frag.compilation_options.constants);
-            mapped_fragment_desc.entry_point(frag.entry_point);
+            if let Some(ep) = frag.entry_point {
+                mapped_fragment_desc.entry_point(ep);
+            }
             mapped_desc.fragment(&mapped_fragment_desc);
         }
 
@@ -1991,7 +1995,9 @@ impl crate::context::Context for ContextWebGpu {
         let mut mapped_compute_stage =
             webgpu_sys::GpuProgrammableStage::new(&shader_module.0.module);
         insert_constants_map(&mapped_compute_stage, desc.compilation_options.constants);
-        mapped_compute_stage.entry_point(desc.entry_point);
+        if let Some(ep) = desc.entry_point {
+            mapped_compute_stage.entry_point(ep);
+        }
         let auto_layout = wasm_bindgen::JsValue::from(webgpu_sys::GpuAutoLayoutMode::Auto);
         let mut mapped_desc = webgpu_sys::GpuComputePipelineDescriptor::new(
             &match desc.layout {
@@ -2993,6 +2999,14 @@ impl crate::context::Context for ContextWebGpu {
         Default::default()
     }
 
+    fn device_generate_allocator_report(
+        &self,
+        _device: &Self::DeviceId,
+        _device_data: &Self::DeviceData,
+    ) -> Option<wgt::AllocatorReport> {
+        None
+    }
+
     fn pipeline_cache_get_data(
         &self,
         _: &Self::PipelineCacheId,
diff --git a/wgpu/src/backend/webgpu/webgpu_sys/gen_GpuFeatureName.rs b/wgpu/src/backend/webgpu/webgpu_sys/gen_GpuFeatureName.rs
index ed39a14c51..ef2119a88b 100644
--- a/wgpu/src/backend/webgpu/webgpu_sys/gen_GpuFeatureName.rs
+++ b/wgpu/src/backend/webgpu/webgpu_sys/gen_GpuFeatureName.rs
@@ -21,6 +21,7 @@ pub enum GpuFeatureName {
     DepthClipControl = "depth-clip-control",
     Depth32floatStencil8 = "depth32float-stencil8",
     TextureCompressionBc = "texture-compression-bc",
+    TextureCompressionBcSliced3d = "texture-compression-bc-sliced-3d",
     TextureCompressionEtc2 = "texture-compression-etc2",
     TextureCompressionAstc = "texture-compression-astc",
     TimestampQuery = "timestamp-query",
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index e9d5b51f50..4d8b820f70 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -25,8 +25,8 @@ use std::{
     sync::Arc,
 };
 use wgc::{
-    command::bundle_ffi::*, device::DeviceLostClosure, gfx_select, id::CommandEncoderId,
-    id::TextureViewId, pipeline::CreateShaderModuleError,
+    command::bundle_ffi::*, device::DeviceLostClosure, id::CommandEncoderId, id::TextureViewId,
+    pipeline::CreateShaderModuleError,
 };
 use wgt::WasmNotSendSync;
 
@@ -72,7 +72,7 @@ impl ContextWgpuCore {
         &self,
         hal_adapter: hal::ExposedAdapter<A>,
     ) -> wgc::id::AdapterId {
-        unsafe { self.0.create_adapter_from_hal(hal_adapter, None) }
+        unsafe { self.0.create_adapter_from_hal(hal_adapter.into(), None) }
     }
 
     pub unsafe fn adapter_as_hal<
@@ -105,12 +105,15 @@ impl ContextWgpuCore {
         desc: &crate::DeviceDescriptor<'_>,
         trace_dir: Option<&std::path::Path>,
     ) -> Result<(Device, Queue), crate::RequestDeviceError> {
+        if trace_dir.is_some() {
+            log::error!("Feature 'trace' has been removed temporarily, see https://github.com/gfx-rs/wgpu/issues/5974");
+        }
         let (device_id, queue_id, error) = unsafe {
             self.0.create_device_from_hal(
                 *adapter,
-                hal_device,
+                hal_device.into(),
                 &desc.map_label(|l| l.map(Borrowed)),
-                trace_dir,
+                None,
                 None,
                 None,
             )
@@ -140,7 +143,7 @@ impl ContextWgpuCore {
         let descriptor = desc.map_label_and_view_formats(|l| l.map(Borrowed), |v| v.to_vec());
         let (id, error) = unsafe {
             self.0
-                .create_texture_from_hal::<A>(hal_texture, device.id, &descriptor, None)
+                .create_texture_from_hal(Box::new(hal_texture), device.id, &descriptor, None)
         };
         if let Some(cause) = error {
             self.handle_error(
@@ -486,13 +489,13 @@ impl Queue {
 
 #[derive(Debug)]
 pub struct ComputePass {
-    pass: Box<dyn wgc::command::DynComputePass>,
+    pass: wgc::command::ComputePass,
     error_sink: ErrorSink,
 }
 
 #[derive(Debug)]
 pub struct RenderPass {
-    pass: Box<dyn wgc::command::DynRenderPass>,
+    pass: wgc::command::RenderPass,
     error_sink: ErrorSink,
 }
 
@@ -559,7 +562,7 @@ impl crate::Context for ContextWgpuCore {
     type SurfaceId = wgc::id::SurfaceId;
     type SurfaceData = Surface;
     type SurfaceOutputDetail = SurfaceOutputDetail;
-    type SubmissionIndexData = wgc::device::queue::WrappedSubmissionIndex;
+    type SubmissionIndexData = wgc::SubmissionIndex;
 
     type RequestAdapterFuture = Ready<Option<(Self::AdapterId, Self::AdapterData)>>;
 
@@ -655,13 +658,16 @@ impl crate::Context for ContextWgpuCore {
         desc: &crate::DeviceDescriptor<'_>,
         trace_dir: Option<&std::path::Path>,
     ) -> Self::RequestDeviceFuture {
-        let (device_id, queue_id, error) = wgc::gfx_select!(*adapter => self.0.adapter_request_device(
+        if trace_dir.is_some() {
+            log::error!("Feature 'trace' has been removed temporarily, see https://github.com/gfx-rs/wgpu/issues/5974");
+        }
+        let (device_id, queue_id, error) = self.0.adapter_request_device(
             *adapter,
             &desc.map_label(|l| l.map(Borrowed)),
-            trace_dir,
             None,
-            None
-        ));
+            None,
+            None,
+        );
         if let Some(err) = error {
             return ready(Err(err.into()));
         }
@@ -675,7 +681,7 @@ impl crate::Context for ContextWgpuCore {
             id: queue_id,
             error_sink,
         };
-        ready(Ok((device_id, device, device_id.into_queue_id(), queue)))
+        ready(Ok((device_id, device, queue_id, queue)))
     }
 
     fn instance_poll_all_devices(&self, force_wait: bool) -> bool {
@@ -692,7 +698,7 @@ impl crate::Context for ContextWgpuCore {
         surface: &Self::SurfaceId,
         _surface_data: &Self::SurfaceData,
     ) -> bool {
-        match wgc::gfx_select!(adapter => self.0.adapter_is_surface_supported(*adapter, *surface)) {
+        match self.0.adapter_is_surface_supported(*adapter, *surface) {
             Ok(result) => result,
             Err(err) => self.handle_error_fatal(err, "Adapter::is_surface_supported"),
         }
@@ -703,7 +709,7 @@ impl crate::Context for ContextWgpuCore {
         adapter: &Self::AdapterId,
         _adapter_data: &Self::AdapterData,
     ) -> Features {
-        match wgc::gfx_select!(*adapter => self.0.adapter_features(*adapter)) {
+        match self.0.adapter_features(*adapter) {
             Ok(features) => features,
             Err(err) => self.handle_error_fatal(err, "Adapter::features"),
         }
@@ -714,7 +720,7 @@ impl crate::Context for ContextWgpuCore {
         adapter: &Self::AdapterId,
         _adapter_data: &Self::AdapterData,
     ) -> Limits {
-        match wgc::gfx_select!(*adapter => self.0.adapter_limits(*adapter)) {
+        match self.0.adapter_limits(*adapter) {
             Ok(limits) => limits,
             Err(err) => self.handle_error_fatal(err, "Adapter::limits"),
         }
@@ -725,7 +731,7 @@ impl crate::Context for ContextWgpuCore {
         adapter: &Self::AdapterId,
         _adapter_data: &Self::AdapterData,
     ) -> DownlevelCapabilities {
-        match wgc::gfx_select!(*adapter => self.0.adapter_downlevel_capabilities(*adapter)) {
+        match self.0.adapter_downlevel_capabilities(*adapter) {
             Ok(downlevel) => downlevel,
             Err(err) => self.handle_error_fatal(err, "Adapter::downlevel_properties"),
         }
@@ -736,7 +742,7 @@ impl crate::Context for ContextWgpuCore {
         adapter: &wgc::id::AdapterId,
         _adapter_data: &Self::AdapterData,
     ) -> AdapterInfo {
-        match wgc::gfx_select!(*adapter => self.0.adapter_get_info(*adapter)) {
+        match self.0.adapter_get_info(*adapter) {
             Ok(info) => info,
             Err(err) => self.handle_error_fatal(err, "Adapter::get_info"),
         }
@@ -748,8 +754,7 @@ impl crate::Context for ContextWgpuCore {
         _adapter_data: &Self::AdapterData,
         format: wgt::TextureFormat,
     ) -> wgt::TextureFormatFeatures {
-        match wgc::gfx_select!(*adapter => self.0.adapter_get_texture_format_features(*adapter, format))
-        {
+        match self.0.adapter_get_texture_format_features(*adapter, format) {
             Ok(info) => info,
             Err(err) => self.handle_error_fatal(err, "Adapter::get_texture_format_features"),
         }
@@ -760,7 +765,7 @@ impl crate::Context for ContextWgpuCore {
         adapter: &Self::AdapterId,
         _adapter_data: &Self::AdapterData,
     ) -> wgt::PresentationTimestamp {
-        match wgc::gfx_select!(*adapter => self.0.adapter_get_presentation_timestamp(*adapter)) {
+        match self.0.adapter_get_presentation_timestamp(*adapter) {
             Ok(timestamp) => timestamp,
             Err(err) => self.handle_error_fatal(err, "Adapter::correlate_presentation_timestamp"),
         }
@@ -773,7 +778,7 @@ impl crate::Context for ContextWgpuCore {
         adapter: &Self::AdapterId,
         _adapter_data: &Self::AdapterData,
     ) -> wgt::SurfaceCapabilities {
-        match wgc::gfx_select!(adapter => self.0.surface_get_capabilities(*surface, *adapter)) {
+        match self.0.surface_get_capabilities(*surface, *adapter) {
             Ok(caps) => caps,
             Err(wgc::instance::GetSurfaceSupportError::Unsupported) => {
                 wgt::SurfaceCapabilities::default()
@@ -790,7 +795,7 @@ impl crate::Context for ContextWgpuCore {
         _device_data: &Self::DeviceData,
         config: &crate::SurfaceConfiguration,
     ) {
-        let error = wgc::gfx_select!(device => self.0.surface_configure(*surface, *device, config));
+        let error = self.0.surface_configure(*surface, *device, config);
         if let Some(e) = error {
             self.handle_error_fatal(e, "Surface::configure");
         } else {
@@ -801,20 +806,14 @@ impl crate::Context for ContextWgpuCore {
     fn surface_get_current_texture(
         &self,
         surface: &Self::SurfaceId,
-        surface_data: &Self::SurfaceData,
+        _surface_data: &Self::SurfaceData,
     ) -> (
         Option<Self::TextureId>,
         Option<Self::TextureData>,
         SurfaceStatus,
         Self::SurfaceOutputDetail,
     ) {
-        let device_id = surface_data
-            .configured_device
-            .lock()
-            .expect("Surface was not configured?");
-        match wgc::gfx_select!(
-            device_id => self.0.surface_get_current_texture(*surface, None)
-        ) {
+        match self.0.surface_get_current_texture(*surface, None) {
             Ok(wgc::present::SurfaceOutput { status, texture_id }) => {
                 let (id, data) = {
                     (
@@ -839,19 +838,15 @@ impl crate::Context for ContextWgpuCore {
         }
     }
 
-    fn surface_present(&self, texture: &Self::TextureId, detail: &Self::SurfaceOutputDetail) {
-        match wgc::gfx_select!(texture => self.0.surface_present(detail.surface_id)) {
+    fn surface_present(&self, detail: &Self::SurfaceOutputDetail) {
+        match self.0.surface_present(detail.surface_id) {
             Ok(_status) => (),
             Err(err) => self.handle_error_fatal(err, "Surface::present"),
         }
     }
 
-    fn surface_texture_discard(
-        &self,
-        texture: &Self::TextureId,
-        detail: &Self::SurfaceOutputDetail,
-    ) {
-        match wgc::gfx_select!(texture => self.0.surface_texture_discard(detail.surface_id)) {
+    fn surface_texture_discard(&self, detail: &Self::SurfaceOutputDetail) {
+        match self.0.surface_texture_discard(detail.surface_id) {
             Ok(_status) => (),
             Err(err) => self.handle_error_fatal(err, "Surface::discard_texture"),
         }
@@ -862,14 +857,14 @@ impl crate::Context for ContextWgpuCore {
         device: &Self::DeviceId,
         _device_data: &Self::DeviceData,
     ) -> Features {
-        match wgc::gfx_select!(device => self.0.device_features(*device)) {
+        match self.0.device_features(*device) {
             Ok(features) => features,
             Err(err) => self.handle_error_fatal(err, "Device::features"),
         }
     }
 
     fn device_limits(&self, device: &Self::DeviceId, _device_data: &Self::DeviceData) -> Limits {
-        match wgc::gfx_select!(device => self.0.device_limits(*device)) {
+        match self.0.device_limits(*device) {
             Ok(limits) => limits,
             Err(err) => self.handle_error_fatal(err, "Device::limits"),
         }
@@ -880,7 +875,7 @@ impl crate::Context for ContextWgpuCore {
         device: &Self::DeviceId,
         _device_data: &Self::DeviceData,
     ) -> DownlevelCapabilities {
-        match wgc::gfx_select!(device => self.0.device_downlevel_properties(*device)) {
+        match self.0.device_downlevel_properties(*device) {
             Ok(limits) => limits,
             Err(err) => self.handle_error_fatal(err, "Device::downlevel_properties"),
         }
@@ -932,9 +927,9 @@ impl crate::Context for ContextWgpuCore {
             ShaderSource::Naga(module) => wgc::pipeline::ShaderModuleSource::Naga(module),
             ShaderSource::Dummy(_) => panic!("found `ShaderSource::Dummy`"),
         };
-        let (id, error) = wgc::gfx_select!(
-            device => self.0.device_create_shader_module(*device, &descriptor, source, None)
-        );
+        let (id, error) = self
+            .0
+            .device_create_shader_module(*device, &descriptor, source, None);
         let compilation_info = match error {
             Some(cause) => {
                 self.handle_error(
@@ -963,9 +958,14 @@ impl crate::Context for ContextWgpuCore {
             // runtime checks
             shader_bound_checks: unsafe { wgt::ShaderBoundChecks::unchecked() },
         };
-        let (id, error) = wgc::gfx_select!(
-            device => self.0.device_create_shader_module_spirv(*device, &descriptor, Borrowed(&desc.source), None)
-        );
+        let (id, error) = unsafe {
+            self.0.device_create_shader_module_spirv(
+                *device,
+                &descriptor,
+                Borrowed(&desc.source),
+                None,
+            )
+        };
         let compilation_info = match error {
             Some(cause) => {
                 self.handle_error(
@@ -991,9 +991,9 @@ impl crate::Context for ContextWgpuCore {
             label: desc.label.map(Borrowed),
             entries: Borrowed(desc.entries),
         };
-        let (id, error) = wgc::gfx_select!(
-            device => self.0.device_create_bind_group_layout(*device, &descriptor, None)
-        );
+        let (id, error) = self
+            .0
+            .device_create_bind_group_layout(*device, &descriptor, None);
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
@@ -1105,11 +1105,7 @@ impl crate::Context for ContextWgpuCore {
             entries: Borrowed(&entries),
         };
 
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_bind_group(
-            *device,
-            &descriptor,
-            None
-        ));
+        let (id, error) = self.0.device_create_bind_group(*device, &descriptor, None);
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
@@ -1146,11 +1142,9 @@ impl crate::Context for ContextWgpuCore {
             push_constant_ranges: Borrowed(desc.push_constant_ranges),
         };
 
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_pipeline_layout(
-            *device,
-            &descriptor,
-            None
-        ));
+        let (id, error) = self
+            .0
+            .device_create_pipeline_layout(*device, &descriptor, None);
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
@@ -1180,29 +1174,18 @@ impl crate::Context for ContextWgpuCore {
             })
             .collect();
 
-        let implicit_pipeline_ids = match desc.layout {
-            Some(_) => None,
-            None => Some(wgc::device::ImplicitPipelineIds {
-                root_id: None,
-                group_ids: &[None; wgc::MAX_BIND_GROUPS],
-            }),
-        };
         let descriptor = pipe::RenderPipelineDescriptor {
             label: desc.label.map(Borrowed),
             layout: desc.layout.map(|l| l.id.into()),
             vertex: pipe::VertexState {
                 stage: pipe::ProgrammableStageDescriptor {
                     module: desc.vertex.module.id.into(),
-                    entry_point: Some(Borrowed(desc.vertex.entry_point)),
+                    entry_point: desc.vertex.entry_point.map(Borrowed),
                     constants: Borrowed(desc.vertex.compilation_options.constants),
                     zero_initialize_workgroup_memory: desc
                         .vertex
                         .compilation_options
                         .zero_initialize_workgroup_memory,
-                    vertex_pulling_transform: desc
-                        .vertex
-                        .compilation_options
-                        .vertex_pulling_transform,
                 },
                 buffers: Borrowed(&vertex_buffers),
             },
@@ -1212,12 +1195,11 @@ impl crate::Context for ContextWgpuCore {
             fragment: desc.fragment.as_ref().map(|frag| pipe::FragmentState {
                 stage: pipe::ProgrammableStageDescriptor {
                     module: frag.module.id.into(),
-                    entry_point: Some(Borrowed(frag.entry_point)),
+                    entry_point: frag.entry_point.map(Borrowed),
                     constants: Borrowed(frag.compilation_options.constants),
                     zero_initialize_workgroup_memory: frag
                         .compilation_options
                         .zero_initialize_workgroup_memory,
-                    vertex_pulling_transform: false,
                 },
                 targets: Borrowed(frag.targets),
             }),
@@ -1225,12 +1207,9 @@ impl crate::Context for ContextWgpuCore {
             cache: desc.cache.map(|c| c.id.into()),
         };
 
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_render_pipeline(
-            *device,
-            &descriptor,
-            None,
-            implicit_pipeline_ids
-        ));
+        let (id, error) = self
+            .0
+            .device_create_render_pipeline(*device, &descriptor, None, None);
         if let Some(cause) = error {
             if let wgc::pipeline::CreateRenderPipelineError::Internal { stage, ref error } = cause {
                 log::error!("Shader translation error for stage {:?}: {}", stage, error);
@@ -1253,34 +1232,23 @@ impl crate::Context for ContextWgpuCore {
     ) -> (Self::ComputePipelineId, Self::ComputePipelineData) {
         use wgc::pipeline as pipe;
 
-        let implicit_pipeline_ids = match desc.layout {
-            Some(_) => None,
-            None => Some(wgc::device::ImplicitPipelineIds {
-                root_id: None,
-                group_ids: &[None; wgc::MAX_BIND_GROUPS],
-            }),
-        };
         let descriptor = pipe::ComputePipelineDescriptor {
             label: desc.label.map(Borrowed),
             layout: desc.layout.map(|l| l.id.into()),
             stage: pipe::ProgrammableStageDescriptor {
                 module: desc.module.id.into(),
-                entry_point: Some(Borrowed(desc.entry_point)),
+                entry_point: desc.entry_point.map(Borrowed),
                 constants: Borrowed(desc.compilation_options.constants),
                 zero_initialize_workgroup_memory: desc
                     .compilation_options
                     .zero_initialize_workgroup_memory,
-                vertex_pulling_transform: false,
             },
             cache: desc.cache.map(|c| c.id.into()),
         };
 
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_compute_pipeline(
-            *device,
-            &descriptor,
-            None,
-            implicit_pipeline_ids
-        ));
+        let (id, error) = self
+            .0
+            .device_create_compute_pipeline(*device, &descriptor, None, None);
         if let Some(cause) = error {
             if let wgc::pipeline::CreateComputePipelineError::Internal(ref error) = cause {
                 log::error!(
@@ -1313,11 +1281,10 @@ impl crate::Context for ContextWgpuCore {
             data: desc.data.map(Borrowed),
             fallback: desc.fallback,
         };
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_pipeline_cache(
-            *device,
-            &descriptor,
-            None
-        ));
+        let (id, error) = unsafe {
+            self.0
+                .device_create_pipeline_cache(*device, &descriptor, None)
+        };
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
@@ -1335,11 +1302,9 @@ impl crate::Context for ContextWgpuCore {
         device_data: &Self::DeviceData,
         desc: &crate::BufferDescriptor<'_>,
     ) -> (Self::BufferId, Self::BufferData) {
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_buffer(
-            *device,
-            &desc.map_label(|l| l.map(Borrowed)),
-            None
-        ));
+        let (id, error) =
+            self.0
+                .device_create_buffer(*device, &desc.map_label(|l| l.map(Borrowed)), None);
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
@@ -1362,11 +1327,7 @@ impl crate::Context for ContextWgpuCore {
         desc: &TextureDescriptor<'_>,
     ) -> (Self::TextureId, Self::TextureData) {
         let wgt_desc = desc.map_label_and_view_formats(|l| l.map(Borrowed), |v| v.to_vec());
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_texture(
-            *device,
-            &wgt_desc,
-            None
-        ));
+        let (id, error) = self.0.device_create_texture(*device, &wgt_desc, None);
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
@@ -1406,11 +1367,7 @@ impl crate::Context for ContextWgpuCore {
             border_color: desc.border_color,
         };
 
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_sampler(
-            *device,
-            &descriptor,
-            None
-        ));
+        let (id, error) = self.0.device_create_sampler(*device, &descriptor, None);
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
@@ -1427,11 +1384,9 @@ impl crate::Context for ContextWgpuCore {
         device_data: &Self::DeviceData,
         desc: &wgt::QuerySetDescriptor<Label<'_>>,
     ) -> (Self::QuerySetId, Self::QuerySetData) {
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_query_set(
-            *device,
-            &desc.map_label(|l| l.map(Borrowed)),
-            None
-        ));
+        let (id, error) =
+            self.0
+                .device_create_query_set(*device, &desc.map_label(|l| l.map(Borrowed)), None);
         if let Some(cause) = error {
             self.handle_error_nolabel(&device_data.error_sink, cause, "Device::create_query_set");
         }
@@ -1443,11 +1398,11 @@ impl crate::Context for ContextWgpuCore {
         device_data: &Self::DeviceData,
         desc: &CommandEncoderDescriptor<'_>,
     ) -> (Self::CommandEncoderId, Self::CommandEncoderData) {
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_command_encoder(
+        let (id, error) = self.0.device_create_command_encoder(
             *device,
             &desc.map_label(|l| l.map(Borrowed)),
-            None
-        ));
+            None,
+        );
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
@@ -1484,7 +1439,7 @@ impl crate::Context for ContextWgpuCore {
     }
     #[doc(hidden)]
     fn device_make_invalid(&self, device: &Self::DeviceId, _device_data: &Self::DeviceData) {
-        wgc::gfx_select!(device => self.0.device_make_invalid(*device));
+        self.0.device_make_invalid(*device);
     }
     #[cfg_attr(not(any(native, Emscripten)), allow(unused))]
     fn device_drop(&self, device: &Self::DeviceId, _device_data: &Self::DeviceData) {
@@ -1492,13 +1447,13 @@ impl crate::Context for ContextWgpuCore {
         {
             // Call device_poll, but don't check for errors. We have to use its
             // return value, but we just drop it.
-            let _ = wgc::gfx_select!(device => self.0.device_poll(*device, wgt::Maintain::wait()));
-            wgc::gfx_select!(device => self.0.device_drop(*device));
+            let _ = self.0.device_poll(*device, wgt::Maintain::wait());
+            self.0.device_drop(*device);
         }
     }
     #[cfg_attr(target_arch = "wasm32", allow(unused))]
     fn queue_drop(&self, queue: &Self::QueueId, _device_data: &Self::QueueData) {
-        wgc::gfx_select!(queue => self.0.queue_drop(*queue));
+        self.0.queue_drop(*queue);
     }
     fn device_set_device_lost_callback(
         &self,
@@ -1507,10 +1462,11 @@ impl crate::Context for ContextWgpuCore {
         device_lost_callback: crate::context::DeviceLostCallback,
     ) {
         let device_lost_closure = DeviceLostClosure::from_rust(device_lost_callback);
-        wgc::gfx_select!(device => self.0.device_set_device_lost_closure(*device, device_lost_closure));
+        self.0
+            .device_set_device_lost_closure(*device, device_lost_closure);
     }
     fn device_destroy(&self, device: &Self::DeviceId, _device_data: &Self::DeviceData) {
-        wgc::gfx_select!(device => self.0.device_destroy(*device));
+        self.0.device_destroy(*device);
     }
     fn device_mark_lost(
         &self,
@@ -1520,7 +1476,7 @@ impl crate::Context for ContextWgpuCore {
     ) {
         // We do not provide a reason to device_lose, because all reasons other than
         // destroyed (which this is not) are "unknown".
-        wgc::gfx_select!(device => self.0.device_mark_lost(*device, message));
+        self.0.device_mark_lost(*device, message);
     }
     fn device_poll(
         &self,
@@ -1529,10 +1485,7 @@ impl crate::Context for ContextWgpuCore {
         maintain: crate::Maintain,
     ) -> wgt::MaintainResult {
         let maintain_inner = maintain.map_index(|i| *i.0.as_ref().downcast_ref().unwrap());
-        match wgc::gfx_select!(device => self.0.device_poll(
-            *device,
-            maintain_inner
-        )) {
+        match self.0.device_poll(*device, maintain_inner) {
             Ok(done) => match done {
                 true => wgt::MaintainResult::SubmissionQueueEmpty,
                 false => wgt::MaintainResult::Ok,
@@ -1592,8 +1545,12 @@ impl crate::Context for ContextWgpuCore {
             ))),
         };
 
-        match wgc::gfx_select!(buffer => self.0.buffer_map_async(*buffer, range.start, Some(range.end-range.start), operation))
-        {
+        match self.0.buffer_map_async(
+            *buffer,
+            range.start,
+            Some(range.end - range.start),
+            operation,
+        ) {
             Ok(()) => (),
             Err(cause) => {
                 self.handle_error_nolabel(&buffer_data.error_sink, cause, "Buffer::map_async")
@@ -1607,11 +1564,10 @@ impl crate::Context for ContextWgpuCore {
         sub_range: Range<wgt::BufferAddress>,
     ) -> Box<dyn crate::context::BufferMappedRange> {
         let size = sub_range.end - sub_range.start;
-        match wgc::gfx_select!(buffer => self.0.buffer_get_mapped_range(
-            *buffer,
-            sub_range.start,
-            Some(size)
-        )) {
+        match self
+            .0
+            .buffer_get_mapped_range(*buffer, sub_range.start, Some(size))
+        {
             Ok((ptr, size)) => Box::new(BufferMappedRange {
                 ptr,
                 size: size as usize,
@@ -1621,7 +1577,7 @@ impl crate::Context for ContextWgpuCore {
     }
 
     fn buffer_unmap(&self, buffer: &Self::BufferId, buffer_data: &Self::BufferData) {
-        match wgc::gfx_select!(buffer => self.0.buffer_unmap(*buffer)) {
+        match self.0.buffer_unmap(*buffer) {
             Ok(()) => (),
             Err(cause) => {
                 self.handle_error_nolabel(&buffer_data.error_sink, cause, "Buffer::buffer_unmap")
@@ -1655,9 +1611,7 @@ impl crate::Context for ContextWgpuCore {
                 array_layer_count: desc.array_layer_count,
             },
         };
-        let (id, error) = wgc::gfx_select!(
-            texture => self.0.texture_create_view(*texture, &descriptor, None)
-        );
+        let (id, error) = self.0.texture_create_view(*texture, &descriptor, None);
         if let Some(cause) = error {
             self.handle_error(
                 &texture_data.error_sink,
@@ -1674,25 +1628,25 @@ impl crate::Context for ContextWgpuCore {
     }
 
     fn adapter_drop(&self, adapter: &Self::AdapterId, _adapter_data: &Self::AdapterData) {
-        wgc::gfx_select!(*adapter => self.0.adapter_drop(*adapter))
+        self.0.adapter_drop(*adapter)
     }
 
     fn buffer_destroy(&self, buffer: &Self::BufferId, _buffer_data: &Self::BufferData) {
         // Per spec, no error to report. Even calling destroy multiple times is valid.
-        let _ = wgc::gfx_select!(buffer => self.0.buffer_destroy(*buffer));
+        let _ = self.0.buffer_destroy(*buffer);
     }
 
     fn buffer_drop(&self, buffer: &Self::BufferId, _buffer_data: &Self::BufferData) {
-        wgc::gfx_select!(buffer => self.0.buffer_drop(*buffer, false))
+        self.0.buffer_drop(*buffer)
     }
 
     fn texture_destroy(&self, texture: &Self::TextureId, _texture_data: &Self::TextureData) {
         // Per spec, no error to report. Even calling destroy multiple times is valid.
-        let _ = wgc::gfx_select!(texture => self.0.texture_destroy(*texture));
+        let _ = self.0.texture_destroy(*texture);
     }
 
     fn texture_drop(&self, texture: &Self::TextureId, _texture_data: &Self::TextureData) {
-        wgc::gfx_select!(texture => self.0.texture_drop(*texture, false))
+        self.0.texture_drop(*texture)
     }
 
     fn texture_view_drop(
@@ -1700,15 +1654,15 @@ impl crate::Context for ContextWgpuCore {
         texture_view: &Self::TextureViewId,
         __texture_view_data: &Self::TextureViewData,
     ) {
-        let _ = wgc::gfx_select!(*texture_view => self.0.texture_view_drop(*texture_view, false));
+        let _ = self.0.texture_view_drop(*texture_view);
     }
 
     fn sampler_drop(&self, sampler: &Self::SamplerId, _sampler_data: &Self::SamplerData) {
-        wgc::gfx_select!(*sampler => self.0.sampler_drop(*sampler))
+        self.0.sampler_drop(*sampler)
     }
 
     fn query_set_drop(&self, query_set: &Self::QuerySetId, _query_set_data: &Self::QuerySetData) {
-        wgc::gfx_select!(*query_set => self.0.query_set_drop(*query_set))
+        self.0.query_set_drop(*query_set)
     }
 
     fn bind_group_drop(
@@ -1716,7 +1670,7 @@ impl crate::Context for ContextWgpuCore {
         bind_group: &Self::BindGroupId,
         _bind_group_data: &Self::BindGroupData,
     ) {
-        wgc::gfx_select!(*bind_group => self.0.bind_group_drop(*bind_group))
+        self.0.bind_group_drop(*bind_group)
     }
 
     fn bind_group_layout_drop(
@@ -1724,7 +1678,7 @@ impl crate::Context for ContextWgpuCore {
         bind_group_layout: &Self::BindGroupLayoutId,
         _bind_group_layout_data: &Self::BindGroupLayoutData,
     ) {
-        wgc::gfx_select!(*bind_group_layout => self.0.bind_group_layout_drop(*bind_group_layout))
+        self.0.bind_group_layout_drop(*bind_group_layout)
     }
 
     fn pipeline_layout_drop(
@@ -1732,14 +1686,14 @@ impl crate::Context for ContextWgpuCore {
         pipeline_layout: &Self::PipelineLayoutId,
         _pipeline_layout_data: &Self::PipelineLayoutData,
     ) {
-        wgc::gfx_select!(*pipeline_layout => self.0.pipeline_layout_drop(*pipeline_layout))
+        self.0.pipeline_layout_drop(*pipeline_layout)
     }
     fn shader_module_drop(
         &self,
         shader_module: &Self::ShaderModuleId,
         _shader_module_data: &Self::ShaderModuleData,
     ) {
-        wgc::gfx_select!(*shader_module => self.0.shader_module_drop(*shader_module))
+        self.0.shader_module_drop(*shader_module)
     }
     fn command_encoder_drop(
         &self,
@@ -1747,7 +1701,7 @@ impl crate::Context for ContextWgpuCore {
         command_encoder_data: &Self::CommandEncoderData,
     ) {
         if command_encoder_data.open {
-            wgc::gfx_select!(command_encoder => self.0.command_encoder_drop(*command_encoder))
+            self.0.command_encoder_drop(*command_encoder)
         }
     }
 
@@ -1756,7 +1710,7 @@ impl crate::Context for ContextWgpuCore {
         command_buffer: &Self::CommandBufferId,
         _command_buffer_data: &Self::CommandBufferData,
     ) {
-        wgc::gfx_select!(*command_buffer => self.0.command_buffer_drop(*command_buffer))
+        self.0.command_buffer_drop(*command_buffer)
     }
 
     fn render_bundle_drop(
@@ -1764,7 +1718,7 @@ impl crate::Context for ContextWgpuCore {
         render_bundle: &Self::RenderBundleId,
         _render_bundle_data: &Self::RenderBundleData,
     ) {
-        wgc::gfx_select!(*render_bundle => self.0.render_bundle_drop(*render_bundle))
+        self.0.render_bundle_drop(*render_bundle)
     }
 
     fn compute_pipeline_drop(
@@ -1772,7 +1726,7 @@ impl crate::Context for ContextWgpuCore {
         pipeline: &Self::ComputePipelineId,
         _pipeline_data: &Self::ComputePipelineData,
     ) {
-        wgc::gfx_select!(*pipeline => self.0.compute_pipeline_drop(*pipeline))
+        self.0.compute_pipeline_drop(*pipeline)
     }
 
     fn render_pipeline_drop(
@@ -1780,7 +1734,7 @@ impl crate::Context for ContextWgpuCore {
         pipeline: &Self::RenderPipelineId,
         _pipeline_data: &Self::RenderPipelineData,
     ) {
-        wgc::gfx_select!(*pipeline => self.0.render_pipeline_drop(*pipeline))
+        self.0.render_pipeline_drop(*pipeline)
     }
 
     fn pipeline_cache_drop(
@@ -1788,7 +1742,7 @@ impl crate::Context for ContextWgpuCore {
         cache: &Self::PipelineCacheId,
         _cache_data: &Self::PipelineCacheData,
     ) {
-        wgc::gfx_select!(*cache => self.0.pipeline_cache_drop(*cache))
+        self.0.pipeline_cache_drop(*cache)
     }
 
     fn compute_pipeline_get_bind_group_layout(
@@ -1797,7 +1751,9 @@ impl crate::Context for ContextWgpuCore {
         _pipeline_data: &Self::ComputePipelineData,
         index: u32,
     ) -> (Self::BindGroupLayoutId, Self::BindGroupLayoutData) {
-        let (id, error) = wgc::gfx_select!(*pipeline => self.0.compute_pipeline_get_bind_group_layout(*pipeline, index, None));
+        let (id, error) = self
+            .0
+            .compute_pipeline_get_bind_group_layout(*pipeline, index, None);
         if let Some(err) = error {
             panic!("Error reflecting bind group {index}: {err}");
         }
@@ -1810,7 +1766,9 @@ impl crate::Context for ContextWgpuCore {
         _pipeline_data: &Self::RenderPipelineData,
         index: u32,
     ) -> (Self::BindGroupLayoutId, Self::BindGroupLayoutData) {
-        let (id, error) = wgc::gfx_select!(*pipeline => self.0.render_pipeline_get_bind_group_layout(*pipeline, index, None));
+        let (id, error) = self
+            .0
+            .render_pipeline_get_bind_group_layout(*pipeline, index, None);
         if let Some(err) = error {
             panic!("Error reflecting bind group {index}: {err}");
         }
@@ -1829,14 +1787,14 @@ impl crate::Context for ContextWgpuCore {
         destination_offset: wgt::BufferAddress,
         copy_size: wgt::BufferAddress,
     ) {
-        if let Err(cause) = wgc::gfx_select!(encoder => self.0.command_encoder_copy_buffer_to_buffer(
+        if let Err(cause) = self.0.command_encoder_copy_buffer_to_buffer(
             *encoder,
             *source,
             source_offset,
             *destination,
             destination_offset,
-            copy_size
-        )) {
+            copy_size,
+        ) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -1853,12 +1811,12 @@ impl crate::Context for ContextWgpuCore {
         destination: crate::ImageCopyTexture<'_>,
         copy_size: wgt::Extent3d,
     ) {
-        if let Err(cause) = wgc::gfx_select!(encoder => self.0.command_encoder_copy_buffer_to_texture(
+        if let Err(cause) = self.0.command_encoder_copy_buffer_to_texture(
             *encoder,
             &map_buffer_copy_view(source),
             &map_texture_copy_view(destination),
-            &copy_size
-        )) {
+            &copy_size,
+        ) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -1875,12 +1833,12 @@ impl crate::Context for ContextWgpuCore {
         destination: crate::ImageCopyBuffer<'_>,
         copy_size: wgt::Extent3d,
     ) {
-        if let Err(cause) = wgc::gfx_select!(encoder => self.0.command_encoder_copy_texture_to_buffer(
+        if let Err(cause) = self.0.command_encoder_copy_texture_to_buffer(
             *encoder,
             &map_texture_copy_view(source),
             &map_buffer_copy_view(destination),
-            &copy_size
-        )) {
+            &copy_size,
+        ) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -1897,12 +1855,12 @@ impl crate::Context for ContextWgpuCore {
         destination: crate::ImageCopyTexture<'_>,
         copy_size: wgt::Extent3d,
     ) {
-        if let Err(cause) = wgc::gfx_select!(encoder => self.0.command_encoder_copy_texture_to_texture(
+        if let Err(cause) = self.0.command_encoder_copy_texture_to_texture(
             *encoder,
             &map_texture_copy_view(source),
             &map_texture_copy_view(destination),
-            &copy_size
-        )) {
+            &copy_size,
+        ) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -1926,10 +1884,13 @@ impl crate::Context for ContextWgpuCore {
                     end_of_pass_write_index: tw.end_of_pass_write_index,
                 });
 
-        let (pass, err) = gfx_select!(encoder => self.0.command_encoder_create_compute_pass_dyn(*encoder, &wgc::command::ComputePassDescriptor {
-            label: desc.label.map(Borrowed),
-            timestamp_writes: timestamp_writes.as_ref(),
-        }));
+        let (pass, err) = self.0.command_encoder_create_compute_pass(
+            *encoder,
+            &wgc::command::ComputePassDescriptor {
+                label: desc.label.map(Borrowed),
+                timestamp_writes: timestamp_writes.as_ref(),
+            },
+        );
 
         if let Some(cause) = err {
             self.handle_error(
@@ -1955,15 +1916,6 @@ impl crate::Context for ContextWgpuCore {
         encoder_data: &Self::CommandEncoderData,
         desc: &crate::RenderPassDescriptor<'_>,
     ) -> (Self::RenderPassId, Self::RenderPassData) {
-        if desc.color_attachments.len() > wgc::MAX_COLOR_ATTACHMENTS {
-            self.handle_error_fatal(
-                wgc::command::ColorAttachmentError::TooMany {
-                    given: desc.color_attachments.len(),
-                    limit: wgc::MAX_COLOR_ATTACHMENTS,
-                },
-                "CommandEncoder::begin_render_pass",
-            );
-        }
         let colors = desc
             .color_attachments
             .iter()
@@ -1975,7 +1927,7 @@ impl crate::Context for ContextWgpuCore {
                         channel: map_pass_channel(Some(&at.ops)),
                     })
             })
-            .collect::<ArrayVec<_, { wgc::MAX_COLOR_ATTACHMENTS }>>();
+            .collect::<Vec<_>>();
 
         let depth_stencil = desc.depth_stencil_attachment.as_ref().map(|dsa| {
             wgc::command::RenderPassDepthStencilAttachment {
@@ -1994,20 +1946,25 @@ impl crate::Context for ContextWgpuCore {
                     end_of_pass_write_index: tw.end_of_pass_write_index,
                 });
 
-        let (pass, err) = gfx_select!(encoder => self.0.command_encoder_create_render_pass_dyn(*encoder, &wgc::command::RenderPassDescriptor {
-            label: desc.label.map(Borrowed),
-            timestamp_writes: timestamp_writes.as_ref(),
-            color_attachments: std::borrow::Cow::Borrowed(&colors),
-            depth_stencil_attachment: depth_stencil.as_ref(),
-            occlusion_query_set: desc.occlusion_query_set.map(|query_set| query_set.id.into()),
-        }));
+        let (pass, err) = self.0.command_encoder_create_render_pass(
+            *encoder,
+            &wgc::command::RenderPassDescriptor {
+                label: desc.label.map(Borrowed),
+                timestamp_writes: timestamp_writes.as_ref(),
+                color_attachments: std::borrow::Cow::Borrowed(&colors),
+                depth_stencil_attachment: depth_stencil.as_ref(),
+                occlusion_query_set: desc
+                    .occlusion_query_set
+                    .map(|query_set| query_set.id.into()),
+            },
+        );
 
         if let Some(cause) = err {
             self.handle_error(
                 &encoder_data.error_sink,
                 cause,
                 desc.label,
-                "CommandEncoder::begin_compute_pass",
+                "CommandEncoder::begin_render_pass",
             );
         }
 
@@ -2027,8 +1984,7 @@ impl crate::Context for ContextWgpuCore {
     ) -> (Self::CommandBufferId, Self::CommandBufferData) {
         let descriptor = wgt::CommandBufferDescriptor::default();
         encoder_data.open = false; // prevent the drop
-        let (id, error) =
-            wgc::gfx_select!(encoder => self.0.command_encoder_finish(encoder, &descriptor));
+        let (id, error) = self.0.command_encoder_finish(encoder, &descriptor);
         if let Some(cause) = error {
             self.handle_error_nolabel(&encoder_data.error_sink, cause, "a CommandEncoder");
         }
@@ -2042,11 +1998,10 @@ impl crate::Context for ContextWgpuCore {
         texture: &crate::Texture,
         subresource_range: &wgt::ImageSubresourceRange,
     ) {
-        if let Err(cause) = wgc::gfx_select!(encoder => self.0.command_encoder_clear_texture(
-            *encoder,
-            texture.id.into(),
-            subresource_range
-        )) {
+        if let Err(cause) =
+            self.0
+                .command_encoder_clear_texture(*encoder, texture.id.into(), subresource_range)
+        {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -2063,11 +2018,10 @@ impl crate::Context for ContextWgpuCore {
         offset: wgt::BufferAddress,
         size: Option<wgt::BufferAddress>,
     ) {
-        if let Err(cause) = wgc::gfx_select!(encoder => self.0.command_encoder_clear_buffer(
-            *encoder,
-            buffer.id.into(),
-            offset, size
-        )) {
+        if let Err(cause) =
+            self.0
+                .command_encoder_clear_buffer(*encoder, buffer.id.into(), offset, size)
+        {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -2082,9 +2036,7 @@ impl crate::Context for ContextWgpuCore {
         encoder_data: &Self::CommandEncoderData,
         label: &str,
     ) {
-        if let Err(cause) =
-            wgc::gfx_select!(encoder => self.0.command_encoder_insert_debug_marker(*encoder, label))
-        {
+        if let Err(cause) = self.0.command_encoder_insert_debug_marker(*encoder, label) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -2099,9 +2051,7 @@ impl crate::Context for ContextWgpuCore {
         encoder_data: &Self::CommandEncoderData,
         label: &str,
     ) {
-        if let Err(cause) =
-            wgc::gfx_select!(encoder => self.0.command_encoder_push_debug_group(*encoder, label))
-        {
+        if let Err(cause) = self.0.command_encoder_push_debug_group(*encoder, label) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -2115,9 +2065,7 @@ impl crate::Context for ContextWgpuCore {
         encoder: &Self::CommandEncoderId,
         encoder_data: &Self::CommandEncoderData,
     ) {
-        if let Err(cause) =
-            wgc::gfx_select!(encoder => self.0.command_encoder_pop_debug_group(*encoder))
-        {
+        if let Err(cause) = self.0.command_encoder_pop_debug_group(*encoder) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -2134,11 +2082,10 @@ impl crate::Context for ContextWgpuCore {
         _query_set_data: &Self::QuerySetData,
         query_index: u32,
     ) {
-        if let Err(cause) = wgc::gfx_select!(encoder => self.0.command_encoder_write_timestamp(
-            *encoder,
-            *query_set,
-            query_index
-        )) {
+        if let Err(cause) =
+            self.0
+                .command_encoder_write_timestamp(*encoder, *query_set, query_index)
+        {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -2159,14 +2106,14 @@ impl crate::Context for ContextWgpuCore {
         _destination_data: &Self::BufferData,
         destination_offset: wgt::BufferAddress,
     ) {
-        if let Err(cause) = wgc::gfx_select!(encoder => self.0.command_encoder_resolve_query_set(
+        if let Err(cause) = self.0.command_encoder_resolve_query_set(
             *encoder,
             *query_set,
             first_query,
             query_count,
             *destination,
-            destination_offset
-        )) {
+            destination_offset,
+        ) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -2181,11 +2128,11 @@ impl crate::Context for ContextWgpuCore {
         encoder_data: Self::RenderBundleEncoderData,
         desc: &crate::RenderBundleDescriptor<'_>,
     ) -> (Self::RenderBundleId, Self::RenderBundleData) {
-        let (id, error) = wgc::gfx_select!(encoder_data.parent() => self.0.render_bundle_encoder_finish(
+        let (id, error) = self.0.render_bundle_encoder_finish(
             encoder_data,
             &desc.map_label(|l| l.map(Borrowed)),
-            None
-        ));
+            None,
+        );
         if let Some(err) = error {
             self.handle_error_fatal(err, "RenderBundleEncoder::finish");
         }
@@ -2201,9 +2148,7 @@ impl crate::Context for ContextWgpuCore {
         offset: wgt::BufferAddress,
         data: &[u8],
     ) {
-        match wgc::gfx_select!(
-            *queue => self.0.queue_write_buffer(*queue, *buffer, offset, data)
-        ) {
+        match self.0.queue_write_buffer(*queue, *buffer, offset, data) {
             Ok(()) => (),
             Err(err) => {
                 self.handle_error_nolabel(&queue_data.error_sink, err, "Queue::write_buffer")
@@ -2220,9 +2165,10 @@ impl crate::Context for ContextWgpuCore {
         offset: wgt::BufferAddress,
         size: wgt::BufferSize,
     ) -> Option<()> {
-        match wgc::gfx_select!(
-            *queue => self.0.queue_validate_write_buffer(*queue, *buffer, offset, size.get())
-        ) {
+        match self
+            .0
+            .queue_validate_write_buffer(*queue, *buffer, offset, size)
+        {
             Ok(()) => Some(()),
             Err(err) => {
                 self.handle_error_nolabel(&queue_data.error_sink, err, "Queue::write_buffer_with");
@@ -2237,9 +2183,7 @@ impl crate::Context for ContextWgpuCore {
         queue_data: &Self::QueueData,
         size: wgt::BufferSize,
     ) -> Option<Box<dyn crate::context::QueueWriteBuffer>> {
-        match wgc::gfx_select!(
-            *queue => self.0.queue_create_staging_buffer(*queue, size, None)
-        ) {
+        match self.0.queue_create_staging_buffer(*queue, size, None) {
             Ok((buffer_id, ptr)) => Some(Box::new(QueueWriteBuffer {
                 buffer_id,
                 mapping: BufferMappedRange {
@@ -2267,9 +2211,10 @@ impl crate::Context for ContextWgpuCore {
             .as_any()
             .downcast_ref::<QueueWriteBuffer>()
             .unwrap();
-        match wgc::gfx_select!(
-            *queue => self.0.queue_write_staging_buffer(*queue, *buffer, offset, staging_buffer.buffer_id)
-        ) {
+        match self
+            .0
+            .queue_write_staging_buffer(*queue, *buffer, offset, staging_buffer.buffer_id)
+        {
             Ok(()) => (),
             Err(err) => {
                 self.handle_error_nolabel(&queue_data.error_sink, err, "Queue::write_buffer_with");
@@ -2286,13 +2231,13 @@ impl crate::Context for ContextWgpuCore {
         data_layout: wgt::ImageDataLayout,
         size: wgt::Extent3d,
     ) {
-        match wgc::gfx_select!(*queue => self.0.queue_write_texture(
+        match self.0.queue_write_texture(
             *queue,
             &map_texture_copy_view(texture),
             data,
             &data_layout,
-            &size
-        )) {
+            &size,
+        ) {
             Ok(()) => (),
             Err(err) => {
                 self.handle_error_nolabel(&queue_data.error_sink, err, "Queue::write_texture")
@@ -2309,12 +2254,12 @@ impl crate::Context for ContextWgpuCore {
         dest: crate::ImageCopyTextureTagged<'_>,
         size: wgt::Extent3d,
     ) {
-        match wgc::gfx_select!(*queue => self.0.queue_copy_external_image_to_texture(
+        match self.0.queue_copy_external_image_to_texture(
             *queue,
             source,
             map_texture_tagged_copy_view(dest),
-            size
-        )) {
+            size,
+        ) {
             Ok(()) => (),
             Err(err) => self.handle_error_nolabel(
                 &queue_data.error_sink,
@@ -2334,14 +2279,13 @@ impl crate::Context for ContextWgpuCore {
             .map(|(i, _)| i)
             .collect::<SmallVec<[_; 4]>>();
 
-        let index = match wgc::gfx_select!(*queue => self.0.queue_submit(*queue, &temp_command_buffers))
-        {
+        let index = match self.0.queue_submit(*queue, &temp_command_buffers) {
             Ok(index) => index,
             Err(err) => self.handle_error_fatal(err, "Queue::submit"),
         };
 
         for cmdbuf in &temp_command_buffers {
-            wgc::gfx_select!(*queue => self.0.command_buffer_drop(*cmdbuf));
+            self.0.command_buffer_drop(*cmdbuf);
         }
 
         index
@@ -2352,9 +2296,7 @@ impl crate::Context for ContextWgpuCore {
         queue: &Self::QueueId,
         _queue_data: &Self::QueueData,
     ) -> f32 {
-        let res = wgc::gfx_select!(queue => self.0.queue_get_timestamp_period(
-            *queue
-        ));
+        let res = self.0.queue_get_timestamp_period(*queue);
         match res {
             Ok(v) => v,
             Err(cause) => {
@@ -2371,18 +2313,18 @@ impl crate::Context for ContextWgpuCore {
     ) {
         let closure = wgc::device::queue::SubmittedWorkDoneClosure::from_rust(callback);
 
-        let res = wgc::gfx_select!(queue => self.0.queue_on_submitted_work_done(*queue, closure));
+        let res = self.0.queue_on_submitted_work_done(*queue, closure);
         if let Err(cause) = res {
             self.handle_error_fatal(cause, "Queue::on_submitted_work_done");
         }
     }
 
     fn device_start_capture(&self, device: &Self::DeviceId, _device_data: &Self::DeviceData) {
-        wgc::gfx_select!(device => self.0.device_start_capture(*device));
+        self.0.device_start_capture(*device);
     }
 
     fn device_stop_capture(&self, device: &Self::DeviceId, _device_data: &Self::DeviceData) {
-        wgc::gfx_select!(device => self.0.device_stop_capture(*device));
+        self.0.device_stop_capture(*device);
     }
 
     fn device_get_internal_counters(
@@ -2390,7 +2332,15 @@ impl crate::Context for ContextWgpuCore {
         device: &Self::DeviceId,
         _device_data: &Self::DeviceData,
     ) -> wgt::InternalCounters {
-        wgc::gfx_select!(device => self.0.device_get_internal_counters(*device))
+        self.0.device_get_internal_counters(*device)
+    }
+
+    fn device_generate_allocator_report(
+        &self,
+        device: &Self::DeviceId,
+        _device_data: &Self::DeviceData,
+    ) -> Option<wgt::AllocatorReport> {
+        self.0.device_generate_allocator_report(*device)
     }
 
     fn pipeline_cache_get_data(
@@ -2399,7 +2349,7 @@ impl crate::Context for ContextWgpuCore {
         // TODO: Used for error handling?
         _cache_data: &Self::PipelineCacheData,
     ) -> Option<Vec<u8>> {
-        wgc::gfx_select!(cache => self.0.pipeline_cache_get_data(*cache))
+        self.0.pipeline_cache_get_data(*cache)
     }
 
     fn compute_pass_set_pipeline(
@@ -2409,7 +2359,10 @@ impl crate::Context for ContextWgpuCore {
         pipeline: &Self::ComputePipelineId,
         _pipeline_data: &Self::ComputePipelineData,
     ) {
-        if let Err(cause) = pass_data.pass.set_pipeline(&self.0, *pipeline) {
+        if let Err(cause) = self
+            .0
+            .compute_pass_set_pipeline(&mut pass_data.pass, *pipeline)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2428,9 +2381,9 @@ impl crate::Context for ContextWgpuCore {
         _bind_group_data: &Self::BindGroupData,
         offsets: &[wgt::DynamicOffset],
     ) {
-        if let Err(cause) = pass_data
-            .pass
-            .set_bind_group(&self.0, index, *bind_group, offsets)
+        if let Err(cause) =
+            self.0
+                .compute_pass_set_bind_group(&mut pass_data.pass, index, *bind_group, offsets)
         {
             self.handle_error(
                 &pass_data.error_sink,
@@ -2448,7 +2401,10 @@ impl crate::Context for ContextWgpuCore {
         offset: u32,
         data: &[u8],
     ) {
-        if let Err(cause) = pass_data.pass.set_push_constants(&self.0, offset, data) {
+        if let Err(cause) =
+            self.0
+                .compute_pass_set_push_constants(&mut pass_data.pass, offset, data)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2464,7 +2420,10 @@ impl crate::Context for ContextWgpuCore {
         pass_data: &mut Self::ComputePassData,
         label: &str,
     ) {
-        if let Err(cause) = pass_data.pass.insert_debug_marker(&self.0, label, 0) {
+        if let Err(cause) = self
+            .0
+            .compute_pass_insert_debug_marker(&mut pass_data.pass, label, 0)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2480,7 +2439,10 @@ impl crate::Context for ContextWgpuCore {
         pass_data: &mut Self::ComputePassData,
         group_label: &str,
     ) {
-        if let Err(cause) = pass_data.pass.push_debug_group(&self.0, group_label, 0) {
+        if let Err(cause) =
+            self.0
+                .compute_pass_push_debug_group(&mut pass_data.pass, group_label, 0)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2495,7 +2457,7 @@ impl crate::Context for ContextWgpuCore {
         _pass: &mut Self::ComputePassId,
         pass_data: &mut Self::ComputePassData,
     ) {
-        if let Err(cause) = pass_data.pass.pop_debug_group(&self.0) {
+        if let Err(cause) = self.0.compute_pass_pop_debug_group(&mut pass_data.pass) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2513,9 +2475,9 @@ impl crate::Context for ContextWgpuCore {
         _query_set_data: &Self::QuerySetData,
         query_index: u32,
     ) {
-        if let Err(cause) = pass_data
-            .pass
-            .write_timestamp(&self.0, *query_set, query_index)
+        if let Err(cause) =
+            self.0
+                .compute_pass_write_timestamp(&mut pass_data.pass, *query_set, query_index)
         {
             self.handle_error(
                 &pass_data.error_sink,
@@ -2534,11 +2496,11 @@ impl crate::Context for ContextWgpuCore {
         _query_set_data: &Self::QuerySetData,
         query_index: u32,
     ) {
-        if let Err(cause) =
-            pass_data
-                .pass
-                .begin_pipeline_statistics_query(&self.0, *query_set, query_index)
-        {
+        if let Err(cause) = self.0.compute_pass_begin_pipeline_statistics_query(
+            &mut pass_data.pass,
+            *query_set,
+            query_index,
+        ) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2553,7 +2515,10 @@ impl crate::Context for ContextWgpuCore {
         _pass: &mut Self::ComputePassId,
         pass_data: &mut Self::ComputePassData,
     ) {
-        if let Err(cause) = pass_data.pass.end_pipeline_statistics_query(&self.0) {
+        if let Err(cause) = self
+            .0
+            .compute_pass_end_pipeline_statistics_query(&mut pass_data.pass)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2571,7 +2536,10 @@ impl crate::Context for ContextWgpuCore {
         y: u32,
         z: u32,
     ) {
-        if let Err(cause) = pass_data.pass.dispatch_workgroups(&self.0, x, y, z) {
+        if let Err(cause) = self
+            .0
+            .compute_pass_dispatch_workgroups(&mut pass_data.pass, x, y, z)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2589,11 +2557,11 @@ impl crate::Context for ContextWgpuCore {
         _indirect_buffer_data: &Self::BufferData,
         indirect_offset: wgt::BufferAddress,
     ) {
-        if let Err(cause) =
-            pass_data
-                .pass
-                .dispatch_workgroups_indirect(&self.0, *indirect_buffer, indirect_offset)
-        {
+        if let Err(cause) = self.0.compute_pass_dispatch_workgroups_indirect(
+            &mut pass_data.pass,
+            *indirect_buffer,
+            indirect_offset,
+        ) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2608,7 +2576,7 @@ impl crate::Context for ContextWgpuCore {
         _pass: &mut Self::ComputePassId,
         pass_data: &mut Self::ComputePassData,
     ) {
-        if let Err(cause) = pass_data.pass.end(&self.0) {
+        if let Err(cause) = self.0.compute_pass_end(&mut pass_data.pass) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2810,7 +2778,10 @@ impl crate::Context for ContextWgpuCore {
         pipeline: &Self::RenderPipelineId,
         _pipeline_data: &Self::RenderPipelineData,
     ) {
-        if let Err(cause) = pass_data.pass.set_pipeline(&self.0, *pipeline) {
+        if let Err(cause) = self
+            .0
+            .render_pass_set_pipeline(&mut pass_data.pass, *pipeline)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2829,9 +2800,9 @@ impl crate::Context for ContextWgpuCore {
         _bind_group_data: &Self::BindGroupData,
         offsets: &[wgt::DynamicOffset],
     ) {
-        if let Err(cause) = pass_data
-            .pass
-            .set_bind_group(&self.0, index, *bind_group, offsets)
+        if let Err(cause) =
+            self.0
+                .render_pass_set_bind_group(&mut pass_data.pass, index, *bind_group, offsets)
         {
             self.handle_error(
                 &pass_data.error_sink,
@@ -2852,11 +2823,13 @@ impl crate::Context for ContextWgpuCore {
         offset: wgt::BufferAddress,
         size: Option<wgt::BufferSize>,
     ) {
-        if let Err(cause) =
-            pass_data
-                .pass
-                .set_index_buffer(&self.0, *buffer, index_format, offset, size)
-        {
+        if let Err(cause) = self.0.render_pass_set_index_buffer(
+            &mut pass_data.pass,
+            *buffer,
+            index_format,
+            offset,
+            size,
+        ) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -2876,9 +2849,9 @@ impl crate::Context for ContextWgpuCore {
         offset: wgt::BufferAddress,
         size: Option<wgt::BufferSize>,
     ) {
-        if let Err(cause) = pass_data
-            .pass
-            .set_vertex_buffer(&self.0, slot, *buffer, offset, size)
+        if let Err(cause) =
+            self.0
+                .render_pass_set_vertex_buffer(&mut pass_data.pass, slot, *buffer, offset, size)
         {
             self.handle_error(
                 &pass_data.error_sink,
@@ -2897,9 +2870,9 @@ impl crate::Context for ContextWgpuCore {
         offset: u32,
         data: &[u8],
     ) {
-        if let Err(cause) = pass_data
-            .pass
-            .set_push_constants(&self.0, stages, offset, data)
+        if let Err(cause) =
+            self.0
+                .render_pass_set_push_constants(&mut pass_data.pass, stages, offset, data)
         {
             self.handle_error(
                 &pass_data.error_sink,
@@ -2917,8 +2890,8 @@ impl crate::Context for ContextWgpuCore {
         vertices: Range<u32>,
         instances: Range<u32>,
     ) {
-        if let Err(cause) = pass_data.pass.draw(
-            &self.0,
+        if let Err(cause) = self.0.render_pass_draw(
+            &mut pass_data.pass,
             vertices.end - vertices.start,
             instances.end - instances.start,
             vertices.start,
@@ -2941,8 +2914,8 @@ impl crate::Context for ContextWgpuCore {
         base_vertex: i32,
         instances: Range<u32>,
     ) {
-        if let Err(cause) = pass_data.pass.draw_indexed(
-            &self.0,
+        if let Err(cause) = self.0.render_pass_draw_indexed(
+            &mut pass_data.pass,
             indices.end - indices.start,
             instances.end - instances.start,
             indices.start,
@@ -2966,9 +2939,9 @@ impl crate::Context for ContextWgpuCore {
         _indirect_buffer_data: &Self::BufferData,
         indirect_offset: wgt::BufferAddress,
     ) {
-        if let Err(cause) = pass_data
-            .pass
-            .draw_indirect(&self.0, *indirect_buffer, indirect_offset)
+        if let Err(cause) =
+            self.0
+                .render_pass_draw_indirect(&mut pass_data.pass, *indirect_buffer, indirect_offset)
         {
             self.handle_error(
                 &pass_data.error_sink,
@@ -2987,11 +2960,11 @@ impl crate::Context for ContextWgpuCore {
         _indirect_buffer_data: &Self::BufferData,
         indirect_offset: wgt::BufferAddress,
     ) {
-        if let Err(cause) =
-            pass_data
-                .pass
-                .draw_indexed_indirect(&self.0, *indirect_buffer, indirect_offset)
-        {
+        if let Err(cause) = self.0.render_pass_draw_indexed_indirect(
+            &mut pass_data.pass,
+            *indirect_buffer,
+            indirect_offset,
+        ) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -3010,11 +2983,12 @@ impl crate::Context for ContextWgpuCore {
         indirect_offset: wgt::BufferAddress,
         count: u32,
     ) {
-        if let Err(cause) =
-            pass_data
-                .pass
-                .multi_draw_indirect(&self.0, *indirect_buffer, indirect_offset, count)
-        {
+        if let Err(cause) = self.0.render_pass_multi_draw_indirect(
+            &mut pass_data.pass,
+            *indirect_buffer,
+            indirect_offset,
+            count,
+        ) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -3033,8 +3007,8 @@ impl crate::Context for ContextWgpuCore {
         indirect_offset: wgt::BufferAddress,
         count: u32,
     ) {
-        if let Err(cause) = pass_data.pass.multi_draw_indexed_indirect(
-            &self.0,
+        if let Err(cause) = self.0.render_pass_multi_draw_indexed_indirect(
+            &mut pass_data.pass,
             *indirect_buffer,
             indirect_offset,
             count,
@@ -3060,8 +3034,8 @@ impl crate::Context for ContextWgpuCore {
         count_buffer_offset: wgt::BufferAddress,
         max_count: u32,
     ) {
-        if let Err(cause) = pass_data.pass.multi_draw_indirect_count(
-            &self.0,
+        if let Err(cause) = self.0.render_pass_multi_draw_indirect_count(
+            &mut pass_data.pass,
             *indirect_buffer,
             indirect_offset,
             *count_buffer,
@@ -3089,8 +3063,8 @@ impl crate::Context for ContextWgpuCore {
         count_buffer_offset: wgt::BufferAddress,
         max_count: u32,
     ) {
-        if let Err(cause) = pass_data.pass.multi_draw_indexed_indirect_count(
-            &self.0,
+        if let Err(cause) = self.0.render_pass_multi_draw_indexed_indirect_count(
+            &mut pass_data.pass,
             *indirect_buffer,
             indirect_offset,
             *count_buffer,
@@ -3112,7 +3086,10 @@ impl crate::Context for ContextWgpuCore {
         pass_data: &mut Self::RenderPassData,
         color: wgt::Color,
     ) {
-        if let Err(cause) = pass_data.pass.set_blend_constant(&self.0, color) {
+        if let Err(cause) = self
+            .0
+            .render_pass_set_blend_constant(&mut pass_data.pass, color)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -3131,9 +3108,9 @@ impl crate::Context for ContextWgpuCore {
         width: u32,
         height: u32,
     ) {
-        if let Err(cause) = pass_data
-            .pass
-            .set_scissor_rect(&self.0, x, y, width, height)
+        if let Err(cause) =
+            self.0
+                .render_pass_set_scissor_rect(&mut pass_data.pass, x, y, width, height)
         {
             self.handle_error(
                 &pass_data.error_sink,
@@ -3155,10 +3132,15 @@ impl crate::Context for ContextWgpuCore {
         min_depth: f32,
         max_depth: f32,
     ) {
-        if let Err(cause) = pass_data
-            .pass
-            .set_viewport(&self.0, x, y, width, height, min_depth, max_depth)
-        {
+        if let Err(cause) = self.0.render_pass_set_viewport(
+            &mut pass_data.pass,
+            x,
+            y,
+            width,
+            height,
+            min_depth,
+            max_depth,
+        ) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -3174,7 +3156,10 @@ impl crate::Context for ContextWgpuCore {
         pass_data: &mut Self::RenderPassData,
         reference: u32,
     ) {
-        if let Err(cause) = pass_data.pass.set_stencil_reference(&self.0, reference) {
+        if let Err(cause) = self
+            .0
+            .render_pass_set_stencil_reference(&mut pass_data.pass, reference)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -3190,7 +3175,10 @@ impl crate::Context for ContextWgpuCore {
         pass_data: &mut Self::RenderPassData,
         label: &str,
     ) {
-        if let Err(cause) = pass_data.pass.insert_debug_marker(&self.0, label, 0) {
+        if let Err(cause) = self
+            .0
+            .render_pass_insert_debug_marker(&mut pass_data.pass, label, 0)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -3206,7 +3194,10 @@ impl crate::Context for ContextWgpuCore {
         pass_data: &mut Self::RenderPassData,
         group_label: &str,
     ) {
-        if let Err(cause) = pass_data.pass.push_debug_group(&self.0, group_label, 0) {
+        if let Err(cause) = self
+            .0
+            .render_pass_push_debug_group(&mut pass_data.pass, group_label, 0)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -3221,7 +3212,7 @@ impl crate::Context for ContextWgpuCore {
         _pass: &mut Self::RenderPassId,
         pass_data: &mut Self::RenderPassData,
     ) {
-        if let Err(cause) = pass_data.pass.pop_debug_group(&self.0) {
+        if let Err(cause) = self.0.render_pass_pop_debug_group(&mut pass_data.pass) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -3239,9 +3230,9 @@ impl crate::Context for ContextWgpuCore {
         _query_set_data: &Self::QuerySetData,
         query_index: u32,
     ) {
-        if let Err(cause) = pass_data
-            .pass
-            .write_timestamp(&self.0, *query_set, query_index)
+        if let Err(cause) =
+            self.0
+                .render_pass_write_timestamp(&mut pass_data.pass, *query_set, query_index)
         {
             self.handle_error(
                 &pass_data.error_sink,
@@ -3258,7 +3249,10 @@ impl crate::Context for ContextWgpuCore {
         pass_data: &mut Self::RenderPassData,
         query_index: u32,
     ) {
-        if let Err(cause) = pass_data.pass.begin_occlusion_query(&self.0, query_index) {
+        if let Err(cause) = self
+            .0
+            .render_pass_begin_occlusion_query(&mut pass_data.pass, query_index)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -3273,7 +3267,7 @@ impl crate::Context for ContextWgpuCore {
         _pass: &mut Self::RenderPassId,
         pass_data: &mut Self::RenderPassData,
     ) {
-        if let Err(cause) = pass_data.pass.end_occlusion_query(&self.0) {
+        if let Err(cause) = self.0.render_pass_end_occlusion_query(&mut pass_data.pass) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -3291,11 +3285,11 @@ impl crate::Context for ContextWgpuCore {
         _query_set_data: &Self::QuerySetData,
         query_index: u32,
     ) {
-        if let Err(cause) =
-            pass_data
-                .pass
-                .begin_pipeline_statistics_query(&self.0, *query_set, query_index)
-        {
+        if let Err(cause) = self.0.render_pass_begin_pipeline_statistics_query(
+            &mut pass_data.pass,
+            *query_set,
+            query_index,
+        ) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -3310,7 +3304,10 @@ impl crate::Context for ContextWgpuCore {
         _pass: &mut Self::RenderPassId,
         pass_data: &mut Self::RenderPassData,
     ) {
-        if let Err(cause) = pass_data.pass.end_pipeline_statistics_query(&self.0) {
+        if let Err(cause) = self
+            .0
+            .render_pass_end_pipeline_statistics_query(&mut pass_data.pass)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -3327,9 +3324,9 @@ impl crate::Context for ContextWgpuCore {
         render_bundles: &mut dyn Iterator<Item = (Self::RenderBundleId, &Self::RenderBundleData)>,
     ) {
         let temp_render_bundles = render_bundles.map(|(i, _)| i).collect::<SmallVec<[_; 4]>>();
-        if let Err(cause) = pass_data
-            .pass
-            .execute_bundles(&self.0, &temp_render_bundles)
+        if let Err(cause) = self
+            .0
+            .render_pass_execute_bundles(&mut pass_data.pass, &temp_render_bundles)
         {
             self.handle_error(
                 &pass_data.error_sink,
@@ -3345,7 +3342,7 @@ impl crate::Context for ContextWgpuCore {
         _pass: &mut Self::RenderPassId,
         pass_data: &mut Self::RenderPassData,
     ) {
-        if let Err(cause) = pass_data.pass.end(&self.0) {
+        if let Err(cause) = self.0.render_pass_end(&mut pass_data.pass) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
@@ -3363,12 +3360,8 @@ impl crate::Context for ContextWgpuCore {
         sizes: wgt::BlasGeometrySizeDescriptors,
     ) -> (Self::BlasId, Option<u64>, Self::BlasData) {
         let global = &self.0;
-        let (id, handle, error) = wgc::gfx_select!(device => global.device_create_blas(
-            *device,
-            &desc.map_label(|l| l.map(Borrowed)),
-            sizes,
-            None,
-        ));
+        let (id, handle, error) =
+            global.device_create_blas(*device, &desc.map_label(|l| l.map(Borrowed)), sizes, None);
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
@@ -3393,11 +3386,8 @@ impl crate::Context for ContextWgpuCore {
         desc: &crate::ray_tracing::CreateTlasDescriptor<'_>,
     ) -> (Self::TlasId, Self::TlasData) {
         let global = &self.0;
-        let (id, error) = wgc::gfx_select!(device => global.device_create_tlas(
-            *device,
-            &desc.map_label(|l| l.map(Borrowed)),
-            None,
-        ));
+        let (id, error) =
+            global.device_create_tlas(*device, &desc.map_label(|l| l.map(Borrowed)), None);
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
@@ -3459,11 +3449,9 @@ impl crate::Context for ContextWgpuCore {
             },
         );
 
-        if let Err(cause) = wgc::gfx_select!(encoder => global.command_encoder_build_acceleration_structures_unsafe_tlas(
-            *encoder,
-            blas,
-            tlas
-        )) {
+        if let Err(cause) =
+            global.command_encoder_build_acceleration_structures_unsafe_tlas(*encoder, blas, tlas)
+        {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -3525,11 +3513,9 @@ impl crate::Context for ContextWgpuCore {
             }
         });
 
-        if let Err(cause) = wgc::gfx_select!(encoder => global.command_encoder_build_acceleration_structures(
-            *encoder,
-            blas,
-            tlas
-        )) {
+        if let Err(cause) =
+            global.command_encoder_build_acceleration_structures(*encoder, blas, tlas)
+        {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -3540,22 +3526,22 @@ impl crate::Context for ContextWgpuCore {
 
     fn blas_destroy(&self, blas: &Self::BlasId, _blas_data: &Self::BlasData) {
         let global = &self.0;
-        let _ = wgc::gfx_select!(blas => global.blas_destroy(*blas));
+        let _ = global.blas_destroy(*blas);
     }
 
     fn blas_drop(&self, blas: &Self::BlasId, _blas_data: &Self::BlasData) {
         let global = &self.0;
-        wgc::gfx_select!(blas => global.blas_drop(*blas, false))
+        global.blas_drop(*blas)
     }
 
     fn tlas_destroy(&self, tlas: &Self::TlasId, _tlas_data: &Self::TlasData) {
         let global = &self.0;
-        let _ = wgc::gfx_select!(tlas => global.tlas_destroy(*tlas));
+        let _ = global.tlas_destroy(*tlas);
     }
 
     fn tlas_drop(&self, tlas: &Self::TlasId, _tlas_data: &Self::TlasData) {
         let global = &self.0;
-        wgc::gfx_select!(tlas => global.tlas_drop(*tlas, false))
+        global.tlas_drop(*tlas)
     }
 }
 
diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs
index e62902880f..62417b2455 100644
--- a/wgpu/src/context.rs
+++ b/wgpu/src/context.rs
@@ -183,12 +183,8 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
         SurfaceStatus,
         Self::SurfaceOutputDetail,
     );
-    fn surface_present(&self, texture: &Self::TextureId, detail: &Self::SurfaceOutputDetail);
-    fn surface_texture_discard(
-        &self,
-        texture: &Self::TextureId,
-        detail: &Self::SurfaceOutputDetail,
-    );
+    fn surface_present(&self, detail: &Self::SurfaceOutputDetail);
+    fn surface_texture_discard(&self, detail: &Self::SurfaceOutputDetail);
 
     fn device_features(&self, device: &Self::DeviceId, device_data: &Self::DeviceData) -> Features;
     fn device_limits(&self, device: &Self::DeviceId, device_data: &Self::DeviceData) -> Limits;
@@ -623,6 +619,12 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
         _device_data: &Self::DeviceData,
     ) -> wgt::InternalCounters;
 
+    fn device_generate_allocator_report(
+        &self,
+        device: &Self::DeviceId,
+        _device_data: &Self::DeviceData,
+    ) -> Option<wgt::AllocatorReport>;
+
     fn pipeline_cache_get_data(
         &self,
         cache: &Self::PipelineCacheId,
@@ -1272,8 +1274,8 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
         SurfaceStatus,
         Box<dyn AnyWasmNotSendSync>,
     );
-    fn surface_present(&self, texture: &ObjectId, detail: &dyn AnyWasmNotSendSync);
-    fn surface_texture_discard(&self, texture: &ObjectId, detail: &dyn AnyWasmNotSendSync);
+    fn surface_present(&self, detail: &dyn AnyWasmNotSendSync);
+    fn surface_texture_discard(&self, detail: &dyn AnyWasmNotSendSync);
 
     fn device_features(&self, device: &ObjectId, device_data: &crate::Data) -> Features;
     fn device_limits(&self, device: &ObjectId, device_data: &crate::Data) -> Limits;
@@ -1654,6 +1656,12 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
         device_data: &crate::Data,
     ) -> wgt::InternalCounters;
 
+    fn generate_allocator_report(
+        &self,
+        device: &ObjectId,
+        device_data: &crate::Data,
+    ) -> Option<wgt::AllocatorReport>;
+
     fn pipeline_cache_get_data(
         &self,
         cache: &ObjectId,
@@ -2260,14 +2268,12 @@ where
         )
     }
 
-    fn surface_present(&self, texture: &ObjectId, detail: &dyn AnyWasmNotSendSync) {
-        let texture = <T::TextureId>::from(*texture);
-        Context::surface_present(self, &texture, detail.downcast_ref().unwrap())
+    fn surface_present(&self, detail: &dyn AnyWasmNotSendSync) {
+        Context::surface_present(self, detail.downcast_ref().unwrap())
     }
 
-    fn surface_texture_discard(&self, texture: &ObjectId, detail: &dyn AnyWasmNotSendSync) {
-        let texture = <T::TextureId>::from(*texture);
-        Context::surface_texture_discard(self, &texture, detail.downcast_ref().unwrap())
+    fn surface_texture_discard(&self, detail: &dyn AnyWasmNotSendSync) {
+        Context::surface_texture_discard(self, detail.downcast_ref().unwrap())
     }
 
     fn device_features(&self, device: &ObjectId, device_data: &crate::Data) -> Features {
@@ -3169,6 +3175,16 @@ where
         Context::device_get_internal_counters(self, &device, device_data)
     }
 
+    fn generate_allocator_report(
+        &self,
+        device: &ObjectId,
+        device_data: &crate::Data,
+    ) -> Option<wgt::AllocatorReport> {
+        let device = <T::DeviceId>::from(*device);
+        let device_data = downcast_ref(device_data);
+        Context::device_generate_allocator_report(self, &device, device_data)
+    }
+
     fn pipeline_cache_get_data(
         &self,
         cache: &ObjectId,
diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs
index 8a42ea524a..edbdcf17f1 100644
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@@ -18,11 +18,18 @@
 #![doc(html_logo_url = "https://raw.githubusercontent.com/gfx-rs/wgpu/trunk/logo.png")]
 #![warn(missing_docs, rust_2018_idioms, unsafe_op_in_unsafe_fn)]
 
+//
+//
+// Modules
+//
+//
+
+mod api;
 mod backend;
 mod context;
-pub mod util;
-#[macro_use]
 mod macros;
+mod send_sync;
+pub mod util;
 
 /// Module to add ray tracing support to wgpu.
 /// It adds support for acceleration structures and ray queries.
@@ -38,47 +45,59 @@ mod macros;
 /// For more details see the examples (starting with ray-).
 pub mod ray_tracing;
 
-use std::{
-    any::Any,
-    borrow::Cow,
-    cmp::Ordering,
-    collections::HashMap,
-    error, fmt,
-    future::Future,
-    marker::PhantomData,
-    num::{NonZeroU32, NonZeroU64},
-    ops::{Bound, Deref, DerefMut, Range, RangeBounds},
-    sync::Arc,
-    thread,
-};
+//
+//
+// Private re-exports
+//
+//
 
-#[allow(unused_imports)] // Unused if all backends are disabled.
+#[allow(unused_imports)] // WebGPU needs this
 use context::Context;
+use send_sync::*;
+
+type C = dyn context::DynContext;
+#[cfg(send_sync)]
+type Data = dyn std::any::Any + Send + Sync;
+#[cfg(not(send_sync))]
+type Data = dyn std::any::Any;
 
-use context::{DeviceRequest, DynContext, ObjectId};
-use parking_lot::Mutex;
+//
+//
+// Public re-exports
+//
+//
 
-use raw_window_handle::{HasDisplayHandle, HasWindowHandle};
+pub use api::*;
 pub use wgt::{
     AdapterInfo, AddressMode, AstcBlock, AstcChannel, Backend, Backends, BindGroupLayoutEntry,
     BindingType, BlendComponent, BlendFactor, BlendOperation, BlendState, BufferAddress,
     BufferBindingType, BufferSize, BufferUsages, Color, ColorTargetState, ColorWrites,
-    CommandBufferDescriptor, CompareFunction, CompositeAlphaMode, DepthBiasState,
+    CommandBufferDescriptor, CompareFunction, CompositeAlphaMode, CoreCounters, DepthBiasState,
     DepthStencilState, DeviceLostReason, DeviceType, DownlevelCapabilities, DownlevelFlags,
     Dx12Compiler, DynamicOffset, Extent3d, Face, Features, FilterMode, FrontFace,
-    Gles3MinorVersion, ImageDataLayout, ImageSubresourceRange, IndexFormat, InstanceDescriptor,
-    InstanceFlags, Limits, MaintainResult, MemoryHints, MultisampleState, Origin2d, Origin3d,
-    PipelineStatisticsTypes, PolygonMode, PowerPreference, PredefinedColorSpace, PresentMode,
-    PresentationTimestamp, PrimitiveState, PrimitiveTopology, PushConstantRange, QueryType,
-    RenderBundleDepthStencil, SamplerBindingType, SamplerBorderColor, ShaderLocation, ShaderModel,
-    ShaderStages, StencilFaceState, StencilOperation, StencilState, StorageTextureAccess,
-    SurfaceCapabilities, SurfaceStatus, TextureAspect, TextureDimension, TextureFormat,
-    TextureFormatFeatureFlags, TextureFormatFeatures, TextureSampleType, TextureUsages,
-    TextureViewDimension, VertexAttribute, VertexFormat, VertexStepMode, WasmNotSend,
-    WasmNotSendSync, WasmNotSync, COPY_BUFFER_ALIGNMENT, COPY_BYTES_PER_ROW_ALIGNMENT,
+    Gles3MinorVersion, HalCounters, ImageDataLayout, ImageSubresourceRange, IndexFormat,
+    InstanceDescriptor, InstanceFlags, InternalCounters, Limits, MaintainResult, MemoryHints,
+    MultisampleState, Origin2d, Origin3d, PipelineStatisticsTypes, PolygonMode, PowerPreference,
+    PredefinedColorSpace, PresentMode, PresentationTimestamp, PrimitiveState, PrimitiveTopology,
+    PushConstantRange, QueryType, RenderBundleDepthStencil, SamplerBindingType, SamplerBorderColor,
+    ShaderLocation, ShaderModel, ShaderStages, StencilFaceState, StencilOperation, StencilState,
+    StorageTextureAccess, SurfaceCapabilities, SurfaceStatus, TextureAspect, TextureDimension,
+    TextureFormat, TextureFormatFeatureFlags, TextureFormatFeatures, TextureSampleType,
+    TextureUsages, TextureViewDimension, VertexAttribute, VertexFormat, VertexStepMode,
+    WasmNotSend, WasmNotSendSync, WasmNotSync, COPY_BUFFER_ALIGNMENT, COPY_BYTES_PER_ROW_ALIGNMENT,
     MAP_ALIGNMENT, PUSH_CONSTANT_ALIGNMENT, QUERY_RESOLVE_BUFFER_ALIGNMENT, QUERY_SET_MAX_QUERIES,
     QUERY_SIZE, VERTEX_STRIDE_ALIGNMENT,
 };
+// wasm-only types, we try to keep as many types non-platform
+// specific, but these need to depend on web-sys.
+#[cfg(any(webgpu, webgl))]
+pub use wgt::{ExternalImageSource, ImageCopyExternalImage};
+
+//
+//
+// Re-exports of dependencies
+//
+//
 
 /// Re-export of our `wgpu-core` dependency.
 ///
@@ -112,5937 +131,3 @@ pub use raw_window_handle as rwh;
 ///
 #[cfg(any(webgl, webgpu))]
 pub use web_sys;
-
-// wasm-only types, we try to keep as many types non-platform
-// specific, but these need to depend on web-sys.
-#[cfg(any(webgpu, webgl))]
-pub use wgt::{ExternalImageSource, ImageCopyExternalImage};
-
-/// Filter for error scopes.
-#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd)]
-pub enum ErrorFilter {
-    /// Catch only out-of-memory errors.
-    OutOfMemory,
-    /// Catch only validation errors.
-    Validation,
-    /// Catch only internal errors.
-    Internal,
-}
-static_assertions::assert_impl_all!(ErrorFilter: Send, Sync);
-
-type C = dyn DynContext;
-#[cfg(send_sync)]
-type Data = dyn Any + Send + Sync;
-#[cfg(not(send_sync))]
-type Data = dyn Any;
-
-/// Context for all other wgpu objects. Instance of wgpu.
-///
-/// This is the first thing you create when using wgpu.
-/// Its primary use is to create [`Adapter`]s and [`Surface`]s.
-///
-/// Does not have to be kept alive.
-///
-/// Corresponds to [WebGPU `GPU`](https://gpuweb.github.io/gpuweb/#gpu-interface).
-#[derive(Debug)]
-pub struct Instance {
-    context: Arc<C>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Instance: Send, Sync);
-
-/// Handle to a physical graphics and/or compute device.
-///
-/// Adapters can be used to open a connection to the corresponding [`Device`]
-/// on the host system by using [`Adapter::request_device`].
-///
-/// Does not have to be kept alive.
-///
-/// Corresponds to [WebGPU `GPUAdapter`](https://gpuweb.github.io/gpuweb/#gpu-adapter).
-#[derive(Debug)]
-pub struct Adapter {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Adapter: Send, Sync);
-
-impl Drop for Adapter {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context.adapter_drop(&self.id, self.data.as_ref())
-        }
-    }
-}
-
-/// Open connection to a graphics and/or compute device.
-///
-/// Responsible for the creation of most rendering and compute resources.
-/// These are then used in commands, which are submitted to a [`Queue`].
-///
-/// A device may be requested from an adapter with [`Adapter::request_device`].
-///
-/// Corresponds to [WebGPU `GPUDevice`](https://gpuweb.github.io/gpuweb/#gpu-device).
-#[derive(Debug)]
-pub struct Device {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Device: Send, Sync);
-
-/// Identifier for a particular call to [`Queue::submit`]. Can be used
-/// as part of an argument to [`Device::poll`] to block for a particular
-/// submission to finish.
-///
-/// This type is unique to the Rust API of `wgpu`.
-/// There is no analogue in the WebGPU specification.
-#[derive(Debug, Clone)]
-pub struct SubmissionIndex(Arc<crate::Data>);
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(SubmissionIndex: Send, Sync);
-
-/// The mapped portion of a buffer, if any, and its outstanding views.
-///
-/// This ensures that views fall within the mapped range and don't overlap, and
-/// also takes care of turning `Option<BufferSize>` sizes into actual buffer
-/// offsets.
-#[derive(Debug)]
-struct MapContext {
-    /// The overall size of the buffer.
-    ///
-    /// This is just a convenient copy of [`Buffer::size`].
-    total_size: BufferAddress,
-
-    /// The range of the buffer that is mapped.
-    ///
-    /// This is `0..0` if the buffer is not mapped. This becomes non-empty when
-    /// the buffer is mapped at creation time, and when you call `map_async` on
-    /// some [`BufferSlice`] (so technically, it indicates the portion that is
-    /// *or has been requested to be* mapped.)
-    ///
-    /// All [`BufferView`]s and [`BufferViewMut`]s must fall within this range.
-    initial_range: Range<BufferAddress>,
-
-    /// The ranges covered by all outstanding [`BufferView`]s and
-    /// [`BufferViewMut`]s. These are non-overlapping, and are all contained
-    /// within `initial_range`.
-    sub_ranges: Vec<Range<BufferAddress>>,
-}
-
-impl MapContext {
-    fn new(total_size: BufferAddress) -> Self {
-        Self {
-            total_size,
-            initial_range: 0..0,
-            sub_ranges: Vec::new(),
-        }
-    }
-
-    /// Record that the buffer is no longer mapped.
-    fn reset(&mut self) {
-        self.initial_range = 0..0;
-
-        assert!(
-            self.sub_ranges.is_empty(),
-            "You cannot unmap a buffer that still has accessible mapped views"
-        );
-    }
-
-    /// Record that the `size` bytes of the buffer at `offset` are now viewed.
-    ///
-    /// Return the byte offset within the buffer of the end of the viewed range.
-    ///
-    /// # Panics
-    ///
-    /// This panics if the given range overlaps with any existing range.
-    fn add(&mut self, offset: BufferAddress, size: Option<BufferSize>) -> BufferAddress {
-        let end = match size {
-            Some(s) => offset + s.get(),
-            None => self.initial_range.end,
-        };
-        assert!(self.initial_range.start <= offset && end <= self.initial_range.end);
-        // This check is essential for avoiding undefined behavior: it is the
-        // only thing that ensures that `&mut` references to the buffer's
-        // contents don't alias anything else.
-        for sub in self.sub_ranges.iter() {
-            assert!(
-                end <= sub.start || offset >= sub.end,
-                "Intersecting map range with {sub:?}"
-            );
-        }
-        self.sub_ranges.push(offset..end);
-        end
-    }
-
-    /// Record that the `size` bytes of the buffer at `offset` are no longer viewed.
-    ///
-    /// # Panics
-    ///
-    /// This panics if the given range does not exactly match one previously
-    /// passed to [`add`].
-    ///
-    /// [`add]`: MapContext::add
-    fn remove(&mut self, offset: BufferAddress, size: Option<BufferSize>) {
-        let end = match size {
-            Some(s) => offset + s.get(),
-            None => self.initial_range.end,
-        };
-
-        let index = self
-            .sub_ranges
-            .iter()
-            .position(|r| *r == (offset..end))
-            .expect("unable to remove range from map context");
-        self.sub_ranges.swap_remove(index);
-    }
-}
-
-/// Handle to a GPU-accessible buffer.
-///
-/// Created with [`Device::create_buffer`] or
-/// [`DeviceExt::create_buffer_init`](util::DeviceExt::create_buffer_init).
-///
-/// Corresponds to [WebGPU `GPUBuffer`](https://gpuweb.github.io/gpuweb/#buffer-interface).
-///
-/// # Mapping buffers
-///
-/// If a `Buffer` is created with the appropriate [`usage`], it can be *mapped*:
-/// you can make its contents accessible to the CPU as an ordinary `&[u8]` or
-/// `&mut [u8]` slice of bytes. Buffers created with the
-/// [`mapped_at_creation`][mac] flag set are also mapped initially.
-///
-/// Depending on the hardware, the buffer could be memory shared between CPU and
-/// GPU, so that the CPU has direct access to the same bytes the GPU will
-/// consult; or it may be ordinary CPU memory, whose contents the system must
-/// copy to/from the GPU as needed. This crate's API is designed to work the
-/// same way in either case: at any given time, a buffer is either mapped and
-/// available to the CPU, or unmapped and ready for use by the GPU, but never
-/// both. This makes it impossible for either side to observe changes by the
-/// other immediately, and any necessary transfers can be carried out when the
-/// buffer transitions from one state to the other.
-///
-/// There are two ways to map a buffer:
-///
-/// - If [`BufferDescriptor::mapped_at_creation`] is `true`, then the entire
-///   buffer is mapped when it is created. This is the easiest way to initialize
-///   a new buffer. You can set `mapped_at_creation` on any kind of buffer,
-///   regardless of its [`usage`] flags.
-///
-/// - If the buffer's [`usage`] includes the [`MAP_READ`] or [`MAP_WRITE`]
-///   flags, then you can call `buffer.slice(range).map_async(mode, callback)`
-///   to map the portion of `buffer` given by `range`. This waits for the GPU to
-///   finish using the buffer, and invokes `callback` as soon as the buffer is
-///   safe for the CPU to access.
-///
-/// Once a buffer is mapped:
-///
-/// - You can call `buffer.slice(range).get_mapped_range()` to obtain a
-///   [`BufferView`], which dereferences to a `&[u8]` that you can use to read
-///   the buffer's contents.
-///
-/// - Or, you can call `buffer.slice(range).get_mapped_range_mut()` to obtain a
-///   [`BufferViewMut`], which dereferences to a `&mut [u8]` that you can use to
-///   read and write the buffer's contents.
-///
-/// The given `range` must fall within the mapped portion of the buffer. If you
-/// attempt to access overlapping ranges, even for shared access only, these
-/// methods panic.
-///
-/// For example:
-///
-/// ```no_run
-/// # let buffer: wgpu::Buffer = todo!();
-/// let slice = buffer.slice(10..20);
-/// slice.map_async(wgpu::MapMode::Read, |result| {
-///     match result {
-///         Ok(()) => {
-///             let view = slice.get_mapped_range();
-///             // read data from `view`, which dereferences to `&[u8]`
-///         }
-///         Err(e) => {
-///             // handle mapping error
-///         }
-///     }
-/// });
-/// ```
-///
-/// This example calls `Buffer::slice` to obtain a [`BufferSlice`] referring to
-/// the second ten bytes of `buffer`. (To obtain access to the entire buffer,
-/// you could call `buffer.slice(..)`.) The code then calls `map_async` to wait
-/// for the buffer to be available, and finally calls `get_mapped_range` on the
-/// slice to actually get at the bytes.
-///
-/// If using `map_async` directly is awkward, you may find it more convenient to
-/// use [`Queue::write_buffer`] and [`util::DownloadBuffer::read_buffer`].
-/// However, those each have their own tradeoffs; the asynchronous nature of GPU
-/// execution makes it hard to avoid friction altogether.
-///
-/// While a buffer is mapped, you must not submit any commands to the GPU that
-/// access it. You may record command buffers that use the buffer, but you must
-/// not submit such command buffers.
-///
-/// When you are done using the buffer on the CPU, you must call
-/// [`Buffer::unmap`] to make it available for use by the GPU again. All
-/// [`BufferView`] and [`BufferViewMut`] views referring to the buffer must be
-/// dropped before you unmap it; otherwise, [`Buffer::unmap`] will panic.
-///
-/// ## Mapping buffers on the web
-///
-/// When compiled to WebAssembly and running in a browser content process,
-/// `wgpu` implements its API in terms of the browser's WebGPU implementation.
-/// In this context, `wgpu` is further isolated from the GPU:
-///
-/// - Depending on the browser's WebGPU implementation, mapping and unmapping
-///   buffers probably entails copies between WebAssembly linear memory and the
-///   graphics driver's buffers.
-///
-/// - All modern web browsers isolate web content in its own sandboxed process,
-///   which can only interact with the GPU via interprocess communication (IPC).
-///   Although most browsers' IPC systems use shared memory for large data
-///   transfers, there will still probably need to be copies into and out of the
-///   shared memory buffers.
-///
-/// All of these copies contribute to the cost of buffer mapping in this
-/// configuration.
-///
-/// [`usage`]: BufferDescriptor::usage
-/// [mac]: BufferDescriptor::mapped_at_creation
-/// [`MAP_READ`]: BufferUsages::MAP_READ
-/// [`MAP_WRITE`]: BufferUsages::MAP_WRITE
-#[derive(Debug)]
-pub struct Buffer {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-    map_context: Mutex<MapContext>,
-    size: wgt::BufferAddress,
-    usage: BufferUsages,
-    // Todo: missing map_state https://www.w3.org/TR/webgpu/#dom-gpubuffer-mapstate
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Buffer: Send, Sync);
-
-/// A slice of a [`Buffer`], to be mapped, used for vertex or index data, or the like.
-///
-/// You can create a `BufferSlice` by calling [`Buffer::slice`]:
-///
-/// ```no_run
-/// # let buffer: wgpu::Buffer = todo!();
-/// let slice = buffer.slice(10..20);
-/// ```
-///
-/// This returns a slice referring to the second ten bytes of `buffer`. To get a
-/// slice of the entire `Buffer`:
-///
-/// ```no_run
-/// # let buffer: wgpu::Buffer = todo!();
-/// let whole_buffer_slice = buffer.slice(..);
-/// ```
-///
-/// A [`BufferSlice`] is nothing more than a reference to the `Buffer` and a
-/// starting and ending position. To access the slice's contents on the CPU, you
-/// must first [map] the buffer, and then call [`BufferSlice::get_mapped_range`]
-/// or [`BufferSlice::get_mapped_range_mut`] to obtain a view of the slice's
-/// contents, which dereferences to a `&[u8]` or `&mut [u8]`.
-///
-/// You can also pass buffer slices to methods like
-/// [`RenderPass::set_vertex_buffer`] and [`RenderPass::set_index_buffer`] to
-/// indicate which data a draw call should consume.
-///
-/// The `BufferSlice` type is unique to the Rust API of `wgpu`. In the WebGPU
-/// specification, an offset and size are specified as arguments to each call
-/// working with the [`Buffer`], instead.
-///
-/// [map]: Buffer#mapping-buffers
-#[derive(Copy, Clone, Debug)]
-pub struct BufferSlice<'a> {
-    buffer: &'a Buffer,
-    offset: BufferAddress,
-    size: Option<BufferSize>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(BufferSlice<'_>: Send, Sync);
-
-/// Handle to a texture on the GPU.
-///
-/// It can be created with [`Device::create_texture`].
-///
-/// Corresponds to [WebGPU `GPUTexture`](https://gpuweb.github.io/gpuweb/#texture-interface).
-#[derive(Debug)]
-pub struct Texture {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-    owned: bool,
-    descriptor: TextureDescriptor<'static>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Texture: Send, Sync);
-
-/// Handle to a texture view.
-///
-/// A `TextureView` object describes a texture and associated metadata needed by a
-/// [`RenderPipeline`] or [`BindGroup`].
-///
-/// Corresponds to [WebGPU `GPUTextureView`](https://gpuweb.github.io/gpuweb/#gputextureview).
-#[derive(Debug)]
-pub struct TextureView {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(TextureView: Send, Sync);
-
-/// Handle to a sampler.
-///
-/// A `Sampler` object defines how a pipeline will sample from a [`TextureView`]. Samplers define
-/// image filters (including anisotropy) and address (wrapping) modes, among other things. See
-/// the documentation for [`SamplerDescriptor`] for more information.
-///
-/// It can be created with [`Device::create_sampler`].
-///
-/// Corresponds to [WebGPU `GPUSampler`](https://gpuweb.github.io/gpuweb/#sampler-interface).
-#[derive(Debug)]
-pub struct Sampler {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Sampler: Send, Sync);
-
-impl Drop for Sampler {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context.sampler_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Describes a [`Surface`].
-///
-/// For use with [`Surface::configure`].
-///
-/// Corresponds to [WebGPU `GPUCanvasConfiguration`](
-/// https://gpuweb.github.io/gpuweb/#canvas-configuration).
-pub type SurfaceConfiguration = wgt::SurfaceConfiguration<Vec<TextureFormat>>;
-static_assertions::assert_impl_all!(SurfaceConfiguration: Send, Sync);
-
-/// Handle to a presentable surface.
-///
-/// A `Surface` represents a platform-specific surface (e.g. a window) onto which rendered images may
-/// be presented. A `Surface` may be created with the function [`Instance::create_surface`].
-///
-/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
-/// [`GPUCanvasContext`](https://gpuweb.github.io/gpuweb/#canvas-context)
-/// serves a similar role.
-pub struct Surface<'window> {
-    context: Arc<C>,
-
-    /// Optionally, keep the source of the handle used for the surface alive.
-    ///
-    /// This is useful for platforms where the surface is created from a window and the surface
-    /// would become invalid when the window is dropped.
-    _handle_source: Option<Box<dyn WindowHandle + 'window>>,
-
-    /// Wgpu-core surface id.
-    id: ObjectId,
-
-    /// Additional surface data returned by [`DynContext::instance_create_surface`].
-    surface_data: Box<Data>,
-
-    // Stores the latest `SurfaceConfiguration` that was set using `Surface::configure`.
-    // It is required to set the attributes of the `SurfaceTexture` in the
-    // `Surface::get_current_texture` method.
-    // Because the `Surface::configure` method operates on an immutable reference this type has to
-    // be wrapped in a mutex and since the configuration is only supplied after the surface has
-    // been created is is additionally wrapped in an option.
-    config: Mutex<Option<SurfaceConfiguration>>,
-}
-
-// This custom implementation is required because [`Surface::_surface`] doesn't
-// require [`Debug`](fmt::Debug), which we should not require from the user.
-impl<'window> fmt::Debug for Surface<'window> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("Surface")
-            .field("context", &self.context)
-            .field(
-                "_handle_source",
-                &if self._handle_source.is_some() {
-                    "Some"
-                } else {
-                    "None"
-                },
-            )
-            .field("id", &self.id)
-            .field("data", &self.surface_data)
-            .field("config", &self.config)
-            .finish()
-    }
-}
-
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Surface<'_>: Send, Sync);
-
-impl Drop for Surface<'_> {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .surface_drop(&self.id, self.surface_data.as_ref())
-        }
-    }
-}
-
-/// Super trait for window handles as used in [`SurfaceTarget`].
-pub trait WindowHandle: HasWindowHandle + HasDisplayHandle + WasmNotSendSync {}
-
-impl<T> WindowHandle for T where T: HasWindowHandle + HasDisplayHandle + WasmNotSendSync {}
-
-/// The window/canvas/surface/swap-chain/etc. a surface is attached to, for use with safe surface creation.
-///
-/// This is either a window or an actual web canvas depending on the platform and
-/// enabled features.
-/// Refer to the individual variants for more information.
-///
-/// See also [`SurfaceTargetUnsafe`] for unsafe variants.
-#[non_exhaustive]
-pub enum SurfaceTarget<'window> {
-    /// Window handle producer.
-    ///
-    /// If the specified display and window handle are not supported by any of the backends, then the surface
-    /// will not be supported by any adapters.
-    ///
-    /// # Errors
-    ///
-    /// - On WebGL2: surface creation returns an error if the browser does not support WebGL2,
-    ///   or declines to provide GPU access (such as due to a resource shortage).
-    ///
-    /// # Panics
-    ///
-    /// - On macOS/Metal: will panic if not called on the main thread.
-    /// - On web: will panic if the `raw_window_handle` does not properly refer to a
-    ///   canvas element.
-    Window(Box<dyn WindowHandle + 'window>),
-
-    /// Surface from a `web_sys::HtmlCanvasElement`.
-    ///
-    /// The `canvas` argument must be a valid `<canvas>` element to
-    /// create a surface upon.
-    ///
-    /// # Errors
-    ///
-    /// - On WebGL2: surface creation will return an error if the browser does not support WebGL2,
-    ///   or declines to provide GPU access (such as due to a resource shortage).
-    #[cfg(any(webgpu, webgl))]
-    Canvas(web_sys::HtmlCanvasElement),
-
-    /// Surface from a `web_sys::OffscreenCanvas`.
-    ///
-    /// The `canvas` argument must be a valid `OffscreenCanvas` object
-    /// to create a surface upon.
-    ///
-    /// # Errors
-    ///
-    /// - On WebGL2: surface creation will return an error if the browser does not support WebGL2,
-    ///   or declines to provide GPU access (such as due to a resource shortage).
-    #[cfg(any(webgpu, webgl))]
-    OffscreenCanvas(web_sys::OffscreenCanvas),
-}
-
-impl<'a, T> From<T> for SurfaceTarget<'a>
-where
-    T: WindowHandle + 'a,
-{
-    fn from(window: T) -> Self {
-        Self::Window(Box::new(window))
-    }
-}
-
-/// The window/canvas/surface/swap-chain/etc. a surface is attached to, for use with unsafe surface creation.
-///
-/// This is either a window or an actual web canvas depending on the platform and
-/// enabled features.
-/// Refer to the individual variants for more information.
-///
-/// See also [`SurfaceTarget`] for safe variants.
-#[non_exhaustive]
-pub enum SurfaceTargetUnsafe {
-    /// Raw window & display handle.
-    ///
-    /// If the specified display and window handle are not supported by any of the backends, then the surface
-    /// will not be supported by any adapters.
-    ///
-    /// # Safety
-    ///
-    /// - `raw_window_handle` & `raw_display_handle` must be valid objects to create a surface upon.
-    /// - `raw_window_handle` & `raw_display_handle` must remain valid until after the returned
-    ///    [`Surface`] is  dropped.
-    RawHandle {
-        /// Raw display handle, underlying display must outlive the surface created from this.
-        raw_display_handle: raw_window_handle::RawDisplayHandle,
-
-        /// Raw display handle, underlying window must outlive the surface created from this.
-        raw_window_handle: raw_window_handle::RawWindowHandle,
-    },
-
-    /// Surface from `CoreAnimationLayer`.
-    ///
-    /// # Safety
-    ///
-    /// - layer must be a valid object to create a surface upon.
-    #[cfg(metal)]
-    CoreAnimationLayer(*mut std::ffi::c_void),
-
-    /// Surface from `IDCompositionVisual`.
-    ///
-    /// # Safety
-    ///
-    /// - visual must be a valid IDCompositionVisual to create a surface upon.
-    #[cfg(dx12)]
-    CompositionVisual(*mut std::ffi::c_void),
-
-    /// Surface from DX12 `SurfaceHandle`.
-    ///
-    /// # Safety
-    ///
-    /// - surface_handle must be a valid SurfaceHandle to create a surface upon.
-    #[cfg(dx12)]
-    SurfaceHandle(*mut std::ffi::c_void),
-
-    /// Surface from DX12 `SwapChainPanel`.
-    ///
-    /// # Safety
-    ///
-    /// - visual must be a valid SwapChainPanel to create a surface upon.
-    #[cfg(dx12)]
-    SwapChainPanel(*mut std::ffi::c_void),
-}
-
-impl SurfaceTargetUnsafe {
-    /// Creates a [`SurfaceTargetUnsafe::RawHandle`] from a window.
-    ///
-    /// # Safety
-    ///
-    /// - `window` must outlive the resulting surface target
-    ///   (and subsequently the surface created for this target).
-    pub unsafe fn from_window<T>(window: &T) -> Result<Self, raw_window_handle::HandleError>
-    where
-        T: HasDisplayHandle + HasWindowHandle,
-    {
-        Ok(Self::RawHandle {
-            raw_display_handle: window.display_handle()?.as_raw(),
-            raw_window_handle: window.window_handle()?.as_raw(),
-        })
-    }
-}
-
-/// Handle to a binding group layout.
-///
-/// A `BindGroupLayout` is a handle to the GPU-side layout of a binding group. It can be used to
-/// create a [`BindGroupDescriptor`] object, which in turn can be used to create a [`BindGroup`]
-/// object with [`Device::create_bind_group`]. A series of `BindGroupLayout`s can also be used to
-/// create a [`PipelineLayoutDescriptor`], which can be used to create a [`PipelineLayout`].
-///
-/// It can be created with [`Device::create_bind_group_layout`].
-///
-/// Corresponds to [WebGPU `GPUBindGroupLayout`](
-/// https://gpuweb.github.io/gpuweb/#gpubindgrouplayout).
-#[derive(Debug)]
-pub struct BindGroupLayout {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(BindGroupLayout: Send, Sync);
-
-impl Drop for BindGroupLayout {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .bind_group_layout_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Handle to a binding group.
-///
-/// A `BindGroup` represents the set of resources bound to the bindings described by a
-/// [`BindGroupLayout`]. It can be created with [`Device::create_bind_group`]. A `BindGroup` can
-/// be bound to a particular [`RenderPass`] with [`RenderPass::set_bind_group`], or to a
-/// [`ComputePass`] with [`ComputePass::set_bind_group`].
-///
-/// Corresponds to [WebGPU `GPUBindGroup`](https://gpuweb.github.io/gpuweb/#gpubindgroup).
-#[derive(Debug)]
-pub struct BindGroup {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(BindGroup: Send, Sync);
-
-impl Drop for BindGroup {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context.bind_group_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Handle to a compiled shader module.
-///
-/// A `ShaderModule` represents a compiled shader module on the GPU. It can be created by passing
-/// source code to [`Device::create_shader_module`] or valid SPIR-V binary to
-/// [`Device::create_shader_module_spirv`]. Shader modules are used to define programmable stages
-/// of a pipeline.
-///
-/// Corresponds to [WebGPU `GPUShaderModule`](https://gpuweb.github.io/gpuweb/#shader-module).
-#[derive(Debug)]
-pub struct ShaderModule {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(ShaderModule: Send, Sync);
-
-impl Drop for ShaderModule {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .shader_module_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-impl ShaderModule {
-    /// Get the compilation info for the shader module.
-    pub fn get_compilation_info(&self) -> impl Future<Output = CompilationInfo> + WasmNotSend {
-        self.context
-            .shader_get_compilation_info(&self.id, self.data.as_ref())
-    }
-}
-
-/// Compilation information for a shader module.
-///
-/// Corresponds to [WebGPU `GPUCompilationInfo`](https://gpuweb.github.io/gpuweb/#gpucompilationinfo).
-/// The source locations use bytes, and index a UTF-8 encoded string.
-#[derive(Debug, Clone)]
-pub struct CompilationInfo {
-    /// The messages from the shader compilation process.
-    pub messages: Vec<CompilationMessage>,
-}
-
-/// A single message from the shader compilation process.
-///
-/// Roughly corresponds to [`GPUCompilationMessage`](https://www.w3.org/TR/webgpu/#gpucompilationmessage),
-/// except that the location uses UTF-8 for all positions.
-#[derive(Debug, Clone)]
-pub struct CompilationMessage {
-    /// The text of the message.
-    pub message: String,
-    /// The type of the message.
-    pub message_type: CompilationMessageType,
-    /// Where in the source code the message points at.
-    pub location: Option<SourceLocation>,
-}
-
-/// The type of a compilation message.
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum CompilationMessageType {
-    /// An error message.
-    Error,
-    /// A warning message.
-    Warning,
-    /// An informational message.
-    Info,
-}
-
-/// A human-readable representation for a span, tailored for text source.
-///
-/// Roughly corresponds to the positional members of [`GPUCompilationMessage`][gcm] from
-/// the WebGPU specification, except
-/// - `offset` and `length` are in bytes (UTF-8 code units), instead of UTF-16 code units.
-/// - `line_position` is in bytes (UTF-8 code units), and is usually not directly intended for humans.
-///
-/// [gcm]: https://www.w3.org/TR/webgpu/#gpucompilationmessage
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-pub struct SourceLocation {
-    /// 1-based line number.
-    pub line_number: u32,
-    /// 1-based column in code units (in bytes) of the start of the span.
-    /// Remember to convert accordingly when displaying to the user.
-    pub line_position: u32,
-    /// 0-based Offset in code units (in bytes) of the start of the span.
-    pub offset: u32,
-    /// Length in code units (in bytes) of the span.
-    pub length: u32,
-}
-
-#[cfg(all(feature = "wgsl", wgpu_core))]
-impl From<naga::error::ShaderError<naga::front::wgsl::ParseError>> for CompilationInfo {
-    fn from(value: naga::error::ShaderError<naga::front::wgsl::ParseError>) -> Self {
-        CompilationInfo {
-            messages: vec![CompilationMessage {
-                message: value.to_string(),
-                message_type: CompilationMessageType::Error,
-                location: value.inner.location(&value.source).map(Into::into),
-            }],
-        }
-    }
-}
-#[cfg(feature = "glsl")]
-impl From<naga::error::ShaderError<naga::front::glsl::ParseErrors>> for CompilationInfo {
-    fn from(value: naga::error::ShaderError<naga::front::glsl::ParseErrors>) -> Self {
-        let messages = value
-            .inner
-            .errors
-            .into_iter()
-            .map(|err| CompilationMessage {
-                message: err.to_string(),
-                message_type: CompilationMessageType::Error,
-                location: err.location(&value.source).map(Into::into),
-            })
-            .collect();
-        CompilationInfo { messages }
-    }
-}
-
-#[cfg(feature = "spirv")]
-impl From<naga::error::ShaderError<naga::front::spv::Error>> for CompilationInfo {
-    fn from(value: naga::error::ShaderError<naga::front::spv::Error>) -> Self {
-        CompilationInfo {
-            messages: vec![CompilationMessage {
-                message: value.to_string(),
-                message_type: CompilationMessageType::Error,
-                location: None,
-            }],
-        }
-    }
-}
-
-#[cfg(any(wgpu_core, naga))]
-impl From<naga::error::ShaderError<naga::WithSpan<naga::valid::ValidationError>>>
-    for CompilationInfo
-{
-    fn from(value: naga::error::ShaderError<naga::WithSpan<naga::valid::ValidationError>>) -> Self {
-        CompilationInfo {
-            messages: vec![CompilationMessage {
-                message: value.to_string(),
-                message_type: CompilationMessageType::Error,
-                location: value.inner.location(&value.source).map(Into::into),
-            }],
-        }
-    }
-}
-
-#[cfg(any(wgpu_core, naga))]
-impl From<naga::SourceLocation> for SourceLocation {
-    fn from(value: naga::SourceLocation) -> Self {
-        SourceLocation {
-            length: value.length,
-            offset: value.offset,
-            line_number: value.line_number,
-            line_position: value.line_position,
-        }
-    }
-}
-
-/// Source of a shader module.
-///
-/// The source will be parsed and validated.
-///
-/// Any necessary shader translation (e.g. from WGSL to SPIR-V or vice versa)
-/// will be done internally by wgpu.
-///
-/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
-/// only WGSL source code strings are accepted.
-#[cfg_attr(feature = "naga-ir", allow(clippy::large_enum_variant))]
-#[derive(Clone, Debug)]
-#[non_exhaustive]
-pub enum ShaderSource<'a> {
-    /// SPIR-V module represented as a slice of words.
-    ///
-    /// See also: [`util::make_spirv`], [`include_spirv`]
-    #[cfg(feature = "spirv")]
-    SpirV(Cow<'a, [u32]>),
-    /// GLSL module as a string slice.
-    ///
-    /// Note: GLSL is not yet fully supported and must be a specific ShaderStage.
-    #[cfg(feature = "glsl")]
-    Glsl {
-        /// The source code of the shader.
-        shader: Cow<'a, str>,
-        /// The shader stage that the shader targets. For example, `naga::ShaderStage::Vertex`
-        stage: naga::ShaderStage,
-        /// Defines to unlock configured shader features.
-        defines: naga::FastHashMap<String, String>,
-    },
-    /// WGSL module as a string slice.
-    #[cfg(feature = "wgsl")]
-    Wgsl(Cow<'a, str>),
-    /// Naga module.
-    #[cfg(feature = "naga-ir")]
-    Naga(Cow<'static, naga::Module>),
-    /// Dummy variant because `Naga` doesn't have a lifetime and without enough active features it
-    /// could be the last one active.
-    #[doc(hidden)]
-    Dummy(PhantomData<&'a ()>),
-}
-static_assertions::assert_impl_all!(ShaderSource<'_>: Send, Sync);
-
-/// Descriptor for use with [`Device::create_shader_module`].
-///
-/// Corresponds to [WebGPU `GPUShaderModuleDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpushadermoduledescriptor).
-#[derive(Clone, Debug)]
-pub struct ShaderModuleDescriptor<'a> {
-    /// Debug label of the shader module. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// Source code for the shader.
-    pub source: ShaderSource<'a>,
-}
-static_assertions::assert_impl_all!(ShaderModuleDescriptor<'_>: Send, Sync);
-
-/// Descriptor for a shader module given by SPIR-V binary, for use with
-/// [`Device::create_shader_module_spirv`].
-///
-/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
-/// only WGSL source code strings are accepted.
-#[derive(Debug)]
-pub struct ShaderModuleDescriptorSpirV<'a> {
-    /// Debug label of the shader module. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// Binary SPIR-V data, in 4-byte words.
-    pub source: Cow<'a, [u32]>,
-}
-static_assertions::assert_impl_all!(ShaderModuleDescriptorSpirV<'_>: Send, Sync);
-
-/// Handle to a pipeline layout.
-///
-/// A `PipelineLayout` object describes the available binding groups of a pipeline.
-/// It can be created with [`Device::create_pipeline_layout`].
-///
-/// Corresponds to [WebGPU `GPUPipelineLayout`](https://gpuweb.github.io/gpuweb/#gpupipelinelayout).
-#[derive(Debug)]
-pub struct PipelineLayout {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(PipelineLayout: Send, Sync);
-
-impl Drop for PipelineLayout {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .pipeline_layout_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Handle to a rendering (graphics) pipeline.
-///
-/// A `RenderPipeline` object represents a graphics pipeline and its stages, bindings, vertex
-/// buffers and targets. It can be created with [`Device::create_render_pipeline`].
-///
-/// Corresponds to [WebGPU `GPURenderPipeline`](https://gpuweb.github.io/gpuweb/#render-pipeline).
-#[derive(Debug)]
-pub struct RenderPipeline {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RenderPipeline: Send, Sync);
-
-impl Drop for RenderPipeline {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .render_pipeline_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-impl RenderPipeline {
-    /// Get an object representing the bind group layout at a given index.
-    pub fn get_bind_group_layout(&self, index: u32) -> BindGroupLayout {
-        let context = Arc::clone(&self.context);
-        let (id, data) =
-            self.context
-                .render_pipeline_get_bind_group_layout(&self.id, self.data.as_ref(), index);
-        BindGroupLayout { context, id, data }
-    }
-}
-
-/// Handle to a compute pipeline.
-///
-/// A `ComputePipeline` object represents a compute pipeline and its single shader stage.
-/// It can be created with [`Device::create_compute_pipeline`].
-///
-/// Corresponds to [WebGPU `GPUComputePipeline`](https://gpuweb.github.io/gpuweb/#compute-pipeline).
-#[derive(Debug)]
-pub struct ComputePipeline {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(ComputePipeline: Send, Sync);
-
-impl Drop for ComputePipeline {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .compute_pipeline_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-impl ComputePipeline {
-    /// Get an object representing the bind group layout at a given index.
-    pub fn get_bind_group_layout(&self, index: u32) -> BindGroupLayout {
-        let context = Arc::clone(&self.context);
-        let (id, data) = self.context.compute_pipeline_get_bind_group_layout(
-            &self.id,
-            self.data.as_ref(),
-            index,
-        );
-        BindGroupLayout { context, id, data }
-    }
-}
-
-/// Handle to a pipeline cache, which is used to accelerate
-/// creating [`RenderPipeline`]s and [`ComputePipeline`]s
-/// in subsequent executions
-///
-/// This reuse is only applicable for the same or similar devices.
-/// See [`util::pipeline_cache_key`] for some details.
-///
-/// # Background
-///
-/// In most GPU drivers, shader code must be converted into a machine code
-/// which can be executed on the GPU.
-/// Generating this machine code can require a lot of computation.
-/// Pipeline caches allow this computation to be reused between executions
-/// of the program.
-/// This can be very useful for reducing program startup time.
-///
-/// Note that most desktop GPU drivers will manage their own caches,
-/// meaning that little advantage can be gained from this on those platforms.
-/// However, on some platforms, especially Android, drivers leave this to the
-/// application to implement.
-///
-/// Unfortunately, drivers do not expose whether they manage their own caches.
-/// Some reasonable policies for applications to use are:
-/// - Manage their own pipeline cache on all platforms
-/// - Only manage pipeline caches on Android
-///
-/// # Usage
-///
-/// It is valid to use this resource when creating multiple pipelines, in
-/// which case it will likely cache each of those pipelines.
-/// It is also valid to create a new cache for each pipeline.
-///
-/// This resource is most useful when the data produced from it (using
-/// [`PipelineCache::get_data`]) is persisted.
-/// Care should be taken that pipeline caches are only used for the same device,
-/// as pipeline caches from compatible devices are unlikely to provide any advantage.
-/// `util::pipeline_cache_key` can be used as a file/directory name to help ensure that.
-///
-/// It is recommended to store pipeline caches atomically. If persisting to disk,
-/// this can usually be achieved by creating a temporary file, then moving/[renaming]
-/// the temporary file over the existing cache
-///
-/// # Storage Usage
-///
-/// There is not currently an API available to reduce the size of a cache.
-/// This is due to limitations in the underlying graphics APIs used.
-/// This is especially impactful if your application is being updated, so
-/// previous caches are no longer being used.
-///
-/// One option to work around this is to regenerate the cache.
-/// That is, creating the pipelines which your program runs using
-/// with the stored cached data, then recreating the *same* pipelines
-/// using a new cache, which your application then store.
-///
-/// # Implementations
-///
-/// This resource currently only works on the following backends:
-///  - Vulkan
-///
-/// This type is unique to the Rust API of `wgpu`.
-///
-/// [renaming]: std::fs::rename
-#[derive(Debug)]
-pub struct PipelineCache {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(PipelineCache: Send, Sync);
-
-impl PipelineCache {
-    /// Get the data associated with this pipeline cache.
-    /// The data format is an implementation detail of `wgpu`.
-    /// The only defined operation on this data setting it as the `data` field
-    /// on [`PipelineCacheDescriptor`], then to [`Device::create_pipeline_cache`].
-    ///
-    /// This function is unique to the Rust API of `wgpu`.
-    pub fn get_data(&self) -> Option<Vec<u8>> {
-        self.context
-            .pipeline_cache_get_data(&self.id, self.data.as_ref())
-    }
-}
-
-impl Drop for PipelineCache {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .pipeline_cache_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Handle to a command buffer on the GPU.
-///
-/// A `CommandBuffer` represents a complete sequence of commands that may be submitted to a command
-/// queue with [`Queue::submit`]. A `CommandBuffer` is obtained by recording a series of commands to
-/// a [`CommandEncoder`] and then calling [`CommandEncoder::finish`].
-///
-/// Corresponds to [WebGPU `GPUCommandBuffer`](https://gpuweb.github.io/gpuweb/#command-buffer).
-#[derive(Debug)]
-pub struct CommandBuffer {
-    context: Arc<C>,
-    id: Option<ObjectId>,
-    data: Option<Box<Data>>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(CommandBuffer: Send, Sync);
-
-impl Drop for CommandBuffer {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            if let Some(id) = self.id.take() {
-                self.context
-                    .command_buffer_drop(&id, self.data.take().unwrap().as_ref());
-            }
-        }
-    }
-}
-
-/// Encodes a series of GPU operations.
-///
-/// A command encoder can record [`RenderPass`]es, [`ComputePass`]es,
-/// and transfer operations between driver-managed resources like [`Buffer`]s and [`Texture`]s.
-///
-/// When finished recording, call [`CommandEncoder::finish`] to obtain a [`CommandBuffer`] which may
-/// be submitted for execution.
-///
-/// Corresponds to [WebGPU `GPUCommandEncoder`](https://gpuweb.github.io/gpuweb/#command-encoder).
-#[derive(Debug)]
-pub struct CommandEncoder {
-    context: Arc<C>,
-    id: Option<ObjectId>,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(CommandEncoder: Send, Sync);
-
-impl Drop for CommandEncoder {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            if let Some(id) = self.id.take() {
-                self.context.command_encoder_drop(&id, self.data.as_ref());
-            }
-        }
-    }
-}
-
-/// In-progress recording of a render pass: a list of render commands in a [`CommandEncoder`].
-///
-/// It can be created with [`CommandEncoder::begin_render_pass()`], whose [`RenderPassDescriptor`]
-/// specifies the attachments (textures) that will be rendered to.
-///
-/// Most of the methods on `RenderPass` serve one of two purposes, identifiable by their names:
-///
-/// * `draw_*()`: Drawing (that is, encoding a render command, which, when executed by the GPU, will
-///   rasterize something and execute shaders).
-/// * `set_*()`: Setting part of the [render state](https://gpuweb.github.io/gpuweb/#renderstate)
-///   for future drawing commands.
-///
-/// A render pass may contain any number of drawing commands, and before/between each command the
-/// render state may be updated however you wish; each drawing command will be executed using the
-/// render state that has been set when the `draw_*()` function is called.
-///
-/// Corresponds to [WebGPU `GPURenderPassEncoder`](
-/// https://gpuweb.github.io/gpuweb/#render-pass-encoder).
-#[derive(Debug)]
-pub struct RenderPass<'encoder> {
-    /// The inner data of the render pass, separated out so it's easy to replace the lifetime with 'static if desired.
-    inner: RenderPassInner,
-
-    /// This lifetime is used to protect the [`CommandEncoder`] from being used
-    /// while the pass is alive.
-    encoder_guard: PhantomData<&'encoder ()>,
-}
-
-#[derive(Debug)]
-struct RenderPassInner {
-    id: ObjectId,
-    data: Box<Data>,
-    context: Arc<C>,
-}
-
-/// In-progress recording of a compute pass.
-///
-/// It can be created with [`CommandEncoder::begin_compute_pass`].
-///
-/// Corresponds to [WebGPU `GPUComputePassEncoder`](
-/// https://gpuweb.github.io/gpuweb/#compute-pass-encoder).
-#[derive(Debug)]
-pub struct ComputePass<'encoder> {
-    /// The inner data of the compute pass, separated out so it's easy to replace the lifetime with 'static if desired.
-    inner: ComputePassInner,
-
-    /// This lifetime is used to protect the [`CommandEncoder`] from being used
-    /// while the pass is alive.
-    encoder_guard: PhantomData<&'encoder ()>,
-}
-
-#[derive(Debug)]
-struct ComputePassInner {
-    id: ObjectId,
-    data: Box<Data>,
-    context: Arc<C>,
-}
-
-/// Encodes a series of GPU operations into a reusable "render bundle".
-///
-/// It only supports a handful of render commands, but it makes them reusable.
-/// It can be created with [`Device::create_render_bundle_encoder`].
-/// It can be executed onto a [`CommandEncoder`] using [`RenderPass::execute_bundles`].
-///
-/// Executing a [`RenderBundle`] is often more efficient than issuing the underlying commands
-/// manually.
-///
-/// Corresponds to [WebGPU `GPURenderBundleEncoder`](
-/// https://gpuweb.github.io/gpuweb/#gpurenderbundleencoder).
-#[derive(Debug)]
-pub struct RenderBundleEncoder<'a> {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-    parent: &'a Device,
-    /// This type should be !Send !Sync, because it represents an allocation on this thread's
-    /// command buffer.
-    _p: PhantomData<*const u8>,
-}
-static_assertions::assert_not_impl_any!(RenderBundleEncoder<'_>: Send, Sync);
-
-/// Pre-prepared reusable bundle of GPU operations.
-///
-/// It only supports a handful of render commands, but it makes them reusable. Executing a
-/// [`RenderBundle`] is often more efficient than issuing the underlying commands manually.
-///
-/// It can be created by use of a [`RenderBundleEncoder`], and executed onto a [`CommandEncoder`]
-/// using [`RenderPass::execute_bundles`].
-///
-/// Corresponds to [WebGPU `GPURenderBundle`](https://gpuweb.github.io/gpuweb/#render-bundle).
-#[derive(Debug)]
-pub struct RenderBundle {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RenderBundle: Send, Sync);
-
-impl Drop for RenderBundle {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .render_bundle_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Handle to a query set.
-///
-/// It can be created with [`Device::create_query_set`].
-///
-/// Corresponds to [WebGPU `GPUQuerySet`](https://gpuweb.github.io/gpuweb/#queryset).
-#[derive(Debug)]
-pub struct QuerySet {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(QuerySet: Send, Sync);
-
-impl Drop for QuerySet {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context.query_set_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Handle to a command queue on a device.
-///
-/// A `Queue` executes recorded [`CommandBuffer`] objects and provides convenience methods
-/// for writing to [buffers](Queue::write_buffer) and [textures](Queue::write_texture).
-/// It can be created along with a [`Device`] by calling [`Adapter::request_device`].
-///
-/// Corresponds to [WebGPU `GPUQueue`](https://gpuweb.github.io/gpuweb/#gpu-queue).
-#[derive(Debug)]
-pub struct Queue {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Queue: Send, Sync);
-
-impl Drop for Queue {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context.queue_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Resource that can be bound to a pipeline.
-///
-/// Corresponds to [WebGPU `GPUBindingResource`](
-/// https://gpuweb.github.io/gpuweb/#typedefdef-gpubindingresource).
-#[non_exhaustive]
-#[derive(Clone, Debug)]
-pub enum BindingResource<'a> {
-    /// Binding is backed by a buffer.
-    ///
-    /// Corresponds to [`wgt::BufferBindingType::Uniform`] and [`wgt::BufferBindingType::Storage`]
-    /// with [`BindGroupLayoutEntry::count`] set to None.
-    Buffer(BufferBinding<'a>),
-    /// Binding is backed by an array of buffers.
-    ///
-    /// [`Features::BUFFER_BINDING_ARRAY`] must be supported to use this feature.
-    ///
-    /// Corresponds to [`wgt::BufferBindingType::Uniform`] and [`wgt::BufferBindingType::Storage`]
-    /// with [`BindGroupLayoutEntry::count`] set to Some.
-    BufferArray(&'a [BufferBinding<'a>]),
-    /// Binding is a sampler.
-    ///
-    /// Corresponds to [`wgt::BindingType::Sampler`] with [`BindGroupLayoutEntry::count`] set to None.
-    Sampler(&'a Sampler),
-    /// Binding is backed by an array of samplers.
-    ///
-    /// [`Features::TEXTURE_BINDING_ARRAY`] must be supported to use this feature.
-    ///
-    /// Corresponds to [`wgt::BindingType::Sampler`] with [`BindGroupLayoutEntry::count`] set
-    /// to Some.
-    SamplerArray(&'a [&'a Sampler]),
-    /// Binding is backed by a texture.
-    ///
-    /// Corresponds to [`wgt::BindingType::Texture`] and [`wgt::BindingType::StorageTexture`] with
-    /// [`BindGroupLayoutEntry::count`] set to None.
-    TextureView(&'a TextureView),
-    /// Binding is backed by an array of textures.
-    ///
-    /// [`Features::TEXTURE_BINDING_ARRAY`] must be supported to use this feature.
-    ///
-    /// Corresponds to [`wgt::BindingType::Texture`] and [`wgt::BindingType::StorageTexture`] with
-    /// [`BindGroupLayoutEntry::count`] set to Some.
-    TextureViewArray(&'a [&'a TextureView]),
-    /// Todo
-    AccelerationStructure(&'a crate::ray_tracing::Tlas),
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(BindingResource<'_>: Send, Sync);
-
-/// Describes the segment of a buffer to bind.
-///
-/// Corresponds to [WebGPU `GPUBufferBinding`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpubufferbinding).
-#[derive(Clone, Debug)]
-pub struct BufferBinding<'a> {
-    /// The buffer to bind.
-    pub buffer: &'a Buffer,
-
-    /// Base offset of the buffer, in bytes.
-    ///
-    /// If the [`has_dynamic_offset`] field of this buffer's layout entry is
-    /// `true`, the offset here will be added to the dynamic offset passed to
-    /// [`RenderPass::set_bind_group`] or [`ComputePass::set_bind_group`].
-    ///
-    /// If the buffer was created with [`BufferUsages::UNIFORM`], then this
-    /// offset must be a multiple of
-    /// [`Limits::min_uniform_buffer_offset_alignment`].
-    ///
-    /// If the buffer was created with [`BufferUsages::STORAGE`], then this
-    /// offset must be a multiple of
-    /// [`Limits::min_storage_buffer_offset_alignment`].
-    ///
-    /// [`has_dynamic_offset`]: BindingType::Buffer::has_dynamic_offset
-    pub offset: BufferAddress,
-
-    /// Size of the binding in bytes, or `None` for using the rest of the buffer.
-    pub size: Option<BufferSize>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(BufferBinding<'_>: Send, Sync);
-
-/// Operation to perform to the output attachment at the start of a render pass.
-///
-/// Corresponds to [WebGPU `GPULoadOp`](https://gpuweb.github.io/gpuweb/#enumdef-gpuloadop),
-/// plus the corresponding clearValue.
-#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
-#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-pub enum LoadOp<V> {
-    /// Loads the specified value for this attachment into the render pass.
-    ///
-    /// On some GPU hardware (primarily mobile), "clear" is significantly cheaper
-    /// because it avoids loading data from main memory into tile-local memory.
-    ///
-    /// On other GPU hardware, there isn’t a significant difference.
-    ///
-    /// As a result, it is recommended to use "clear" rather than "load" in cases
-    /// where the initial value doesn’t matter
-    /// (e.g. the render target will be cleared using a skybox).
-    Clear(V),
-    /// Loads the existing value for this attachment into the render pass.
-    Load,
-}
-
-impl<V: Default> Default for LoadOp<V> {
-    fn default() -> Self {
-        Self::Clear(Default::default())
-    }
-}
-
-/// Operation to perform to the output attachment at the end of a render pass.
-///
-/// Corresponds to [WebGPU `GPUStoreOp`](https://gpuweb.github.io/gpuweb/#enumdef-gpustoreop).
-#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Default)]
-#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-pub enum StoreOp {
-    /// Stores the resulting value of the render pass for this attachment.
-    #[default]
-    Store,
-    /// Discards the resulting value of the render pass for this attachment.
-    ///
-    /// The attachment will be treated as uninitialized afterwards.
-    /// (If only either Depth or Stencil texture-aspects is set to `Discard`,
-    /// the respective other texture-aspect will be preserved.)
-    ///
-    /// This can be significantly faster on tile-based render hardware.
-    ///
-    /// Prefer this if the attachment is not read by subsequent passes.
-    Discard,
-}
-
-/// Pair of load and store operations for an attachment aspect.
-///
-/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
-/// separate `loadOp` and `storeOp` fields are used instead.
-#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
-#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-pub struct Operations<V> {
-    /// How data should be read through this attachment.
-    pub load: LoadOp<V>,
-    /// Whether data will be written to through this attachment.
-    ///
-    /// Note that resolve textures (if specified) are always written to,
-    /// regardless of this setting.
-    pub store: StoreOp,
-}
-
-impl<V: Default> Default for Operations<V> {
-    #[inline]
-    fn default() -> Self {
-        Self {
-            load: LoadOp::<V>::default(),
-            store: StoreOp::default(),
-        }
-    }
-}
-
-/// Describes the timestamp writes of a render pass.
-///
-/// For use with [`RenderPassDescriptor`].
-/// At least one of `beginning_of_pass_write_index` and `end_of_pass_write_index` must be `Some`.
-///
-/// Corresponds to [WebGPU `GPURenderPassTimestampWrite`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderpasstimestampwrites).
-#[derive(Clone, Debug)]
-pub struct RenderPassTimestampWrites<'a> {
-    /// The query set to write to.
-    pub query_set: &'a QuerySet,
-    /// The index of the query set at which a start timestamp of this pass is written, if any.
-    pub beginning_of_pass_write_index: Option<u32>,
-    /// The index of the query set at which an end timestamp of this pass is written, if any.
-    pub end_of_pass_write_index: Option<u32>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RenderPassTimestampWrites<'_>: Send, Sync);
-
-/// Describes a color attachment to a [`RenderPass`].
-///
-/// For use with [`RenderPassDescriptor`].
-///
-/// Corresponds to [WebGPU `GPURenderPassColorAttachment`](
-/// https://gpuweb.github.io/gpuweb/#color-attachments).
-#[derive(Clone, Debug)]
-pub struct RenderPassColorAttachment<'tex> {
-    /// The view to use as an attachment.
-    pub view: &'tex TextureView,
-    /// The view that will receive the resolved output if multisampling is used.
-    ///
-    /// If set, it is always written to, regardless of how [`Self::ops`] is configured.
-    pub resolve_target: Option<&'tex TextureView>,
-    /// What operations will be performed on this color attachment.
-    pub ops: Operations<Color>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RenderPassColorAttachment<'_>: Send, Sync);
-
-/// Describes a depth/stencil attachment to a [`RenderPass`].
-///
-/// For use with [`RenderPassDescriptor`].
-///
-/// Corresponds to [WebGPU `GPURenderPassDepthStencilAttachment`](
-/// https://gpuweb.github.io/gpuweb/#depth-stencil-attachments).
-#[derive(Clone, Debug)]
-pub struct RenderPassDepthStencilAttachment<'tex> {
-    /// The view to use as an attachment.
-    pub view: &'tex TextureView,
-    /// What operations will be performed on the depth part of the attachment.
-    pub depth_ops: Option<Operations<f32>>,
-    /// What operations will be performed on the stencil part of the attachment.
-    pub stencil_ops: Option<Operations<u32>>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RenderPassDepthStencilAttachment<'_>: Send, Sync);
-
-// The underlying types are also exported so that documentation shows up for them
-
-/// Object debugging label.
-pub type Label<'a> = Option<&'a str>;
-pub use wgt::RequestAdapterOptions as RequestAdapterOptionsBase;
-/// Additional information required when requesting an adapter.
-///
-/// For use with [`Instance::request_adapter`].
-///
-/// Corresponds to [WebGPU `GPURequestAdapterOptions`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpurequestadapteroptions).
-pub type RequestAdapterOptions<'a, 'b> = RequestAdapterOptionsBase<&'a Surface<'b>>;
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RequestAdapterOptions<'_, '_>: Send, Sync);
-/// Describes a [`Device`].
-///
-/// For use with [`Adapter::request_device`].
-///
-/// Corresponds to [WebGPU `GPUDeviceDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpudevicedescriptor).
-pub type DeviceDescriptor<'a> = wgt::DeviceDescriptor<Label<'a>>;
-static_assertions::assert_impl_all!(DeviceDescriptor<'_>: Send, Sync);
-/// Describes a [`Buffer`].
-///
-/// For use with [`Device::create_buffer`].
-///
-/// Corresponds to [WebGPU `GPUBufferDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpubufferdescriptor).
-pub type BufferDescriptor<'a> = wgt::BufferDescriptor<Label<'a>>;
-static_assertions::assert_impl_all!(BufferDescriptor<'_>: Send, Sync);
-/// Describes a [`CommandEncoder`].
-///
-/// For use with [`Device::create_command_encoder`].
-///
-/// Corresponds to [WebGPU `GPUCommandEncoderDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpucommandencoderdescriptor).
-pub type CommandEncoderDescriptor<'a> = wgt::CommandEncoderDescriptor<Label<'a>>;
-static_assertions::assert_impl_all!(CommandEncoderDescriptor<'_>: Send, Sync);
-/// Describes a [`RenderBundle`].
-///
-/// For use with [`RenderBundleEncoder::finish`].
-///
-/// Corresponds to [WebGPU `GPURenderBundleDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderbundledescriptor).
-pub type RenderBundleDescriptor<'a> = wgt::RenderBundleDescriptor<Label<'a>>;
-static_assertions::assert_impl_all!(RenderBundleDescriptor<'_>: Send, Sync);
-/// Describes a [`Texture`].
-///
-/// For use with [`Device::create_texture`].
-///
-/// Corresponds to [WebGPU `GPUTextureDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gputexturedescriptor).
-pub type TextureDescriptor<'a> = wgt::TextureDescriptor<Label<'a>, &'a [TextureFormat]>;
-static_assertions::assert_impl_all!(TextureDescriptor<'_>: Send, Sync);
-/// Describes a [`QuerySet`].
-///
-/// For use with [`Device::create_query_set`].
-///
-/// Corresponds to [WebGPU `GPUQuerySetDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpuquerysetdescriptor).
-pub type QuerySetDescriptor<'a> = wgt::QuerySetDescriptor<Label<'a>>;
-static_assertions::assert_impl_all!(QuerySetDescriptor<'_>: Send, Sync);
-pub use wgt::Maintain as MaintainBase;
-/// Passed to [`Device::poll`] to control how and if it should block.
-pub type Maintain = wgt::Maintain<SubmissionIndex>;
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Maintain: Send, Sync);
-
-/// Describes a [`TextureView`].
-///
-/// For use with [`Texture::create_view`].
-///
-/// Corresponds to [WebGPU `GPUTextureViewDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gputextureviewdescriptor).
-#[derive(Clone, Debug, Default, Eq, PartialEq)]
-pub struct TextureViewDescriptor<'a> {
-    /// Debug label of the texture view. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// Format of the texture view. Either must be the same as the texture format or in the list
-    /// of `view_formats` in the texture's descriptor.
-    pub format: Option<TextureFormat>,
-    /// The dimension of the texture view. For 1D textures, this must be `D1`. For 2D textures it must be one of
-    /// `D2`, `D2Array`, `Cube`, and `CubeArray`. For 3D textures it must be `D3`
-    pub dimension: Option<TextureViewDimension>,
-    /// Aspect of the texture. Color textures must be [`TextureAspect::All`].
-    pub aspect: TextureAspect,
-    /// Base mip level.
-    pub base_mip_level: u32,
-    /// Mip level count.
-    /// If `Some(count)`, `base_mip_level + count` must be less or equal to underlying texture mip count.
-    /// If `None`, considered to include the rest of the mipmap levels, but at least 1 in total.
-    pub mip_level_count: Option<u32>,
-    /// Base array layer.
-    pub base_array_layer: u32,
-    /// Layer count.
-    /// If `Some(count)`, `base_array_layer + count` must be less or equal to the underlying array count.
-    /// If `None`, considered to include the rest of the array layers, but at least 1 in total.
-    pub array_layer_count: Option<u32>,
-}
-static_assertions::assert_impl_all!(TextureViewDescriptor<'_>: Send, Sync);
-
-/// Describes a [`PipelineLayout`].
-///
-/// For use with [`Device::create_pipeline_layout`].
-///
-/// Corresponds to [WebGPU `GPUPipelineLayoutDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpupipelinelayoutdescriptor).
-#[derive(Clone, Debug, Default)]
-pub struct PipelineLayoutDescriptor<'a> {
-    /// Debug label of the pipeline layout. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// Bind groups that this pipeline uses. The first entry will provide all the bindings for
-    /// "set = 0", second entry will provide all the bindings for "set = 1" etc.
-    pub bind_group_layouts: &'a [&'a BindGroupLayout],
-    /// Set of push constant ranges this pipeline uses. Each shader stage that uses push constants
-    /// must define the range in push constant memory that corresponds to its single `layout(push_constant)`
-    /// uniform block.
-    ///
-    /// If this array is non-empty, the [`Features::PUSH_CONSTANTS`] must be enabled.
-    pub push_constant_ranges: &'a [PushConstantRange],
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(PipelineLayoutDescriptor<'_>: Send, Sync);
-
-/// Describes a [`Sampler`].
-///
-/// For use with [`Device::create_sampler`].
-///
-/// Corresponds to [WebGPU `GPUSamplerDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpusamplerdescriptor).
-#[derive(Clone, Debug, PartialEq)]
-pub struct SamplerDescriptor<'a> {
-    /// Debug label of the sampler. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// How to deal with out of bounds accesses in the u (i.e. x) direction
-    pub address_mode_u: AddressMode,
-    /// How to deal with out of bounds accesses in the v (i.e. y) direction
-    pub address_mode_v: AddressMode,
-    /// How to deal with out of bounds accesses in the w (i.e. z) direction
-    pub address_mode_w: AddressMode,
-    /// How to filter the texture when it needs to be magnified (made larger)
-    pub mag_filter: FilterMode,
-    /// How to filter the texture when it needs to be minified (made smaller)
-    pub min_filter: FilterMode,
-    /// How to filter between mip map levels
-    pub mipmap_filter: FilterMode,
-    /// Minimum level of detail (i.e. mip level) to use
-    pub lod_min_clamp: f32,
-    /// Maximum level of detail (i.e. mip level) to use
-    pub lod_max_clamp: f32,
-    /// If this is enabled, this is a comparison sampler using the given comparison function.
-    pub compare: Option<CompareFunction>,
-    /// Must be at least 1. If this is not 1, all filter modes must be linear.
-    pub anisotropy_clamp: u16,
-    /// Border color to use when address_mode is [`AddressMode::ClampToBorder`]
-    pub border_color: Option<SamplerBorderColor>,
-}
-static_assertions::assert_impl_all!(SamplerDescriptor<'_>: Send, Sync);
-
-impl Default for SamplerDescriptor<'_> {
-    fn default() -> Self {
-        Self {
-            label: None,
-            address_mode_u: Default::default(),
-            address_mode_v: Default::default(),
-            address_mode_w: Default::default(),
-            mag_filter: Default::default(),
-            min_filter: Default::default(),
-            mipmap_filter: Default::default(),
-            lod_min_clamp: 0.0,
-            lod_max_clamp: 32.0,
-            compare: None,
-            anisotropy_clamp: 1,
-            border_color: None,
-        }
-    }
-}
-
-/// An element of a [`BindGroupDescriptor`], consisting of a bindable resource
-/// and the slot to bind it to.
-///
-/// Corresponds to [WebGPU `GPUBindGroupEntry`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpubindgroupentry).
-#[derive(Clone, Debug)]
-pub struct BindGroupEntry<'a> {
-    /// Slot for which binding provides resource. Corresponds to an entry of the same
-    /// binding index in the [`BindGroupLayoutDescriptor`].
-    pub binding: u32,
-    /// Resource to attach to the binding
-    pub resource: BindingResource<'a>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(BindGroupEntry<'_>: Send, Sync);
-
-/// Describes a group of bindings and the resources to be bound.
-///
-/// For use with [`Device::create_bind_group`].
-///
-/// Corresponds to [WebGPU `GPUBindGroupDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpubindgroupdescriptor).
-#[derive(Clone, Debug)]
-pub struct BindGroupDescriptor<'a> {
-    /// Debug label of the bind group. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// The [`BindGroupLayout`] that corresponds to this bind group.
-    pub layout: &'a BindGroupLayout,
-    /// The resources to bind to this bind group.
-    pub entries: &'a [BindGroupEntry<'a>],
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(BindGroupDescriptor<'_>: Send, Sync);
-
-/// Describes the attachments of a render pass.
-///
-/// For use with [`CommandEncoder::begin_render_pass`].
-///
-/// Corresponds to [WebGPU `GPURenderPassDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderpassdescriptor).
-#[derive(Clone, Debug, Default)]
-pub struct RenderPassDescriptor<'a> {
-    /// Debug label of the render pass. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// The color attachments of the render pass.
-    pub color_attachments: &'a [Option<RenderPassColorAttachment<'a>>],
-    /// The depth and stencil attachment of the render pass, if any.
-    pub depth_stencil_attachment: Option<RenderPassDepthStencilAttachment<'a>>,
-    /// Defines which timestamp values will be written for this pass, and where to write them to.
-    ///
-    /// Requires [`Features::TIMESTAMP_QUERY`] to be enabled.
-    pub timestamp_writes: Option<RenderPassTimestampWrites<'a>>,
-    /// Defines where the occlusion query results will be stored for this pass.
-    pub occlusion_query_set: Option<&'a QuerySet>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RenderPassDescriptor<'_>: Send, Sync);
-
-/// Describes how the vertex buffer is interpreted.
-///
-/// For use in [`VertexState`].
-///
-/// Corresponds to [WebGPU `GPUVertexBufferLayout`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpuvertexbufferlayout).
-#[derive(Clone, Debug, Hash, Eq, PartialEq)]
-pub struct VertexBufferLayout<'a> {
-    /// The stride, in bytes, between elements of this buffer.
-    pub array_stride: BufferAddress,
-    /// How often this vertex buffer is "stepped" forward.
-    pub step_mode: VertexStepMode,
-    /// The list of attributes which comprise a single vertex.
-    pub attributes: &'a [VertexAttribute],
-}
-static_assertions::assert_impl_all!(VertexBufferLayout<'_>: Send, Sync);
-
-/// Describes the vertex processing in a render pipeline.
-///
-/// For use in [`RenderPipelineDescriptor`].
-///
-/// Corresponds to [WebGPU `GPUVertexState`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpuvertexstate).
-#[derive(Clone, Debug)]
-pub struct VertexState<'a> {
-    /// The compiled shader module for this stage.
-    pub module: &'a ShaderModule,
-    /// The name of the entry point in the compiled shader. There must be a function with this name
-    /// in the shader.
-    pub entry_point: &'a str,
-    /// Advanced options for when this pipeline is compiled
-    ///
-    /// This implements `Default`, and for most users can be set to `Default::default()`
-    pub compilation_options: PipelineCompilationOptions<'a>,
-    /// The format of any vertex buffers used with this pipeline.
-    pub buffers: &'a [VertexBufferLayout<'a>],
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(VertexState<'_>: Send, Sync);
-
-/// Describes the fragment processing in a render pipeline.
-///
-/// For use in [`RenderPipelineDescriptor`].
-///
-/// Corresponds to [WebGPU `GPUFragmentState`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpufragmentstate).
-#[derive(Clone, Debug)]
-pub struct FragmentState<'a> {
-    /// The compiled shader module for this stage.
-    pub module: &'a ShaderModule,
-    /// The name of the entry point in the compiled shader. There must be a function with this name
-    /// in the shader.
-    pub entry_point: &'a str,
-    /// Advanced options for when this pipeline is compiled
-    ///
-    /// This implements `Default`, and for most users can be set to `Default::default()`
-    pub compilation_options: PipelineCompilationOptions<'a>,
-    /// The color state of the render targets.
-    pub targets: &'a [Option<ColorTargetState>],
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(FragmentState<'_>: Send, Sync);
-
-/// Describes a render (graphics) pipeline.
-///
-/// For use with [`Device::create_render_pipeline`].
-///
-/// Corresponds to [WebGPU `GPURenderPipelineDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderpipelinedescriptor).
-#[derive(Clone, Debug)]
-pub struct RenderPipelineDescriptor<'a> {
-    /// Debug label of the pipeline. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// The layout of bind groups for this pipeline.
-    pub layout: Option<&'a PipelineLayout>,
-    /// The compiled vertex stage, its entry point, and the input buffers layout.
-    pub vertex: VertexState<'a>,
-    /// The properties of the pipeline at the primitive assembly and rasterization level.
-    pub primitive: PrimitiveState,
-    /// The effect of draw calls on the depth and stencil aspects of the output target, if any.
-    pub depth_stencil: Option<DepthStencilState>,
-    /// The multi-sampling properties of the pipeline.
-    pub multisample: MultisampleState,
-    /// The compiled fragment stage, its entry point, and the color targets.
-    pub fragment: Option<FragmentState<'a>>,
-    /// If the pipeline will be used with a multiview render pass, this indicates how many array
-    /// layers the attachments will have.
-    pub multiview: Option<NonZeroU32>,
-    /// The pipeline cache to use when creating this pipeline.
-    pub cache: Option<&'a PipelineCache>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RenderPipelineDescriptor<'_>: Send, Sync);
-
-/// Describes the timestamp writes of a compute pass.
-///
-/// For use with [`ComputePassDescriptor`].
-/// At least one of `beginning_of_pass_write_index` and `end_of_pass_write_index` must be `Some`.
-///
-/// Corresponds to [WebGPU `GPUComputePassTimestampWrites`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepasstimestampwrites).
-#[derive(Clone, Debug)]
-pub struct ComputePassTimestampWrites<'a> {
-    /// The query set to write to.
-    pub query_set: &'a QuerySet,
-    /// The index of the query set at which a start timestamp of this pass is written, if any.
-    pub beginning_of_pass_write_index: Option<u32>,
-    /// The index of the query set at which an end timestamp of this pass is written, if any.
-    pub end_of_pass_write_index: Option<u32>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(ComputePassTimestampWrites<'_>: Send, Sync);
-
-/// Describes the attachments of a compute pass.
-///
-/// For use with [`CommandEncoder::begin_compute_pass`].
-///
-/// Corresponds to [WebGPU `GPUComputePassDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepassdescriptor).
-#[derive(Clone, Default, Debug)]
-pub struct ComputePassDescriptor<'a> {
-    /// Debug label of the compute pass. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// Defines which timestamp values will be written for this pass, and where to write them to.
-    ///
-    /// Requires [`Features::TIMESTAMP_QUERY`] to be enabled.
-    pub timestamp_writes: Option<ComputePassTimestampWrites<'a>>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(ComputePassDescriptor<'_>: Send, Sync);
-
-#[derive(Clone, Debug)]
-/// Advanced options for use when a pipeline is compiled
-///
-/// This implements `Default`, and for most users can be set to `Default::default()`
-pub struct PipelineCompilationOptions<'a> {
-    /// Specifies the values of pipeline-overridable constants in the shader module.
-    ///
-    /// If an `@id` attribute was specified on the declaration,
-    /// the key must be the pipeline constant ID as a decimal ASCII number; if not,
-    /// the key must be the constant's identifier name.
-    ///
-    /// The value may represent any of WGSL's concrete scalar types.
-    pub constants: &'a HashMap<String, f64>,
-    /// Whether workgroup scoped memory will be initialized with zero values for this stage.
-    ///
-    /// This is required by the WebGPU spec, but may have overhead which can be avoided
-    /// for cross-platform applications
-    pub zero_initialize_workgroup_memory: bool,
-    /// Should the pipeline attempt to transform vertex shaders to use vertex pulling.
-    pub vertex_pulling_transform: bool,
-}
-
-impl<'a> Default for PipelineCompilationOptions<'a> {
-    fn default() -> Self {
-        // HashMap doesn't have a const constructor, due to the use of RandomState
-        // This does introduce some synchronisation costs, but these should be minor,
-        // and might be cheaper than the alternative of getting new random state
-        static DEFAULT_CONSTANTS: std::sync::OnceLock<HashMap<String, f64>> =
-            std::sync::OnceLock::new();
-        let constants = DEFAULT_CONSTANTS.get_or_init(Default::default);
-        Self {
-            constants,
-            zero_initialize_workgroup_memory: true,
-            vertex_pulling_transform: false,
-        }
-    }
-}
-
-/// Describes a compute pipeline.
-///
-/// For use with [`Device::create_compute_pipeline`].
-///
-/// Corresponds to [WebGPU `GPUComputePipelineDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepipelinedescriptor).
-#[derive(Clone, Debug)]
-pub struct ComputePipelineDescriptor<'a> {
-    /// Debug label of the pipeline. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// The layout of bind groups for this pipeline.
-    pub layout: Option<&'a PipelineLayout>,
-    /// The compiled shader module for this stage.
-    pub module: &'a ShaderModule,
-    /// The name of the entry point in the compiled shader. There must be a function with this name
-    /// and no return value in the shader.
-    pub entry_point: &'a str,
-    /// Advanced options for when this pipeline is compiled
-    ///
-    /// This implements `Default`, and for most users can be set to `Default::default()`
-    pub compilation_options: PipelineCompilationOptions<'a>,
-    /// The pipeline cache to use when creating this pipeline.
-    pub cache: Option<&'a PipelineCache>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(ComputePipelineDescriptor<'_>: Send, Sync);
-
-/// Describes a pipeline cache, which allows reusing compilation work
-/// between program runs.
-///
-/// For use with [`Device::create_pipeline_cache`]
-///
-/// This type is unique to the Rust API of `wgpu`.
-#[derive(Clone, Debug)]
-pub struct PipelineCacheDescriptor<'a> {
-    /// Debug label of the pipeline cache. This might show up in some logs from `wgpu`
-    pub label: Label<'a>,
-    /// The data used to initialise the cache initialise
-    ///
-    /// # Safety
-    ///
-    /// This data must have been provided from a previous call to
-    /// [`PipelineCache::get_data`], if not `None`
-    pub data: Option<&'a [u8]>,
-    /// Whether to create a cache without data when the provided data
-    /// is invalid.
-    ///
-    /// Recommended to set to true
-    pub fallback: bool,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(PipelineCacheDescriptor<'_>: Send, Sync);
-
-pub use wgt::ImageCopyBuffer as ImageCopyBufferBase;
-/// View of a buffer which can be used to copy to/from a texture.
-///
-/// Corresponds to [WebGPU `GPUImageCopyBuffer`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpuimagecopybuffer).
-pub type ImageCopyBuffer<'a> = ImageCopyBufferBase<&'a Buffer>;
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(ImageCopyBuffer<'_>: Send, Sync);
-
-pub use wgt::ImageCopyTexture as ImageCopyTextureBase;
-/// View of a texture which can be used to copy to/from a buffer/texture.
-///
-/// Corresponds to [WebGPU `GPUImageCopyTexture`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpuimagecopytexture).
-pub type ImageCopyTexture<'a> = ImageCopyTextureBase<&'a Texture>;
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(ImageCopyTexture<'_>: Send, Sync);
-
-pub use wgt::ImageCopyTextureTagged as ImageCopyTextureTaggedBase;
-/// View of a texture which can be used to copy to a texture, including
-/// color space and alpha premultiplication information.
-///
-/// Corresponds to [WebGPU `GPUImageCopyTextureTagged`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpuimagecopytexturetagged).
-pub type ImageCopyTextureTagged<'a> = ImageCopyTextureTaggedBase<&'a Texture>;
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(ImageCopyTexture<'_>: Send, Sync);
-
-/// Describes a [`BindGroupLayout`].
-///
-/// For use with [`Device::create_bind_group_layout`].
-///
-/// Corresponds to [WebGPU `GPUBindGroupLayoutDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpubindgrouplayoutdescriptor).
-#[derive(Clone, Debug)]
-pub struct BindGroupLayoutDescriptor<'a> {
-    /// Debug label of the bind group layout. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-
-    /// Array of entries in this BindGroupLayout
-    pub entries: &'a [BindGroupLayoutEntry],
-}
-static_assertions::assert_impl_all!(BindGroupLayoutDescriptor<'_>: Send, Sync);
-
-/// Describes a [`RenderBundleEncoder`].
-///
-/// For use with [`Device::create_render_bundle_encoder`].
-///
-/// Corresponds to [WebGPU `GPURenderBundleEncoderDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderbundleencoderdescriptor).
-#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
-pub struct RenderBundleEncoderDescriptor<'a> {
-    /// Debug label of the render bundle encoder. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// The formats of the color attachments that this render bundle is capable to rendering to. This
-    /// must match the formats of the color attachments in the render pass this render bundle is executed in.
-    pub color_formats: &'a [Option<TextureFormat>],
-    /// Information about the depth attachment that this render bundle is capable to rendering to. This
-    /// must match the format of the depth attachments in the render pass this render bundle is executed in.
-    pub depth_stencil: Option<RenderBundleDepthStencil>,
-    /// Sample count this render bundle is capable of rendering to. This must match the pipelines and
-    /// the render passes it is used in.
-    pub sample_count: u32,
-    /// If this render bundle will rendering to multiple array layers in the attachments at the same time.
-    pub multiview: Option<NonZeroU32>,
-}
-static_assertions::assert_impl_all!(RenderBundleEncoderDescriptor<'_>: Send, Sync);
-
-/// Surface texture that can be rendered to.
-/// Result of a successful call to [`Surface::get_current_texture`].
-///
-/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
-/// the [`GPUCanvasContext`](https://gpuweb.github.io/gpuweb/#canvas-context) provides
-/// a texture without any additional information.
-#[derive(Debug)]
-pub struct SurfaceTexture {
-    /// Accessible view of the frame.
-    pub texture: Texture,
-    /// `true` if the acquired buffer can still be used for rendering,
-    /// but should be recreated for maximum performance.
-    pub suboptimal: bool,
-    presented: bool,
-    detail: Box<dyn AnyWasmNotSendSync>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(SurfaceTexture: Send, Sync);
-
-/// Result of an unsuccessful call to [`Surface::get_current_texture`].
-#[derive(Clone, PartialEq, Eq, Debug)]
-pub enum SurfaceError {
-    /// A timeout was encountered while trying to acquire the next frame.
-    Timeout,
-    /// The underlying surface has changed, and therefore the swap chain must be updated.
-    Outdated,
-    /// The swap chain has been lost and needs to be recreated.
-    Lost,
-    /// There is no more memory left to allocate a new frame.
-    OutOfMemory,
-}
-static_assertions::assert_impl_all!(SurfaceError: Send, Sync);
-
-impl fmt::Display for SurfaceError {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "{}", match self {
-            Self::Timeout => "A timeout was encountered while trying to acquire the next frame",
-            Self::Outdated => "The underlying surface has changed, and therefore the swap chain must be updated",
-            Self::Lost =>  "The swap chain has been lost and needs to be recreated",
-            Self::OutOfMemory => "There is no more memory left to allocate a new frame",
-        })
-    }
-}
-
-impl error::Error for SurfaceError {}
-
-impl Default for Instance {
-    /// Creates a new instance of wgpu with default options.
-    ///
-    /// Backends are set to `Backends::all()`, and FXC is chosen as the `dx12_shader_compiler`.
-    ///
-    /// # Panics
-    ///
-    /// If no backend feature for the active target platform is enabled,
-    /// this method will panic, see [`Instance::enabled_backend_features()`].
-    fn default() -> Self {
-        Self::new(InstanceDescriptor::default())
-    }
-}
-
-impl Instance {
-    /// Returns which backends can be picked for the current build configuration.
-    ///
-    /// The returned set depends on a combination of target platform and enabled features.
-    /// This does *not* do any runtime checks and is exclusively based on compile time information.
-    ///
-    /// `InstanceDescriptor::backends` does not need to be a subset of this,
-    /// but any backend that is not in this set, will not be picked.
-    ///
-    /// TODO: Right now it's otherwise not possible yet to opt-out of all features on some platforms.
-    /// See <https://github.com/gfx-rs/wgpu/issues/3514>
-    /// * Windows/Linux/Android: always enables Vulkan and GLES with no way to opt out
-    pub const fn enabled_backend_features() -> Backends {
-        let mut backends = Backends::empty();
-
-        if cfg!(native) {
-            if cfg!(metal) {
-                backends = backends.union(Backends::METAL);
-            }
-            if cfg!(dx12) {
-                backends = backends.union(Backends::DX12);
-            }
-
-            // Windows, Android, Linux currently always enable Vulkan and OpenGL.
-            // See <https://github.com/gfx-rs/wgpu/issues/3514>
-            if cfg!(target_os = "windows") || cfg!(unix) {
-                backends = backends.union(Backends::VULKAN).union(Backends::GL);
-            }
-
-            // Vulkan on Mac/iOS is only available through vulkan-portability.
-            if (cfg!(target_os = "ios") || cfg!(target_os = "macos"))
-                && cfg!(feature = "vulkan-portability")
-            {
-                backends = backends.union(Backends::VULKAN);
-            }
-
-            // GL on Mac is only available through angle.
-            if cfg!(target_os = "macos") && cfg!(feature = "angle") {
-                backends = backends.union(Backends::GL);
-            }
-        } else {
-            if cfg!(webgpu) {
-                backends = backends.union(Backends::BROWSER_WEBGPU);
-            }
-            if cfg!(webgl) {
-                backends = backends.union(Backends::GL);
-            }
-        }
-
-        backends
-    }
-
-    /// Create an new instance of wgpu.
-    ///
-    /// # Arguments
-    ///
-    /// - `instance_desc` - Has fields for which [backends][Backends] wgpu will choose
-    ///   during instantiation, and which [DX12 shader compiler][Dx12Compiler] wgpu will use.
-    ///
-    ///   [`Backends::BROWSER_WEBGPU`] takes a special role:
-    ///   If it is set and WebGPU support is detected, this instance will *only* be able to create
-    ///   WebGPU adapters. If you instead want to force use of WebGL, either
-    ///   disable the `webgpu` compile-time feature or do add the [`Backends::BROWSER_WEBGPU`]
-    ///   flag to the the `instance_desc`'s `backends` field.
-    ///   If it is set and WebGPU support is *not* detected, the instance will use wgpu-core
-    ///   to create adapters. Meaning that if the `webgl` feature is enabled, it is able to create
-    ///   a WebGL adapter.
-    ///
-    /// # Panics
-    ///
-    /// If no backend feature for the active target platform is enabled,
-    /// this method will panic, see [`Instance::enabled_backend_features()`].
-    #[allow(unreachable_code)]
-    pub fn new(_instance_desc: InstanceDescriptor) -> Self {
-        if Self::enabled_backend_features().is_empty() {
-            panic!(
-                "No wgpu backend feature that is implemented for the target platform was enabled. \
-                 See `wgpu::Instance::enabled_backend_features()` for more information."
-            );
-        }
-
-        #[cfg(webgpu)]
-        {
-            let is_only_available_backend = !cfg!(wgpu_core);
-            let requested_webgpu = _instance_desc.backends.contains(Backends::BROWSER_WEBGPU);
-            let support_webgpu =
-                crate::backend::get_browser_gpu_property().map_or(false, |gpu| !gpu.is_undefined());
-
-            if is_only_available_backend || (requested_webgpu && support_webgpu) {
-                return Self {
-                    context: Arc::from(crate::backend::ContextWebGpu::init(_instance_desc)),
-                };
-            }
-        }
-
-        #[cfg(wgpu_core)]
-        {
-            return Self {
-                context: Arc::from(crate::backend::ContextWgpuCore::init(_instance_desc)),
-            };
-        }
-
-        unreachable!(
-            "Earlier check of `enabled_backend_features` should have prevented getting here!"
-        );
-    }
-
-    /// Create an new instance of wgpu from a wgpu-hal instance.
-    ///
-    /// # Arguments
-    ///
-    /// - `hal_instance` - wgpu-hal instance.
-    ///
-    /// # Safety
-    ///
-    /// Refer to the creation of wgpu-hal Instance for every backend.
-    #[cfg(wgpu_core)]
-    pub unsafe fn from_hal<A: wgc::hal_api::HalApi>(hal_instance: A::Instance) -> Self {
-        Self {
-            context: Arc::new(unsafe {
-                crate::backend::ContextWgpuCore::from_hal_instance::<A>(hal_instance)
-            }),
-        }
-    }
-
-    /// Return a reference to a specific backend instance, if available.
-    ///
-    /// If this `Instance` has a wgpu-hal [`Instance`] for backend
-    /// `A`, return a reference to it. Otherwise, return `None`.
-    ///
-    /// # Safety
-    ///
-    /// - The raw instance handle returned must not be manually destroyed.
-    ///
-    /// [`Instance`]: hal::Api::Instance
-    #[cfg(wgpu_core)]
-    pub unsafe fn as_hal<A: wgc::hal_api::HalApi>(&self) -> Option<&A::Instance> {
-        self.context
-            .as_any()
-            // If we don't have a wgpu-core instance, we don't have a hal instance either.
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-            .and_then(|ctx| unsafe { ctx.instance_as_hal::<A>() })
-    }
-
-    /// Create an new instance of wgpu from a wgpu-core instance.
-    ///
-    /// # Arguments
-    ///
-    /// - `core_instance` - wgpu-core instance.
-    ///
-    /// # Safety
-    ///
-    /// Refer to the creation of wgpu-core Instance.
-    #[cfg(wgpu_core)]
-    pub unsafe fn from_core(core_instance: wgc::instance::Instance) -> Self {
-        Self {
-            context: Arc::new(unsafe {
-                crate::backend::ContextWgpuCore::from_core_instance(core_instance)
-            }),
-        }
-    }
-
-    /// Retrieves all available [`Adapter`]s that match the given [`Backends`].
-    ///
-    /// # Arguments
-    ///
-    /// - `backends` - Backends from which to enumerate adapters.
-    #[cfg(native)]
-    pub fn enumerate_adapters(&self, backends: Backends) -> Vec<Adapter> {
-        let context = Arc::clone(&self.context);
-        self.context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-            .map(|ctx| {
-                ctx.enumerate_adapters(backends)
-                    .into_iter()
-                    .map(move |id| crate::Adapter {
-                        context: Arc::clone(&context),
-                        id: ObjectId::from(id),
-                        data: Box::new(()),
-                    })
-                    .collect()
-            })
-            .unwrap()
-    }
-
-    /// Retrieves an [`Adapter`] which matches the given [`RequestAdapterOptions`].
-    ///
-    /// Some options are "soft", so treated as non-mandatory. Others are "hard".
-    ///
-    /// If no adapters are found that suffice all the "hard" options, `None` is returned.
-    ///
-    /// A `compatible_surface` is required when targeting WebGL2.
-    pub fn request_adapter(
-        &self,
-        options: &RequestAdapterOptions<'_, '_>,
-    ) -> impl Future<Output = Option<Adapter>> + WasmNotSend {
-        let context = Arc::clone(&self.context);
-        let adapter = self.context.instance_request_adapter(options);
-        async move {
-            adapter
-                .await
-                .map(|(id, data)| Adapter { context, id, data })
-        }
-    }
-
-    /// Converts a wgpu-hal `ExposedAdapter` to a wgpu [`Adapter`].
-    ///
-    /// # Safety
-    ///
-    /// `hal_adapter` must be created from this instance internal handle.
-    #[cfg(wgpu_core)]
-    pub unsafe fn create_adapter_from_hal<A: wgc::hal_api::HalApi>(
-        &self,
-        hal_adapter: hal::ExposedAdapter<A>,
-    ) -> Adapter {
-        let context = Arc::clone(&self.context);
-        let id = unsafe {
-            context
-                .as_any()
-                .downcast_ref::<crate::backend::ContextWgpuCore>()
-                .unwrap()
-                .create_adapter_from_hal(hal_adapter)
-                .into()
-        };
-        Adapter {
-            context,
-            id,
-            data: Box::new(()),
-        }
-    }
-
-    /// Creates a new surface targeting a given window/canvas/surface/etc..
-    ///
-    /// Internally, this creates surfaces for all backends that are enabled for this instance.
-    ///
-    /// See [`SurfaceTarget`] for what targets are supported.
-    /// See [`Instance::create_surface_unsafe`] for surface creation with unsafe target variants.
-    ///
-    /// Most commonly used are window handles (or provider of windows handles)
-    /// which can be passed directly as they're automatically converted to [`SurfaceTarget`].
-    pub fn create_surface<'window>(
-        &self,
-        target: impl Into<SurfaceTarget<'window>>,
-    ) -> Result<Surface<'window>, CreateSurfaceError> {
-        // Handle origin (i.e. window) to optionally take ownership of to make the surface outlast the window.
-        let handle_source;
-
-        let target = target.into();
-        let mut surface = match target {
-            SurfaceTarget::Window(window) => unsafe {
-                let surface = self.create_surface_unsafe(
-                    SurfaceTargetUnsafe::from_window(&window).map_err(|e| CreateSurfaceError {
-                        inner: CreateSurfaceErrorKind::RawHandle(e),
-                    })?,
-                );
-                handle_source = Some(window);
-
-                surface
-            }?,
-
-            #[cfg(any(webgpu, webgl))]
-            SurfaceTarget::Canvas(canvas) => {
-                handle_source = None;
-
-                let value: &wasm_bindgen::JsValue = &canvas;
-                let obj = std::ptr::NonNull::from(value).cast();
-                let raw_window_handle = raw_window_handle::WebCanvasWindowHandle::new(obj).into();
-                let raw_display_handle = raw_window_handle::WebDisplayHandle::new().into();
-
-                // Note that we need to call this while we still have `value` around.
-                // This is safe without storing canvas to `handle_origin` since the surface will create a copy internally.
-                unsafe {
-                    self.create_surface_unsafe(SurfaceTargetUnsafe::RawHandle {
-                        raw_display_handle,
-                        raw_window_handle,
-                    })
-                }?
-            }
-
-            #[cfg(any(webgpu, webgl))]
-            SurfaceTarget::OffscreenCanvas(canvas) => {
-                handle_source = None;
-
-                let value: &wasm_bindgen::JsValue = &canvas;
-                let obj = std::ptr::NonNull::from(value).cast();
-                let raw_window_handle =
-                    raw_window_handle::WebOffscreenCanvasWindowHandle::new(obj).into();
-                let raw_display_handle = raw_window_handle::WebDisplayHandle::new().into();
-
-                // Note that we need to call this while we still have `value` around.
-                // This is safe without storing canvas to `handle_origin` since the surface will create a copy internally.
-                unsafe {
-                    self.create_surface_unsafe(SurfaceTargetUnsafe::RawHandle {
-                        raw_display_handle,
-                        raw_window_handle,
-                    })
-                }?
-            }
-        };
-
-        surface._handle_source = handle_source;
-
-        Ok(surface)
-    }
-
-    /// Creates a new surface targeting a given window/canvas/surface/etc. using an unsafe target.
-    ///
-    /// Internally, this creates surfaces for all backends that are enabled for this instance.
-    ///
-    /// See [`SurfaceTargetUnsafe`] for what targets are supported.
-    /// See [`Instance::create_surface`] for surface creation with safe target variants.
-    ///
-    /// # Safety
-    ///
-    /// - See respective [`SurfaceTargetUnsafe`] variants for safety requirements.
-    pub unsafe fn create_surface_unsafe<'window>(
-        &self,
-        target: SurfaceTargetUnsafe,
-    ) -> Result<Surface<'window>, CreateSurfaceError> {
-        let (id, data) = unsafe { self.context.instance_create_surface(target) }?;
-
-        Ok(Surface {
-            context: Arc::clone(&self.context),
-            _handle_source: None,
-            id,
-            surface_data: data,
-            config: Mutex::new(None),
-        })
-    }
-
-    /// Polls all devices.
-    ///
-    /// If `force_wait` is true and this is not running on the web, then this
-    /// function will block until all in-flight buffers have been mapped and
-    /// all submitted commands have finished execution.
-    ///
-    /// Return `true` if all devices' queues are empty, or `false` if there are
-    /// queue submissions still in flight. (Note that, unless access to all
-    /// [`Queue`s] associated with this [`Instance`] is coordinated somehow,
-    /// this information could be out of date by the time the caller receives
-    /// it. `Queue`s can be shared between threads, and other threads could
-    /// submit new work at any time.)
-    ///
-    /// On the web, this is a no-op. `Device`s are automatically polled.
-    ///
-    /// [`Queue`s]: Queue
-    pub fn poll_all(&self, force_wait: bool) -> bool {
-        self.context.instance_poll_all_devices(force_wait)
-    }
-
-    /// Generates memory report.
-    ///
-    /// Returns `None` if the feature is not supported by the backend
-    /// which happens only when WebGPU is pre-selected by the instance creation.
-    #[cfg(wgpu_core)]
-    pub fn generate_report(&self) -> Option<wgc::global::GlobalReport> {
-        self.context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-            .map(|ctx| ctx.generate_report())
-    }
-}
-
-impl Adapter {
-    /// Requests a connection to a physical device, creating a logical device.
-    ///
-    /// Returns the [`Device`] together with a [`Queue`] that executes command buffers.
-    ///
-    /// [Per the WebGPU specification], an [`Adapter`] may only be used once to create a device.
-    /// If another device is wanted, call [`Instance::request_adapter()`] again to get a fresh
-    /// [`Adapter`].
-    /// However, `wgpu` does not currently enforce this restriction.
-    ///
-    /// # Arguments
-    ///
-    /// - `desc` - Description of the features and limits requested from the given device.
-    /// - `trace_path` - Can be used for API call tracing, if that feature is
-    ///   enabled in `wgpu-core`.
-    ///
-    /// # Panics
-    ///
-    /// - `request_device()` was already called on this `Adapter`.
-    /// - Features specified by `desc` are not supported by this adapter.
-    /// - Unsafe features were requested but not enabled when requesting the adapter.
-    /// - Limits requested exceed the values provided by the adapter.
-    /// - Adapter does not support all features wgpu requires to safely operate.
-    ///
-    /// [Per the WebGPU specification]: https://www.w3.org/TR/webgpu/#dom-gpuadapter-requestdevice
-    pub fn request_device(
-        &self,
-        desc: &DeviceDescriptor<'_>,
-        trace_path: Option<&std::path::Path>,
-    ) -> impl Future<Output = Result<(Device, Queue), RequestDeviceError>> + WasmNotSend {
-        let context = Arc::clone(&self.context);
-        let device = DynContext::adapter_request_device(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-            trace_path,
-        );
-        async move {
-            device.await.map(
-                |DeviceRequest {
-                     device_id,
-                     device_data,
-                     queue_id,
-                     queue_data,
-                 }| {
-                    (
-                        Device {
-                            context: Arc::clone(&context),
-                            id: device_id,
-                            data: device_data,
-                        },
-                        Queue {
-                            context,
-                            id: queue_id,
-                            data: queue_data,
-                        },
-                    )
-                },
-            )
-        }
-    }
-
-    /// Create a wgpu [`Device`] and [`Queue`] from a wgpu-hal `OpenDevice`
-    ///
-    /// # Safety
-    ///
-    /// - `hal_device` must be created from this adapter internal handle.
-    /// - `desc.features` must be a subset of `hal_device` features.
-    #[cfg(wgpu_core)]
-    pub unsafe fn create_device_from_hal<A: wgc::hal_api::HalApi>(
-        &self,
-        hal_device: hal::OpenDevice<A>,
-        desc: &DeviceDescriptor<'_>,
-        trace_path: Option<&std::path::Path>,
-    ) -> Result<(Device, Queue), RequestDeviceError> {
-        let context = Arc::clone(&self.context);
-        unsafe {
-            self.context
-                .as_any()
-                .downcast_ref::<crate::backend::ContextWgpuCore>()
-                // Part of the safety requirements is that the device was generated from the same adapter.
-                // Therefore, unwrap is fine here since only WgpuCoreContext based adapters have the ability to create hal devices.
-                .unwrap()
-                .create_device_from_hal(&self.id.into(), hal_device, desc, trace_path)
-        }
-        .map(|(device, queue)| {
-            (
-                Device {
-                    context: Arc::clone(&context),
-                    id: device.id().into(),
-                    data: Box::new(device),
-                },
-                Queue {
-                    context,
-                    id: queue.id().into(),
-                    data: Box::new(queue),
-                },
-            )
-        })
-    }
-
-    /// Apply a callback to this `Adapter`'s underlying backend adapter.
-    ///
-    /// If this `Adapter` is implemented by the backend API given by `A` (Vulkan,
-    /// Dx12, etc.), then apply `hal_adapter_callback` to `Some(&adapter)`, where
-    /// `adapter` is the underlying backend adapter type, [`A::Adapter`].
-    ///
-    /// If this `Adapter` uses a different backend, apply `hal_adapter_callback`
-    /// to `None`.
-    ///
-    /// The adapter is locked for reading while `hal_adapter_callback` runs. If
-    /// the callback attempts to perform any `wgpu` operations that require
-    /// write access to the adapter, deadlock will occur. The locks are
-    /// automatically released when the callback returns.
-    ///
-    /// # Safety
-    ///
-    /// - The raw handle passed to the callback must not be manually destroyed.
-    ///
-    /// [`A::Adapter`]: hal::Api::Adapter
-    #[cfg(wgpu_core)]
-    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Adapter>) -> R, R>(
-        &self,
-        hal_adapter_callback: F,
-    ) -> R {
-        if let Some(ctx) = self
-            .context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-        {
-            unsafe { ctx.adapter_as_hal::<A, F, R>(self.id.into(), hal_adapter_callback) }
-        } else {
-            hal_adapter_callback(None)
-        }
-    }
-
-    /// Returns whether this adapter may present to the passed surface.
-    pub fn is_surface_supported(&self, surface: &Surface<'_>) -> bool {
-        DynContext::adapter_is_surface_supported(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            &surface.id,
-            surface.surface_data.as_ref(),
-        )
-    }
-
-    /// The features which can be used to create devices on this adapter.
-    pub fn features(&self) -> Features {
-        DynContext::adapter_features(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// The best limits which can be used to create devices on this adapter.
-    pub fn limits(&self) -> Limits {
-        DynContext::adapter_limits(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Get info about the adapter itself.
-    pub fn get_info(&self) -> AdapterInfo {
-        DynContext::adapter_get_info(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Get info about the adapter itself.
-    pub fn get_downlevel_capabilities(&self) -> DownlevelCapabilities {
-        DynContext::adapter_downlevel_capabilities(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Returns the features supported for a given texture format by this adapter.
-    ///
-    /// Note that the WebGPU spec further restricts the available usages/features.
-    /// To disable these restrictions on a device, request the [`Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES`] feature.
-    pub fn get_texture_format_features(&self, format: TextureFormat) -> TextureFormatFeatures {
-        DynContext::adapter_get_texture_format_features(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            format,
-        )
-    }
-
-    /// Generates a timestamp using the clock used by the presentation engine.
-    ///
-    /// When comparing completely opaque timestamp systems, we need a way of generating timestamps that signal
-    /// the exact same time. You can do this by calling your own timestamp function immediately after a call to
-    /// this function. This should result in timestamps that are 0.5 to 5 microseconds apart. There are locks
-    /// that must be taken during the call, so don't call your function before.
-    ///
-    /// ```no_run
-    /// # let adapter: wgpu::Adapter = panic!();
-    /// # let some_code = || wgpu::PresentationTimestamp::INVALID_TIMESTAMP;
-    /// use std::time::{Duration, Instant};
-    /// let presentation = adapter.get_presentation_timestamp();
-    /// let instant = Instant::now();
-    ///
-    /// // We can now turn a new presentation timestamp into an Instant.
-    /// let some_pres_timestamp = some_code();
-    /// let duration = Duration::from_nanos((some_pres_timestamp.0 - presentation.0) as u64);
-    /// let new_instant: Instant = instant + duration;
-    /// ```
-    //
-    /// [Instant]: std::time::Instant
-    pub fn get_presentation_timestamp(&self) -> PresentationTimestamp {
-        DynContext::adapter_get_presentation_timestamp(&*self.context, &self.id, self.data.as_ref())
-    }
-}
-
-impl Device {
-    /// Check for resource cleanups and mapping callbacks. Will block if [`Maintain::Wait`] is passed.
-    ///
-    /// Return `true` if the queue is empty, or `false` if there are more queue
-    /// submissions still in flight. (Note that, unless access to the [`Queue`] is
-    /// coordinated somehow, this information could be out of date by the time
-    /// the caller receives it. `Queue`s can be shared between threads, so
-    /// other threads could submit new work at any time.)
-    ///
-    /// When running on WebGPU, this is a no-op. `Device`s are automatically polled.
-    pub fn poll(&self, maintain: Maintain) -> MaintainResult {
-        DynContext::device_poll(&*self.context, &self.id, self.data.as_ref(), maintain)
-    }
-
-    /// The features which can be used on this device.
-    ///
-    /// No additional features can be used, even if the underlying adapter can support them.
-    pub fn features(&self) -> Features {
-        DynContext::device_features(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// The limits which can be used on this device.
-    ///
-    /// No better limits can be used, even if the underlying adapter can support them.
-    pub fn limits(&self) -> Limits {
-        DynContext::device_limits(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Creates a shader module from either SPIR-V or WGSL source code.
-    ///
-    /// <div class="warning">
-    // NOTE: Keep this in sync with `naga::front::wgsl::parse_str`!
-    // NOTE: Keep this in sync with `wgpu_core::Global::device_create_shader_module`!
-    ///
-    /// This function may consume a lot of stack space. Compiler-enforced limits for parsing
-    /// recursion exist; if shader compilation runs into them, it will return an error gracefully.
-    /// However, on some build profiles and platforms, the default stack size for a thread may be
-    /// exceeded before this limit is reached during parsing. Callers should ensure that there is
-    /// enough stack space for this, particularly if calls to this method are exposed to user
-    /// input.
-    ///
-    /// </div>
-    pub fn create_shader_module(&self, desc: ShaderModuleDescriptor<'_>) -> ShaderModule {
-        let (id, data) = DynContext::device_create_shader_module(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-            wgt::ShaderBoundChecks::new(),
-        );
-        ShaderModule {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates a shader module from either SPIR-V or WGSL source code without runtime checks.
-    ///
-    /// # Safety
-    /// In contrast with [`create_shader_module`](Self::create_shader_module) this function
-    /// creates a shader module without runtime checks which allows shaders to perform
-    /// operations which can lead to undefined behavior like indexing out of bounds, thus it's
-    /// the caller responsibility to pass a shader which doesn't perform any of this
-    /// operations.
-    ///
-    /// This has no effect on web.
-    pub unsafe fn create_shader_module_unchecked(
-        &self,
-        desc: ShaderModuleDescriptor<'_>,
-    ) -> ShaderModule {
-        let (id, data) = DynContext::device_create_shader_module(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-            unsafe { wgt::ShaderBoundChecks::unchecked() },
-        );
-        ShaderModule {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates a shader module from SPIR-V binary directly.
-    ///
-    /// # Safety
-    ///
-    /// This function passes binary data to the backend as-is and can potentially result in a
-    /// driver crash or bogus behaviour. No attempt is made to ensure that data is valid SPIR-V.
-    ///
-    /// See also [`include_spirv_raw!`] and [`util::make_spirv_raw`].
-    pub unsafe fn create_shader_module_spirv(
-        &self,
-        desc: &ShaderModuleDescriptorSpirV<'_>,
-    ) -> ShaderModule {
-        let (id, data) = unsafe {
-            DynContext::device_create_shader_module_spirv(
-                &*self.context,
-                &self.id,
-                self.data.as_ref(),
-                desc,
-            )
-        };
-        ShaderModule {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates an empty [`CommandEncoder`].
-    pub fn create_command_encoder(&self, desc: &CommandEncoderDescriptor<'_>) -> CommandEncoder {
-        let (id, data) = DynContext::device_create_command_encoder(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-        );
-        CommandEncoder {
-            context: Arc::clone(&self.context),
-            id: Some(id),
-            data,
-        }
-    }
-
-    /// Creates an empty [`RenderBundleEncoder`].
-    pub fn create_render_bundle_encoder(
-        &self,
-        desc: &RenderBundleEncoderDescriptor<'_>,
-    ) -> RenderBundleEncoder<'_> {
-        let (id, data) = DynContext::device_create_render_bundle_encoder(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-        );
-        RenderBundleEncoder {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-            parent: self,
-            _p: Default::default(),
-        }
-    }
-
-    /// Creates a new [`BindGroup`].
-    pub fn create_bind_group(&self, desc: &BindGroupDescriptor<'_>) -> BindGroup {
-        let (id, data) = DynContext::device_create_bind_group(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-        );
-        BindGroup {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates a [`BindGroupLayout`].
-    pub fn create_bind_group_layout(
-        &self,
-        desc: &BindGroupLayoutDescriptor<'_>,
-    ) -> BindGroupLayout {
-        let (id, data) = DynContext::device_create_bind_group_layout(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-        );
-        BindGroupLayout {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates a [`PipelineLayout`].
-    pub fn create_pipeline_layout(&self, desc: &PipelineLayoutDescriptor<'_>) -> PipelineLayout {
-        let (id, data) = DynContext::device_create_pipeline_layout(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-        );
-        PipelineLayout {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates a [`RenderPipeline`].
-    pub fn create_render_pipeline(&self, desc: &RenderPipelineDescriptor<'_>) -> RenderPipeline {
-        let (id, data) = DynContext::device_create_render_pipeline(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-        );
-        RenderPipeline {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates a [`ComputePipeline`].
-    pub fn create_compute_pipeline(&self, desc: &ComputePipelineDescriptor<'_>) -> ComputePipeline {
-        let (id, data) = DynContext::device_create_compute_pipeline(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-        );
-        ComputePipeline {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates a [`Buffer`].
-    pub fn create_buffer(&self, desc: &BufferDescriptor<'_>) -> Buffer {
-        let mut map_context = MapContext::new(desc.size);
-        if desc.mapped_at_creation {
-            map_context.initial_range = 0..desc.size;
-        }
-
-        let (id, data) =
-            DynContext::device_create_buffer(&*self.context, &self.id, self.data.as_ref(), desc);
-
-        Buffer {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-            map_context: Mutex::new(map_context),
-            size: desc.size,
-            usage: desc.usage,
-        }
-    }
-
-    /// Creates a new [`Texture`].
-    ///
-    /// `desc` specifies the general format of the texture.
-    pub fn create_texture(&self, desc: &TextureDescriptor<'_>) -> Texture {
-        let (id, data) =
-            DynContext::device_create_texture(&*self.context, &self.id, self.data.as_ref(), desc);
-        Texture {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-            owned: true,
-            descriptor: TextureDescriptor {
-                label: None,
-                view_formats: &[],
-                ..desc.clone()
-            },
-        }
-    }
-
-    /// Creates a [`Texture`] from a wgpu-hal Texture.
-    ///
-    /// # Safety
-    ///
-    /// - `hal_texture` must be created from this device internal handle
-    /// - `hal_texture` must be created respecting `desc`
-    /// - `hal_texture` must be initialized
-    #[cfg(wgpu_core)]
-    pub unsafe fn create_texture_from_hal<A: wgc::hal_api::HalApi>(
-        &self,
-        hal_texture: A::Texture,
-        desc: &TextureDescriptor<'_>,
-    ) -> Texture {
-        let texture = unsafe {
-            self.context
-                .as_any()
-                .downcast_ref::<crate::backend::ContextWgpuCore>()
-                // Part of the safety requirements is that the texture was generated from the same hal device.
-                // Therefore, unwrap is fine here since only WgpuCoreContext has the ability to create hal textures.
-                .unwrap()
-                .create_texture_from_hal::<A>(
-                    hal_texture,
-                    self.data.as_ref().downcast_ref().unwrap(),
-                    desc,
-                )
-        };
-        Texture {
-            context: Arc::clone(&self.context),
-            id: ObjectId::from(texture.id()),
-            data: Box::new(texture),
-            owned: true,
-            descriptor: TextureDescriptor {
-                label: None,
-                view_formats: &[],
-                ..desc.clone()
-            },
-        }
-    }
-
-    /// Creates a [`Buffer`] from a wgpu-hal Buffer.
-    ///
-    /// # Safety
-    ///
-    /// - `hal_buffer` must be created from this device internal handle
-    /// - `hal_buffer` must be created respecting `desc`
-    /// - `hal_buffer` must be initialized
-    #[cfg(wgpu_core)]
-    pub unsafe fn create_buffer_from_hal<A: wgc::hal_api::HalApi>(
-        &self,
-        hal_buffer: A::Buffer,
-        desc: &BufferDescriptor<'_>,
-    ) -> Buffer {
-        let mut map_context = MapContext::new(desc.size);
-        if desc.mapped_at_creation {
-            map_context.initial_range = 0..desc.size;
-        }
-
-        let (id, buffer) = unsafe {
-            self.context
-                .as_any()
-                .downcast_ref::<crate::backend::ContextWgpuCore>()
-                // Part of the safety requirements is that the buffer was generated from the same hal device.
-                // Therefore, unwrap is fine here since only WgpuCoreContext has the ability to create hal buffers.
-                .unwrap()
-                .create_buffer_from_hal::<A>(
-                    hal_buffer,
-                    self.data.as_ref().downcast_ref().unwrap(),
-                    desc,
-                )
-        };
-
-        Buffer {
-            context: Arc::clone(&self.context),
-            id: ObjectId::from(id),
-            data: Box::new(buffer),
-            map_context: Mutex::new(map_context),
-            size: desc.size,
-            usage: desc.usage,
-        }
-    }
-
-    /// Creates a new [`Sampler`].
-    ///
-    /// `desc` specifies the behavior of the sampler.
-    pub fn create_sampler(&self, desc: &SamplerDescriptor<'_>) -> Sampler {
-        let (id, data) =
-            DynContext::device_create_sampler(&*self.context, &self.id, self.data.as_ref(), desc);
-        Sampler {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates a new [`QuerySet`].
-    pub fn create_query_set(&self, desc: &QuerySetDescriptor<'_>) -> QuerySet {
-        let (id, data) =
-            DynContext::device_create_query_set(&*self.context, &self.id, self.data.as_ref(), desc);
-        QuerySet {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Set a callback for errors that are not handled in error scopes.
-    pub fn on_uncaptured_error(&self, handler: Box<dyn UncapturedErrorHandler>) {
-        self.context
-            .device_on_uncaptured_error(&self.id, self.data.as_ref(), handler);
-    }
-
-    /// Push an error scope.
-    pub fn push_error_scope(&self, filter: ErrorFilter) {
-        self.context
-            .device_push_error_scope(&self.id, self.data.as_ref(), filter);
-    }
-
-    /// Pop an error scope.
-    pub fn pop_error_scope(&self) -> impl Future<Output = Option<Error>> + WasmNotSend {
-        self.context
-            .device_pop_error_scope(&self.id, self.data.as_ref())
-    }
-
-    /// Starts frame capture.
-    pub fn start_capture(&self) {
-        DynContext::device_start_capture(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Stops frame capture.
-    pub fn stop_capture(&self) {
-        DynContext::device_stop_capture(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Query internal counters from the native backend for debugging purposes.
-    ///
-    /// Some backends may not set all counters, or may not set any counter at all.
-    /// The `counters` cargo feature must be enabled for any counter to be set.
-    ///
-    /// If a counter is not set, its contains its default value (zero).
-    pub fn get_internal_counters(&self) -> wgt::InternalCounters {
-        DynContext::device_get_internal_counters(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Apply a callback to this `Device`'s underlying backend device.
-    ///
-    /// If this `Device` is implemented by the backend API given by `A` (Vulkan,
-    /// Dx12, etc.), then apply `hal_device_callback` to `Some(&device)`, where
-    /// `device` is the underlying backend device type, [`A::Device`].
-    ///
-    /// If this `Device` uses a different backend, apply `hal_device_callback`
-    /// to `None`.
-    ///
-    /// The device is locked for reading while `hal_device_callback` runs. If
-    /// the callback attempts to perform any `wgpu` operations that require
-    /// write access to the device (destroying a buffer, say), deadlock will
-    /// occur. The locks are automatically released when the callback returns.
-    ///
-    /// # Safety
-    ///
-    /// - The raw handle passed to the callback must not be manually destroyed.
-    ///
-    /// [`A::Device`]: hal::Api::Device
-    #[cfg(wgpu_core)]
-    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Device>) -> R, R>(
-        &self,
-        hal_device_callback: F,
-    ) -> Option<R> {
-        self.context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-            .map(|ctx| unsafe {
-                ctx.device_as_hal::<A, F, R>(
-                    self.data.as_ref().downcast_ref().unwrap(),
-                    hal_device_callback,
-                )
-            })
-    }
-
-    /// Destroy this device.
-    pub fn destroy(&self) {
-        DynContext::device_destroy(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Set a DeviceLostCallback on this device.
-    pub fn set_device_lost_callback(
-        &self,
-        callback: impl Fn(DeviceLostReason, String) + Send + 'static,
-    ) {
-        DynContext::device_set_device_lost_callback(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            Box::new(callback),
-        )
-    }
-
-    /// Test-only function to make this device invalid.
-    #[doc(hidden)]
-    pub fn make_invalid(&self) {
-        DynContext::device_make_invalid(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Create a [`PipelineCache`] with initial data
-    ///
-    /// This can be passed to [`Device::create_compute_pipeline`]
-    /// and [`Device::create_render_pipeline`] to either accelerate these
-    /// or add the cache results from those.
-    ///
-    /// # Safety
-    ///
-    /// If the `data` field of `desc` is set, it must have previously been returned from a call
-    /// to [`PipelineCache::get_data`][^saving]. This `data` will only be used if it came
-    /// from an adapter with the same [`util::pipeline_cache_key`].
-    /// This *is* compatible across wgpu versions, as any data format change will
-    /// be accounted for.
-    ///
-    /// It is *not* supported to bring caches from previous direct uses of backend APIs
-    /// into this method.
-    ///
-    /// # Errors
-    ///
-    /// Returns an error value if:
-    ///  * the [`PIPELINE_CACHE`](wgt::Features::PIPELINE_CACHE) feature is not enabled
-    ///  * this device is invalid; or
-    ///  * the device is out of memory
-    ///
-    /// This method also returns an error value if:
-    ///  * The `fallback` field on `desc` is false; and
-    ///  * the `data` provided would not be used[^data_not_used]
-    ///
-    /// If an error value is used in subsequent calls, default caching will be used.
-    ///
-    /// [^saving]: We do recognise that saving this data to disk means this condition
-    /// is impossible to fully prove. Consider the risks for your own application in this case.
-    ///
-    /// [^data_not_used]: This data may be not used if: the data was produced by a prior
-    /// version of wgpu; or was created for an incompatible adapter, or there was a GPU driver
-    /// update. In some cases, the data might not be used and a real value is returned,
-    /// this is left to the discretion of GPU drivers.
-    pub unsafe fn create_pipeline_cache(
-        &self,
-        desc: &PipelineCacheDescriptor<'_>,
-    ) -> PipelineCache {
-        let (id, data) = unsafe {
-            DynContext::device_create_pipeline_cache(
-                &*self.context,
-                &self.id,
-                self.data.as_ref(),
-                desc,
-            )
-        };
-        PipelineCache {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-}
-
-impl Drop for Device {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context.device_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Requesting a device from an [`Adapter`] failed.
-#[derive(Clone, Debug)]
-pub struct RequestDeviceError {
-    inner: RequestDeviceErrorKind,
-}
-#[derive(Clone, Debug)]
-enum RequestDeviceErrorKind {
-    /// Error from [`wgpu_core`].
-    // must match dependency cfg
-    #[cfg(wgpu_core)]
-    Core(wgc::instance::RequestDeviceError),
-
-    /// Error from web API that was called by `wgpu` to request a device.
-    ///
-    /// (This is currently never used by the webgl backend, but it could be.)
-    #[cfg(webgpu)]
-    WebGpu(wasm_bindgen::JsValue),
-}
-
-#[cfg(send_sync)]
-unsafe impl Send for RequestDeviceErrorKind {}
-#[cfg(send_sync)]
-unsafe impl Sync for RequestDeviceErrorKind {}
-
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RequestDeviceError: Send, Sync);
-
-impl fmt::Display for RequestDeviceError {
-    fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match &self.inner {
-            #[cfg(wgpu_core)]
-            RequestDeviceErrorKind::Core(error) => error.fmt(_f),
-            #[cfg(webgpu)]
-            RequestDeviceErrorKind::WebGpu(error_js_value) => {
-                // wasm-bindgen provides a reasonable error stringification via `Debug` impl
-                write!(_f, "{error_js_value:?}")
-            }
-            #[cfg(not(any(webgpu, wgpu_core)))]
-            _ => unimplemented!("unknown `RequestDeviceErrorKind`"),
-        }
-    }
-}
-
-impl error::Error for RequestDeviceError {
-    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
-        match &self.inner {
-            #[cfg(wgpu_core)]
-            RequestDeviceErrorKind::Core(error) => error.source(),
-            #[cfg(webgpu)]
-            RequestDeviceErrorKind::WebGpu(_) => None,
-            #[cfg(not(any(webgpu, wgpu_core)))]
-            _ => unimplemented!("unknown `RequestDeviceErrorKind`"),
-        }
-    }
-}
-
-#[cfg(wgpu_core)]
-impl From<wgc::instance::RequestDeviceError> for RequestDeviceError {
-    fn from(error: wgc::instance::RequestDeviceError) -> Self {
-        Self {
-            inner: RequestDeviceErrorKind::Core(error),
-        }
-    }
-}
-
-/// [`Instance::create_surface()`] or a related function failed.
-#[derive(Clone, Debug)]
-#[non_exhaustive]
-pub struct CreateSurfaceError {
-    inner: CreateSurfaceErrorKind,
-}
-#[derive(Clone, Debug)]
-enum CreateSurfaceErrorKind {
-    /// Error from [`wgpu_hal`].
-    #[cfg(wgpu_core)]
-    Hal(wgc::instance::CreateSurfaceError),
-
-    /// Error from WebGPU surface creation.
-    #[allow(dead_code)] // may be unused depending on target and features
-    Web(String),
-
-    /// Error when trying to get a [`DisplayHandle`] or a [`WindowHandle`] from
-    /// `raw_window_handle`.
-    RawHandle(raw_window_handle::HandleError),
-}
-static_assertions::assert_impl_all!(CreateSurfaceError: Send, Sync);
-
-impl fmt::Display for CreateSurfaceError {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match &self.inner {
-            #[cfg(wgpu_core)]
-            CreateSurfaceErrorKind::Hal(e) => e.fmt(f),
-            CreateSurfaceErrorKind::Web(e) => e.fmt(f),
-            CreateSurfaceErrorKind::RawHandle(e) => e.fmt(f),
-        }
-    }
-}
-
-impl error::Error for CreateSurfaceError {
-    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
-        match &self.inner {
-            #[cfg(wgpu_core)]
-            CreateSurfaceErrorKind::Hal(e) => e.source(),
-            CreateSurfaceErrorKind::Web(_) => None,
-            CreateSurfaceErrorKind::RawHandle(e) => e.source(),
-        }
-    }
-}
-
-#[cfg(wgpu_core)]
-impl From<wgc::instance::CreateSurfaceError> for CreateSurfaceError {
-    fn from(e: wgc::instance::CreateSurfaceError) -> Self {
-        Self {
-            inner: CreateSurfaceErrorKind::Hal(e),
-        }
-    }
-}
-
-/// Error occurred when trying to async map a buffer.
-#[derive(Clone, PartialEq, Eq, Debug)]
-pub struct BufferAsyncError;
-static_assertions::assert_impl_all!(BufferAsyncError: Send, Sync);
-
-impl fmt::Display for BufferAsyncError {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "Error occurred when trying to async map a buffer")
-    }
-}
-
-impl error::Error for BufferAsyncError {}
-
-/// Type of buffer mapping.
-#[derive(Debug, Clone, Copy, Eq, PartialEq)]
-pub enum MapMode {
-    /// Map only for reading
-    Read,
-    /// Map only for writing
-    Write,
-}
-static_assertions::assert_impl_all!(MapMode: Send, Sync);
-
-fn range_to_offset_size<S: RangeBounds<BufferAddress>>(
-    bounds: S,
-) -> (BufferAddress, Option<BufferSize>) {
-    let offset = match bounds.start_bound() {
-        Bound::Included(&bound) => bound,
-        Bound::Excluded(&bound) => bound + 1,
-        Bound::Unbounded => 0,
-    };
-    let size = match bounds.end_bound() {
-        Bound::Included(&bound) => Some(bound + 1 - offset),
-        Bound::Excluded(&bound) => Some(bound - offset),
-        Bound::Unbounded => None,
-    }
-    .map(|size| BufferSize::new(size).expect("Buffer slices can not be empty"));
-
-    (offset, size)
-}
-
-/// Read only view into a mapped buffer.
-///
-/// To get a `BufferView`, first [map] the buffer, and then
-/// call `buffer.slice(range).get_mapped_range()`.
-///
-/// `BufferView` dereferences to `&[u8]`, so you can use all the usual Rust
-/// slice methods to access the buffer's contents. It also implements
-/// `AsRef<[u8]>`, if that's more convenient.
-///
-/// If you try to create overlapping views of a buffer, mutable or
-/// otherwise, `get_mapped_range` will panic.
-///
-/// [map]: Buffer#mapping-buffers
-#[derive(Debug)]
-pub struct BufferView<'a> {
-    slice: BufferSlice<'a>,
-    data: Box<dyn crate::context::BufferMappedRange>,
-}
-
-/// Write only view into mapped buffer.
-///
-/// To get a `BufferViewMut`, first [map] the buffer, and then
-/// call `buffer.slice(range).get_mapped_range_mut()`.
-///
-/// `BufferViewMut` dereferences to `&mut [u8]`, so you can use all the usual
-/// Rust slice methods to access the buffer's contents. It also implements
-/// `AsMut<[u8]>`, if that's more convenient.
-///
-/// It is possible to read the buffer using this view, but doing so is not
-/// recommended, as it is likely to be slow.
-///
-/// If you try to create overlapping views of a buffer, mutable or
-/// otherwise, `get_mapped_range_mut` will panic.
-///
-/// [map]: Buffer#mapping-buffers
-#[derive(Debug)]
-pub struct BufferViewMut<'a> {
-    slice: BufferSlice<'a>,
-    data: Box<dyn crate::context::BufferMappedRange>,
-    readable: bool,
-}
-
-impl std::ops::Deref for BufferView<'_> {
-    type Target = [u8];
-
-    #[inline]
-    fn deref(&self) -> &[u8] {
-        self.data.slice()
-    }
-}
-
-impl AsRef<[u8]> for BufferView<'_> {
-    #[inline]
-    fn as_ref(&self) -> &[u8] {
-        self.data.slice()
-    }
-}
-
-impl AsMut<[u8]> for BufferViewMut<'_> {
-    #[inline]
-    fn as_mut(&mut self) -> &mut [u8] {
-        self.data.slice_mut()
-    }
-}
-
-impl Deref for BufferViewMut<'_> {
-    type Target = [u8];
-
-    fn deref(&self) -> &Self::Target {
-        if !self.readable {
-            log::warn!("Reading from a BufferViewMut is slow and not recommended.");
-        }
-
-        self.data.slice()
-    }
-}
-
-impl DerefMut for BufferViewMut<'_> {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        self.data.slice_mut()
-    }
-}
-
-impl Drop for BufferView<'_> {
-    fn drop(&mut self) {
-        self.slice
-            .buffer
-            .map_context
-            .lock()
-            .remove(self.slice.offset, self.slice.size);
-    }
-}
-
-impl Drop for BufferViewMut<'_> {
-    fn drop(&mut self) {
-        self.slice
-            .buffer
-            .map_context
-            .lock()
-            .remove(self.slice.offset, self.slice.size);
-    }
-}
-
-impl Buffer {
-    /// Return the binding view of the entire buffer.
-    pub fn as_entire_binding(&self) -> BindingResource<'_> {
-        BindingResource::Buffer(self.as_entire_buffer_binding())
-    }
-
-    /// Return the binding view of the entire buffer.
-    pub fn as_entire_buffer_binding(&self) -> BufferBinding<'_> {
-        BufferBinding {
-            buffer: self,
-            offset: 0,
-            size: None,
-        }
-    }
-
-    /// Returns the inner hal Buffer using a callback. The hal buffer will be `None` if the
-    /// backend type argument does not match with this wgpu Buffer
-    ///
-    /// # Safety
-    ///
-    /// - The raw handle obtained from the hal Buffer must not be manually destroyed
-    #[cfg(wgpu_core)]
-    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Buffer>) -> R, R>(
-        &self,
-        hal_buffer_callback: F,
-    ) -> R {
-        let id = self.id;
-
-        if let Some(ctx) = self
-            .context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-        {
-            unsafe { ctx.buffer_as_hal::<A, F, R>(id.into(), hal_buffer_callback) }
-        } else {
-            hal_buffer_callback(None)
-        }
-    }
-
-    /// Use only a portion of this Buffer for a given operation. Choosing a range with no end
-    /// will use the rest of the buffer. Using a totally unbounded range will use the entire buffer.
-    pub fn slice<S: RangeBounds<BufferAddress>>(&self, bounds: S) -> BufferSlice<'_> {
-        let (offset, size) = range_to_offset_size(bounds);
-        BufferSlice {
-            buffer: self,
-            offset,
-            size,
-        }
-    }
-
-    /// Flushes any pending write operations and unmaps the buffer from host memory.
-    pub fn unmap(&self) {
-        self.map_context.lock().reset();
-        DynContext::buffer_unmap(&*self.context, &self.id, self.data.as_ref());
-    }
-
-    /// Destroy the associated native resources as soon as possible.
-    pub fn destroy(&self) {
-        DynContext::buffer_destroy(&*self.context, &self.id, self.data.as_ref());
-    }
-
-    /// Returns the length of the buffer allocation in bytes.
-    ///
-    /// This is always equal to the `size` that was specified when creating the buffer.
-    pub fn size(&self) -> BufferAddress {
-        self.size
-    }
-
-    /// Returns the allowed usages for this `Buffer`.
-    ///
-    /// This is always equal to the `usage` that was specified when creating the buffer.
-    pub fn usage(&self) -> BufferUsages {
-        self.usage
-    }
-}
-
-impl<'a> BufferSlice<'a> {
-    /// Map the buffer. Buffer is ready to map once the callback is called.
-    ///
-    /// For the callback to complete, either `queue.submit(..)`, `instance.poll_all(..)`, or `device.poll(..)`
-    /// must be called elsewhere in the runtime, possibly integrated into an event loop or run on a separate thread.
-    ///
-    /// The callback will be called on the thread that first calls the above functions after the gpu work
-    /// has completed. There are no restrictions on the code you can run in the callback, however on native the
-    /// call to the function will not complete until the callback returns, so prefer keeping callbacks short
-    /// and used to set flags, send messages, etc.
-    pub fn map_async(
-        &self,
-        mode: MapMode,
-        callback: impl FnOnce(Result<(), BufferAsyncError>) + WasmNotSend + 'static,
-    ) {
-        let mut mc = self.buffer.map_context.lock();
-        assert_eq!(
-            mc.initial_range,
-            0..0,
-            "Buffer {:?} is already mapped",
-            self.buffer.id
-        );
-        let end = match self.size {
-            Some(s) => self.offset + s.get(),
-            None => mc.total_size,
-        };
-        mc.initial_range = self.offset..end;
-
-        DynContext::buffer_map_async(
-            &*self.buffer.context,
-            &self.buffer.id,
-            self.buffer.data.as_ref(),
-            mode,
-            self.offset..end,
-            Box::new(callback),
-        )
-    }
-
-    /// Synchronously and immediately map a buffer for reading. If the buffer is not immediately mappable
-    /// through [`BufferDescriptor::mapped_at_creation`] or [`BufferSlice::map_async`], will panic.
-    pub fn get_mapped_range(&self) -> BufferView<'a> {
-        let end = self.buffer.map_context.lock().add(self.offset, self.size);
-        let data = DynContext::buffer_get_mapped_range(
-            &*self.buffer.context,
-            &self.buffer.id,
-            self.buffer.data.as_ref(),
-            self.offset..end,
-        );
-        BufferView { slice: *self, data }
-    }
-
-    /// Synchronously and immediately map a buffer for reading. If the buffer is not immediately mappable
-    /// through [`BufferDescriptor::mapped_at_creation`] or [`BufferSlice::map_async`], will fail.
-    ///
-    /// This is useful when targeting WebGPU and you want to pass mapped data directly to js.
-    /// Unlike `get_mapped_range` which unconditionally copies mapped data into the wasm heap,
-    /// this function directly hands you the ArrayBuffer that we mapped the data into in js.
-    ///
-    /// This is only available on WebGPU, on any other backends this will return `None`.
-    #[cfg(webgpu)]
-    pub fn get_mapped_range_as_array_buffer(&self) -> Option<js_sys::ArrayBuffer> {
-        self.buffer
-            .context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWebGpu>()
-            .map(|ctx| {
-                let buffer_data = crate::context::downcast_ref(self.buffer.data.as_ref());
-                let end = self.buffer.map_context.lock().add(self.offset, self.size);
-                ctx.buffer_get_mapped_range_as_array_buffer(buffer_data, self.offset..end)
-            })
-    }
-
-    /// Synchronously and immediately map a buffer for writing. If the buffer is not immediately mappable
-    /// through [`BufferDescriptor::mapped_at_creation`] or [`BufferSlice::map_async`], will panic.
-    pub fn get_mapped_range_mut(&self) -> BufferViewMut<'a> {
-        let end = self.buffer.map_context.lock().add(self.offset, self.size);
-        let data = DynContext::buffer_get_mapped_range(
-            &*self.buffer.context,
-            &self.buffer.id,
-            self.buffer.data.as_ref(),
-            self.offset..end,
-        );
-        BufferViewMut {
-            slice: *self,
-            data,
-            readable: self.buffer.usage.contains(BufferUsages::MAP_READ),
-        }
-    }
-}
-
-impl Drop for Buffer {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context.buffer_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-impl Texture {
-    /// Returns the inner hal Texture using a callback. The hal texture will be `None` if the
-    /// backend type argument does not match with this wgpu Texture
-    ///
-    /// # Safety
-    ///
-    /// - The raw handle obtained from the hal Texture must not be manually destroyed
-    #[cfg(wgpu_core)]
-    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Texture>) -> R, R>(
-        &self,
-        hal_texture_callback: F,
-    ) -> R {
-        let texture = self.data.as_ref().downcast_ref().unwrap();
-
-        if let Some(ctx) = self
-            .context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-        {
-            unsafe { ctx.texture_as_hal::<A, F, R>(texture, hal_texture_callback) }
-        } else {
-            hal_texture_callback(None)
-        }
-    }
-
-    /// Creates a view of this texture.
-    pub fn create_view(&self, desc: &TextureViewDescriptor<'_>) -> TextureView {
-        let (id, data) =
-            DynContext::texture_create_view(&*self.context, &self.id, self.data.as_ref(), desc);
-        TextureView {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Destroy the associated native resources as soon as possible.
-    pub fn destroy(&self) {
-        DynContext::texture_destroy(&*self.context, &self.id, self.data.as_ref());
-    }
-
-    /// Make an `ImageCopyTexture` representing the whole texture.
-    pub fn as_image_copy(&self) -> ImageCopyTexture<'_> {
-        ImageCopyTexture {
-            texture: self,
-            mip_level: 0,
-            origin: Origin3d::ZERO,
-            aspect: TextureAspect::All,
-        }
-    }
-
-    /// Returns the size of this `Texture`.
-    ///
-    /// This is always equal to the `size` that was specified when creating the texture.
-    pub fn size(&self) -> Extent3d {
-        self.descriptor.size
-    }
-
-    /// Returns the width of this `Texture`.
-    ///
-    /// This is always equal to the `size.width` that was specified when creating the texture.
-    pub fn width(&self) -> u32 {
-        self.descriptor.size.width
-    }
-
-    /// Returns the height of this `Texture`.
-    ///
-    /// This is always equal to the `size.height` that was specified when creating the texture.
-    pub fn height(&self) -> u32 {
-        self.descriptor.size.height
-    }
-
-    /// Returns the depth or layer count of this `Texture`.
-    ///
-    /// This is always equal to the `size.depth_or_array_layers` that was specified when creating the texture.
-    pub fn depth_or_array_layers(&self) -> u32 {
-        self.descriptor.size.depth_or_array_layers
-    }
-
-    /// Returns the mip_level_count of this `Texture`.
-    ///
-    /// This is always equal to the `mip_level_count` that was specified when creating the texture.
-    pub fn mip_level_count(&self) -> u32 {
-        self.descriptor.mip_level_count
-    }
-
-    /// Returns the sample_count of this `Texture`.
-    ///
-    /// This is always equal to the `sample_count` that was specified when creating the texture.
-    pub fn sample_count(&self) -> u32 {
-        self.descriptor.sample_count
-    }
-
-    /// Returns the dimension of this `Texture`.
-    ///
-    /// This is always equal to the `dimension` that was specified when creating the texture.
-    pub fn dimension(&self) -> TextureDimension {
-        self.descriptor.dimension
-    }
-
-    /// Returns the format of this `Texture`.
-    ///
-    /// This is always equal to the `format` that was specified when creating the texture.
-    pub fn format(&self) -> TextureFormat {
-        self.descriptor.format
-    }
-
-    /// Returns the allowed usages of this `Texture`.
-    ///
-    /// This is always equal to the `usage` that was specified when creating the texture.
-    pub fn usage(&self) -> TextureUsages {
-        self.descriptor.usage
-    }
-}
-
-impl Drop for Texture {
-    fn drop(&mut self) {
-        if self.owned && !thread::panicking() {
-            self.context.texture_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-impl Drop for TextureView {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context.texture_view_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-impl CommandEncoder {
-    /// Finishes recording and returns a [`CommandBuffer`] that can be submitted for execution.
-    pub fn finish(mut self) -> CommandBuffer {
-        let (id, data) = DynContext::command_encoder_finish(
-            &*self.context,
-            self.id.take().unwrap(),
-            self.data.as_mut(),
-        );
-        CommandBuffer {
-            context: Arc::clone(&self.context),
-            id: Some(id),
-            data: Some(data),
-        }
-    }
-
-    /// Begins recording of a render pass.
-    ///
-    /// This function returns a [`RenderPass`] object which records a single render pass.
-    ///
-    /// As long as the returned  [`RenderPass`] has not ended,
-    /// any mutating operation on this command encoder causes an error and invalidates it.
-    /// Note that the `'encoder` lifetime relationship protects against this,
-    /// but it is possible to opt out of it by calling [`RenderPass::forget_lifetime`].
-    /// This can be useful for runtime handling of the encoder->pass
-    /// dependency e.g. when pass and encoder are stored in the same data structure.
-    pub fn begin_render_pass<'encoder>(
-        &'encoder mut self,
-        desc: &RenderPassDescriptor<'_>,
-    ) -> RenderPass<'encoder> {
-        let id = self.id.as_ref().unwrap();
-        let (id, data) = DynContext::command_encoder_begin_render_pass(
-            &*self.context,
-            id,
-            self.data.as_ref(),
-            desc,
-        );
-        RenderPass {
-            inner: RenderPassInner {
-                id,
-                data,
-                context: self.context.clone(),
-            },
-            encoder_guard: PhantomData,
-        }
-    }
-
-    /// Begins recording of a compute pass.
-    ///
-    /// This function returns a [`ComputePass`] object which records a single compute pass.
-    ///
-    /// As long as the returned  [`ComputePass`] has not ended,
-    /// any mutating operation on this command encoder causes an error and invalidates it.
-    /// Note that the `'encoder` lifetime relationship protects against this,
-    /// but it is possible to opt out of it by calling [`ComputePass::forget_lifetime`].
-    /// This can be useful for runtime handling of the encoder->pass
-    /// dependency e.g. when pass and encoder are stored in the same data structure.
-    pub fn begin_compute_pass<'encoder>(
-        &'encoder mut self,
-        desc: &ComputePassDescriptor<'_>,
-    ) -> ComputePass<'encoder> {
-        let id = self.id.as_ref().unwrap();
-        let (id, data) = DynContext::command_encoder_begin_compute_pass(
-            &*self.context,
-            id,
-            self.data.as_ref(),
-            desc,
-        );
-        ComputePass {
-            inner: ComputePassInner {
-                id,
-                data,
-                context: self.context.clone(),
-            },
-            encoder_guard: PhantomData,
-        }
-    }
-
-    /// Copy data from one buffer to another.
-    ///
-    /// # Panics
-    ///
-    /// - Buffer offsets or copy size not a multiple of [`COPY_BUFFER_ALIGNMENT`].
-    /// - Copy would overrun buffer.
-    /// - Copy within the same buffer.
-    pub fn copy_buffer_to_buffer(
-        &mut self,
-        source: &Buffer,
-        source_offset: BufferAddress,
-        destination: &Buffer,
-        destination_offset: BufferAddress,
-        copy_size: BufferAddress,
-    ) {
-        DynContext::command_encoder_copy_buffer_to_buffer(
-            &*self.context,
-            self.id.as_ref().unwrap(),
-            self.data.as_ref(),
-            &source.id,
-            source.data.as_ref(),
-            source_offset,
-            &destination.id,
-            destination.data.as_ref(),
-            destination_offset,
-            copy_size,
-        );
-    }
-
-    /// Copy data from a buffer to a texture.
-    pub fn copy_buffer_to_texture(
-        &mut self,
-        source: ImageCopyBuffer<'_>,
-        destination: ImageCopyTexture<'_>,
-        copy_size: Extent3d,
-    ) {
-        DynContext::command_encoder_copy_buffer_to_texture(
-            &*self.context,
-            self.id.as_ref().unwrap(),
-            self.data.as_ref(),
-            source,
-            destination,
-            copy_size,
-        );
-    }
-
-    /// Copy data from a texture to a buffer.
-    pub fn copy_texture_to_buffer(
-        &mut self,
-        source: ImageCopyTexture<'_>,
-        destination: ImageCopyBuffer<'_>,
-        copy_size: Extent3d,
-    ) {
-        DynContext::command_encoder_copy_texture_to_buffer(
-            &*self.context,
-            self.id.as_ref().unwrap(),
-            self.data.as_ref(),
-            source,
-            destination,
-            copy_size,
-        );
-    }
-
-    /// Copy data from one texture to another.
-    ///
-    /// # Panics
-    ///
-    /// - Textures are not the same type
-    /// - If a depth texture, or a multisampled texture, the entire texture must be copied
-    /// - Copy would overrun either texture
-    pub fn copy_texture_to_texture(
-        &mut self,
-        source: ImageCopyTexture<'_>,
-        destination: ImageCopyTexture<'_>,
-        copy_size: Extent3d,
-    ) {
-        DynContext::command_encoder_copy_texture_to_texture(
-            &*self.context,
-            self.id.as_ref().unwrap(),
-            self.data.as_ref(),
-            source,
-            destination,
-            copy_size,
-        );
-    }
-
-    /// Clears texture to zero.
-    ///
-    /// Note that unlike with clear_buffer, `COPY_DST` usage is not required.
-    ///
-    /// # Implementation notes
-    ///
-    /// - implemented either via buffer copies and render/depth target clear, path depends on texture usages
-    /// - behaves like texture zero init, but is performed immediately (clearing is *not* delayed via marking it as uninitialized)
-    ///
-    /// # Panics
-    ///
-    /// - `CLEAR_TEXTURE` extension not enabled
-    /// - Range is out of bounds
-    pub fn clear_texture(&mut self, texture: &Texture, subresource_range: &ImageSubresourceRange) {
-        DynContext::command_encoder_clear_texture(
-            &*self.context,
-            self.id.as_ref().unwrap(),
-            self.data.as_ref(),
-            texture,
-            subresource_range,
-        );
-    }
-
-    /// Clears buffer to zero.
-    ///
-    /// # Panics
-    ///
-    /// - Buffer does not have `COPY_DST` usage.
-    /// - Range is out of bounds
-    pub fn clear_buffer(
-        &mut self,
-        buffer: &Buffer,
-        offset: BufferAddress,
-        size: Option<BufferAddress>,
-    ) {
-        DynContext::command_encoder_clear_buffer(
-            &*self.context,
-            self.id.as_ref().unwrap(),
-            self.data.as_ref(),
-            buffer,
-            offset,
-            size,
-        );
-    }
-
-    /// Inserts debug marker.
-    pub fn insert_debug_marker(&mut self, label: &str) {
-        let id = self.id.as_ref().unwrap();
-        DynContext::command_encoder_insert_debug_marker(
-            &*self.context,
-            id,
-            self.data.as_ref(),
-            label,
-        );
-    }
-
-    /// Start record commands and group it into debug marker group.
-    pub fn push_debug_group(&mut self, label: &str) {
-        let id = self.id.as_ref().unwrap();
-        DynContext::command_encoder_push_debug_group(&*self.context, id, self.data.as_ref(), label);
-    }
-
-    /// Stops command recording and creates debug group.
-    pub fn pop_debug_group(&mut self) {
-        let id = self.id.as_ref().unwrap();
-        DynContext::command_encoder_pop_debug_group(&*self.context, id, self.data.as_ref());
-    }
-
-    /// Resolves a query set, writing the results into the supplied destination buffer.
-    ///
-    /// Occlusion and timestamp queries are 8 bytes each (see [`crate::QUERY_SIZE`]). For pipeline statistics queries,
-    /// see [`PipelineStatisticsTypes`] for more information.
-    pub fn resolve_query_set(
-        &mut self,
-        query_set: &QuerySet,
-        query_range: Range<u32>,
-        destination: &Buffer,
-        destination_offset: BufferAddress,
-    ) {
-        DynContext::command_encoder_resolve_query_set(
-            &*self.context,
-            self.id.as_ref().unwrap(),
-            self.data.as_ref(),
-            &query_set.id,
-            query_set.data.as_ref(),
-            query_range.start,
-            query_range.end - query_range.start,
-            &destination.id,
-            destination.data.as_ref(),
-            destination_offset,
-        )
-    }
-
-    /// Returns the inner hal CommandEncoder using a callback. The hal command encoder will be `None` if the
-    /// backend type argument does not match with this wgpu CommandEncoder
-    ///
-    /// This method will start the wgpu_core level command recording.
-    ///
-    /// # Safety
-    ///
-    /// - The raw handle obtained from the hal CommandEncoder must not be manually destroyed
-    #[cfg(wgpu_core)]
-    pub unsafe fn as_hal_mut<
-        A: wgc::hal_api::HalApi,
-        F: FnOnce(Option<&mut A::CommandEncoder>) -> R,
-        R,
-    >(
-        &mut self,
-        hal_command_encoder_callback: F,
-    ) -> Option<R> {
-        use core::id::CommandEncoderId;
-
-        self.context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-            .map(|ctx| unsafe {
-                ctx.command_encoder_as_hal_mut::<A, F, R>(
-                    CommandEncoderId::from(self.id.unwrap()),
-                    hal_command_encoder_callback,
-                )
-            })
-    }
-}
-
-/// [`Features::TIMESTAMP_QUERY_INSIDE_ENCODERS`] must be enabled on the device in order to call these functions.
-impl CommandEncoder {
-    /// Issue a timestamp command at this point in the queue.
-    /// The timestamp will be written to the specified query set, at the specified index.
-    ///
-    /// Must be multiplied by [`Queue::get_timestamp_period`] to get
-    /// the value in nanoseconds. Absolute values have no meaning,
-    /// but timestamps can be subtracted to get the time it takes
-    /// for a string of operations to complete.
-    ///
-    /// Attention: Since commands within a command recorder may be reordered,
-    /// there is no strict guarantee that timestamps are taken after all commands
-    /// recorded so far and all before all commands recorded after.
-    /// This may depend both on the backend and the driver.
-    pub fn write_timestamp(&mut self, query_set: &QuerySet, query_index: u32) {
-        DynContext::command_encoder_write_timestamp(
-            &*self.context,
-            self.id.as_ref().unwrap(),
-            self.data.as_mut(),
-            &query_set.id,
-            query_set.data.as_ref(),
-            query_index,
-        )
-    }
-}
-
-impl<'encoder> RenderPass<'encoder> {
-    /// Drops the lifetime relationship to the parent command encoder, making usage of
-    /// the encoder while this pass is recorded a run-time error instead.
-    ///
-    /// Attention: As long as the render pass has not been ended, any mutating operation on the parent
-    /// command encoder will cause a run-time error and invalidate it!
-    /// By default, the lifetime constraint prevents this, but it can be useful
-    /// to handle this at run time, such as when storing the pass and encoder in the same
-    /// data structure.
-    ///
-    /// This operation has no effect on pass recording.
-    /// It's a safe operation, since [`CommandEncoder`] is in a locked state as long as the pass is active
-    /// regardless of the lifetime constraint or its absence.
-    pub fn forget_lifetime(self) -> RenderPass<'static> {
-        RenderPass {
-            inner: self.inner,
-            encoder_guard: PhantomData,
-        }
-    }
-
-    /// Sets the active bind group for a given bind group index. The bind group layout
-    /// in the active pipeline when any `draw_*()` method is called must match the layout of
-    /// this bind group.
-    ///
-    /// If the bind group have dynamic offsets, provide them in binding order.
-    /// These offsets have to be aligned to [`Limits::min_uniform_buffer_offset_alignment`]
-    /// or [`Limits::min_storage_buffer_offset_alignment`] appropriately.
-    ///
-    /// Subsequent draw calls’ shader executions will be able to access data in these bind groups.
-    pub fn set_bind_group(
-        &mut self,
-        index: u32,
-        bind_group: &BindGroup,
-        offsets: &[DynamicOffset],
-    ) {
-        DynContext::render_pass_set_bind_group(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            index,
-            &bind_group.id,
-            bind_group.data.as_ref(),
-            offsets,
-        )
-    }
-
-    /// Sets the active render pipeline.
-    ///
-    /// Subsequent draw calls will exhibit the behavior defined by `pipeline`.
-    pub fn set_pipeline(&mut self, pipeline: &RenderPipeline) {
-        DynContext::render_pass_set_pipeline(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &pipeline.id,
-            pipeline.data.as_ref(),
-        )
-    }
-
-    /// Sets the blend color as used by some of the blending modes.
-    ///
-    /// Subsequent blending tests will test against this value.
-    /// If this method has not been called, the blend constant defaults to [`Color::TRANSPARENT`]
-    /// (all components zero).
-    pub fn set_blend_constant(&mut self, color: Color) {
-        DynContext::render_pass_set_blend_constant(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            color,
-        )
-    }
-
-    /// Sets the active index buffer.
-    ///
-    /// Subsequent calls to [`draw_indexed`](RenderPass::draw_indexed) on this [`RenderPass`] will
-    /// use `buffer` as the source index buffer.
-    pub fn set_index_buffer(&mut self, buffer_slice: BufferSlice<'_>, index_format: IndexFormat) {
-        DynContext::render_pass_set_index_buffer(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &buffer_slice.buffer.id,
-            buffer_slice.buffer.data.as_ref(),
-            index_format,
-            buffer_slice.offset,
-            buffer_slice.size,
-        )
-    }
-
-    /// Assign a vertex buffer to a slot.
-    ///
-    /// Subsequent calls to [`draw`] and [`draw_indexed`] on this
-    /// [`RenderPass`] will use `buffer` as one of the source vertex buffers.
-    ///
-    /// The `slot` refers to the index of the matching descriptor in
-    /// [`VertexState::buffers`].
-    ///
-    /// [`draw`]: RenderPass::draw
-    /// [`draw_indexed`]: RenderPass::draw_indexed
-    pub fn set_vertex_buffer(&mut self, slot: u32, buffer_slice: BufferSlice<'_>) {
-        DynContext::render_pass_set_vertex_buffer(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            slot,
-            &buffer_slice.buffer.id,
-            buffer_slice.buffer.data.as_ref(),
-            buffer_slice.offset,
-            buffer_slice.size,
-        )
-    }
-
-    /// Sets the scissor rectangle used during the rasterization stage.
-    /// After transformation into [viewport coordinates](https://www.w3.org/TR/webgpu/#viewport-coordinates).
-    ///
-    /// Subsequent draw calls will discard any fragments which fall outside the scissor rectangle.
-    /// If this method has not been called, the scissor rectangle defaults to the entire bounds of
-    /// the render targets.
-    ///
-    /// The function of the scissor rectangle resembles [`set_viewport()`](Self::set_viewport),
-    /// but it does not affect the coordinate system, only which fragments are discarded.
-    pub fn set_scissor_rect(&mut self, x: u32, y: u32, width: u32, height: u32) {
-        DynContext::render_pass_set_scissor_rect(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            x,
-            y,
-            width,
-            height,
-        );
-    }
-
-    /// Sets the viewport used during the rasterization stage to linearly map
-    /// from [normalized device coordinates](https://www.w3.org/TR/webgpu/#ndc) to [viewport coordinates](https://www.w3.org/TR/webgpu/#viewport-coordinates).
-    ///
-    /// Subsequent draw calls will only draw within this region.
-    /// If this method has not been called, the viewport defaults to the entire bounds of the render
-    /// targets.
-    pub fn set_viewport(&mut self, x: f32, y: f32, w: f32, h: f32, min_depth: f32, max_depth: f32) {
-        DynContext::render_pass_set_viewport(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            x,
-            y,
-            w,
-            h,
-            min_depth,
-            max_depth,
-        );
-    }
-
-    /// Sets the stencil reference.
-    ///
-    /// Subsequent stencil tests will test against this value.
-    /// If this method has not been called, the stencil reference value defaults to `0`.
-    pub fn set_stencil_reference(&mut self, reference: u32) {
-        DynContext::render_pass_set_stencil_reference(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            reference,
-        );
-    }
-
-    /// Inserts debug marker.
-    pub fn insert_debug_marker(&mut self, label: &str) {
-        DynContext::render_pass_insert_debug_marker(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            label,
-        );
-    }
-
-    /// Start record commands and group it into debug marker group.
-    pub fn push_debug_group(&mut self, label: &str) {
-        DynContext::render_pass_push_debug_group(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            label,
-        );
-    }
-
-    /// Stops command recording and creates debug group.
-    pub fn pop_debug_group(&mut self) {
-        DynContext::render_pass_pop_debug_group(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-        );
-    }
-
-    /// Draws primitives from the active vertex buffer(s).
-    ///
-    /// The active vertex buffer(s) can be set with [`RenderPass::set_vertex_buffer`].
-    /// Does not use an Index Buffer. If you need this see [`RenderPass::draw_indexed`]
-    ///
-    /// Panics if vertices Range is outside of the range of the vertices range of any set vertex buffer.
-    ///
-    /// vertices: The range of vertices to draw.
-    /// instances: Range of Instances to draw. Use 0..1 if instance buffers are not used.
-    /// E.g.of how its used internally
-    /// ```rust ignore
-    /// for instance_id in instance_range {
-    ///     for vertex_id in vertex_range {
-    ///         let vertex = vertex[vertex_id];
-    ///         vertex_shader(vertex, vertex_id, instance_id);
-    ///     }
-    /// }
-    /// ```
-    ///
-    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
-    /// It is not affected by changes to the state that are performed after it is called.
-    pub fn draw(&mut self, vertices: Range<u32>, instances: Range<u32>) {
-        DynContext::render_pass_draw(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            vertices,
-            instances,
-        )
-    }
-
-    /// Draws indexed primitives using the active index buffer and the active vertex buffers.
-    ///
-    /// The active index buffer can be set with [`RenderPass::set_index_buffer`]
-    /// The active vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
-    ///
-    /// Panics if indices Range is outside of the range of the indices range of any set index buffer.
-    ///
-    /// indices: The range of indices to draw.
-    /// base_vertex: value added to each index value before indexing into the vertex buffers.
-    /// instances: Range of Instances to draw. Use 0..1 if instance buffers are not used.
-    /// E.g.of how its used internally
-    /// ```rust ignore
-    /// for instance_id in instance_range {
-    ///     for index_index in index_range {
-    ///         let vertex_id = index_buffer[index_index];
-    ///         let adjusted_vertex_id = vertex_id + base_vertex;
-    ///         let vertex = vertex[adjusted_vertex_id];
-    ///         vertex_shader(vertex, adjusted_vertex_id, instance_id);
-    ///     }
-    /// }
-    /// ```
-    ///
-    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
-    /// It is not affected by changes to the state that are performed after it is called.
-    pub fn draw_indexed(&mut self, indices: Range<u32>, base_vertex: i32, instances: Range<u32>) {
-        DynContext::render_pass_draw_indexed(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            indices,
-            base_vertex,
-            instances,
-        );
-    }
-
-    /// Draws primitives from the active vertex buffer(s) based on the contents of the `indirect_buffer`.
-    ///
-    /// This is like calling [`RenderPass::draw`] but the contents of the call are specified in the `indirect_buffer`.
-    /// The structure expected in `indirect_buffer` must conform to [`DrawIndirectArgs`](crate::util::DrawIndirectArgs).
-    ///
-    /// Indirect drawing has some caveats depending on the features available. We are not currently able to validate
-    /// these and issue an error.
-    /// - If [`Features::INDIRECT_FIRST_INSTANCE`] is not present on the adapter,
-    ///   [`DrawIndirect::first_instance`](crate::util::DrawIndirectArgs::first_instance) will be ignored.
-    /// - If [`DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW`] is not present on the adapter,
-    ///   any use of `@builtin(vertex_index)` or `@builtin(instance_index)` in the vertex shader will have different values.
-    ///
-    /// See details on the individual flags for more information.
-    pub fn draw_indirect(&mut self, indirect_buffer: &Buffer, indirect_offset: BufferAddress) {
-        DynContext::render_pass_draw_indirect(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-        );
-    }
-
-    /// Draws indexed primitives using the active index buffer and the active vertex buffers,
-    /// based on the contents of the `indirect_buffer`.
-    ///
-    /// This is like calling [`RenderPass::draw_indexed`] but the contents of the call are specified in the `indirect_buffer`.
-    /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirectArgs`](crate::util::DrawIndexedIndirectArgs).
-    ///
-    /// Indirect drawing has some caveats depending on the features available. We are not currently able to validate
-    /// these and issue an error.
-    /// - If [`Features::INDIRECT_FIRST_INSTANCE`] is not present on the adapter,
-    ///   [`DrawIndexedIndirect::first_instance`](crate::util::DrawIndexedIndirectArgs::first_instance) will be ignored.
-    /// - If [`DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW`] is not present on the adapter,
-    ///   any use of `@builtin(vertex_index)` or `@builtin(instance_index)` in the vertex shader will have different values.
-    ///
-    /// See details on the individual flags for more information.
-    pub fn draw_indexed_indirect(
-        &mut self,
-        indirect_buffer: &Buffer,
-        indirect_offset: BufferAddress,
-    ) {
-        DynContext::render_pass_draw_indexed_indirect(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-        );
-    }
-
-    /// Execute a [render bundle][RenderBundle], which is a set of pre-recorded commands
-    /// that can be run together.
-    ///
-    /// Commands in the bundle do not inherit this render pass's current render state, and after the
-    /// bundle has executed, the state is **cleared** (reset to defaults, not the previous state).
-    pub fn execute_bundles<'a, I: IntoIterator<Item = &'a RenderBundle>>(
-        &mut self,
-        render_bundles: I,
-    ) {
-        let mut render_bundles = render_bundles
-            .into_iter()
-            .map(|rb| (&rb.id, rb.data.as_ref()));
-
-        DynContext::render_pass_execute_bundles(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &mut render_bundles,
-        )
-    }
-}
-
-/// [`Features::MULTI_DRAW_INDIRECT`] must be enabled on the device in order to call these functions.
-impl<'encoder> RenderPass<'encoder> {
-    /// Dispatches multiple draw calls from the active vertex buffer(s) based on the contents of the `indirect_buffer`.
-    /// `count` draw calls are issued.
-    ///
-    /// The active vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
-    ///
-    /// The structure expected in `indirect_buffer` must conform to [`DrawIndirectArgs`](crate::util::DrawIndirectArgs).
-    /// These draw structures are expected to be tightly packed.
-    ///
-    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
-    /// It is not affected by changes to the state that are performed after it is called.
-    pub fn multi_draw_indirect(
-        &mut self,
-        indirect_buffer: &Buffer,
-        indirect_offset: BufferAddress,
-        count: u32,
-    ) {
-        DynContext::render_pass_multi_draw_indirect(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-            count,
-        );
-    }
-
-    /// Dispatches multiple draw calls from the active index buffer and the active vertex buffers,
-    /// based on the contents of the `indirect_buffer`. `count` draw calls are issued.
-    ///
-    /// The active index buffer can be set with [`RenderPass::set_index_buffer`], while the active
-    /// vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
-    ///
-    /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirectArgs`](crate::util::DrawIndexedIndirectArgs).
-    /// These draw structures are expected to be tightly packed.
-    ///
-    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
-    /// It is not affected by changes to the state that are performed after it is called.
-    pub fn multi_draw_indexed_indirect(
-        &mut self,
-        indirect_buffer: &Buffer,
-        indirect_offset: BufferAddress,
-        count: u32,
-    ) {
-        DynContext::render_pass_multi_draw_indexed_indirect(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-            count,
-        );
-    }
-}
-
-/// [`Features::MULTI_DRAW_INDIRECT_COUNT`] must be enabled on the device in order to call these functions.
-impl<'encoder> RenderPass<'encoder> {
-    /// Dispatches multiple draw calls from the active vertex buffer(s) based on the contents of the `indirect_buffer`.
-    /// The count buffer is read to determine how many draws to issue.
-    ///
-    /// The indirect buffer must be long enough to account for `max_count` draws, however only `count`
-    /// draws will be read. If `count` is greater than `max_count`, `max_count` will be used.
-    ///
-    /// The active vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
-    ///
-    /// The structure expected in `indirect_buffer` must conform to [`DrawIndirectArgs`](crate::util::DrawIndirectArgs).
-    /// These draw structures are expected to be tightly packed.
-    ///
-    /// The structure expected in `count_buffer` is the following:
-    ///
-    /// ```rust
-    /// #[repr(C)]
-    /// struct DrawIndirectCount {
-    ///     count: u32, // Number of draw calls to issue.
-    /// }
-    /// ```
-    ///
-    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
-    /// It is not affected by changes to the state that are performed after it is called.
-    pub fn multi_draw_indirect_count(
-        &mut self,
-        indirect_buffer: &Buffer,
-        indirect_offset: BufferAddress,
-        count_buffer: &Buffer,
-        count_offset: BufferAddress,
-        max_count: u32,
-    ) {
-        DynContext::render_pass_multi_draw_indirect_count(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-            &count_buffer.id,
-            count_buffer.data.as_ref(),
-            count_offset,
-            max_count,
-        );
-    }
-
-    /// Dispatches multiple draw calls from the active index buffer and the active vertex buffers,
-    /// based on the contents of the `indirect_buffer`. The count buffer is read to determine how many draws to issue.
-    ///
-    /// The indirect buffer must be long enough to account for `max_count` draws, however only `count`
-    /// draws will be read. If `count` is greater than `max_count`, `max_count` will be used.
-    ///
-    /// The active index buffer can be set with [`RenderPass::set_index_buffer`], while the active
-    /// vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
-    ///
-    ///
-    /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirectArgs`](crate::util::DrawIndexedIndirectArgs).
-    ///
-    /// These draw structures are expected to be tightly packed.
-    ///
-    /// The structure expected in `count_buffer` is the following:
-    ///
-    /// ```rust
-    /// #[repr(C)]
-    /// struct DrawIndexedIndirectCount {
-    ///     count: u32, // Number of draw calls to issue.
-    /// }
-    /// ```
-    ///
-    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
-    /// It is not affected by changes to the state that are performed after it is called.
-    pub fn multi_draw_indexed_indirect_count(
-        &mut self,
-        indirect_buffer: &Buffer,
-        indirect_offset: BufferAddress,
-        count_buffer: &Buffer,
-        count_offset: BufferAddress,
-        max_count: u32,
-    ) {
-        DynContext::render_pass_multi_draw_indexed_indirect_count(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-            &count_buffer.id,
-            count_buffer.data.as_ref(),
-            count_offset,
-            max_count,
-        );
-    }
-}
-
-/// [`Features::PUSH_CONSTANTS`] must be enabled on the device in order to call these functions.
-impl<'encoder> RenderPass<'encoder> {
-    /// Set push constant data for subsequent draw calls.
-    ///
-    /// Write the bytes in `data` at offset `offset` within push constant
-    /// storage, all of which are accessible by all the pipeline stages in
-    /// `stages`, and no others.  Both `offset` and the length of `data` must be
-    /// multiples of [`PUSH_CONSTANT_ALIGNMENT`], which is always 4.
-    ///
-    /// For example, if `offset` is `4` and `data` is eight bytes long, this
-    /// call will write `data` to bytes `4..12` of push constant storage.
-    ///
-    /// # Stage matching
-    ///
-    /// Every byte in the affected range of push constant storage must be
-    /// accessible to exactly the same set of pipeline stages, which must match
-    /// `stages`. If there are two bytes of storage that are accessible by
-    /// different sets of pipeline stages - say, one is accessible by fragment
-    /// shaders, and the other is accessible by both fragment shaders and vertex
-    /// shaders - then no single `set_push_constants` call may affect both of
-    /// them; to write both, you must make multiple calls, each with the
-    /// appropriate `stages` value.
-    ///
-    /// Which pipeline stages may access a given byte is determined by the
-    /// pipeline's [`PushConstant`] global variable and (if it is a struct) its
-    /// members' offsets.
-    ///
-    /// For example, suppose you have twelve bytes of push constant storage,
-    /// where bytes `0..8` are accessed by the vertex shader, and bytes `4..12`
-    /// are accessed by the fragment shader. This means there are three byte
-    /// ranges each accessed by a different set of stages:
-    ///
-    /// - Bytes `0..4` are accessed only by the fragment shader.
-    ///
-    /// - Bytes `4..8` are accessed by both the fragment shader and the vertex shader.
-    ///
-    /// - Bytes `8..12` are accessed only by the vertex shader.
-    ///
-    /// To write all twelve bytes requires three `set_push_constants` calls, one
-    /// for each range, each passing the matching `stages` mask.
-    ///
-    /// [`PushConstant`]: https://docs.rs/naga/latest/naga/enum.StorageClass.html#variant.PushConstant
-    pub fn set_push_constants(&mut self, stages: ShaderStages, offset: u32, data: &[u8]) {
-        DynContext::render_pass_set_push_constants(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            stages,
-            offset,
-            data,
-        );
-    }
-}
-
-/// [`Features::TIMESTAMP_QUERY_INSIDE_PASSES`] must be enabled on the device in order to call these functions.
-impl<'encoder> RenderPass<'encoder> {
-    /// Issue a timestamp command at this point in the queue. The
-    /// timestamp will be written to the specified query set, at the specified index.
-    ///
-    /// Must be multiplied by [`Queue::get_timestamp_period`] to get
-    /// the value in nanoseconds. Absolute values have no meaning,
-    /// but timestamps can be subtracted to get the time it takes
-    /// for a string of operations to complete.
-    pub fn write_timestamp(&mut self, query_set: &QuerySet, query_index: u32) {
-        DynContext::render_pass_write_timestamp(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &query_set.id,
-            query_set.data.as_ref(),
-            query_index,
-        )
-    }
-}
-
-impl<'encoder> RenderPass<'encoder> {
-    /// Start a occlusion query on this render pass. It can be ended with
-    /// `end_occlusion_query`. Occlusion queries may not be nested.
-    pub fn begin_occlusion_query(&mut self, query_index: u32) {
-        DynContext::render_pass_begin_occlusion_query(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            query_index,
-        );
-    }
-
-    /// End the occlusion query on this render pass. It can be started with
-    /// `begin_occlusion_query`. Occlusion queries may not be nested.
-    pub fn end_occlusion_query(&mut self) {
-        DynContext::render_pass_end_occlusion_query(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-        );
-    }
-}
-
-/// [`Features::PIPELINE_STATISTICS_QUERY`] must be enabled on the device in order to call these functions.
-impl<'encoder> RenderPass<'encoder> {
-    /// Start a pipeline statistics query on this render pass. It can be ended with
-    /// `end_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
-    pub fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, query_index: u32) {
-        DynContext::render_pass_begin_pipeline_statistics_query(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &query_set.id,
-            query_set.data.as_ref(),
-            query_index,
-        );
-    }
-
-    /// End the pipeline statistics query on this render pass. It can be started with
-    /// `begin_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
-    pub fn end_pipeline_statistics_query(&mut self) {
-        DynContext::render_pass_end_pipeline_statistics_query(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-        );
-    }
-}
-
-impl Drop for RenderPassInner {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .render_pass_end(&mut self.id, self.data.as_mut());
-        }
-    }
-}
-
-impl<'encoder> ComputePass<'encoder> {
-    /// Drops the lifetime relationship to the parent command encoder, making usage of
-    /// the encoder while this pass is recorded a run-time error instead.
-    ///
-    /// Attention: As long as the compute pass has not been ended, any mutating operation on the parent
-    /// command encoder will cause a run-time error and invalidate it!
-    /// By default, the lifetime constraint prevents this, but it can be useful
-    /// to handle this at run time, such as when storing the pass and encoder in the same
-    /// data structure.
-    ///
-    /// This operation has no effect on pass recording.
-    /// It's a safe operation, since [`CommandEncoder`] is in a locked state as long as the pass is active
-    /// regardless of the lifetime constraint or its absence.
-    pub fn forget_lifetime(self) -> ComputePass<'static> {
-        ComputePass {
-            inner: self.inner,
-            encoder_guard: PhantomData,
-        }
-    }
-
-    /// Sets the active bind group for a given bind group index. The bind group layout
-    /// in the active pipeline when the `dispatch()` function is called must match the layout of this bind group.
-    ///
-    /// If the bind group have dynamic offsets, provide them in the binding order.
-    /// These offsets have to be aligned to [`Limits::min_uniform_buffer_offset_alignment`]
-    /// or [`Limits::min_storage_buffer_offset_alignment`] appropriately.
-    pub fn set_bind_group(
-        &mut self,
-        index: u32,
-        bind_group: &BindGroup,
-        offsets: &[DynamicOffset],
-    ) {
-        DynContext::compute_pass_set_bind_group(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            index,
-            &bind_group.id,
-            bind_group.data.as_ref(),
-            offsets,
-        );
-    }
-
-    /// Sets the active compute pipeline.
-    pub fn set_pipeline(&mut self, pipeline: &ComputePipeline) {
-        DynContext::compute_pass_set_pipeline(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &pipeline.id,
-            pipeline.data.as_ref(),
-        );
-    }
-
-    /// Inserts debug marker.
-    pub fn insert_debug_marker(&mut self, label: &str) {
-        DynContext::compute_pass_insert_debug_marker(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            label,
-        );
-    }
-
-    /// Start record commands and group it into debug marker group.
-    pub fn push_debug_group(&mut self, label: &str) {
-        DynContext::compute_pass_push_debug_group(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            label,
-        );
-    }
-
-    /// Stops command recording and creates debug group.
-    pub fn pop_debug_group(&mut self) {
-        DynContext::compute_pass_pop_debug_group(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-        );
-    }
-
-    /// Dispatches compute work operations.
-    ///
-    /// `x`, `y` and `z` denote the number of work groups to dispatch in each dimension.
-    pub fn dispatch_workgroups(&mut self, x: u32, y: u32, z: u32) {
-        DynContext::compute_pass_dispatch_workgroups(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            x,
-            y,
-            z,
-        );
-    }
-
-    /// Dispatches compute work operations, based on the contents of the `indirect_buffer`.
-    ///
-    /// The structure expected in `indirect_buffer` must conform to [`DispatchIndirectArgs`](crate::util::DispatchIndirectArgs).
-    pub fn dispatch_workgroups_indirect(
-        &mut self,
-        indirect_buffer: &Buffer,
-        indirect_offset: BufferAddress,
-    ) {
-        DynContext::compute_pass_dispatch_workgroups_indirect(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-        );
-    }
-}
-
-/// [`Features::PUSH_CONSTANTS`] must be enabled on the device in order to call these functions.
-impl<'encoder> ComputePass<'encoder> {
-    /// Set push constant data for subsequent dispatch calls.
-    ///
-    /// Write the bytes in `data` at offset `offset` within push constant
-    /// storage.  Both `offset` and the length of `data` must be
-    /// multiples of [`PUSH_CONSTANT_ALIGNMENT`], which is always 4.
-    ///
-    /// For example, if `offset` is `4` and `data` is eight bytes long, this
-    /// call will write `data` to bytes `4..12` of push constant storage.
-    pub fn set_push_constants(&mut self, offset: u32, data: &[u8]) {
-        DynContext::compute_pass_set_push_constants(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            offset,
-            data,
-        );
-    }
-}
-
-/// [`Features::TIMESTAMP_QUERY_INSIDE_PASSES`] must be enabled on the device in order to call these functions.
-impl<'encoder> ComputePass<'encoder> {
-    /// Issue a timestamp command at this point in the queue. The timestamp will be written to the specified query set, at the specified index.
-    ///
-    /// Must be multiplied by [`Queue::get_timestamp_period`] to get
-    /// the value in nanoseconds. Absolute values have no meaning,
-    /// but timestamps can be subtracted to get the time it takes
-    /// for a string of operations to complete.
-    pub fn write_timestamp(&mut self, query_set: &QuerySet, query_index: u32) {
-        DynContext::compute_pass_write_timestamp(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &query_set.id,
-            query_set.data.as_ref(),
-            query_index,
-        )
-    }
-}
-
-/// [`Features::PIPELINE_STATISTICS_QUERY`] must be enabled on the device in order to call these functions.
-impl<'encoder> ComputePass<'encoder> {
-    /// Start a pipeline statistics query on this compute pass. It can be ended with
-    /// `end_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
-    pub fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, query_index: u32) {
-        DynContext::compute_pass_begin_pipeline_statistics_query(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &query_set.id,
-            query_set.data.as_ref(),
-            query_index,
-        );
-    }
-
-    /// End the pipeline statistics query on this compute pass. It can be started with
-    /// `begin_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
-    pub fn end_pipeline_statistics_query(&mut self) {
-        DynContext::compute_pass_end_pipeline_statistics_query(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-        );
-    }
-}
-
-impl Drop for ComputePassInner {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .compute_pass_end(&mut self.id, self.data.as_mut());
-        }
-    }
-}
-
-impl<'a> RenderBundleEncoder<'a> {
-    /// Finishes recording and returns a [`RenderBundle`] that can be executed in other render passes.
-    pub fn finish(self, desc: &RenderBundleDescriptor<'_>) -> RenderBundle {
-        let (id, data) =
-            DynContext::render_bundle_encoder_finish(&*self.context, self.id, self.data, desc);
-        RenderBundle {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Sets the active bind group for a given bind group index. The bind group layout
-    /// in the active pipeline when any `draw()` function is called must match the layout of this bind group.
-    ///
-    /// If the bind group have dynamic offsets, provide them in the binding order.
-    pub fn set_bind_group(
-        &mut self,
-        index: u32,
-        bind_group: &'a BindGroup,
-        offsets: &[DynamicOffset],
-    ) {
-        DynContext::render_bundle_encoder_set_bind_group(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            index,
-            &bind_group.id,
-            bind_group.data.as_ref(),
-            offsets,
-        )
-    }
-
-    /// Sets the active render pipeline.
-    ///
-    /// Subsequent draw calls will exhibit the behavior defined by `pipeline`.
-    pub fn set_pipeline(&mut self, pipeline: &'a RenderPipeline) {
-        DynContext::render_bundle_encoder_set_pipeline(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            &pipeline.id,
-            pipeline.data.as_ref(),
-        )
-    }
-
-    /// Sets the active index buffer.
-    ///
-    /// Subsequent calls to [`draw_indexed`](RenderBundleEncoder::draw_indexed) on this [`RenderBundleEncoder`] will
-    /// use `buffer` as the source index buffer.
-    pub fn set_index_buffer(&mut self, buffer_slice: BufferSlice<'a>, index_format: IndexFormat) {
-        DynContext::render_bundle_encoder_set_index_buffer(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            &buffer_slice.buffer.id,
-            buffer_slice.buffer.data.as_ref(),
-            index_format,
-            buffer_slice.offset,
-            buffer_slice.size,
-        )
-    }
-
-    /// Assign a vertex buffer to a slot.
-    ///
-    /// Subsequent calls to [`draw`] and [`draw_indexed`] on this
-    /// [`RenderBundleEncoder`] will use `buffer` as one of the source vertex buffers.
-    ///
-    /// The `slot` refers to the index of the matching descriptor in
-    /// [`VertexState::buffers`].
-    ///
-    /// [`draw`]: RenderBundleEncoder::draw
-    /// [`draw_indexed`]: RenderBundleEncoder::draw_indexed
-    pub fn set_vertex_buffer(&mut self, slot: u32, buffer_slice: BufferSlice<'a>) {
-        DynContext::render_bundle_encoder_set_vertex_buffer(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            slot,
-            &buffer_slice.buffer.id,
-            buffer_slice.buffer.data.as_ref(),
-            buffer_slice.offset,
-            buffer_slice.size,
-        )
-    }
-
-    /// Draws primitives from the active vertex buffer(s).
-    ///
-    /// The active vertex buffers can be set with [`RenderBundleEncoder::set_vertex_buffer`].
-    /// Does not use an Index Buffer. If you need this see [`RenderBundleEncoder::draw_indexed`]
-    ///
-    /// Panics if vertices Range is outside of the range of the vertices range of any set vertex buffer.
-    ///
-    /// vertices: The range of vertices to draw.
-    /// instances: Range of Instances to draw. Use 0..1 if instance buffers are not used.
-    /// E.g.of how its used internally
-    /// ```rust ignore
-    /// for instance_id in instance_range {
-    ///     for vertex_id in vertex_range {
-    ///         let vertex = vertex[vertex_id];
-    ///         vertex_shader(vertex, vertex_id, instance_id);
-    ///     }
-    /// }
-    /// ```
-    pub fn draw(&mut self, vertices: Range<u32>, instances: Range<u32>) {
-        DynContext::render_bundle_encoder_draw(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            vertices,
-            instances,
-        )
-    }
-
-    /// Draws indexed primitives using the active index buffer and the active vertex buffer(s).
-    ///
-    /// The active index buffer can be set with [`RenderBundleEncoder::set_index_buffer`].
-    /// The active vertex buffer(s) can be set with [`RenderBundleEncoder::set_vertex_buffer`].
-    ///
-    /// Panics if indices Range is outside of the range of the indices range of any set index buffer.
-    ///
-    /// indices: The range of indices to draw.
-    /// base_vertex: value added to each index value before indexing into the vertex buffers.
-    /// instances: Range of Instances to draw. Use 0..1 if instance buffers are not used.
-    /// E.g.of how its used internally
-    /// ```rust ignore
-    /// for instance_id in instance_range {
-    ///     for index_index in index_range {
-    ///         let vertex_id = index_buffer[index_index];
-    ///         let adjusted_vertex_id = vertex_id + base_vertex;
-    ///         let vertex = vertex[adjusted_vertex_id];
-    ///         vertex_shader(vertex, adjusted_vertex_id, instance_id);
-    ///     }
-    /// }
-    /// ```
-    pub fn draw_indexed(&mut self, indices: Range<u32>, base_vertex: i32, instances: Range<u32>) {
-        DynContext::render_bundle_encoder_draw_indexed(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            indices,
-            base_vertex,
-            instances,
-        );
-    }
-
-    /// Draws primitives from the active vertex buffer(s) based on the contents of the `indirect_buffer`.
-    ///
-    /// The active vertex buffers can be set with [`RenderBundleEncoder::set_vertex_buffer`].
-    ///
-    /// The structure expected in `indirect_buffer` must conform to [`DrawIndirectArgs`](crate::util::DrawIndirectArgs).
-    pub fn draw_indirect(&mut self, indirect_buffer: &'a Buffer, indirect_offset: BufferAddress) {
-        DynContext::render_bundle_encoder_draw_indirect(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-        );
-    }
-
-    /// Draws indexed primitives using the active index buffer and the active vertex buffers,
-    /// based on the contents of the `indirect_buffer`.
-    ///
-    /// The active index buffer can be set with [`RenderBundleEncoder::set_index_buffer`], while the active
-    /// vertex buffers can be set with [`RenderBundleEncoder::set_vertex_buffer`].
-    ///
-    /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirectArgs`](crate::util::DrawIndexedIndirectArgs).
-    pub fn draw_indexed_indirect(
-        &mut self,
-        indirect_buffer: &'a Buffer,
-        indirect_offset: BufferAddress,
-    ) {
-        DynContext::render_bundle_encoder_draw_indexed_indirect(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-        );
-    }
-}
-
-/// [`Features::PUSH_CONSTANTS`] must be enabled on the device in order to call these functions.
-impl<'a> RenderBundleEncoder<'a> {
-    /// Set push constant data.
-    ///
-    /// Offset is measured in bytes, but must be a multiple of [`PUSH_CONSTANT_ALIGNMENT`].
-    ///
-    /// Data size must be a multiple of 4 and must have an alignment of 4.
-    /// For example, with an offset of 4 and an array of `[u8; 8]`, that will write to the range
-    /// of 4..12.
-    ///
-    /// For each byte in the range of push constant data written, the union of the stages of all push constant
-    /// ranges that covers that byte must be exactly `stages`. There's no good way of explaining this simply,
-    /// so here are some examples:
-    ///
-    /// ```text
-    /// For the given ranges:
-    /// - 0..4 Vertex
-    /// - 4..8 Fragment
-    /// ```
-    ///
-    /// You would need to upload this in two set_push_constants calls. First for the `Vertex` range, second for the `Fragment` range.
-    ///
-    /// ```text
-    /// For the given ranges:
-    /// - 0..8  Vertex
-    /// - 4..12 Fragment
-    /// ```
-    ///
-    /// You would need to upload this in three set_push_constants calls. First for the `Vertex` only range 0..4, second
-    /// for the `Vertex | Fragment` range 4..8, third for the `Fragment` range 8..12.
-    pub fn set_push_constants(&mut self, stages: ShaderStages, offset: u32, data: &[u8]) {
-        DynContext::render_bundle_encoder_set_push_constants(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            stages,
-            offset,
-            data,
-        );
-    }
-}
-
-/// A write-only view into a staging buffer.
-///
-/// Reading into this buffer won't yield the contents of the buffer from the
-/// GPU and is likely to be slow. Because of this, although [`AsMut`] is
-/// implemented for this type, [`AsRef`] is not.
-pub struct QueueWriteBufferView<'a> {
-    queue: &'a Queue,
-    buffer: &'a Buffer,
-    offset: BufferAddress,
-    inner: Box<dyn context::QueueWriteBuffer>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(QueueWriteBufferView<'_>: Send, Sync);
-
-impl Deref for QueueWriteBufferView<'_> {
-    type Target = [u8];
-
-    fn deref(&self) -> &Self::Target {
-        log::warn!("Reading from a QueueWriteBufferView won't yield the contents of the buffer and may be slow.");
-        self.inner.slice()
-    }
-}
-
-impl DerefMut for QueueWriteBufferView<'_> {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        self.inner.slice_mut()
-    }
-}
-
-impl<'a> AsMut<[u8]> for QueueWriteBufferView<'a> {
-    fn as_mut(&mut self) -> &mut [u8] {
-        self.inner.slice_mut()
-    }
-}
-
-impl<'a> Drop for QueueWriteBufferView<'a> {
-    fn drop(&mut self) {
-        DynContext::queue_write_staging_buffer(
-            &*self.queue.context,
-            &self.queue.id,
-            self.queue.data.as_ref(),
-            &self.buffer.id,
-            self.buffer.data.as_ref(),
-            self.offset,
-            &*self.inner,
-        );
-    }
-}
-
-impl Queue {
-    /// Schedule a data write into `buffer` starting at `offset`.
-    ///
-    /// This method fails if `data` overruns the size of `buffer` starting at `offset`.
-    ///
-    /// This does *not* submit the transfer to the GPU immediately. Calls to
-    /// `write_buffer` begin execution only on the next call to
-    /// [`Queue::submit`]. To get a set of scheduled transfers started
-    /// immediately, it's fine to call `submit` with no command buffers at all:
-    ///
-    /// ```no_run
-    /// # let queue: wgpu::Queue = todo!();
-    /// queue.submit([]);
-    /// ```
-    ///
-    /// However, `data` will be immediately copied into staging memory, so the
-    /// caller may discard it any time after this call completes.
-    ///
-    /// If possible, consider using [`Queue::write_buffer_with`] instead. That
-    /// method avoids an intermediate copy and is often able to transfer data
-    /// more efficiently than this one.
-    pub fn write_buffer(&self, buffer: &Buffer, offset: BufferAddress, data: &[u8]) {
-        DynContext::queue_write_buffer(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            &buffer.id,
-            buffer.data.as_ref(),
-            offset,
-            data,
-        )
-    }
-
-    /// Write to a buffer via a directly mapped staging buffer.
-    ///
-    /// Return a [`QueueWriteBufferView`] which, when dropped, schedules a copy
-    /// of its contents into `buffer` at `offset`. The returned view
-    /// dereferences to a `size`-byte long `&mut [u8]`, in which you should
-    /// store the data you would like written to `buffer`.
-    ///
-    /// This method may perform transfers faster than [`Queue::write_buffer`],
-    /// because the returned [`QueueWriteBufferView`] is actually the staging
-    /// buffer for the write, mapped into the caller's address space. Writing
-    /// your data directly into this staging buffer avoids the temporary
-    /// CPU-side buffer needed by `write_buffer`.
-    ///
-    /// Reading from the returned view is slow, and will not yield the current
-    /// contents of `buffer`.
-    ///
-    /// Note that dropping the [`QueueWriteBufferView`] does *not* submit the
-    /// transfer to the GPU immediately. The transfer begins only on the next
-    /// call to [`Queue::submit`] after the view is dropped. To get a set of
-    /// scheduled transfers started immediately, it's fine to call `submit` with
-    /// no command buffers at all:
-    ///
-    /// ```no_run
-    /// # let queue: wgpu::Queue = todo!();
-    /// queue.submit([]);
-    /// ```
-    ///
-    /// This method fails if `size` is greater than the size of `buffer` starting at `offset`.
-    #[must_use]
-    pub fn write_buffer_with<'a>(
-        &'a self,
-        buffer: &'a Buffer,
-        offset: BufferAddress,
-        size: BufferSize,
-    ) -> Option<QueueWriteBufferView<'a>> {
-        profiling::scope!("Queue::write_buffer_with");
-        DynContext::queue_validate_write_buffer(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            &buffer.id,
-            buffer.data.as_ref(),
-            offset,
-            size,
-        )?;
-        let staging_buffer = DynContext::queue_create_staging_buffer(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            size,
-        )?;
-        Some(QueueWriteBufferView {
-            queue: self,
-            buffer,
-            offset,
-            inner: staging_buffer,
-        })
-    }
-
-    /// Schedule a write of some data into a texture.
-    ///
-    /// * `data` contains the texels to be written, which must be in
-    ///   [the same format as the texture](TextureFormat).
-    /// * `data_layout` describes the memory layout of `data`, which does not necessarily
-    ///   have to have tightly packed rows.
-    /// * `texture` specifies the texture to write into, and the location within the
-    ///   texture (coordinate offset, mip level) that will be overwritten.
-    /// * `size` is the size, in texels, of the region to be written.
-    ///
-    /// This method fails if `size` overruns the size of `texture`, or if `data` is too short.
-    ///
-    /// This does *not* submit the transfer to the GPU immediately. Calls to
-    /// `write_texture` begin execution only on the next call to
-    /// [`Queue::submit`]. To get a set of scheduled transfers started
-    /// immediately, it's fine to call `submit` with no command buffers at all:
-    ///
-    /// ```no_run
-    /// # let queue: wgpu::Queue = todo!();
-    /// queue.submit([]);
-    /// ```
-    ///
-    /// However, `data` will be immediately copied into staging memory, so the
-    /// caller may discard it any time after this call completes.
-    pub fn write_texture(
-        &self,
-        texture: ImageCopyTexture<'_>,
-        data: &[u8],
-        data_layout: ImageDataLayout,
-        size: Extent3d,
-    ) {
-        DynContext::queue_write_texture(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            texture,
-            data,
-            data_layout,
-            size,
-        )
-    }
-
-    /// Schedule a copy of data from `image` into `texture`.
-    #[cfg(any(webgpu, webgl))]
-    pub fn copy_external_image_to_texture(
-        &self,
-        source: &wgt::ImageCopyExternalImage,
-        dest: ImageCopyTextureTagged<'_>,
-        size: Extent3d,
-    ) {
-        DynContext::queue_copy_external_image_to_texture(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            source,
-            dest,
-            size,
-        )
-    }
-
-    /// Submits a series of finished command buffers for execution.
-    pub fn submit<I: IntoIterator<Item = CommandBuffer>>(
-        &self,
-        command_buffers: I,
-    ) -> SubmissionIndex {
-        let mut command_buffers = command_buffers
-            .into_iter()
-            .map(|mut comb| (comb.id.take().unwrap(), comb.data.take().unwrap()));
-
-        let data = DynContext::queue_submit(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            &mut command_buffers,
-        );
-
-        SubmissionIndex(data)
-    }
-
-    /// Gets the amount of nanoseconds each tick of a timestamp query represents.
-    ///
-    /// Returns zero if timestamp queries are unsupported.
-    ///
-    /// Timestamp values are represented in nanosecond values on WebGPU, see `<https://gpuweb.github.io/gpuweb/#timestamp>`
-    /// Therefore, this is always 1.0 on the web, but on wgpu-core a manual conversion is required.
-    pub fn get_timestamp_period(&self) -> f32 {
-        DynContext::queue_get_timestamp_period(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Registers a callback when the previous call to submit finishes running on the gpu. This callback
-    /// being called implies that all mapped buffer callbacks which were registered before this call will
-    /// have been called.
-    ///
-    /// For the callback to complete, either `queue.submit(..)`, `instance.poll_all(..)`, or `device.poll(..)`
-    /// must be called elsewhere in the runtime, possibly integrated into an event loop or run on a separate thread.
-    ///
-    /// The callback will be called on the thread that first calls the above functions after the gpu work
-    /// has completed. There are no restrictions on the code you can run in the callback, however on native the
-    /// call to the function will not complete until the callback returns, so prefer keeping callbacks short
-    /// and used to set flags, send messages, etc.
-    pub fn on_submitted_work_done(&self, callback: impl FnOnce() + Send + 'static) {
-        DynContext::queue_on_submitted_work_done(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            Box::new(callback),
-        )
-    }
-}
-
-impl SurfaceTexture {
-    /// Schedule this texture to be presented on the owning surface.
-    ///
-    /// Needs to be called after any work on the texture is scheduled via [`Queue::submit`].
-    ///
-    /// # Platform dependent behavior
-    ///
-    /// On Wayland, `present` will attach a `wl_buffer` to the underlying `wl_surface` and commit the new surface
-    /// state. If it is desired to do things such as request a frame callback, scale the surface using the viewporter
-    /// or synchronize other double buffered state, then these operations should be done before the call to `present`.
-    pub fn present(mut self) {
-        self.presented = true;
-        DynContext::surface_present(
-            &*self.texture.context,
-            &self.texture.id,
-            // This call to as_ref is essential because we want the DynContext implementation to see the inner
-            // value of the Box (T::SurfaceOutputDetail), not the Box itself.
-            self.detail.as_ref(),
-        );
-    }
-}
-
-impl Drop for SurfaceTexture {
-    fn drop(&mut self) {
-        if !self.presented && !thread::panicking() {
-            DynContext::surface_texture_discard(
-                &*self.texture.context,
-                &self.texture.id,
-                // This call to as_ref is essential because we want the DynContext implementation to see the inner
-                // value of the Box (T::SurfaceOutputDetail), not the Box itself.
-                self.detail.as_ref(),
-            );
-        }
-    }
-}
-
-impl Surface<'_> {
-    /// Returns the capabilities of the surface when used with the given adapter.
-    ///
-    /// Returns specified values (see [`SurfaceCapabilities`]) if surface is incompatible with the adapter.
-    pub fn get_capabilities(&self, adapter: &Adapter) -> SurfaceCapabilities {
-        DynContext::surface_get_capabilities(
-            &*self.context,
-            &self.id,
-            self.surface_data.as_ref(),
-            &adapter.id,
-            adapter.data.as_ref(),
-        )
-    }
-
-    /// Return a default `SurfaceConfiguration` from width and height to use for the [`Surface`] with this adapter.
-    ///
-    /// Returns None if the surface isn't supported by this adapter
-    pub fn get_default_config(
-        &self,
-        adapter: &Adapter,
-        width: u32,
-        height: u32,
-    ) -> Option<SurfaceConfiguration> {
-        let caps = self.get_capabilities(adapter);
-        Some(SurfaceConfiguration {
-            usage: wgt::TextureUsages::RENDER_ATTACHMENT,
-            format: *caps.formats.first()?,
-            width,
-            height,
-            desired_maximum_frame_latency: 2,
-            present_mode: *caps.present_modes.first()?,
-            alpha_mode: wgt::CompositeAlphaMode::Auto,
-            view_formats: vec![],
-        })
-    }
-
-    /// Initializes [`Surface`] for presentation.
-    ///
-    /// # Panics
-    ///
-    /// - A old [`SurfaceTexture`] is still alive referencing an old surface.
-    /// - Texture format requested is unsupported on the surface.
-    /// - `config.width` or `config.height` is zero.
-    pub fn configure(&self, device: &Device, config: &SurfaceConfiguration) {
-        DynContext::surface_configure(
-            &*self.context,
-            &self.id,
-            self.surface_data.as_ref(),
-            &device.id,
-            device.data.as_ref(),
-            config,
-        );
-
-        let mut conf = self.config.lock();
-        *conf = Some(config.clone());
-    }
-
-    /// Returns the next texture to be presented by the swapchain for drawing.
-    ///
-    /// In order to present the [`SurfaceTexture`] returned by this method,
-    /// first a [`Queue::submit`] needs to be done with some work rendering to this texture.
-    /// Then [`SurfaceTexture::present`] needs to be called.
-    ///
-    /// If a SurfaceTexture referencing this surface is alive when the swapchain is recreated,
-    /// recreating the swapchain will panic.
-    pub fn get_current_texture(&self) -> Result<SurfaceTexture, SurfaceError> {
-        let (texture_id, texture_data, status, detail) = DynContext::surface_get_current_texture(
-            &*self.context,
-            &self.id,
-            self.surface_data.as_ref(),
-        );
-
-        let suboptimal = match status {
-            SurfaceStatus::Good => false,
-            SurfaceStatus::Suboptimal => true,
-            SurfaceStatus::Timeout => return Err(SurfaceError::Timeout),
-            SurfaceStatus::Outdated => return Err(SurfaceError::Outdated),
-            SurfaceStatus::Lost => return Err(SurfaceError::Lost),
-        };
-
-        let guard = self.config.lock();
-        let config = guard
-            .as_ref()
-            .expect("This surface has not been configured yet.");
-
-        let descriptor = TextureDescriptor {
-            label: None,
-            size: Extent3d {
-                width: config.width,
-                height: config.height,
-                depth_or_array_layers: 1,
-            },
-            format: config.format,
-            usage: config.usage,
-            mip_level_count: 1,
-            sample_count: 1,
-            dimension: TextureDimension::D2,
-            view_formats: &[],
-        };
-
-        texture_id
-            .zip(texture_data)
-            .map(|(id, data)| SurfaceTexture {
-                texture: Texture {
-                    context: Arc::clone(&self.context),
-                    id,
-                    data,
-                    owned: false,
-                    descriptor,
-                },
-                suboptimal,
-                presented: false,
-                detail,
-            })
-            .ok_or(SurfaceError::Lost)
-    }
-
-    /// Returns the inner hal Surface using a callback. The hal surface will be `None` if the
-    /// backend type argument does not match with this wgpu Surface
-    ///
-    /// # Safety
-    ///
-    /// - The raw handle obtained from the hal Surface must not be manually destroyed
-    #[cfg(wgpu_core)]
-    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Surface>) -> R, R>(
-        &mut self,
-        hal_surface_callback: F,
-    ) -> Option<R> {
-        self.context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-            .map(|ctx| unsafe {
-                ctx.surface_as_hal::<A, F, R>(
-                    self.surface_data.downcast_ref().unwrap(),
-                    hal_surface_callback,
-                )
-            })
-    }
-}
-
-/// Opaque globally-unique identifier
-#[repr(transparent)]
-pub struct Id<T>(NonZeroU64, PhantomData<*mut T>);
-
-impl<T> Id<T> {
-    /// For testing use only. We provide no guarantees about the actual value of the ids.
-    #[doc(hidden)]
-    pub fn inner(&self) -> u64 {
-        self.0.get()
-    }
-}
-
-// SAFETY: `Id` is a bare `NonZeroU64`, the type parameter is a marker purely to avoid confusing Ids
-// returned for different types , so `Id` can safely implement Send and Sync.
-unsafe impl<T> Send for Id<T> {}
-
-// SAFETY: See the implementation for `Send`.
-unsafe impl<T> Sync for Id<T> {}
-
-impl<T> Clone for Id<T> {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
-
-impl<T> Copy for Id<T> {}
-
-impl<T> fmt::Debug for Id<T> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_tuple("Id").field(&self.0).finish()
-    }
-}
-
-impl<T> PartialEq for Id<T> {
-    fn eq(&self, other: &Id<T>) -> bool {
-        self.0 == other.0
-    }
-}
-
-impl<T> Eq for Id<T> {}
-
-impl<T> PartialOrd for Id<T> {
-    fn partial_cmp(&self, other: &Id<T>) -> Option<Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl<T> Ord for Id<T> {
-    fn cmp(&self, other: &Id<T>) -> Ordering {
-        self.0.cmp(&other.0)
-    }
-}
-
-impl<T> std::hash::Hash for Id<T> {
-    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
-        self.0.hash(state)
-    }
-}
-
-impl Adapter {
-    /// Returns a globally-unique identifier for this `Adapter`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl Device {
-    /// Returns a globally-unique identifier for this `Device`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl Queue {
-    /// Returns a globally-unique identifier for this `Queue`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl ShaderModule {
-    /// Returns a globally-unique identifier for this `ShaderModule`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl BindGroupLayout {
-    /// Returns a globally-unique identifier for this `BindGroupLayout`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl BindGroup {
-    /// Returns a globally-unique identifier for this `BindGroup`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl TextureView {
-    /// Returns a globally-unique identifier for this `TextureView`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-
-    /// Returns the inner hal TextureView using a callback. The hal texture will be `None` if the
-    /// backend type argument does not match with this wgpu Texture
-    ///
-    /// # Safety
-    ///
-    /// - The raw handle obtained from the hal TextureView must not be manually destroyed
-    #[cfg(wgpu_core)]
-    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::TextureView>) -> R, R>(
-        &self,
-        hal_texture_view_callback: F,
-    ) -> R {
-        use core::id::TextureViewId;
-
-        let texture_view_id = TextureViewId::from(self.id);
-
-        if let Some(ctx) = self
-            .context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-        {
-            unsafe {
-                ctx.texture_view_as_hal::<A, F, R>(texture_view_id, hal_texture_view_callback)
-            }
-        } else {
-            hal_texture_view_callback(None)
-        }
-    }
-}
-
-impl Sampler {
-    /// Returns a globally-unique identifier for this `Sampler`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl Buffer {
-    /// Returns a globally-unique identifier for this `Buffer`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl Texture {
-    /// Returns a globally-unique identifier for this `Texture`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl QuerySet {
-    /// Returns a globally-unique identifier for this `QuerySet`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl PipelineLayout {
-    /// Returns a globally-unique identifier for this `PipelineLayout`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl RenderPipeline {
-    /// Returns a globally-unique identifier for this `RenderPipeline`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl ComputePipeline {
-    /// Returns a globally-unique identifier for this `ComputePipeline`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl RenderBundle {
-    /// Returns a globally-unique identifier for this `RenderBundle`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl Surface<'_> {
-    /// Returns a globally-unique identifier for this `Surface`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Surface<'_>> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-/// Type for the callback of uncaptured error handler
-pub trait UncapturedErrorHandler: Fn(Error) + Send + 'static {}
-impl<T> UncapturedErrorHandler for T where T: Fn(Error) + Send + 'static {}
-
-/// Error type
-#[derive(Debug)]
-pub enum Error {
-    /// Out of memory error
-    OutOfMemory {
-        /// Lower level source of the error.
-        #[cfg(send_sync)]
-        #[cfg_attr(docsrs, doc(cfg(all())))]
-        source: Box<dyn error::Error + Send + Sync + 'static>,
-        /// Lower level source of the error.
-        #[cfg(not(send_sync))]
-        #[cfg_attr(docsrs, doc(cfg(all())))]
-        source: Box<dyn error::Error + 'static>,
-    },
-    /// Validation error, signifying a bug in code or data
-    Validation {
-        /// Lower level source of the error.
-        #[cfg(send_sync)]
-        #[cfg_attr(docsrs, doc(cfg(all())))]
-        source: Box<dyn error::Error + Send + Sync + 'static>,
-        /// Lower level source of the error.
-        #[cfg(not(send_sync))]
-        #[cfg_attr(docsrs, doc(cfg(all())))]
-        source: Box<dyn error::Error + 'static>,
-        /// Description of the validation error.
-        description: String,
-    },
-    /// Internal error. Used for signalling any failures not explicitly expected by WebGPU.
-    ///
-    /// These could be due to internal implementation or system limits being reached.
-    Internal {
-        /// Lower level source of the error.
-        #[cfg(send_sync)]
-        #[cfg_attr(docsrs, doc(cfg(all())))]
-        source: Box<dyn error::Error + Send + Sync + 'static>,
-        /// Lower level source of the error.
-        #[cfg(not(send_sync))]
-        #[cfg_attr(docsrs, doc(cfg(all())))]
-        source: Box<dyn error::Error + 'static>,
-        /// Description of the internal GPU error.
-        description: String,
-    },
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Error: Send, Sync);
-
-impl error::Error for Error {
-    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
-        match self {
-            Error::OutOfMemory { source } => Some(source.as_ref()),
-            Error::Validation { source, .. } => Some(source.as_ref()),
-            Error::Internal { source, .. } => Some(source.as_ref()),
-        }
-    }
-}
-
-impl fmt::Display for Error {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match self {
-            Error::OutOfMemory { .. } => f.write_str("Out of Memory"),
-            Error::Validation { description, .. } => f.write_str(description),
-            Error::Internal { description, .. } => f.write_str(description),
-        }
-    }
-}
-
-use send_sync::*;
-
-mod send_sync {
-    use std::any::Any;
-    use std::fmt;
-
-    use wgt::WasmNotSendSync;
-
-    pub trait AnyWasmNotSendSync: Any + WasmNotSendSync {
-        fn upcast_any_ref(&self) -> &dyn Any;
-    }
-    impl<T: Any + WasmNotSendSync> AnyWasmNotSendSync for T {
-        #[inline]
-        fn upcast_any_ref(&self) -> &dyn Any {
-            self
-        }
-    }
-
-    impl dyn AnyWasmNotSendSync + 'static {
-        #[inline]
-        pub fn downcast_ref<T: 'static>(&self) -> Option<&T> {
-            self.upcast_any_ref().downcast_ref::<T>()
-        }
-    }
-
-    impl fmt::Debug for dyn AnyWasmNotSendSync {
-        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-            f.debug_struct("Any").finish_non_exhaustive()
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::BufferSize;
-
-    #[test]
-    fn range_to_offset_size_works() {
-        assert_eq!(crate::range_to_offset_size(0..2), (0, BufferSize::new(2)));
-        assert_eq!(crate::range_to_offset_size(2..5), (2, BufferSize::new(3)));
-        assert_eq!(crate::range_to_offset_size(..), (0, None));
-        assert_eq!(crate::range_to_offset_size(21..), (21, None));
-        assert_eq!(crate::range_to_offset_size(0..), (0, None));
-        assert_eq!(crate::range_to_offset_size(..21), (0, BufferSize::new(21)));
-    }
-
-    #[test]
-    #[should_panic]
-    fn range_to_offset_size_panics_for_empty_range() {
-        crate::range_to_offset_size(123..123);
-    }
-
-    #[test]
-    #[should_panic]
-    fn range_to_offset_size_panics_for_unbounded_empty_range() {
-        crate::range_to_offset_size(..0);
-    }
-}
diff --git a/wgpu/src/send_sync.rs b/wgpu/src/send_sync.rs
new file mode 100644
index 0000000000..3842931716
--- /dev/null
+++ b/wgpu/src/send_sync.rs
@@ -0,0 +1,27 @@
+use std::any::Any;
+use std::fmt;
+
+use wgt::WasmNotSendSync;
+
+pub trait AnyWasmNotSendSync: Any + WasmNotSendSync {
+    fn upcast_any_ref(&self) -> &dyn Any;
+}
+impl<T: Any + WasmNotSendSync> AnyWasmNotSendSync for T {
+    #[inline]
+    fn upcast_any_ref(&self) -> &dyn Any {
+        self
+    }
+}
+
+impl dyn AnyWasmNotSendSync + 'static {
+    #[inline]
+    pub fn downcast_ref<T: 'static>(&self) -> Option<&T> {
+        self.upcast_any_ref().downcast_ref::<T>()
+    }
+}
+
+impl fmt::Debug for dyn AnyWasmNotSendSync {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Any").finish_non_exhaustive()
+    }
+}
diff --git a/wgpu/src/util/mod.rs b/wgpu/src/util/mod.rs
index f52b82a9c1..ff4fb7ecf8 100644
--- a/wgpu/src/util/mod.rs
+++ b/wgpu/src/util/mod.rs
@@ -123,7 +123,7 @@ impl DownloadBuffer {
                     return;
                 }
 
-                let mapped_range = super::DynContext::buffer_get_mapped_range(
+                let mapped_range = crate::context::DynContext::buffer_get_mapped_range(
                     &*download.context,
                     &download.id,
                     download.data.as_ref(),