diff --git a/.cargo/config.toml b/.cargo/config.toml
deleted file mode 100644
index 95d2a35175..0000000000
--- a/.cargo/config.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[alias]
-xtask = "run --manifest-path xtask/Cargo.toml --"
-
-[build]
-rustflags = [
-"--cfg=web_sys_unstable_apis"
-]
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f4ed15c4a7..981fcd3498 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -93,7 +93,7 @@ jobs:
 
     steps:
       - name: checkout repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Install MSRV toolchain
         run: |
@@ -181,7 +181,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: checkout repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Install wasm-pack
         uses: taiki-e/install-action@v2
@@ -219,7 +219,7 @@ jobs:
 
     steps:
       - name: checkout repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Install cargo-nextest and cargo-llvm-cov
         uses: taiki-e/install-action@v2
@@ -284,6 +284,7 @@ jobs:
           done
 
       - uses: actions/upload-artifact@v3
+        if: always() # We want artifacts even if the tests fail.
         with:
           name: comparison-images
           path: |
@@ -308,7 +309,7 @@ jobs:
 
     steps:
       - name: checkout repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: disable debug
         shell: bash
@@ -335,7 +336,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: checkout repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: run rustfmt
         run: |
@@ -346,7 +347,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: checkout repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Install MSRV toolchain
         run: |
@@ -375,7 +376,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: checkout repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Run `cargo deny check`
         uses: EmbarkStudios/cargo-deny-action@v1
@@ -389,7 +390,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: checkout repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Run `cargo deny check`
         uses: EmbarkStudios/cargo-deny-action@v1
diff --git a/.github/workflows/cts.yml b/.github/workflows/cts.yml
index 70479533cf..e4bb20e7b1 100644
--- a/.github/workflows/cts.yml
+++ b/.github/workflows/cts.yml
@@ -39,7 +39,7 @@ jobs:
 
     steps:
       - name: checkout repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           path: wgpu
 
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 11d8d9e962..396a93ef04 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -16,7 +16,7 @@ jobs:
 
     steps:
       - name: Checkout the code
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           persist-credentials: false
 
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 81a2a7b407..f0aa086961 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -17,7 +17,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout the code
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           persist-credentials: false
 
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 72f0c83fcf..db91b89718 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -40,15 +40,92 @@ Bottom level categories:
 
 ## Unreleased
 
+### Major changes
+
+#### Pass timestamp queries
+
+Addition of `TimestampWrites` to compute and render passes to allow profiling.
+This brings us in line with the spec.
+
+Added new example to demonstrate the various kinds of timestamps.
+
+By @FL33TW00D & @wumpf in [#3636](https://github.com/gfx-rs/wgpu/pull/3636).
+
+#### Occlusion Query Support
+
+The `occlusion_query_set` value defines where the occlusion query results will be stored for this pass.
+
+```diff
+let render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+  // ...
++ occlusion_query_set: None,
+});
+```
+
+By @Valaphee in [#3402](https://github.com/gfx-rs/wgpu/pull/3402)
+
+### Added/New Features
+
+- Add `gles_minor_version` field to `wgpu::InstanceDescriptor`. By @PJB3005 in [#3998](https://github.com/gfx-rs/wgpu/pull/3998)
+
 ### Changes
 
+#### General
+
 - Omit texture store bound checks since they are no-ops if out of bounds on all APIs. By @teoxoy in [#3975](https://github.com/gfx-rs/wgpu/pull/3975)
+- Validate `DownlevelFlags::READ_ONLY_DEPTH_STENCIL`. By @teoxoy in [#4031](https://github.com/gfx-rs/wgpu/pull/4031)
+- Add validation in accordance with WebGPU `setViewport` valid usage for `x`, `y` and `this.[[attachment_size]]`. By @James2022-rgb in [#4058](https://github.com/gfx-rs/wgpu/pull/4058)
+- `wgpu::CreateSurfaceError` now gives details of the failure, but no longer implements `PartialEq`. By @kpreid in [#4066](https://github.com/gfx-rs/wgpu/pull/4066)
+- Make `WGPU_POWER_PREF=none` a valid value. By @fornwall in [4076](https://github.com/gfx-rs/wgpu/pull/4076)
+
+#### Vulkan
+
+- Rename `wgpu_hal::vulkan::Instance::required_extensions` to `desired_extensions`. By @jimblandy in [#4115](https://github.com/gfx-rs/wgpu/pull/4115)
+
+- Don't bother calling `vkFreeCommandBuffers` when `vkDestroyCommandPool` will take care of that for us. By @jimblandy in [#4059](https://github.com/gfx-rs/wgpu/pull/4059)
+
+
+### Documentation
+- Use WGSL for VertexFormat example types. By @ScanMountGoat in [#4305](https://github.com/gfx-rs/wgpu/pull/4035)
 
 ### Bug Fixes
 
+#### General
+
+- Derive storage bindings via `naga::StorageAccess` instead of `naga::GlobalUse`. By @teoxoy in [#3985](https://github.com/gfx-rs/wgpu/pull/3985).
+- `Queue::on_submitted_work_done` callbacks will now always be called after all previous `BufferSlice::map_async` callbacks, even when there are no active submissions. By @cwfitzgerald in [#4036](https://github.com/gfx-rs/wgpu/pull/4036).
+- Fix `clear` texture views being leaked when `wgpu::SurfaceTexture` is dropped before it is presented. By @rajveermalviya in [#4057](https://github.com/gfx-rs/wgpu/pull/4057).
+
 #### Vulkan
 - Fix enabling `wgpu::Features::PARTIALLY_BOUND_BINDING_ARRAY` not being actually enabled in vulkan backend. By @39ali in[#3772](https://github.com/gfx-rs/wgpu/pull/3772).
 
+- Don't pass `vk::InstanceCreateFlags::ENUMERATE_PORTABILITY_KHR` unless the `VK_KHR_portability_enumeration` extension is available. By @jimblandy in[#4038](https://github.com/gfx-rs/wgpu/pull/4038).
+
+- Enhancement of [#4038], using ash's definition instead of hard-coded c_str. By @hybcloud in[#4044](https://github.com/gfx-rs/wgpu/pull/4044).
+
+- Enable vulkan presentation on (Linux) Intel Mesa >= v21.2. By @flukejones in[#4110](https://github.com/gfx-rs/wgpu/pull/4110)
+
+#### DX12
+
+- DX12 doesn't support `Features::POLYGON_MODE_POINT``. By @teoxoy in [#4032](https://github.com/gfx-rs/wgpu/pull/4032).
+- Set `Features::VERTEX_WRITABLE_STORAGE` based on the right feature level. By @teoxoy in [#4033](https://github.com/gfx-rs/wgpu/pull/4033).
+
+#### Metal
+
+- Ensure that MTLCommandEncoder calls endEncoding before it is deallocated. By @bradwerth in [#4023](https://github.com/gfx-rs/wgpu/pull/4023)
+
+#### WebGPU
+
+- Ensure that limit requests and reporting is done correctly. By @OptimisticPeach in [#4107](https://github.com/gfx-rs/wgpu/pull/4107)
+
+#### Testing
+
+- Skip `test_multithreaded_compute` on MoltenVK. By @jimblandy in [#4096](https://github.com/gfx-rs/wgpu/pull/4096).
+
+### Documentation
+
+- Add an overview of `RenderPass` and how render state works. By @kpreid in [#4055](https://github.com/gfx-rs/wgpu/pull/4055)
+
 ## v0.17.0 (2023-07-20)
 
 This is the first release that featured `wgpu-info` as a binary crate for getting information about what devices wgpu sees in your system. It can dump the information in both human readable format and json.
@@ -59,7 +136,7 @@ This release was fairly minor as breaking changes go.
 
 #### `wgpu` types now `!Send` `!Sync` on wasm
 
-Up until this point, wgpu has made the assumption that threads do not exist on wasm. With the rise of libraries like [`wasm_thread`](https://crates.io/crates/wasm_thread) making it easier and easier to do wasm multithreading this assumption is no longer sound. As all wgpu objects contain references into the JS heap, they cannot leave the thread they started on. 
+Up until this point, wgpu has made the assumption that threads do not exist on wasm. With the rise of libraries like [`wasm_thread`](https://crates.io/crates/wasm_thread) making it easier and easier to do wasm multithreading this assumption is no longer sound. As all wgpu objects contain references into the JS heap, they cannot leave the thread they started on.
 
 As we understand that this change might be very inconvenient for users who don't care about wasm threading, there is a crate feature which re-enables the old behavior: `fragile-send-sync-non-atomic-wasm`. So long as you don't compile your code with `-Ctarget-feature=+atomics`, `Send` and `Sync` will be implemented again on wgpu types on wasm. As the name implies, especially for libraries, this is very fragile, as you don't know if a user will want to compile with atomics (and therefore threads) or not.
 
@@ -101,6 +178,7 @@ By @fornwall in [#3904](https://github.com/gfx-rs/wgpu/pull/3904) and [#3905](ht
 
 - Empty scissor rects are allowed now, matching the specification. by @PJB3005 in [#3863](https://github.com/gfx-rs/wgpu/pull/3863).
 - Add back components info to `TextureFormat`s. By @teoxoy in [#3843](https://github.com/gfx-rs/wgpu/pull/3843).
+- Add `get_mapped_range_as_array_buffer` for faster buffer read-backs in wasm builds. By @ryankaplan in [#4042] (https://github.com/gfx-rs/wgpu/pull/4042).
 
 ### Documentation
 
@@ -111,6 +189,7 @@ By @fornwall in [#3904](https://github.com/gfx-rs/wgpu/pull/3904) and [#3905](ht
 - Spell out which sizes are in bytes. By @jimblandy in [#3773](https://github.com/gfx-rs/wgpu/pull/3773).
 - Validate that `descriptor.usage` is not empty in `create_buffer` by @nical in [#3928](https://github.com/gfx-rs/wgpu/pull/3928)
 - Update `max_bindings_per_bind_group` limit to reflect spec changes by @ErichDonGubler and @nical in [#3943](https://github.com/gfx-rs/wgpu/pull/3943) [#3942](https://github.com/gfx-rs/wgpu/pull/3942)
+- Add better docs for `Limits`, listing the actual limits returned by `downlevel_defaults` and `downlevel_webgl2_defaults` by @JustAnotherCodemonkey in [#3988](https://github.com/gfx-rs/wgpu/pull/3988)
 
 ### Bug Fixes
 
@@ -137,6 +216,7 @@ By @fornwall in [#3904](https://github.com/gfx-rs/wgpu/pull/3904) and [#3905](ht
 #### DX12
 
 - Disable suballocation on Intel Iris(R) Xe. By @xiaopengli89 in [#3668](https://github.com/gfx-rs/wgpu/pull/3668)
+- Change the `max_buffer_size` limit from `u64::MAX` to `i32::MAX`. By @nical in [#4020](https://github.com/gfx-rs/wgpu/pull/4020)
 
 #### WebGPU
 
diff --git a/Cargo.lock b/Cargo.lock
index 930cee4702..ba1a403628 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -88,9 +88,9 @@ dependencies = [
 
 [[package]]
 name = "anyhow"
-version = "1.0.72"
+version = "1.0.75"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854"
+checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6"
 
 [[package]]
 name = "arrayref"
@@ -159,7 +159,7 @@ checksum = "a564d521dd56509c4c47480d00b80ee55f7e385ae48db5744c67ad50c92d2ebf"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.26",
+ "syn 2.0.28",
 ]
 
 [[package]]
@@ -185,9 +185,9 @@ dependencies = [
 
 [[package]]
 name = "base64"
-version = "0.13.1"
+version = "0.21.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
+checksum = "604178f6c5c21f02dc555784810edfb88d34ac2c73b2eae109655649ee73ce3d"
 
 [[package]]
 name = "base64-simd"
@@ -222,9 +222,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
 
 [[package]]
 name = "bitflags"
-version = "2.3.3"
+version = "2.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42"
+checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635"
 dependencies = [
  "serde",
 ]
@@ -262,9 +262,9 @@ checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
 
 [[package]]
 name = "bytemuck"
-version = "1.13.1"
+version = "1.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "17febce684fd15d89027105661fec94afb475cb995fbc59d2865198446ba2eea"
+checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6"
 dependencies = [
  "bytemuck_derive",
 ]
@@ -277,7 +277,7 @@ checksum = "fdde5c9cd29ebd706ce1b35600920a33550e402fc998a2e53ad3b42c3c47a192"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.26",
+ "syn 2.0.28",
 ]
 
 [[package]]
@@ -552,7 +552,7 @@ version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e16e44ab292b1dddfdaf7be62cfd8877df52f2f3fde5858d95bab606be259f20"
 dependencies = [
- "bitflags 2.3.3",
+ "bitflags 2.4.0",
  "libloading 0.8.0",
  "winapi",
 ]
@@ -915,7 +915,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.26",
+ "syn 2.0.28",
 ]
 
 [[package]]
@@ -1053,7 +1053,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.26",
+ "syn 2.0.28",
 ]
 
 [[package]]
@@ -1214,7 +1214,7 @@ version = "0.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fbcd2dba93594b227a1f57ee09b8b9da8892c34d55aa332e034a228d0fe6a171"
 dependencies = [
- "bitflags 2.3.3",
+ "bitflags 2.4.0",
  "gpu-alloc-types",
 ]
 
@@ -1224,7 +1224,7 @@ version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "98ff03b468aa837d70984d55f5d3f846f6ec31fe34bbb97c4f85219caeee1ca4"
 dependencies = [
- "bitflags 2.3.3",
+ "bitflags 2.4.0",
 ]
 
 [[package]]
@@ -1332,9 +1332,9 @@ checksum = "cb56e1aa765b4b4f3aadfab769793b7087bb03a4ea4920644a6d238e2df5b9ed"
 
 [[package]]
 name = "image"
-version = "0.24.6"
+version = "0.24.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "527909aa81e20ac3a44803521443a765550f09b5130c2c2fa1ea59c2f8f50a3a"
+checksum = "6f3dfdbdd72063086ff443e297b61695500514b1e41095b6fb9a5ab48a70a711"
 dependencies = [
  "bytemuck",
  "byteorder",
@@ -1352,7 +1352,6 @@ checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
 dependencies = [
  "autocfg",
  "hashbrown 0.12.3",
- "serde",
 ]
 
 [[package]]
@@ -1363,6 +1362,7 @@ checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d"
 dependencies = [
  "equivalent",
  "hashbrown 0.14.0",
+ "serde",
 ]
 
 [[package]]
@@ -1466,9 +1466,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
 
 [[package]]
 name = "libc"
-version = "0.2.147"
+version = "0.2.148"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
+checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b"
 
 [[package]]
 name = "libloading"
@@ -1554,7 +1554,7 @@ version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "623b5e6cefd76e58f774bd3cc0c6f5c7615c58c03a97815245a25c3c9bdee318"
 dependencies = [
- "bitflags 2.3.3",
+ "bitflags 2.4.0",
  "block",
  "core-graphics-types",
  "foreign-types 0.5.0",
@@ -1603,13 +1603,13 @@ dependencies = [
 [[package]]
 name = "naga"
 version = "0.13.0"
-source = "git+https://github.com/gfx-rs/naga?rev=bac2d82a430fbfcf100ee22b7c3bc12f3d593079#bac2d82a430fbfcf100ee22b7c3bc12f3d593079"
+source = "git+https://github.com/gfx-rs/naga?rev=cc87b8f9eb30bb55d0735b89d3df3e099e1a6e7c#cc87b8f9eb30bb55d0735b89d3df3e099e1a6e7c"
 dependencies = [
  "bit-set",
- "bitflags 2.3.3",
+ "bitflags 2.4.0",
  "codespan-reporting",
  "hexf-parse",
- "indexmap 1.9.3",
+ "indexmap 2.0.0",
  "log",
  "num-traits 0.2.16",
  "petgraph",
@@ -1832,7 +1832,7 @@ dependencies = [
  "proc-macro-crate",
  "proc-macro2",
  "quote",
- "syn 2.0.26",
+ "syn 2.0.28",
 ]
 
 [[package]]
@@ -2026,14 +2026,14 @@ checksum = "ec2e072ecce94ec471b13398d5402c188e76ac03cf74dd1a975161b23a3f6d9c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.26",
+ "syn 2.0.28",
 ]
 
 [[package]]
 name = "pin-project-lite"
-version = "0.2.10"
+version = "0.2.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4c40d25201921e5ff0c862a505c6557ea88568a4e3ace775ab55e93f2f4f9d57"
+checksum = "12cc1b0bf1727a77a54b6654e7b5f1af8604923edc8b81885f8ec92f9e3f0a05"
 
 [[package]]
 name = "pin-utils"
@@ -2074,9 +2074,9 @@ dependencies = [
 
 [[package]]
 name = "png"
-version = "0.17.9"
+version = "0.17.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "59871cc5b6cce7eaccca5a802b4173377a1c2ba90654246789a8fa2334426d11"
+checksum = "dd75bf2d8dd3702b9707cdbc56a5b9ef42cec752eb8b3bafc01234558442aa64"
 dependencies = [
  "bitflags 1.3.2",
  "crc32fast",
@@ -2127,9 +2127,9 @@ dependencies = [
 
 [[package]]
 name = "profiling"
-version = "1.0.8"
+version = "1.0.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "332cd62e95873ea4f41f3dfd6bbbfc5b52aec892d7e8d534197c4720a0bbbab2"
+checksum = "45f10e75d83c7aec79a6aa46f897075890e156b105eebe51cfa0abce51af025f"
 
 [[package]]
 name = "quote"
@@ -2272,13 +2272,14 @@ checksum = "216080ab382b992234dda86873c18d4c48358f5cfcb70fd693d7f6f2131b628b"
 
 [[package]]
 name = "ron"
-version = "0.8.0"
+version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "300a51053b1cb55c80b7a9fde4120726ddf25ca241a1cbb926626f62fb136bff"
+checksum = "b91f7eff05f748767f183df4320a63d6936e9c6107d97c9e6bdd9784f4289c94"
 dependencies = [
  "base64",
- "bitflags 1.3.2",
+ "bitflags 2.4.0",
  "serde",
+ "serde_derive",
 ]
 
 [[package]]
@@ -2317,7 +2318,7 @@ version = "0.38.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0a962918ea88d644592894bc6dc55acc6c0956488adcebbfb6e273506b7fd6e5"
 dependencies = [
- "bitflags 2.3.3",
+ "bitflags 2.4.0",
  "errno",
  "libc",
  "linux-raw-sys",
@@ -2399,9 +2400,9 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
 
 [[package]]
 name = "serde"
-version = "1.0.175"
+version = "1.0.188"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d25439cd7397d044e2748a6fe2432b5e85db703d6d097bd014b3c0ad1ebff0b"
+checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e"
 dependencies = [
  "serde_derive",
 ]
@@ -2417,20 +2418,20 @@ dependencies = [
 
 [[package]]
 name = "serde_derive"
-version = "1.0.175"
+version = "1.0.188"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b23f7ade6f110613c0d63858ddb8b94c1041f550eab58a16b371bdf2c9c80ab4"
+checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.26",
+ "syn 2.0.28",
 ]
 
 [[package]]
 name = "serde_json"
-version = "1.0.103"
+version = "1.0.107"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b"
+checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65"
 dependencies = [
  "indexmap 2.0.0",
  "itoa",
@@ -2556,12 +2557,12 @@ dependencies = [
 
 [[package]]
 name = "socket2"
-version = "0.4.9"
+version = "0.5.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662"
+checksum = "2538b18701741680e0322a2302176d3253a35388e2e62f172f64f4f16605f877"
 dependencies = [
  "libc",
- "winapi",
+ "windows-sys 0.48.0",
 ]
 
 [[package]]
@@ -2621,9 +2622,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.26"
+version = "2.0.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "45c3457aacde3c65315de5031ec191ce46604304d2446e803d71ade03308d970"
+checksum = "04361975b3f5e348b2189d8dc55bc942f278b2d482a6a0365de5bdd62d351567"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -2641,22 +2642,22 @@ dependencies = [
 
 [[package]]
 name = "thiserror"
-version = "1.0.44"
+version = "1.0.48"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90"
+checksum = "9d6d7a740b8a666a7e828dd00da9c0dc290dff53154ea77ac109281de90589b7"
 dependencies = [
  "thiserror-impl",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "1.0.44"
+version = "1.0.48"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96"
+checksum = "49922ecae66cc8a249b77e68d1d0623c1b2c514f0060c27cdc68bd62a1219d35"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.26",
+ "syn 2.0.28",
 ]
 
 [[package]]
@@ -2726,11 +2727,10 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.29.1"
+version = "1.32.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "532826ff75199d5833b9d2c5fe410f29235e25704ee5f0ef599fb51c21f4a4da"
+checksum = "17ed6077ed6cd6c74735e21f37eb16dc3935f96878b1fe961074089cc80893f9"
 dependencies = [
- "autocfg",
  "backtrace",
  "bytes",
  "libc",
@@ -2752,7 +2752,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.26",
+ "syn 2.0.28",
 ]
 
 [[package]]
@@ -2962,7 +2962,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.26",
+ "syn 2.0.28",
  "wasm-bindgen-shared",
 ]
 
@@ -2996,7 +2996,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.26",
+ "syn 2.0.28",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -3210,7 +3210,7 @@ version = "0.17.0"
 dependencies = [
  "arrayvec 0.7.4",
  "bit-vec",
- "bitflags 2.3.3",
+ "bitflags 2.4.0",
  "codespan-reporting",
  "log",
  "naga",
@@ -3269,7 +3269,7 @@ dependencies = [
  "arrayvec 0.7.4",
  "ash",
  "bit-set",
- "bitflags 2.3.3",
+ "bitflags 2.4.0",
  "block",
  "cfg-if",
  "core-graphics-types",
@@ -3369,7 +3369,7 @@ name = "wgpu-info"
 version = "0.17.0"
 dependencies = [
  "anyhow",
- "bitflags 2.3.3",
+ "bitflags 2.4.0",
  "env_logger",
  "pico-args",
  "serde",
@@ -3450,7 +3450,7 @@ dependencies = [
 name = "wgpu-test"
 version = "0.17.0"
 dependencies = [
- "bitflags 2.3.3",
+ "bitflags 2.4.0",
  "bytemuck",
  "cfg-if",
  "console_log",
@@ -3460,6 +3460,7 @@ dependencies = [
  "log",
  "naga",
  "nv-flip",
+ "parking_lot",
  "png",
  "pollster",
  "raw-window-handle 0.5.2",
@@ -3483,11 +3484,29 @@ dependencies = [
  "winit 0.28.6",
 ]
 
+[[package]]
+name = "wgpu-timestamp-queries-example"
+version = "0.17.0"
+dependencies = [
+ "bytemuck",
+ "console_error_panic_hook",
+ "console_log",
+ "env_logger",
+ "futures-intrusive",
+ "log",
+ "pollster",
+ "wasm-bindgen-futures",
+ "wasm-bindgen-test",
+ "wgpu",
+ "wgpu-test",
+ "winit 0.28.6",
+]
+
 [[package]]
 name = "wgpu-types"
 version = "0.17.0"
 dependencies = [
- "bitflags 2.3.3",
+ "bitflags 2.4.0",
  "js-sys",
  "serde",
  "serde_json",
diff --git a/Cargo.toml b/Cargo.toml
index 96c05cb51f..55c6048b86 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -51,7 +51,7 @@ version = "0.17"
 
 [workspace.dependencies.naga]
 git = "https://github.com/gfx-rs/naga"
-rev = "bac2d82a430fbfcf100ee22b7c3bc12f3d593079"
+rev = "cc87b8f9eb30bb55d0735b89d3df3e099e1a6e7c"
 version = "0.13.0"
 
 [workspace.dependencies]
@@ -60,7 +60,7 @@ arrayvec = "0.7"
 async-executor = "1"
 bitflags = "2"
 bit-vec = "0.6"
-bytemuck = { version = "1.13", features = ["derive"] }
+bytemuck = { version = "1.14", features = ["derive"] }
 cfg_aliases = "0.1"
 cfg-if = "1"
 codespan-reporting = "0.11"
@@ -82,14 +82,14 @@ obj = "0.10"
 # parking_lot 0.12 switches from `winapi` to `windows`; permit either
 parking_lot = ">=0.11,<0.13"
 pico-args = { version = "0.5.0", features = ["eq-separator", "short-space-opt", "combined-flags"] }
-png = "0.17.9"
+png = "0.17.10"
 pollster = "0.3"
 profiling = { version = "1", default-features = false }
 raw-window-handle = "0.5"
 renderdoc-sys = "1.0.0"
 ron = "0.8"
 serde = "1"
-serde_json = "1.0.96"
+serde_json = "1.0.107"
 smallvec = "1"
 static_assertions = "1.1.0"
 thiserror = "1"
@@ -141,7 +141,7 @@ deno_url = "0.106.0"
 deno_web = "0.137.0"
 deno_webidl = "0.106.0"
 deno_webgpu = { path = "./deno_webgpu" }
-tokio = "1.28.2"
+tokio = "1.32.0"
 termcolor = "1.2.0"
 
 [patch."https://github.com/gfx-rs/naga"]
diff --git a/README.md b/README.md
index c2b62daf93..44e036a2b2 100644
--- a/README.md
+++ b/README.md
@@ -135,8 +135,9 @@ All testing and example infrastructure shares the same set of environment variab
 
 - `WGPU_ADAPTER_NAME` with a substring of the name of the adapter you want to use (ex. `1080` will match `NVIDIA GeForce 1080ti`).
 - `WGPU_BACKEND` with a comma separated list of the backends you want to use (`vulkan`, `metal`, `dx12`, `dx11`, or `gl`).
-- `WGPU_POWER_PREF` with the power preference to choose when a specific adapter name isn't specified (`high` or `low`)
+- `WGPU_POWER_PREF` with the power preference to choose when a specific adapter name isn't specified (`high`, `low` or `none`)
 - `WGPU_DX12_COMPILER` with the DX12 shader compiler you wish to use (`dxc` or `fxc`, note that `dxc` requires `dxil.dll` and `dxcompiler.dll` to be in the working directory otherwise it will fall back to `fxc`)
+- `WGPU_GLES_MINOR_VERSION` with the minor OpenGL ES 3 version number to request (`0`, `1`, `2` or `automatic`).
 
 When running the CTS, use the variables `DENO_WEBGPU_ADAPTER_NAME`, `DENO_WEBGPU_BACKEND`, `DENO_WEBGPU_POWER_PREFERENCE`.
 
diff --git a/deno_webgpu/command_encoder.rs b/deno_webgpu/command_encoder.rs
index 6c169677a4..4857b0a7a7 100644
--- a/deno_webgpu/command_encoder.rs
+++ b/deno_webgpu/command_encoder.rs
@@ -98,6 +98,7 @@ pub fn op_webgpu_command_encoder_begin_render_pass(
     label: Option<String>,
     color_attachments: Vec<Option<GpuRenderPassColorAttachment>>,
     depth_stencil_attachment: Option<GpuRenderPassDepthStencilAttachment>,
+    occlusion_query_set: Option<ResourceId>,
 ) -> Result<WebGpuResult, AnyError> {
     let command_encoder_resource = state
         .resource_table
@@ -171,10 +172,17 @@ pub fn op_webgpu_command_encoder_begin_render_pass(
             });
     }
 
+    let occlusion_query_set_resource = occlusion_query_set
+        .map(|rid| state.resource_table.get::<super::WebGpuQuerySet>(rid))
+        .transpose()?
+        .map(|query_set| query_set.1);
+
     let descriptor = wgpu_core::command::RenderPassDescriptor {
         label: label.map(Cow::from),
         color_attachments: Cow::from(color_attachments),
         depth_stencil_attachment: processed_depth_stencil_attachment.as_ref(),
+        timestamp_writes: None,
+        occlusion_query_set: occlusion_query_set_resource,
     };
 
     let render_pass = wgpu_core::command::RenderPass::new(command_encoder_resource.1, &descriptor);
@@ -200,6 +208,7 @@ pub fn op_webgpu_command_encoder_begin_compute_pass(
 
     let descriptor = wgpu_core::command::ComputePassDescriptor {
         label: label.map(Cow::from),
+        timestamp_writes: None,
     };
 
     let compute_pass =
diff --git a/deno_webgpu/error.rs b/deno_webgpu/error.rs
index 17ff7c1ef4..a68592adfc 100644
--- a/deno_webgpu/error.rs
+++ b/deno_webgpu/error.rs
@@ -104,7 +104,9 @@ impl From<DeviceError> for WebGpuError {
         match err {
             DeviceError::Lost => WebGpuError::Lost,
             DeviceError::OutOfMemory => WebGpuError::OutOfMemory,
-            DeviceError::Invalid => WebGpuError::Validation(fmt_err(&err)),
+            DeviceError::ResourceCreationFailed | DeviceError::Invalid => {
+                WebGpuError::Validation(fmt_err(&err))
+            }
         }
     }
 }
diff --git a/deno_webgpu/lib.rs b/deno_webgpu/lib.rs
index f475723682..92a6a51334 100644
--- a/deno_webgpu/lib.rs
+++ b/deno_webgpu/lib.rs
@@ -406,6 +406,7 @@ pub async fn op_webgpu_request_adapter(
             wgpu_types::InstanceDescriptor {
                 backends,
                 dx12_shader_compiler: wgpu_types::Dx12Compiler::Fxc,
+                gles_minor_version: wgpu_types::Gles3MinorVersion::default(),
             },
         )));
         state.borrow::<Instance>()
diff --git a/deno_webgpu/render_pass.rs b/deno_webgpu/render_pass.rs
index 4b0621fc4a..678990ea3d 100644
--- a/deno_webgpu/render_pass.rs
+++ b/deno_webgpu/render_pass.rs
@@ -114,6 +114,40 @@ pub fn op_webgpu_render_pass_set_stencil_reference(
     Ok(WebGpuResult::empty())
 }
 
+#[op]
+pub fn op_webgpu_render_pass_begin_occlusion_query(
+    state: &mut OpState,
+    render_pass_rid: ResourceId,
+    query_index: u32,
+) -> Result<WebGpuResult, AnyError> {
+    let render_pass_resource = state
+        .resource_table
+        .get::<WebGpuRenderPass>(render_pass_rid)?;
+
+    wgpu_core::command::render_ffi::wgpu_render_pass_begin_occlusion_query(
+        &mut render_pass_resource.0.borrow_mut(),
+        query_index,
+    );
+
+    Ok(WebGpuResult::empty())
+}
+
+#[op]
+pub fn op_webgpu_render_pass_end_occlusion_query(
+    state: &mut OpState,
+    render_pass_rid: ResourceId,
+) -> Result<WebGpuResult, AnyError> {
+    let render_pass_resource = state
+        .resource_table
+        .get::<WebGpuRenderPass>(render_pass_rid)?;
+
+    wgpu_core::command::render_ffi::wgpu_render_pass_end_occlusion_query(
+        &mut render_pass_resource.0.borrow_mut(),
+    );
+
+    Ok(WebGpuResult::empty())
+}
+
 #[op]
 pub fn op_webgpu_render_pass_begin_pipeline_statistics_query(
     state: &mut OpState,
diff --git a/deno_webgpu/webgpu.idl b/deno_webgpu/webgpu.idl
index b27db0c7a7..f2fea59c9f 100644
--- a/deno_webgpu/webgpu.idl
+++ b/deno_webgpu/webgpu.idl
@@ -981,6 +981,7 @@ dictionary GPURenderPassDescriptor
          : GPUObjectDescriptorBase {
     required sequence<GPURenderPassColorAttachment?> colorAttachments;
     GPURenderPassDepthStencilAttachment depthStencilAttachment;
+    GPUQuerySet occlusionQuerySet;
 };
 
 dictionary GPURenderPassColorAttachment {
diff --git a/examples/boids/src/main.rs b/examples/boids/src/main.rs
index 1bb003acb4..e8aa2f71fd 100644
--- a/examples/boids/src/main.rs
+++ b/examples/boids/src/main.rs
@@ -283,6 +283,8 @@ impl wgpu_example::framework::Example for Example {
             label: None,
             color_attachments: &color_attachments,
             depth_stencil_attachment: None,
+            timestamp_writes: None,
+            occlusion_query_set: None,
         };
 
         // get command encoder
@@ -292,8 +294,10 @@ impl wgpu_example::framework::Example for Example {
         command_encoder.push_debug_group("compute boid movement");
         {
             // compute pass
-            let mut cpass =
-                command_encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: None });
+            let mut cpass = command_encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
+                label: None,
+                timestamp_writes: None,
+            });
             cpass.set_pipeline(&self.compute_pipeline);
             cpass.set_bind_group(0, &self.particle_bind_groups[self.frame_num % 2], &[]);
             cpass.dispatch_workgroups(self.work_group_count, 1, 1);
@@ -341,7 +345,7 @@ fn boids() {
             .downlevel_flags(wgpu::DownlevelFlags::COMPUTE_SHADERS)
             .limits(wgpu::Limits::downlevel_defaults())
             // Lots of validation errors, maybe related to https://github.com/gfx-rs/wgpu/issues/3160
-            .molten_vk_failure(),
+            .expect_fail(wgpu_test::FailureCase::molten_vk()),
         comparisons: &[wgpu_test::ComparisonType::Mean(0.005)],
     });
 }
diff --git a/examples/bunnymark/src/main.rs b/examples/bunnymark/src/main.rs
index 9322131f09..256083eebb 100644
--- a/examples/bunnymark/src/main.rs
+++ b/examples/bunnymark/src/main.rs
@@ -336,6 +336,8 @@ impl wgpu_example::framework::Example for Example {
                     },
                 })],
                 depth_stencil_attachment: None,
+                timestamp_writes: None,
+                occlusion_query_set: None,
             });
             rpass.set_pipeline(&self.pipeline);
             rpass.set_bind_group(0, &self.global_group, &[]);
diff --git a/examples/capture/src/main.rs b/examples/capture/src/main.rs
index d36c3033ea..b783b3af80 100644
--- a/examples/capture/src/main.rs
+++ b/examples/capture/src/main.rs
@@ -40,6 +40,7 @@ async fn create_red_image_with_dimensions(
     let instance = wgpu::Instance::new(wgpu::InstanceDescriptor {
         backends,
         dx12_shader_compiler: wgpu::Dx12Compiler::default(),
+        gles_minor_version: wgpu::Gles3MinorVersion::default(),
     });
     let adapter = instance
         .request_adapter(&wgpu::RequestAdapterOptions::default())
@@ -104,6 +105,8 @@ async fn create_red_image_with_dimensions(
                 },
             })],
             depth_stencil_attachment: None,
+            timestamp_writes: None,
+            occlusion_query_set: None,
         });
 
         // Copy the data from the texture to the buffer
@@ -146,13 +149,13 @@ async fn create_png(
     //
     // We pass our submission index so we don't need to wait for any other possible submissions.
     device.poll(wgpu::Maintain::WaitForSubmissionIndex(submission_index));
-    // If a file system is available, write the buffer as a PNG
-    let has_file_system_available = cfg!(not(target_arch = "wasm32"));
-    if !has_file_system_available {
-        return;
-    }
 
     if let Some(Ok(())) = receiver.receive().await {
+        // If a file system is available, write the buffer as a PNG
+        let has_file_system_available = cfg!(not(target_arch = "wasm32"));
+        if !has_file_system_available {
+            return;
+        }
         let padded_buffer = buffer_slice.get_mapped_range();
 
         let mut png_encoder = png::Encoder::new(
diff --git a/examples/common/src/framework.rs b/examples/common/src/framework.rs
index 482d970563..875d8544e7 100644
--- a/examples/common/src/framework.rs
+++ b/examples/common/src/framework.rs
@@ -12,9 +12,9 @@ use winit::{
 
 #[allow(dead_code)]
 pub fn cast_slice<T>(data: &[T]) -> &[u8] {
-    use std::{mem::size_of, slice::from_raw_parts};
+    use std::{mem::size_of_val, slice::from_raw_parts};
 
-    unsafe { from_raw_parts(data.as_ptr() as *const u8, data.len() * size_of::<T>()) }
+    unsafe { from_raw_parts(data.as_ptr() as *const u8, size_of_val(data)) }
 }
 
 #[allow(dead_code)]
@@ -155,10 +155,12 @@ async fn setup<E: Example>(title: &str) -> Setup {
 
     let backends = wgpu::util::backend_bits_from_env().unwrap_or_else(wgpu::Backends::all);
     let dx12_shader_compiler = wgpu::util::dx12_shader_compiler_from_env().unwrap_or_default();
+    let gles_minor_version = wgpu::util::gles_minor_version_from_env().unwrap_or_default();
 
     let instance = wgpu::Instance::new(wgpu::InstanceDescriptor {
         backends,
         dx12_shader_compiler,
+        gles_minor_version,
     });
     let (size, surface) = unsafe {
         let size = window.inner_size();
@@ -453,7 +455,7 @@ pub fn run<E: Example>(title: &str) {
 
 #[cfg(target_arch = "wasm32")]
 pub fn run<E: Example>(title: &str) {
-    use wasm_bindgen::{prelude::*, JsCast};
+    use wasm_bindgen::prelude::*;
 
     let title = title.to_owned();
     wasm_bindgen_futures::spawn_local(async move {
@@ -623,7 +625,7 @@ pub fn test<E: Example>(mut params: FrameworkRefTest) {
 
             wgpu_test::image::compare_image_output(
                 env!("CARGO_MANIFEST_DIR").to_string() + "/../../" + params.image_path,
-                ctx.adapter_info.backend,
+                &ctx.adapter_info,
                 params.width,
                 params.height,
                 &bytes,
diff --git a/examples/conservative-raster/src/main.rs b/examples/conservative-raster/src/main.rs
index 1dba599591..e5cfb4d775 100644
--- a/examples/conservative-raster/src/main.rs
+++ b/examples/conservative-raster/src/main.rs
@@ -273,6 +273,8 @@ impl wgpu_example::framework::Example for Example {
                     },
                 })],
                 depth_stencil_attachment: None,
+                timestamp_writes: None,
+                occlusion_query_set: None,
             });
 
             rpass.set_pipeline(&self.pipeline_triangle_conservative);
@@ -292,6 +294,8 @@ impl wgpu_example::framework::Example for Example {
                     },
                 })],
                 depth_stencil_attachment: None,
+                timestamp_writes: None,
+                occlusion_query_set: None,
             });
 
             rpass.set_pipeline(&self.pipeline_upscale);
diff --git a/examples/cube/src/main.rs b/examples/cube/src/main.rs
index 271bc17ce4..a10dfd0fd0 100644
--- a/examples/cube/src/main.rs
+++ b/examples/cube/src/main.rs
@@ -379,6 +379,8 @@ impl wgpu_example::framework::Example for Example {
                     },
                 })],
                 depth_stencil_attachment: None,
+                timestamp_writes: None,
+                occlusion_query_set: None,
             });
             rpass.push_debug_group("Prepare data for draw.");
             rpass.set_pipeline(&self.pipeline);
diff --git a/examples/hello-compute/src/main.rs b/examples/hello-compute/src/main.rs
index afdf7744c9..3b102f4e0e 100644
--- a/examples/hello-compute/src/main.rs
+++ b/examples/hello-compute/src/main.rs
@@ -75,8 +75,7 @@ async fn execute_gpu_inner(
     });
 
     // Gets the size in bytes of the buffer.
-    let slice_size = numbers.len() * std::mem::size_of::<u32>();
-    let size = slice_size as wgpu::BufferAddress;
+    let size = std::mem::size_of_val(numbers) as wgpu::BufferAddress;
 
     // Instantiates buffer without data.
     // `usage` of buffer specifies how it can be used:
@@ -132,7 +131,10 @@ async fn execute_gpu_inner(
     let mut encoder =
         device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
     {
-        let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: None });
+        let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
+            label: None,
+            timestamp_writes: None,
+        });
         cpass.set_pipeline(&compute_pipeline);
         cpass.set_bind_group(0, &bind_group, &[]);
         cpass.insert_debug_marker("compute collatz iterations");
diff --git a/examples/hello-compute/src/tests.rs b/examples/hello-compute/src/tests.rs
index 462d30d6b7..7f8649f72f 100644
--- a/examples/hello-compute/src/tests.rs
+++ b/examples/hello-compute/src/tests.rs
@@ -1,7 +1,7 @@
 use std::sync::Arc;
 
 use super::*;
-use wgpu_test::{initialize_test, TestParameters};
+use wgpu_test::{initialize_test, FailureCase, TestParameters};
 
 wasm_bindgen_test::wasm_bindgen_test_configure!(run_in_browser);
 
@@ -12,7 +12,8 @@ fn test_compute_1() {
         TestParameters::default()
             .downlevel_flags(wgpu::DownlevelFlags::COMPUTE_SHADERS)
             .limits(wgpu::Limits::downlevel_defaults())
-            .specific_failure(None, None, Some("V3D"), true),
+            .features(wgpu::Features::TIMESTAMP_QUERY)
+            .skip(FailureCase::adapter("V3D")),
         |ctx| {
             let input = &[1, 2, 3, 4];
 
@@ -33,7 +34,8 @@ fn test_compute_2() {
         TestParameters::default()
             .downlevel_flags(wgpu::DownlevelFlags::COMPUTE_SHADERS)
             .limits(wgpu::Limits::downlevel_defaults())
-            .specific_failure(None, None, Some("V3D"), true),
+            .features(wgpu::Features::TIMESTAMP_QUERY)
+            .skip(FailureCase::adapter("V3D")),
         |ctx| {
             let input = &[5, 23, 10, 9];
 
@@ -54,7 +56,8 @@ fn test_compute_overflow() {
         TestParameters::default()
             .downlevel_flags(wgpu::DownlevelFlags::COMPUTE_SHADERS)
             .limits(wgpu::Limits::downlevel_defaults())
-            .specific_failure(None, None, Some("V3D"), true),
+            .features(wgpu::Features::TIMESTAMP_QUERY)
+            .skip(FailureCase::adapter("V3D")),
         |ctx| {
             let input = &[77031, 837799, 8400511, 63728127];
             pollster::block_on(assert_execute_gpu(
@@ -74,16 +77,16 @@ fn test_multithreaded_compute() {
         TestParameters::default()
             .downlevel_flags(wgpu::DownlevelFlags::COMPUTE_SHADERS)
             .limits(wgpu::Limits::downlevel_defaults())
-            .specific_failure(None, None, Some("V3D"), true)
+            .features(wgpu::Features::TIMESTAMP_QUERY)
+            .skip(FailureCase::adapter("V3D"))
             // https://github.com/gfx-rs/wgpu/issues/3944
-            .specific_failure(
-                Some(wgpu::Backends::VULKAN),
-                None,
-                Some("swiftshader"),
-                true,
-            )
+            .skip(FailureCase::backend_adapter(
+                wgpu::Backends::VULKAN,
+                "swiftshader",
+            ))
             // https://github.com/gfx-rs/wgpu/issues/3250
-            .specific_failure(Some(wgpu::Backends::GL), None, Some("llvmpipe"), true),
+            .skip(FailureCase::backend_adapter(wgpu::Backends::GL, "llvmpipe"))
+            .skip(FailureCase::molten_vk()),
         |ctx| {
             use std::{sync::mpsc, thread, time::Duration};
 
diff --git a/examples/hello-triangle/src/main.rs b/examples/hello-triangle/src/main.rs
index 98abf5b8d5..c5432acd07 100644
--- a/examples/hello-triangle/src/main.rs
+++ b/examples/hello-triangle/src/main.rs
@@ -122,6 +122,8 @@ async fn run(event_loop: EventLoop<()>, window: Window) {
                             },
                         })],
                         depth_stencil_attachment: None,
+                        timestamp_writes: None,
+                        occlusion_query_set: None,
                     });
                     rpass.set_pipeline(&render_pipeline);
                     rpass.draw(0..3, 0..1);
diff --git a/examples/hello-windows/src/main.rs b/examples/hello-windows/src/main.rs
index c6798d865f..f368804c36 100644
--- a/examples/hello-windows/src/main.rs
+++ b/examples/hello-windows/src/main.rs
@@ -135,6 +135,8 @@ async fn run(event_loop: EventLoop<()>, viewports: Vec<(Window, wgpu::Color)>) {
                                 },
                             })],
                             depth_stencil_attachment: None,
+                            timestamp_writes: None,
+                            occlusion_query_set: None,
                         });
                     }
 
diff --git a/examples/mipmap/src/main.rs b/examples/mipmap/src/main.rs
index 48703ab312..a85110ff14 100644
--- a/examples/mipmap/src/main.rs
+++ b/examples/mipmap/src/main.rs
@@ -167,6 +167,8 @@ impl Example {
                     },
                 })],
                 depth_stencil_attachment: None,
+                timestamp_writes: None,
+                occlusion_query_set: None,
             });
             if let Some(ref query_sets) = query_sets {
                 rpass.write_timestamp(&query_sets.timestamp, timestamp_query_index_base);
@@ -492,6 +494,8 @@ impl wgpu_example::framework::Example for Example {
                     },
                 })],
                 depth_stencil_attachment: None,
+                timestamp_writes: None,
+                occlusion_query_set: None,
             });
             rpass.set_pipeline(&self.draw_pipeline);
             rpass.set_bind_group(0, &self.bind_group, &[]);
@@ -517,7 +521,7 @@ fn mipmap() {
         height: 768,
         optional_features: wgpu::Features::default(),
         base_test_parameters: wgpu_test::TestParameters::default()
-            .backend_failure(wgpu::Backends::GL),
+            .expect_fail(wgpu_test::FailureCase::backend(wgpu::Backends::GL)),
         comparisons: &[wgpu_test::ComparisonType::Mean(0.02)],
     });
 }
@@ -531,7 +535,7 @@ fn mipmap_query() {
         height: 768,
         optional_features: QUERY_FEATURES,
         base_test_parameters: wgpu_test::TestParameters::default()
-            .backend_failure(wgpu::Backends::GL),
+            .expect_fail(wgpu_test::FailureCase::backend(wgpu::Backends::GL)),
         comparisons: &[wgpu_test::ComparisonType::Mean(0.02)],
     });
 }
diff --git a/examples/msaa-line/src/main.rs b/examples/msaa-line/src/main.rs
index c7f69b6339..aa7a277418 100644
--- a/examples/msaa-line/src/main.rs
+++ b/examples/msaa-line/src/main.rs
@@ -12,6 +12,9 @@ use std::{borrow::Cow, iter};
 use bytemuck::{Pod, Zeroable};
 use wgpu::util::DeviceExt;
 
+#[cfg(test)]
+use wgpu_test::FailureCase;
+
 #[repr(C)]
 #[derive(Clone, Copy, Pod, Zeroable)]
 struct Vertex {
@@ -300,6 +303,8 @@ impl wgpu_example::framework::Example for Example {
                     label: None,
                     color_attachments: &[Some(rpass_color_attachment)],
                     depth_stencil_attachment: None,
+                    timestamp_writes: None,
+                    occlusion_query_set: None,
                 })
                 .execute_bundles(iter::once(&self.bundle));
         }
@@ -324,7 +329,11 @@ fn msaa_line() {
         optional_features: wgpu::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES,
         base_test_parameters: wgpu_test::TestParameters::default()
             // AMD seems to render nothing on DX12 https://github.com/gfx-rs/wgpu/issues/3838
-            .specific_failure(Some(wgpu::Backends::DX12), Some(0x1002), None, false),
+            .expect_fail(FailureCase {
+                backends: Some(wgpu::Backends::DX12),
+                vendor: Some(0x1002),
+                ..FailureCase::default()
+            }),
         // There's a lot of natural variance so we check the weighted median too to differentiate
         // real failures from variance.
         comparisons: &[
diff --git a/examples/shadow/src/main.rs b/examples/shadow/src/main.rs
index 461b04d17a..3f963d0c53 100644
--- a/examples/shadow/src/main.rs
+++ b/examples/shadow/src/main.rs
@@ -777,6 +777,8 @@ impl wgpu_example::framework::Example for Example {
                         }),
                         stencil_ops: None,
                     }),
+                    timestamp_writes: None,
+                    occlusion_query_set: None,
                 });
                 pass.set_pipeline(&self.shadow_pass.pipeline);
                 pass.set_bind_group(0, &self.shadow_pass.bind_group, &[]);
@@ -819,6 +821,8 @@ impl wgpu_example::framework::Example for Example {
                     }),
                     stencil_ops: None,
                 }),
+                timestamp_writes: None,
+                occlusion_query_set: None,
             });
             pass.set_pipeline(&self.forward_pass.pipeline);
             pass.set_bind_group(0, &self.forward_pass.bind_group, &[]);
@@ -853,9 +857,15 @@ fn shadow() {
         base_test_parameters: wgpu_test::TestParameters::default()
             .downlevel_flags(wgpu::DownlevelFlags::COMPARISON_SAMPLERS)
             // rpi4 on VK doesn't work: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3916
-            .specific_failure(Some(wgpu::Backends::VULKAN), None, Some("V3D"), false)
+            .expect_fail(wgpu_test::FailureCase::backend_adapter(
+                wgpu::Backends::VULKAN,
+                "V3D",
+            ))
             // llvmpipe versions in CI are flaky: https://github.com/gfx-rs/wgpu/issues/2594
-            .specific_failure(Some(wgpu::Backends::VULKAN), None, Some("llvmpipe"), true),
+            .skip(wgpu_test::FailureCase::backend_adapter(
+                wgpu::Backends::VULKAN,
+                "llvmpipe",
+            )),
         comparisons: &[wgpu_test::ComparisonType::Mean(0.02)],
     });
 }
diff --git a/examples/skybox/src/main.rs b/examples/skybox/src/main.rs
index bfb5c10521..d09622f53c 100644
--- a/examples/skybox/src/main.rs
+++ b/examples/skybox/src/main.rs
@@ -439,6 +439,8 @@ impl wgpu_example::framework::Example for Skybox {
                     }),
                     stencil_ops: None,
                 }),
+                timestamp_writes: None,
+                occlusion_query_set: None,
             });
 
             rpass.set_bind_group(0, &self.bind_group, &[]);
@@ -473,11 +475,8 @@ fn skybox() {
         width: 1024,
         height: 768,
         optional_features: wgpu::Features::default(),
-        base_test_parameters: wgpu_test::TestParameters::default().specific_failure(
-            Some(wgpu::Backends::GL),
-            None,
-            Some("ANGLE"),
-            false,
+        base_test_parameters: wgpu_test::TestParameters::default().expect_fail(
+            wgpu_test::FailureCase::backend_adapter(wgpu::Backends::GL, "ANGLE"),
         ),
         comparisons: &[wgpu_test::ComparisonType::Mean(0.015)],
     });
diff --git a/examples/stencil-triangles/src/main.rs b/examples/stencil-triangles/src/main.rs
index 1499f9ab74..55aad9c9ba 100644
--- a/examples/stencil-triangles/src/main.rs
+++ b/examples/stencil-triangles/src/main.rs
@@ -211,6 +211,8 @@ impl wgpu_example::framework::Example for Triangles {
                         store: true,
                     }),
                 }),
+                timestamp_writes: None,
+                occlusion_query_set: None,
             });
 
             rpass.set_stencil_reference(1);
diff --git a/examples/texture-arrays/src/main.rs b/examples/texture-arrays/src/main.rs
index b2683c8471..373c2396ae 100644
--- a/examples/texture-arrays/src/main.rs
+++ b/examples/texture-arrays/src/main.rs
@@ -383,6 +383,8 @@ impl wgpu_example::framework::Example for Example {
                 },
             })],
             depth_stencil_attachment: None,
+            timestamp_writes: None,
+            occlusion_query_set: None,
         });
 
         rpass.set_pipeline(&self.pipeline);
diff --git a/examples/timestamp-queries/Cargo.toml b/examples/timestamp-queries/Cargo.toml
new file mode 100644
index 0000000000..f2d7de3f1e
--- /dev/null
+++ b/examples/timestamp-queries/Cargo.toml
@@ -0,0 +1,30 @@
+[package]
+name = "wgpu-timestamp-queries-example"
+version.workspace = true
+license.workspace = true
+edition.workspace = true
+description = "wgpu timestamp query example"
+publish = false
+
+[[bin]]
+name = "timestamp-queries"
+path = "src/main.rs"
+
+[dependencies]
+bytemuck.workspace = true
+env_logger.workspace = true
+futures-intrusive.workspace = true
+pollster.workspace = true
+wgpu.workspace = true
+winit.workspace = true
+
+[target.'cfg(target_arch = "wasm32")'.dependencies]
+console_error_panic_hook.workspace = true
+console_log.workspace = true
+log.workspace = true
+wasm-bindgen-futures.workspace = true
+
+[dev-dependencies]
+wasm-bindgen-test.workspace = true
+wgpu-test.workspace = true
+
diff --git a/examples/timestamp-queries/README.md b/examples/timestamp-queries/README.md
new file mode 100644
index 0000000000..1c95ff9f11
--- /dev/null
+++ b/examples/timestamp-queries/README.md
@@ -0,0 +1,9 @@
+# timestamp-queries
+
+This example shows various ways of querying time when supported.
+
+## To Run
+
+```
+cargo run --bin timestamp-queries
+```
diff --git a/examples/timestamp-queries/src/main.rs b/examples/timestamp-queries/src/main.rs
new file mode 100644
index 0000000000..3479122c79
--- /dev/null
+++ b/examples/timestamp-queries/src/main.rs
@@ -0,0 +1,484 @@
+//! Sample demonstrating different kinds of gpu timestamp queries.
+//!
+//! Timestamp queries are typically used to profile how long certain operations take on the GPU.
+//! wgpu has several ways of performing gpu timestamp queries:
+//! * `wgpu::Encoder::write_timestamp` writes a between any commands recorded on an encoder.
+//!     (enabled with wgpu::Features::TIMESTAMP_QUERY)
+//! * passing `wgpu::RenderPassTimestampWrites`/`wgpu::ComputePassTimestampWrites` during render/compute pass creation.
+//!     This writes timestamps for the beginning and end of a given pass.
+//!     (enabled with wgpu::Features::TIMESTAMP_QUERY)
+//! * `wgpu::RenderPass/ComputePass::write_timestamp` writes a timestamp within commands of a render pass.
+//!     Note that some GPU architectures do not support this.
+//!     (native only, enabled with wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES)
+//!
+//! Any timestamp is written to a `wgpu::QuerySet` which needs to be resolved to a buffer with `wgpu::BufferUsages::QUERY_RESOLVE`.
+//! Since this usage is incompatible with `wgpu::BufferUsages::MAP_READ` we need to copy the resolved timestamps to a separate buffer afterwards.
+//!
+//! The period, i.e. the unit of time, of the timestamps in wgpu is undetermined and needs to be queried with `wgpu::Queue::get_timestamp_period`
+//! in order to get comparable results.
+
+use wgpu::util::DeviceExt;
+
+struct Queries {
+    set: wgpu::QuerySet,
+    resolve_buffer: wgpu::Buffer,
+    destination_buffer: wgpu::Buffer,
+    num_queries: u64,
+    next_unused_query: u32,
+}
+
+struct QueryResults {
+    encoder_timestamps: [u64; 2],
+    render_start_end_timestamps: [u64; 2],
+    render_inside_timestamp: Option<u64>,
+    compute_start_end_timestamps: [u64; 2],
+    compute_inside_timestamp: Option<u64>,
+}
+
+impl QueryResults {
+    // Queries:
+    // * encoder timestamp start
+    // * encoder timestamp end
+    // * render start
+    // * render in-between (optional)
+    // * render end
+    // * compute start
+    // * compute in-between (optional)
+    // * compute end
+    const NUM_QUERIES: u64 = 8;
+
+    fn from_raw_results(timestamps: Vec<u64>, timestamps_inside_passes: bool) -> Self {
+        assert_eq!(timestamps.len(), Self::NUM_QUERIES as usize);
+
+        let mut next_slot = 0;
+        let mut get_next_slot = || {
+            let slot = timestamps[next_slot];
+            next_slot += 1;
+            slot
+        };
+
+        let mut encoder_timestamps = [0, 0];
+        encoder_timestamps[0] = get_next_slot();
+        let render_start_end_timestamps = [get_next_slot(), get_next_slot()];
+        let render_inside_timestamp = timestamps_inside_passes.then_some(get_next_slot());
+        let compute_start_end_timestamps = [get_next_slot(), get_next_slot()];
+        let compute_inside_timestamp = timestamps_inside_passes.then_some(get_next_slot());
+        encoder_timestamps[1] = get_next_slot();
+
+        QueryResults {
+            encoder_timestamps,
+            render_start_end_timestamps,
+            render_inside_timestamp,
+            compute_start_end_timestamps,
+            compute_inside_timestamp,
+        }
+    }
+
+    fn print(&self, queue: &wgpu::Queue) {
+        let period = queue.get_timestamp_period();
+        let elapsed_us = |start, end: u64| end.wrapping_sub(start) as f64 * period as f64 / 1000.0;
+
+        println!(
+            "Elapsed time render + compute: {:.2} μs",
+            elapsed_us(self.encoder_timestamps[0], self.encoder_timestamps[1])
+        );
+        println!(
+            "Elapsed time render pass: {:.2} μs",
+            elapsed_us(
+                self.render_start_end_timestamps[0],
+                self.render_start_end_timestamps[1]
+            )
+        );
+        if let Some(timestamp) = self.render_inside_timestamp {
+            println!(
+                "Elapsed time first triangle: {:.2} μs",
+                elapsed_us(self.render_start_end_timestamps[0], timestamp)
+            );
+        }
+        println!(
+            "Elapsed time compute pass: {:.2} μs",
+            elapsed_us(
+                self.compute_start_end_timestamps[0],
+                self.compute_start_end_timestamps[1]
+            )
+        );
+        if let Some(timestamp) = self.compute_inside_timestamp {
+            println!(
+                "Elapsed time after first dispatch: {:.2} μs",
+                elapsed_us(self.compute_start_end_timestamps[0], timestamp)
+            );
+        }
+    }
+}
+
+impl Queries {
+    fn new(device: &wgpu::Device, num_queries: u64) -> Self {
+        Queries {
+            set: device.create_query_set(&wgpu::QuerySetDescriptor {
+                label: Some("Timestamp query set"),
+                count: num_queries as _,
+                ty: wgpu::QueryType::Timestamp,
+            }),
+            resolve_buffer: device.create_buffer(&wgpu::BufferDescriptor {
+                label: Some("query resolve buffer"),
+                size: std::mem::size_of::<u64>() as u64 * num_queries,
+                usage: wgpu::BufferUsages::COPY_SRC | wgpu::BufferUsages::QUERY_RESOLVE,
+                mapped_at_creation: false,
+            }),
+            destination_buffer: device.create_buffer(&wgpu::BufferDescriptor {
+                label: Some("query dest buffer"),
+                size: std::mem::size_of::<u64>() as u64 * num_queries,
+                usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
+                mapped_at_creation: false,
+            }),
+            num_queries,
+            next_unused_query: 0,
+        }
+    }
+
+    fn resolve(&self, encoder: &mut wgpu::CommandEncoder) {
+        encoder.resolve_query_set(
+            &self.set,
+            // TODO(https://github.com/gfx-rs/wgpu/issues/3993): Musn't be larger than the number valid queries in the set.
+            0..self.next_unused_query as u32,
+            &self.resolve_buffer,
+            0,
+        );
+        encoder.copy_buffer_to_buffer(
+            &self.resolve_buffer,
+            0,
+            &self.destination_buffer,
+            0,
+            self.resolve_buffer.size(),
+        );
+    }
+
+    fn wait_for_results(&self, device: &wgpu::Device) -> Vec<u64> {
+        self.destination_buffer
+            .slice(..)
+            .map_async(wgpu::MapMode::Read, |_| ());
+        device.poll(wgpu::Maintain::Wait);
+
+        let timestamps = {
+            let timestamp_view = self
+                .destination_buffer
+                .slice(..(std::mem::size_of::<u64>() as wgpu::BufferAddress * self.num_queries))
+                .get_mapped_range();
+            bytemuck::cast_slice(&timestamp_view).to_vec()
+        };
+
+        self.destination_buffer.unmap();
+
+        timestamps
+    }
+}
+
+async fn run() {
+    // Instantiates instance of wgpu
+    let backends = wgpu::util::backend_bits_from_env().unwrap_or_else(wgpu::Backends::all);
+    let instance = wgpu::Instance::new(wgpu::InstanceDescriptor {
+        backends,
+        dx12_shader_compiler: wgpu::Dx12Compiler::default(),
+        gles_minor_version: wgpu::Gles3MinorVersion::default(),
+    });
+
+    // `request_adapter` instantiates the general connection to the GPU
+    let adapter = instance
+        .request_adapter(&wgpu::RequestAdapterOptions::default())
+        .await
+        .expect("Failed to request adapter.");
+
+    // Check timestamp features.
+    let features = adapter.features()
+        & (wgpu::Features::TIMESTAMP_QUERY | wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES);
+    if features.contains(wgpu::Features::TIMESTAMP_QUERY) {
+        println!("Adapter supports timestamp queries.");
+    } else {
+        println!("Adapter does not support timestamp queries, aborting.");
+        return;
+    }
+    let timestamps_inside_passes = features.contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES);
+    if timestamps_inside_passes {
+        println!("Adapter supports timestamp queries within passes.");
+    } else {
+        println!("Adapter does not support timestamp queries within passes.");
+    }
+
+    // `request_device` instantiates the feature specific connection to the GPU, defining some parameters,
+    //  `features` being the available features.
+    let (device, queue) = adapter
+        .request_device(
+            &wgpu::DeviceDescriptor {
+                label: None,
+                features,
+                limits: wgpu::Limits::downlevel_defaults(),
+            },
+            None,
+        )
+        .await
+        .unwrap();
+
+    let queries = submit_render_and_compute_pass_with_queries(&device, &queue);
+    let raw_results = queries.wait_for_results(&device);
+    println!("Raw timestamp buffer contents: {:?}", raw_results);
+    QueryResults::from_raw_results(raw_results, timestamps_inside_passes).print(&queue);
+}
+
+fn submit_render_and_compute_pass_with_queries(
+    device: &wgpu::Device,
+    queue: &wgpu::Queue,
+) -> Queries {
+    let mut encoder =
+        device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+
+    let mut queries = Queries::new(device, QueryResults::NUM_QUERIES);
+    let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
+        label: None,
+        source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(include_str!("shader.wgsl"))),
+    });
+
+    encoder.write_timestamp(&queries.set, queries.next_unused_query);
+    queries.next_unused_query += 1;
+
+    // Render two triangles and profile it.
+    render_pass(
+        device,
+        &shader,
+        &mut encoder,
+        &queries.set,
+        &mut queries.next_unused_query,
+    );
+
+    // Compute a hash function on a single thread a bunch of time and profile it.
+    compute_pass(
+        device,
+        &shader,
+        &mut encoder,
+        &queries.set,
+        &mut queries.next_unused_query,
+    );
+
+    encoder.write_timestamp(&queries.set, queries.next_unused_query);
+    queries.next_unused_query += 1;
+
+    queries.resolve(&mut encoder);
+    queue.submit(Some(encoder.finish()));
+
+    queries
+}
+
+fn compute_pass(
+    device: &wgpu::Device,
+    module: &wgpu::ShaderModule,
+    encoder: &mut wgpu::CommandEncoder,
+    query_set: &wgpu::QuerySet,
+    next_unused_query: &mut u32,
+) {
+    let storage_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
+        label: Some("Storage Buffer"),
+        contents: bytemuck::cast_slice(&[42]),
+        usage: wgpu::BufferUsages::STORAGE,
+    });
+    let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
+        label: None,
+        layout: None,
+        module,
+        entry_point: "main_cs",
+    });
+    let bind_group_layout = compute_pipeline.get_bind_group_layout(0);
+    let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
+        label: None,
+        layout: &bind_group_layout,
+        entries: &[wgpu::BindGroupEntry {
+            binding: 0,
+            resource: storage_buffer.as_entire_binding(),
+        }],
+    });
+
+    let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
+        label: None,
+        timestamp_writes: Some(wgpu::ComputePassTimestampWrites {
+            query_set,
+            beginning_of_pass_write_index: Some(*next_unused_query),
+            end_of_pass_write_index: Some(*next_unused_query + 1),
+        }),
+    });
+    *next_unused_query += 2;
+    cpass.set_pipeline(&compute_pipeline);
+    cpass.set_bind_group(0, &bind_group, &[]);
+    cpass.dispatch_workgroups(1, 1, 1);
+    if device
+        .features()
+        .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES)
+    {
+        cpass.write_timestamp(query_set, *next_unused_query);
+        *next_unused_query += 1;
+    }
+    cpass.dispatch_workgroups(1, 1, 1);
+}
+
+fn render_pass(
+    device: &wgpu::Device,
+    module: &wgpu::ShaderModule,
+    encoder: &mut wgpu::CommandEncoder,
+    query_set: &wgpu::QuerySet,
+    next_unused_query: &mut u32,
+) {
+    let format = wgpu::TextureFormat::Rgba8Unorm;
+
+    let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+        label: None,
+        bind_group_layouts: &[],
+        push_constant_ranges: &[],
+    });
+
+    let render_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
+        label: None,
+        layout: Some(&pipeline_layout),
+        vertex: wgpu::VertexState {
+            module,
+            entry_point: "vs_main",
+            buffers: &[],
+        },
+        fragment: Some(wgpu::FragmentState {
+            module,
+            entry_point: "fs_main",
+            targets: &[Some(format.into())],
+        }),
+        primitive: wgpu::PrimitiveState::default(),
+        depth_stencil: None,
+        multisample: wgpu::MultisampleState::default(),
+        multiview: None,
+    });
+
+    let render_target = device.create_texture(&wgpu::TextureDescriptor {
+        label: Some("rendertarget"),
+        size: wgpu::Extent3d {
+            width: 512,
+            height: 512,
+            depth_or_array_layers: 1,
+        },
+        mip_level_count: 1,
+        sample_count: 1,
+        dimension: wgpu::TextureDimension::D2,
+        format,
+        usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
+        view_formats: &[format],
+    });
+    let render_target_view = render_target.create_view(&wgpu::TextureViewDescriptor::default());
+
+    let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+        label: None,
+        color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+            view: &render_target_view,
+            resolve_target: None,
+            ops: wgpu::Operations {
+                load: wgpu::LoadOp::Clear(wgpu::Color::GREEN),
+                store: true,
+            },
+        })],
+        depth_stencil_attachment: None,
+        timestamp_writes: Some(wgpu::RenderPassTimestampWrites {
+            query_set,
+            beginning_of_pass_write_index: Some(*next_unused_query),
+            end_of_pass_write_index: Some(*next_unused_query + 1),
+        }),
+        occlusion_query_set: None,
+    });
+    *next_unused_query += 2;
+
+    rpass.set_pipeline(&render_pipeline);
+
+    rpass.draw(0..3, 0..1);
+    if device
+        .features()
+        .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES)
+    {
+        rpass.write_timestamp(query_set, *next_unused_query);
+        *next_unused_query += 1;
+    }
+
+    rpass.draw(0..3, 0..1);
+}
+
+fn main() {
+    #[cfg(not(target_arch = "wasm32"))]
+    {
+        env_logger::init();
+        pollster::block_on(run());
+    }
+    #[cfg(target_arch = "wasm32")]
+    {
+        std::panic::set_hook(Box::new(console_error_panic_hook::hook));
+        console_log::init().expect("could not initialize logger");
+        wasm_bindgen_futures::spawn_local(run());
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::{submit_render_and_compute_pass_with_queries, QueryResults};
+
+    #[test]
+    #[wasm_bindgen_test::wasm_bindgen_test]
+    fn test_timestamps_encoder() {
+        wgpu_test::initialize_test(
+            wgpu_test::TestParameters::default()
+                .limits(wgpu::Limits::downlevel_defaults())
+                .features(wgpu::Features::TIMESTAMP_QUERY),
+            |ctx| {
+                test_timestamps(ctx, false);
+            },
+        );
+    }
+
+    #[test]
+    #[wasm_bindgen_test::wasm_bindgen_test]
+    fn test_timestamps_passes() {
+        wgpu_test::initialize_test(
+            wgpu_test::TestParameters::default()
+                .limits(wgpu::Limits::downlevel_defaults())
+                .features(
+                    wgpu::Features::TIMESTAMP_QUERY | wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES,
+                ),
+            |ctx| {
+                test_timestamps(ctx, true);
+            },
+        );
+    }
+
+    fn test_timestamps(ctx: wgpu_test::TestingContext, timestamps_inside_passes: bool) {
+        let queries = submit_render_and_compute_pass_with_queries(&ctx.device, &ctx.queue);
+        let raw_results = queries.wait_for_results(&ctx.device);
+        let QueryResults {
+            encoder_timestamps,
+            render_start_end_timestamps,
+            render_inside_timestamp,
+            compute_start_end_timestamps,
+            compute_inside_timestamp,
+        } = QueryResults::from_raw_results(raw_results, timestamps_inside_passes);
+
+        // Timestamps may wrap around, so can't really only reason about deltas!
+        // Making things worse, deltas are allowed to be zero.
+        let render_delta =
+            render_start_end_timestamps[1].wrapping_sub(render_start_end_timestamps[0]);
+        let compute_delta =
+            compute_start_end_timestamps[1].wrapping_sub(compute_start_end_timestamps[0]);
+
+        // TODO: Metal encoder timestamps aren't implemented yet.
+        if ctx.adapter.get_info().backend != wgpu::Backend::Metal {
+            let encoder_delta = encoder_timestamps[1].wrapping_sub(encoder_timestamps[0]);
+            assert!(encoder_delta > 0);
+            assert!(encoder_delta >= render_delta + compute_delta);
+        }
+
+        if let Some(render_inside_timestamp) = render_inside_timestamp {
+            assert!(render_inside_timestamp >= render_start_end_timestamps[0]);
+            assert!(render_inside_timestamp <= render_start_end_timestamps[1]);
+        }
+        if let Some(compute_inside_timestamp) = compute_inside_timestamp {
+            assert!(compute_inside_timestamp >= compute_start_end_timestamps[0]);
+            assert!(compute_inside_timestamp <= compute_start_end_timestamps[1]);
+        }
+    }
+}
diff --git a/examples/timestamp-queries/src/shader.wgsl b/examples/timestamp-queries/src/shader.wgsl
new file mode 100644
index 0000000000..c0689b0236
--- /dev/null
+++ b/examples/timestamp-queries/src/shader.wgsl
@@ -0,0 +1,34 @@
+@vertex
+fn vs_main(@builtin(vertex_index) in_vertex_index: u32) -> @builtin(position) vec4<f32> {
+    let x = f32(i32(in_vertex_index) - 1);
+    let y = f32(i32(in_vertex_index & 1u) * 2 - 1);
+    return vec4<f32>(x, y, 0.0, 1.0);
+}
+
+@fragment
+fn fs_main() -> @location(0) vec4<f32> {
+    return vec4<f32>(1.0, 0.0, 0.0, 1.0);
+}
+
+
+@group(0)
+@binding(0)
+var<storage, read_write> buffer: array<u32>; // Used as both input and output for convenience.
+
+fn pcg_hash(input: u32) -> u32 {
+    let state = input * 747796405u + 2891336453u;
+    let word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u;
+    return (word >> 22u) ^ word;
+}
+
+@compute
+@workgroup_size(1)
+fn main_cs(@builtin(global_invocation_id) global_id: vec3<u32>) {
+    var value = buffer[0];
+
+    for (var i = 0u; i < 128u; i += 1u) {
+        value = pcg_hash(value);
+    }
+
+    buffer[0] = value;
+}
diff --git a/examples/water/src/main.rs b/examples/water/src/main.rs
index ac4098559c..5d5daa1f59 100644
--- a/examples/water/src/main.rs
+++ b/examples/water/src/main.rs
@@ -752,6 +752,8 @@ impl wgpu_example::framework::Example for Example {
                     }),
                     stencil_ops: None,
                 }),
+                timestamp_writes: None,
+                occlusion_query_set: None,
             });
 
             rpass.execute_bundles([&self.terrain_bundle]);
@@ -777,6 +779,8 @@ impl wgpu_example::framework::Example for Example {
                     }),
                     stencil_ops: None,
                 }),
+                timestamp_writes: None,
+                occlusion_query_set: None,
             });
             rpass.set_pipeline(&self.terrain_pipeline);
             rpass.set_bind_group(0, &self.terrain_normal_bind_group, &[]);
@@ -801,6 +805,8 @@ impl wgpu_example::framework::Example for Example {
                     depth_ops: None,
                     stencil_ops: None,
                 }),
+                timestamp_writes: None,
+                occlusion_query_set: None,
             });
 
             rpass.set_pipeline(&self.water_pipeline);
diff --git a/player/src/lib.rs b/player/src/lib.rs
index 0f6eb1d10a..fbfb2697d1 100644
--- a/player/src/lib.rs
+++ b/player/src/lib.rs
@@ -36,7 +36,11 @@ impl<I: Clone + Debug + wgc::id::TypedId> wgc::identity::IdentityHandlerFactory<
         IdentityPassThrough(PhantomData)
     }
 }
-impl wgc::identity::GlobalIdentityHandlerFactory for IdentityPassThroughFactory {}
+impl wgc::identity::GlobalIdentityHandlerFactory for IdentityPassThroughFactory {
+    fn ids_are_generated_in_wgpu() -> bool {
+        false
+    }
+}
 
 pub trait GlobalPlay {
     fn encode_commands<A: wgc::hal_api::HalApi>(
@@ -121,20 +125,31 @@ impl GlobalPlay for wgc::global::Global<IdentityPassThroughFactory> {
                 trace::Command::InsertDebugMarker(marker) => self
                     .command_encoder_insert_debug_marker::<A>(encoder, &marker)
                     .unwrap(),
-                trace::Command::RunComputePass { base } => {
-                    self.command_encoder_run_compute_pass_impl::<A>(encoder, base.as_ref())
-                        .unwrap();
+                trace::Command::RunComputePass {
+                    base,
+                    timestamp_writes,
+                } => {
+                    self.command_encoder_run_compute_pass_impl::<A>(
+                        encoder,
+                        base.as_ref(),
+                        timestamp_writes.as_ref(),
+                    )
+                    .unwrap();
                 }
                 trace::Command::RunRenderPass {
                     base,
                     target_colors,
                     target_depth_stencil,
+                    timestamp_writes,
+                    occlusion_query_set_id,
                 } => {
                     self.command_encoder_run_render_pass_impl::<A>(
                         encoder,
                         base.as_ref(),
                         &target_colors,
                         target_depth_stencil.as_ref(),
+                        timestamp_writes.as_ref(),
+                        occlusion_query_set_id,
                     )
                     .unwrap();
                 }
@@ -143,7 +158,7 @@ impl GlobalPlay for wgc::global::Global<IdentityPassThroughFactory> {
         let (cmd_buf, error) = self
             .command_encoder_finish::<A>(encoder, &wgt::CommandBufferDescriptor { label: None });
         if let Some(e) = error {
-            panic!("{:?}", e);
+            panic!("{e}");
         }
         cmd_buf
     }
@@ -171,7 +186,7 @@ impl GlobalPlay for wgc::global::Global<IdentityPassThroughFactory> {
                 self.device_maintain_ids::<A>(device).unwrap();
                 let (_, error) = self.device_create_buffer::<A>(device, &desc, id);
                 if let Some(e) = error {
-                    panic!("{:?}", e);
+                    panic!("{e}");
                 }
             }
             Action::FreeBuffer(id) => {
@@ -184,7 +199,7 @@ impl GlobalPlay for wgc::global::Global<IdentityPassThroughFactory> {
                 self.device_maintain_ids::<A>(device).unwrap();
                 let (_, error) = self.device_create_texture::<A>(device, &desc, id);
                 if let Some(e) = error {
-                    panic!("{:?}", e);
+                    panic!("{e}");
                 }
             }
             Action::FreeTexture(id) => {
@@ -201,7 +216,7 @@ impl GlobalPlay for wgc::global::Global<IdentityPassThroughFactory> {
                 self.device_maintain_ids::<A>(device).unwrap();
                 let (_, error) = self.texture_create_view::<A>(parent_id, &desc, id);
                 if let Some(e) = error {
-                    panic!("{:?}", e);
+                    panic!("{e}");
                 }
             }
             Action::DestroyTextureView(id) => {
@@ -211,7 +226,7 @@ impl GlobalPlay for wgc::global::Global<IdentityPassThroughFactory> {
                 self.device_maintain_ids::<A>(device).unwrap();
                 let (_, error) = self.device_create_sampler::<A>(device, &desc, id);
                 if let Some(e) = error {
-                    panic!("{:?}", e);
+                    panic!("{e}");
                 }
             }
             Action::DestroySampler(id) => {
@@ -227,7 +242,7 @@ impl GlobalPlay for wgc::global::Global<IdentityPassThroughFactory> {
             Action::CreateBindGroupLayout(id, desc) => {
                 let (_, error) = self.device_create_bind_group_layout::<A>(device, &desc, id);
                 if let Some(e) = error {
-                    panic!("{:?}", e);
+                    panic!("{e}");
                 }
             }
             Action::DestroyBindGroupLayout(id) => {
@@ -237,7 +252,7 @@ impl GlobalPlay for wgc::global::Global<IdentityPassThroughFactory> {
                 self.device_maintain_ids::<A>(device).unwrap();
                 let (_, error) = self.device_create_pipeline_layout::<A>(device, &desc, id);
                 if let Some(e) = error {
-                    panic!("{:?}", e);
+                    panic!("{e}");
                 }
             }
             Action::DestroyPipelineLayout(id) => {
@@ -247,7 +262,7 @@ impl GlobalPlay for wgc::global::Global<IdentityPassThroughFactory> {
                 self.device_maintain_ids::<A>(device).unwrap();
                 let (_, error) = self.device_create_bind_group::<A>(device, &desc, id);
                 if let Some(e) = error {
-                    panic!("{:?}", e);
+                    panic!("{e}");
                 }
             }
             Action::DestroyBindGroup(id) => {
@@ -257,7 +272,7 @@ impl GlobalPlay for wgc::global::Global<IdentityPassThroughFactory> {
                 log::info!("Creating shader from {}", data);
                 let code = fs::read_to_string(dir.join(&data)).unwrap();
                 let source = if data.ends_with(".wgsl") {
-                    wgc::pipeline::ShaderModuleSource::Wgsl(Cow::Owned(code))
+                    wgc::pipeline::ShaderModuleSource::Wgsl(Cow::Owned(code.clone()))
                 } else if data.ends_with(".ron") {
                     let module = ron::de::from_str(&code).unwrap();
                     wgc::pipeline::ShaderModuleSource::Naga(module)
@@ -266,7 +281,7 @@ impl GlobalPlay for wgc::global::Global<IdentityPassThroughFactory> {
                 };
                 let (_, error) = self.device_create_shader_module::<A>(device, &desc, source, id);
                 if let Some(e) = error {
-                    panic!("{:?}", e);
+                    println!("shader compilation error:\n---{code}\n---\n{e}");
                 }
             }
             Action::DestroyShaderModule(id) => {
@@ -288,7 +303,7 @@ impl GlobalPlay for wgc::global::Global<IdentityPassThroughFactory> {
                 let (_, error) =
                     self.device_create_compute_pipeline::<A>(device, &desc, id, implicit_ids);
                 if let Some(e) = error {
-                    panic!("{:?}", e);
+                    panic!("{e}");
                 }
             }
             Action::DestroyComputePipeline(id) => {
@@ -310,7 +325,7 @@ impl GlobalPlay for wgc::global::Global<IdentityPassThroughFactory> {
                 let (_, error) =
                     self.device_create_render_pipeline::<A>(device, &desc, id, implicit_ids);
                 if let Some(e) = error {
-                    panic!("{:?}", e);
+                    panic!("{e}");
                 }
             }
             Action::DestroyRenderPipeline(id) => {
@@ -325,7 +340,7 @@ impl GlobalPlay for wgc::global::Global<IdentityPassThroughFactory> {
                     id,
                 );
                 if let Some(e) = error {
-                    panic!("{:?}", e);
+                    panic!("{e}");
                 }
             }
             Action::DestroyRenderBundle(id) => {
@@ -335,7 +350,7 @@ impl GlobalPlay for wgc::global::Global<IdentityPassThroughFactory> {
                 self.device_maintain_ids::<A>(device).unwrap();
                 let (_, error) = self.device_create_query_set::<A>(device, &desc, id);
                 if let Some(e) = error {
-                    panic!("{:?}", e);
+                    panic!("{e}");
                 }
             }
             Action::DestroyQuerySet(id) => {
@@ -378,7 +393,7 @@ impl GlobalPlay for wgc::global::Global<IdentityPassThroughFactory> {
                     comb_manager.alloc(device.backend()),
                 );
                 if let Some(e) = error {
-                    panic!("{:?}", e);
+                    panic!("{e}");
                 }
                 let cmdbuf = self.encode_commands::<A>(encoder, commands);
                 self.queue_submit::<A>(device, &[cmdbuf]).unwrap();
diff --git a/player/tests/test.rs b/player/tests/test.rs
index bbaa66cc4e..cd1302777e 100644
--- a/player/tests/test.rs
+++ b/player/tests/test.rs
@@ -185,6 +185,7 @@ impl Corpus {
             wgt::InstanceDescriptor {
                 backends: corpus.backends,
                 dx12_shader_compiler: wgt::Dx12Compiler::Fxc,
+                gles_minor_version: wgt::Gles3MinorVersion::default(),
             },
         );
         for &backend in BACKENDS {
diff --git a/tests/Cargo.toml b/tests/Cargo.toml
index 85dae4959c..5738ed1bdb 100644
--- a/tests/Cargo.toml
+++ b/tests/Cargo.toml
@@ -24,6 +24,7 @@ bytemuck.workspace = true
 cfg-if.workspace = true
 env_logger.workspace = true
 log.workspace = true
+parking_lot.workspace = true
 png.workspace = true
 pollster.workspace = true
 wgpu.workspace = true
diff --git a/tests/src/image.rs b/tests/src/image.rs
index 00aa78f660..e50fd43e7f 100644
--- a/tests/src/image.rs
+++ b/tests/src/image.rs
@@ -150,7 +150,7 @@ impl ComparisonType {
 
 pub fn compare_image_output(
     path: impl AsRef<Path> + AsRef<OsStr>,
-    backend: Backend,
+    adapter_info: &wgt::AdapterInfo,
     width: u32,
     height: u32,
     test_with_alpha: &[u8],
@@ -205,17 +205,18 @@ pub fn compare_image_output(
         }
 
         let file_stem = reference_path.file_stem().unwrap().to_string_lossy();
+        let renderer = format!(
+            "{}-{}-{}",
+            adapter_info.backend.to_str(),
+            sanitize_for_path(&adapter_info.name),
+            sanitize_for_path(&adapter_info.driver)
+        );
         // Determine the paths to write out the various intermediate files
         let actual_path = Path::new(&path).with_file_name(
-            OsString::from_str(&format!("{}-{}-actual.png", file_stem, backend.to_str(),)).unwrap(),
+            OsString::from_str(&format!("{}-{}-actual.png", file_stem, renderer)).unwrap(),
         );
         let difference_path = Path::new(&path).with_file_name(
-            OsString::from_str(&format!(
-                "{}-{}-difference.png",
-                file_stem,
-                backend.to_str(),
-            ))
-            .unwrap(),
+            OsString::from_str(&format!("{}-{}-difference.png", file_stem, renderer,)).unwrap(),
         );
 
         // Convert the error values to a false color reprensentation
@@ -246,10 +247,16 @@ pub fn compare_image_output(
 
     #[cfg(target_arch = "wasm32")]
     {
-        let _ = (path, backend, width, height, test_with_alpha, checks);
+        let _ = (path, adapter_info, width, height, test_with_alpha, checks);
     }
 }
 
+fn sanitize_for_path(s: &str) -> String {
+    s.chars()
+        .map(|ch| if ch.is_ascii_alphanumeric() { ch } else { '_' })
+        .collect()
+}
+
 fn copy_via_compute(
     device: &Device,
     encoder: &mut CommandEncoder,
diff --git a/tests/src/lib.rs b/tests/src/lib.rs
index 949afb48a0..236b353386 100644
--- a/tests/src/lib.rs
+++ b/tests/src/lib.rs
@@ -53,11 +53,195 @@ fn lowest_downlevel_properties() -> DownlevelCapabilities {
     }
 }
 
+/// Conditions under which a test should fail or be skipped.
+///
+/// By passing a `FailureCase` to [`TestParameters::expect_fail`], you can
+/// mark a test as expected to fail under the indicated conditions. By
+/// passing it to [`TestParameters::skip`], you can request that the
+/// test be skipped altogether.
+///
+/// If a field is `None`, then that field does not restrict matches. For
+/// example:
+///
+/// ```
+/// # use wgpu_test::FailureCase;
+/// FailureCase {
+///     backends: Some(wgpu::Backends::DX11 | wgpu::Backends::DX12),
+///     vendor: None,
+///     adapter: Some("RTX"),
+///     driver: None,
+/// }
+/// # ;
+/// ```
+///
+/// This applies to all cards with `"RTX'` in their name on either
+/// Direct3D backend, no matter the vendor ID or driver name.
+///
+/// The strings given here need only appear as a substring in the
+/// corresponding [`AdapterInfo`] fields. The comparison is
+/// case-insensitive.
+///
+/// The default value of `FailureCase` applies to any test case. That
+/// is, there are no criteria to constrain the match.
+///
+/// [`AdapterInfo`]: wgt::AdapterInfo
+#[derive(Default)]
 pub struct FailureCase {
-    backends: Option<wgpu::Backends>,
-    vendor: Option<u32>,
-    adapter: Option<String>,
-    skip: bool,
+    /// Backends expected to fail, or `None` for any backend.
+    ///
+    /// If this is `None`, or if the test is using one of the backends
+    /// in `backends`, then this `FailureCase` applies.
+    pub backends: Option<wgpu::Backends>,
+
+    /// Vendor expected to fail, or `None` for any vendor.
+    ///
+    /// If `Some`, this must match [`AdapterInfo::device`], which is
+    /// usually the PCI device id. Otherwise, this `FailureCase`
+    /// applies regardless of vendor.
+    ///
+    /// [`AdapterInfo::device`]: wgt::AdapterInfo::device
+    pub vendor: Option<u32>,
+
+    /// Name of adaper expected to fail, or `None` for any adapter name.
+    ///
+    /// If this is `Some(s)` and `s` is a substring of
+    /// [`AdapterInfo::name`], then this `FailureCase` applies. If
+    /// this is `None`, the adapter name isn't considered.
+    ///
+    /// [`AdapterInfo::name`]: wgt::AdapterInfo::name
+    pub adapter: Option<&'static str>,
+
+    /// Name of driver expected to fail, or `None` for any driver name.
+    ///
+    /// If this is `Some(s)` and `s` is a substring of
+    /// [`AdapterInfo::driver`], then this `FailureCase` applies. If
+    /// this is `None`, the driver name isn't considered.
+    ///
+    /// [`AdapterInfo::driver`]: wgt::AdapterInfo::driver
+    pub driver: Option<&'static str>,
+}
+
+impl FailureCase {
+    /// This case applies to all tests.
+    pub fn always() -> Self {
+        FailureCase::default()
+    }
+
+    /// This case applies to no tests.
+    pub fn never() -> Self {
+        FailureCase {
+            backends: Some(wgpu::Backends::empty()),
+            ..FailureCase::default()
+        }
+    }
+
+    /// Tests running on any of the given backends.
+    pub fn backend(backends: wgpu::Backends) -> Self {
+        FailureCase {
+            backends: Some(backends),
+            ..FailureCase::default()
+        }
+    }
+
+    /// Tests running on `adapter`.
+    ///
+    /// For this case to apply, the `adapter` string must appear as a substring
+    /// of the adapter's [`AdapterInfo::name`]. The comparison is
+    /// case-insensitive.
+    ///
+    /// [`AdapterInfo::name`]: wgt::AdapterInfo::name
+    pub fn adapter(adapter: &'static str) -> Self {
+        FailureCase {
+            adapter: Some(adapter),
+            ..FailureCase::default()
+        }
+    }
+
+    /// Tests running on `backend` and `adapter`.
+    ///
+    /// For this case to apply, the test must be using an adapter for one of the
+    /// given `backend` bits, and `adapter` string must appear as a substring of
+    /// the adapter's [`AdapterInfo::name`]. The string comparison is
+    /// case-insensitive.
+    ///
+    /// [`AdapterInfo::name`]: wgt::AdapterInfo::name
+    pub fn backend_adapter(backends: wgpu::Backends, adapter: &'static str) -> Self {
+        FailureCase {
+            backends: Some(backends),
+            adapter: Some(adapter),
+            ..FailureCase::default()
+        }
+    }
+
+    /// Tests running under WebGL.
+    ///
+    /// Because of wasm's limited ability to recover from errors, we
+    /// usually need to skip the test altogether if it's not
+    /// supported, so this should be usually used with
+    /// [`TestParameters::skip`].
+    pub fn webgl2() -> Self {
+        #[cfg(target_arch = "wasm32")]
+        let case = FailureCase::backend(wgpu::Backends::GL);
+        #[cfg(not(target_arch = "wasm32"))]
+        let case = FailureCase::never();
+        case
+    }
+
+    /// Tests running on the MoltenVK Vulkan driver on macOS.
+    pub fn molten_vk() -> Self {
+        FailureCase {
+            backends: Some(wgpu::Backends::VULKAN),
+            driver: Some("MoltenVK"),
+            ..FailureCase::default()
+        }
+    }
+
+    /// Test whether `self` applies to `info`.
+    ///
+    /// If it does, return a `FailureReasons` whose set bits indicate
+    /// why. If it doesn't, return `None`.
+    ///
+    /// The caller is responsible for converting the string-valued
+    /// fields of `info` to lower case, to ensure case-insensitive
+    /// matching.
+    fn applies_to(&self, info: &wgt::AdapterInfo) -> Option<FailureReasons> {
+        let mut reasons = FailureReasons::empty();
+
+        if let Some(backends) = self.backends {
+            if !backends.contains(wgpu::Backends::from(info.backend)) {
+                return None;
+            }
+            reasons.set(FailureReasons::BACKEND, true);
+        }
+        if let Some(vendor) = self.vendor {
+            if vendor != info.vendor {
+                return None;
+            }
+            reasons.set(FailureReasons::VENDOR, true);
+        }
+        if let Some(adapter) = self.adapter {
+            let adapter = adapter.to_lowercase();
+            if !info.name.contains(&adapter) {
+                return None;
+            }
+            reasons.set(FailureReasons::ADAPTER, true);
+        }
+        if let Some(driver) = self.driver {
+            let driver = driver.to_lowercase();
+            if !info.driver.contains(&driver) {
+                return None;
+            }
+            reasons.set(FailureReasons::DRIVER, true);
+        }
+
+        // If we got this far but no specific reasons were triggered, then this
+        // must be a wildcard.
+        if reasons.is_empty() {
+            Some(FailureReasons::ALWAYS)
+        } else {
+            Some(reasons)
+        }
+    }
 }
 
 // This information determines if a test should run.
@@ -65,7 +249,11 @@ pub struct TestParameters {
     pub required_features: Features,
     pub required_downlevel_properties: DownlevelCapabilities,
     pub required_limits: Limits,
-    // Backends where test should fail.
+
+    /// Conditions under which this test should be skipped.
+    pub skips: Vec<FailureCase>,
+
+    /// Conditions under which this test should be run, but is expected to fail.
     pub failures: Vec<FailureCase>,
 }
 
@@ -75,6 +263,7 @@ impl Default for TestParameters {
             required_features: Features::empty(),
             required_downlevel_properties: lowest_downlevel_properties(),
             required_limits: Limits::downlevel_webgl2_defaults(),
+            skips: Vec::new(),
             failures: Vec::new(),
         }
     }
@@ -86,7 +275,8 @@ bitflags::bitflags! {
         const BACKEND = 1 << 0;
         const VENDOR = 1 << 1;
         const ADAPTER = 1 << 2;
-        const ALWAYS = 1 << 3;
+        const DRIVER = 1 << 3;
+        const ALWAYS = 1 << 4;
     }
 }
 
@@ -115,87 +305,17 @@ impl TestParameters {
         self
     }
 
-    /// Mark the test as always failing, equivalent to specific_failure(None, None, None)
-    pub fn failure(mut self) -> Self {
-        self.failures.push(FailureCase {
-            backends: None,
-            vendor: None,
-            adapter: None,
-            skip: false,
-        });
-        self
-    }
-
-    /// Mark the test as always failing and needing to be skipped, equivalent to specific_failure(None, None, None)
-    pub fn skip(mut self) -> Self {
-        self.failures.push(FailureCase {
-            backends: None,
-            vendor: None,
-            adapter: None,
-            skip: true,
-        });
-        self
-    }
-
-    /// Mark the test as always failing on a specific backend, equivalent to specific_failure(backend, None, None)
-    pub fn backend_failure(mut self, backends: wgpu::Backends) -> Self {
-        self.failures.push(FailureCase {
-            backends: Some(backends),
-            vendor: None,
-            adapter: None,
-            skip: false,
-        });
+    /// Mark the test as always failing, but not to be skipped.
+    pub fn expect_fail(mut self, when: FailureCase) -> Self {
+        self.failures.push(when);
         self
     }
 
-    /// Mark the test as always failing on WebGL. Because limited ability of wasm to recover from errors, we need to wholesale
-    /// skip the test if it's not supported.
-    pub fn webgl2_failure(mut self) -> Self {
-        let _ = &mut self;
-        #[cfg(target_arch = "wasm32")]
-        self.failures.push(FailureCase {
-            backends: Some(wgpu::Backends::GL),
-            vendor: None,
-            adapter: None,
-            skip: true,
-        });
-        self
-    }
-
-    /// Determines if a test should fail under a particular set of conditions. If any of these are None, that means that it will match anything in that field.
-    ///
-    /// ex.
-    /// `specific_failure(Some(wgpu::Backends::DX11 | wgpu::Backends::DX12), None, Some("RTX"), false)`
-    /// means that this test will fail on all cards with RTX in their name on either D3D backend, no matter the vendor ID.
-    ///
-    /// If segfault is set to true, the test won't be run at all due to avoid segfaults.
-    pub fn specific_failure(
-        mut self,
-        backends: Option<Backends>,
-        vendor: Option<u32>,
-        device: Option<&'static str>,
-        skip: bool,
-    ) -> Self {
-        self.failures.push(FailureCase {
-            backends,
-            vendor,
-            adapter: device.as_ref().map(AsRef::as_ref).map(str::to_lowercase),
-            skip,
-        });
+    /// Mark the test as always failing, and needing to be skipped.
+    pub fn skip(mut self, when: FailureCase) -> Self {
+        self.skips.push(when);
         self
     }
-
-    /// Mark the test as failing on vulkan on mac only
-    pub fn molten_vk_failure(self) -> Self {
-        #[cfg(any(target_os = "macos", target_os = "ios"))]
-        {
-            self.specific_failure(Some(wgpu::Backends::VULKAN), None, None, false)
-        }
-        #[cfg(not(any(target_os = "macos", target_os = "ios")))]
-        {
-            self
-        }
-    }
 }
 
 pub fn initialize_test(parameters: TestParameters, test_function: impl FnOnce(TestingContext)) {
@@ -210,7 +330,15 @@ pub fn initialize_test(parameters: TestParameters, test_function: impl FnOnce(Te
     let (adapter, _surface_guard) = initialize_adapter();
 
     let adapter_info = adapter.get_info();
-    let adapter_lowercase_name = adapter_info.name.to_lowercase();
+
+    // Produce a lower-case version of the adapter info, for comparison against
+    // `parameters.skips` and `parameters.failures`.
+    let adapter_lowercase_info = wgt::AdapterInfo {
+        name: adapter_info.name.to_lowercase(),
+        driver: adapter_info.driver.to_lowercase(),
+        ..adapter_info.clone()
+    };
+
     let adapter_features = adapter.features();
     let adapter_limits = adapter.limits();
     let adapter_downlevel_capabilities = adapter.get_downlevel_capabilities();
@@ -254,7 +382,7 @@ pub fn initialize_test(parameters: TestParameters, test_function: impl FnOnce(Te
 
     let context = TestingContext {
         adapter,
-        adapter_info: adapter_info.clone(),
+        adapter_info,
         adapter_downlevel_capabilities,
         device,
         device_features: parameters.required_features,
@@ -262,105 +390,77 @@ pub fn initialize_test(parameters: TestParameters, test_function: impl FnOnce(Te
         queue,
     };
 
-    let expected_failure_reason = parameters.failures.iter().find_map(|failure| {
-        let always =
-            failure.backends.is_none() && failure.vendor.is_none() && failure.adapter.is_none();
-
-        let expect_failure_backend = failure
-            .backends
-            .map(|f| f.contains(wgpu::Backends::from(adapter_info.backend)));
-        let expect_failure_vendor = failure.vendor.map(|v| v == adapter_info.vendor);
-        let expect_failure_adapter = failure
-            .adapter
-            .as_deref()
-            .map(|f| adapter_lowercase_name.contains(f));
-
-        if expect_failure_backend.unwrap_or(true)
-            && expect_failure_vendor.unwrap_or(true)
-            && expect_failure_adapter.unwrap_or(true)
-        {
-            if always {
-                Some((FailureReasons::ALWAYS, failure.skip))
-            } else {
-                let mut reason = FailureReasons::empty();
-                reason.set(
-                    FailureReasons::BACKEND,
-                    expect_failure_backend.unwrap_or(false),
-                );
-                reason.set(
-                    FailureReasons::VENDOR,
-                    expect_failure_vendor.unwrap_or(false),
-                );
-                reason.set(
-                    FailureReasons::ADAPTER,
-                    expect_failure_adapter.unwrap_or(false),
-                );
-                Some((reason, failure.skip))
-            }
-        } else {
-            None
-        }
-    });
-
-    if let Some((reason, true)) = expected_failure_reason {
-        log::info!("EXPECTED TEST FAILURE SKIPPED: {:?}", reason);
+    // Check if we should skip the test altogether.
+    if let Some(skip_reason) = parameters
+        .skips
+        .iter()
+        .find_map(|case| case.applies_to(&adapter_lowercase_info))
+    {
+        log::info!("EXPECTED TEST FAILURE SKIPPED: {:?}", skip_reason);
         return;
     }
 
+    // Determine if we expect this test to fail, and if so, why.
+    let expected_failure_reason = parameters
+        .failures
+        .iter()
+        .find_map(|case| case.applies_to(&adapter_lowercase_info));
+
+    // Run the test, and catch panics (possibly due to failed assertions).
     let panicked = catch_unwind(AssertUnwindSafe(|| test_function(context))).is_err();
+
+    // Check whether any validation errors were reported during the test run.
     cfg_if::cfg_if!(
         if #[cfg(any(not(target_arch = "wasm32"), target_os = "emscripten"))] {
             let canary_set = wgpu::hal::VALIDATION_CANARY.get_and_reset();
         } else {
-            let canary_set = _surface_guard.check_for_unreported_errors();
+            let canary_set = _surface_guard.unwrap().check_for_unreported_errors();
         }
     );
 
-    let failed = panicked || canary_set;
-
+    // Summarize reasons for actual failure, if any.
     let failure_cause = match (panicked, canary_set) {
-        (true, true) => "PANIC AND VALIDATION ERROR",
-        (true, false) => "PANIC",
-        (false, true) => "VALIDATION ERROR",
-        (false, false) => "",
+        (true, true) => Some("PANIC AND VALIDATION ERROR"),
+        (true, false) => Some("PANIC"),
+        (false, true) => Some("VALIDATION ERROR"),
+        (false, false) => None,
     };
 
-    let expect_failure = expected_failure_reason.is_some();
-
-    if failed == expect_failure {
-        // We got the conditions we expected
-        if let Some((expected_reason, _)) = expected_failure_reason {
-            // Print out reason for the failure
+    // Compare actual results against expectations.
+    match (failure_cause, expected_failure_reason) {
+        // The test passed, as expected.
+        (None, None) => {}
+        // The test failed unexpectedly.
+        (Some(cause), None) => {
+            panic!("UNEXPECTED TEST FAILURE DUE TO {cause}")
+        }
+        // The test passed unexpectedly.
+        (None, Some(reason)) => {
+            panic!("UNEXPECTED TEST PASS: {reason:?}");
+        }
+        // The test failed, as expected.
+        (Some(cause), Some(reason_expected)) => {
             log::info!(
-                "GOT EXPECTED TEST FAILURE DUE TO {}: {:?}",
-                failure_cause,
-                expected_reason
+                "EXPECTED FAILURE DUE TO {} (expected because of {:?})",
+                cause,
+                reason_expected
             );
         }
-    } else if let Some((reason, _)) = expected_failure_reason {
-        // We expected to fail, but things passed
-        panic!("UNEXPECTED TEST PASS: {reason:?}");
-    } else {
-        panic!("UNEXPECTED TEST FAILURE DUE TO {failure_cause}")
     }
 }
 
-fn initialize_adapter() -> (Adapter, SurfaceGuard) {
-    let backends = wgpu::util::backend_bits_from_env().unwrap_or_else(Backends::all);
-    let dx12_shader_compiler = wgpu::util::dx12_shader_compiler_from_env().unwrap_or_default();
-    let instance = Instance::new(wgpu::InstanceDescriptor {
-        backends,
-        dx12_shader_compiler,
-    });
-    let surface_guard;
+fn initialize_adapter() -> (Adapter, Option<SurfaceGuard>) {
+    let instance = initialize_instance();
+    let surface_guard: Option<SurfaceGuard>;
     let compatible_surface;
 
+    // Create a canvas iff we need a WebGL2RenderingContext to have a working device.
     #[cfg(not(all(
         target_arch = "wasm32",
         any(target_os = "emscripten", feature = "webgl")
     )))]
     {
-        surface_guard = SurfaceGuard {};
+        surface_guard = None;
         compatible_surface = None;
     }
     #[cfg(all(
@@ -396,7 +496,7 @@ fn initialize_adapter() -> (Adapter, SurfaceGuard) {
                 .expect("could not create surface from canvas")
         };
 
-        surface_guard = SurfaceGuard { canvas };
+        surface_guard = Some(SurfaceGuard { canvas });
 
         compatible_surface = Some(surface);
     }
@@ -411,12 +511,21 @@ fn initialize_adapter() -> (Adapter, SurfaceGuard) {
     (adapter, surface_guard)
 }
 
-struct SurfaceGuard {
-    #[cfg(all(
-        target_arch = "wasm32",
-        any(target_os = "emscripten", feature = "webgl")
-    ))]
-    canvas: web_sys::HtmlCanvasElement,
+pub fn initialize_instance() -> Instance {
+    let backends = wgpu::util::backend_bits_from_env().unwrap_or_else(Backends::all);
+    let dx12_shader_compiler = wgpu::util::dx12_shader_compiler_from_env().unwrap_or_default();
+    let gles_minor_version = wgpu::util::gles_minor_version_from_env().unwrap_or_default();
+    Instance::new(wgpu::InstanceDescriptor {
+        backends,
+        dx12_shader_compiler,
+        gles_minor_version,
+    })
+}
+
+// Public because it is used by tests of interacting with canvas
+pub struct SurfaceGuard {
+    #[cfg(target_arch = "wasm32")]
+    pub canvas: web_sys::HtmlCanvasElement,
 }
 
 impl SurfaceGuard {
@@ -450,11 +559,8 @@ impl Drop for SurfaceGuard {
     }
 }
 
-#[cfg(all(
-    target_arch = "wasm32",
-    any(target_os = "emscripten", feature = "webgl")
-))]
-fn create_html_canvas() -> web_sys::HtmlCanvasElement {
+#[cfg(target_arch = "wasm32")]
+pub fn create_html_canvas() -> web_sys::HtmlCanvasElement {
     use wasm_bindgen::JsCast;
 
     web_sys::window()
diff --git a/tests/tests/bind_group_layout_dedup.rs b/tests/tests/bind_group_layout_dedup.rs
new file mode 100644
index 0000000000..03bc1f1c5a
--- /dev/null
+++ b/tests/tests/bind_group_layout_dedup.rs
@@ -0,0 +1,144 @@
+use wgpu_test::{initialize_test, TestParameters};
+
+#[test]
+fn bind_group_layout_deduplication() {
+    initialize_test(TestParameters::default(), |ctx| {
+        let entries_1 = &[];
+
+        let entries_2 = &[wgpu::BindGroupLayoutEntry {
+            binding: 0,
+            visibility: wgpu::ShaderStages::VERTEX,
+            ty: wgpu::BindingType::Buffer {
+                ty: wgpu::BufferBindingType::Uniform,
+                has_dynamic_offset: false,
+                min_binding_size: None,
+            },
+            count: None,
+        }];
+
+        let bgl_1a = ctx
+            .device
+            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+                label: None,
+                entries: entries_1,
+            });
+
+        let _bgl_2 = ctx
+            .device
+            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+                label: None,
+                entries: entries_2,
+            });
+
+        let bgl_1b = ctx
+            .device
+            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+                label: None,
+                entries: entries_1,
+            });
+
+        let bg_1a = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
+            label: None,
+            layout: &bgl_1a,
+            entries: &[],
+        });
+
+        let bg_1b = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
+            label: None,
+            layout: &bgl_1b,
+            entries: &[],
+        });
+
+        let pipeline_layout = ctx
+            .device
+            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+                label: None,
+                bind_group_layouts: &[&bgl_1b],
+                push_constant_ranges: &[],
+            });
+
+        let module = ctx
+            .device
+            .create_shader_module(wgpu::ShaderModuleDescriptor {
+                label: None,
+                source: wgpu::ShaderSource::Wgsl(SHADER_SRC.into()),
+            });
+
+        let targets = &[Some(wgpu::ColorTargetState {
+            format: wgpu::TextureFormat::Rgba8Unorm,
+            blend: None,
+            write_mask: Default::default(),
+        })];
+
+        let desc = wgpu::RenderPipelineDescriptor {
+            label: None,
+            layout: Some(&pipeline_layout),
+            vertex: wgpu::VertexState {
+                module: &module,
+                entry_point: "vs_main",
+                buffers: &[],
+            },
+            fragment: Some(wgpu::FragmentState {
+                module: &module,
+                entry_point: "fs_main",
+                targets,
+            }),
+            primitive: wgpu::PrimitiveState::default(),
+            depth_stencil: None,
+            multiview: None,
+            multisample: wgpu::MultisampleState::default(),
+        };
+
+        let pipeline = ctx.device.create_render_pipeline(&desc);
+
+        let texture = ctx.device.create_texture(&wgpu::TextureDescriptor {
+            label: None,
+            dimension: wgpu::TextureDimension::D2,
+            size: wgpu::Extent3d {
+                width: 32,
+                height: 32,
+                depth_or_array_layers: 1,
+            },
+            sample_count: 1,
+            mip_level_count: 1,
+            format: wgpu::TextureFormat::Rgba8Unorm,
+            usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
+            view_formats: &[],
+        });
+
+        let texture_view = texture.create_view(&Default::default());
+
+        let mut encoder = ctx.device.create_command_encoder(&Default::default());
+
+        {
+            let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+                label: None,
+                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+                    view: &texture_view,
+                    resolve_target: None,
+                    ops: Default::default(),
+                })],
+                depth_stencil_attachment: None,
+                occlusion_query_set: None,
+                timestamp_writes: None,
+            });
+
+            pass.set_bind_group(0, &bg_1b, &[]);
+
+            pass.set_pipeline(&pipeline);
+
+            pass.draw(0..6, 0..1);
+
+            pass.set_bind_group(0, &bg_1a, &[]);
+
+            pass.draw(0..6, 0..1);
+        }
+
+        ctx.queue.submit(Some(encoder.finish()));
+    })
+}
+
+const SHADER_SRC: &str = "
+@vertex fn vs_main() -> @builtin(position) vec4<f32> { return vec4<f32>(1.0); }
+@fragment fn fs_main() -> @location(0) vec4<f32> { return vec4<f32>(1.0); }
+";
diff --git a/tests/tests/clear_texture.rs b/tests/tests/clear_texture.rs
index 7b2024c64c..36f48af359 100644
--- a/tests/tests/clear_texture.rs
+++ b/tests/tests/clear_texture.rs
@@ -1,5 +1,7 @@
 use wasm_bindgen_test::*;
-use wgpu_test::{image::ReadbackBuffers, initialize_test, TestParameters, TestingContext};
+use wgpu_test::{
+    image::ReadbackBuffers, initialize_test, FailureCase, TestParameters, TestingContext,
+};
 
 static TEXTURE_FORMATS_UNCOMPRESSED_GLES_COMPAT: &[wgpu::TextureFormat] = &[
     wgpu::TextureFormat::R8Unorm,
@@ -328,7 +330,7 @@ fn clear_texture_tests(ctx: &TestingContext, formats: &[wgpu::TextureFormat]) {
 fn clear_texture_uncompressed_gles_compat() {
     initialize_test(
         TestParameters::default()
-            .webgl2_failure()
+            .skip(FailureCase::webgl2())
             .features(wgpu::Features::CLEAR_TEXTURE),
         |ctx| {
             clear_texture_tests(&ctx, TEXTURE_FORMATS_UNCOMPRESSED_GLES_COMPAT);
@@ -341,8 +343,8 @@ fn clear_texture_uncompressed_gles_compat() {
 fn clear_texture_uncompressed() {
     initialize_test(
         TestParameters::default()
-            .webgl2_failure()
-            .backend_failure(wgpu::Backends::GL)
+            .skip(FailureCase::webgl2())
+            .expect_fail(FailureCase::backend(wgpu::Backends::GL))
             .features(wgpu::Features::CLEAR_TEXTURE),
         |ctx| {
             clear_texture_tests(&ctx, TEXTURE_FORMATS_UNCOMPRESSED);
@@ -355,7 +357,7 @@ fn clear_texture_uncompressed() {
 fn clear_texture_depth() {
     initialize_test(
         TestParameters::default()
-            .webgl2_failure()
+            .skip(FailureCase::webgl2())
             .downlevel_flags(
                 wgpu::DownlevelFlags::DEPTH_TEXTURE_AND_BUFFER_COPIES
                     | wgpu::DownlevelFlags::COMPUTE_SHADERS,
@@ -385,8 +387,10 @@ fn clear_texture_bc() {
     initialize_test(
         TestParameters::default()
             .features(wgpu::Features::CLEAR_TEXTURE | wgpu::Features::TEXTURE_COMPRESSION_BC)
-            .specific_failure(Some(wgpu::Backends::GL), None, Some("ANGLE"), false) // https://bugs.chromium.org/p/angleproject/issues/detail?id=7056
-            .backend_failure(wgpu::Backends::GL), // compressed texture copy to buffer not yet implemented
+            // https://bugs.chromium.org/p/angleproject/issues/detail?id=7056
+            .expect_fail(FailureCase::backend_adapter(wgpu::Backends::GL, "ANGLE"))
+            // compressed texture copy to buffer not yet implemented
+            .expect_fail(FailureCase::backend(wgpu::Backends::GL)),
         |ctx| {
             clear_texture_tests(&ctx, TEXTURE_FORMATS_BC);
         },
@@ -402,8 +406,10 @@ fn clear_texture_astc() {
                 max_texture_dimension_2d: wgpu::COPY_BYTES_PER_ROW_ALIGNMENT * 12,
                 ..wgpu::Limits::downlevel_defaults()
             })
-            .specific_failure(Some(wgpu::Backends::GL), None, Some("ANGLE"), false) // https://bugs.chromium.org/p/angleproject/issues/detail?id=7056
-            .backend_failure(wgpu::Backends::GL), // compressed texture copy to buffer not yet implemented
+            // https://bugs.chromium.org/p/angleproject/issues/detail?id=7056
+            .expect_fail(FailureCase::backend_adapter(wgpu::Backends::GL, "ANGLE"))
+            // compressed texture copy to buffer not yet implemented
+            .expect_fail(FailureCase::backend(wgpu::Backends::GL)),
         |ctx| {
             clear_texture_tests(&ctx, TEXTURE_FORMATS_ASTC);
         },
@@ -415,8 +421,10 @@ fn clear_texture_etc2() {
     initialize_test(
         TestParameters::default()
             .features(wgpu::Features::CLEAR_TEXTURE | wgpu::Features::TEXTURE_COMPRESSION_ETC2)
-            .specific_failure(Some(wgpu::Backends::GL), None, Some("ANGLE"), false) // https://bugs.chromium.org/p/angleproject/issues/detail?id=7056
-            .backend_failure(wgpu::Backends::GL), // compressed texture copy to buffer not yet implemented
+            // https://bugs.chromium.org/p/angleproject/issues/detail?id=7056
+            .expect_fail(FailureCase::backend_adapter(wgpu::Backends::GL, "ANGLE"))
+            // compressed texture copy to buffer not yet implemented
+            .expect_fail(FailureCase::backend(wgpu::Backends::GL)),
         |ctx| {
             clear_texture_tests(&ctx, TEXTURE_FORMATS_ETC2);
         },
diff --git a/tests/tests/create_surface_error.rs b/tests/tests/create_surface_error.rs
new file mode 100644
index 0000000000..f8962697ce
--- /dev/null
+++ b/tests/tests/create_surface_error.rs
@@ -0,0 +1,28 @@
+//! Test that `create_surface_*()` accurately reports those errors we can provoke.
+
+/// This test applies to those cfgs that have a `create_surface_from_canvas` method, which
+/// include WebGL and WebGPU, but *not* Emscripten GLES.
+#[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))]
+#[wasm_bindgen_test::wasm_bindgen_test]
+fn canvas_get_context_returned_null() {
+    // Not using initialize_test() because that goes straight to creating the canvas for us.
+    let instance = wgpu_test::initialize_instance();
+    // Create canvas and cleanup on drop
+    let canvas_g = wgpu_test::SurfaceGuard {
+        canvas: wgpu_test::create_html_canvas(),
+    };
+    // Using a context id that is not "webgl2" or "webgpu" will render the canvas unusable by wgpu.
+    canvas_g.canvas.get_context("2d").unwrap();
+
+    #[allow(clippy::redundant_clone)] // false positive — can't and shouldn't move out.
+    let error = instance
+        .create_surface_from_canvas(canvas_g.canvas.clone())
+        .unwrap_err();
+
+    assert!(
+        error
+            .to_string()
+            .contains("canvas.getContext() returned null"),
+        "{error}"
+    );
+}
diff --git a/tests/tests/device.rs b/tests/tests/device.rs
index 945d5476d7..f43791f86e 100644
--- a/tests/tests/device.rs
+++ b/tests/tests/device.rs
@@ -1,6 +1,6 @@
 use wasm_bindgen_test::*;
 
-use wgpu_test::{initialize_test, TestParameters};
+use wgpu_test::{initialize_test, FailureCase, TestParameters};
 
 #[test]
 #[wasm_bindgen_test]
@@ -13,26 +13,30 @@ fn device_initialization() {
 #[test]
 #[ignore]
 fn device_mismatch() {
-    initialize_test(TestParameters::default().failure(), |ctx| {
-        // Create a bind group uisng a lyaout from another device. This should be a validation
-        // error but currently crashes.
-        let (device2, _) =
-            pollster::block_on(ctx.adapter.request_device(&Default::default(), None)).unwrap();
+    initialize_test(
+        // https://github.com/gfx-rs/wgpu/issues/3927
+        TestParameters::default().expect_fail(FailureCase::always()),
+        |ctx| {
+            // Create a bind group uisng a lyaout from another device. This should be a validation
+            // error but currently crashes.
+            let (device2, _) =
+                pollster::block_on(ctx.adapter.request_device(&Default::default(), None)).unwrap();
 
-        {
-            let bind_group_layout =
-                device2.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+            {
+                let bind_group_layout =
+                    device2.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+                        label: None,
+                        entries: &[],
+                    });
+
+                let _bind_group = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
                     label: None,
+                    layout: &bind_group_layout,
                     entries: &[],
                 });
+            }
 
-            let _bind_group = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
-                label: None,
-                layout: &bind_group_layout,
-                entries: &[],
-            });
-        }
-
-        ctx.device.poll(wgpu::Maintain::Poll);
-    });
+            ctx.device.poll(wgpu::Maintain::Poll);
+        },
+    );
 }
diff --git a/tests/tests/encoder.rs b/tests/tests/encoder.rs
index 9e541c16a8..5914cd22da 100644
--- a/tests/tests/encoder.rs
+++ b/tests/tests/encoder.rs
@@ -1,5 +1,6 @@
 use wasm_bindgen_test::*;
-use wgpu_test::{initialize_test, TestParameters};
+use wgpu::RenderPassDescriptor;
+use wgpu_test::{fail, initialize_test, FailureCase, TestParameters};
 
 #[test]
 #[wasm_bindgen_test]
@@ -11,3 +12,60 @@ fn drop_encoder() {
         drop(encoder);
     })
 }
+
+#[test]
+fn drop_encoder_after_error() {
+    // This test crashes on DX12 with the exception:
+    //
+    // ID3D12CommandAllocator::Reset: The command allocator cannot be reset because a
+    // command list is currently being recorded with the allocator. [ EXECUTION ERROR
+    // #543: COMMAND_ALLOCATOR_CANNOT_RESET]
+    //
+    // For now, we mark the test as failing on DX12.
+    let parameters =
+        TestParameters::default().expect_fail(FailureCase::backend(wgpu::Backends::DX12));
+    initialize_test(parameters, |ctx| {
+        let mut encoder = ctx
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
+
+        let target_tex = ctx.device.create_texture(&wgpu::TextureDescriptor {
+            label: None,
+            size: wgpu::Extent3d {
+                width: 100,
+                height: 100,
+                depth_or_array_layers: 1,
+            },
+            mip_level_count: 1,
+            sample_count: 1,
+            dimension: wgpu::TextureDimension::D2,
+            format: wgpu::TextureFormat::R8Unorm,
+            usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
+            view_formats: &[],
+        });
+        let target_view = target_tex.create_view(&wgpu::TextureViewDescriptor::default());
+
+        let mut renderpass = encoder.begin_render_pass(&RenderPassDescriptor {
+            label: Some("renderpass"),
+            color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+                ops: wgpu::Operations::default(),
+                resolve_target: None,
+                view: &target_view,
+            })],
+            depth_stencil_attachment: None,
+            timestamp_writes: None,
+            occlusion_query_set: None,
+        });
+
+        // Set a bad viewport on renderpass, triggering an error.
+        fail(&ctx.device, || {
+            renderpass.set_viewport(0.0, 0.0, -1.0, -1.0, 0.0, 1.0);
+            drop(renderpass);
+        });
+
+        // This is the actual interesting error condition. We've created
+        // a CommandEncoder which errored out when processing a command.
+        // The encoder is still open!
+        drop(encoder);
+    })
+}
diff --git a/tests/tests/instance.rs b/tests/tests/instance.rs
index e9ff6afff0..b231e8d879 100644
--- a/tests/tests/instance.rs
+++ b/tests/tests/instance.rs
@@ -6,6 +6,7 @@ fn initialize() {
     let _ = wgpu::Instance::new(wgpu::InstanceDescriptor {
         backends: wgpu::util::backend_bits_from_env().unwrap_or_else(wgpu::Backends::all),
         dx12_shader_compiler: wgpu::util::dx12_shader_compiler_from_env().unwrap_or_default(),
+        gles_minor_version: wgpu::util::gles_minor_version_from_env().unwrap_or_default(),
     });
 }
 
@@ -13,6 +14,7 @@ fn request_adapter_inner(power: wgt::PowerPreference) {
     let instance = wgpu::Instance::new(wgpu::InstanceDescriptor {
         backends: wgpu::util::backend_bits_from_env().unwrap_or_else(wgpu::Backends::all),
         dx12_shader_compiler: wgpu::util::dx12_shader_compiler_from_env().unwrap_or_default(),
+        gles_minor_version: wgpu::util::gles_minor_version_from_env().unwrap_or_default(),
     });
 
     let _adapter = pollster::block_on(instance.request_adapter(&wgpu::RequestAdapterOptions {
diff --git a/tests/tests/occlusion_query/mod.rs b/tests/tests/occlusion_query/mod.rs
new file mode 100644
index 0000000000..eab0828e41
--- /dev/null
+++ b/tests/tests/occlusion_query/mod.rs
@@ -0,0 +1,128 @@
+use std::borrow::Cow;
+use wgpu_test::{initialize_test, TestParameters};
+
+#[test]
+fn occlusion_query() {
+    initialize_test(TestParameters::default(), |ctx| {
+        // Create depth texture
+        let depth_texture = ctx.device.create_texture(&wgpu::TextureDescriptor {
+            label: Some("Depth texture"),
+            size: wgpu::Extent3d {
+                width: 64,
+                height: 64,
+                depth_or_array_layers: 1,
+            },
+            mip_level_count: 1,
+            sample_count: 1,
+            dimension: wgpu::TextureDimension::D2,
+            format: wgpu::TextureFormat::Depth32Float,
+            usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
+            view_formats: &[],
+        });
+        let depth_texture_view = depth_texture.create_view(&wgpu::TextureViewDescriptor::default());
+
+        // Setup pipeline using a simple shader with hardcoded vertices
+        let shader = ctx
+            .device
+            .create_shader_module(wgpu::ShaderModuleDescriptor {
+                label: Some("Shader module"),
+                source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(include_str!("shader.wgsl"))),
+            });
+        let pipeline = ctx
+            .device
+            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
+                label: Some("Pipeline"),
+                layout: None,
+                vertex: wgpu::VertexState {
+                    module: &shader,
+                    entry_point: "vs_main",
+                    buffers: &[],
+                },
+                fragment: None,
+                primitive: wgpu::PrimitiveState::default(),
+                depth_stencil: Some(wgpu::DepthStencilState {
+                    format: wgpu::TextureFormat::Depth32Float,
+                    depth_write_enabled: true,
+                    depth_compare: wgpu::CompareFunction::Less,
+                    stencil: wgpu::StencilState::default(),
+                    bias: wgpu::DepthBiasState::default(),
+                }),
+                multisample: wgpu::MultisampleState::default(),
+                multiview: None,
+            });
+
+        // Create occlusion query set
+        let query_set = ctx.device.create_query_set(&wgpu::QuerySetDescriptor {
+            label: Some("Query set"),
+            ty: wgpu::QueryType::Occlusion,
+            count: 3,
+        });
+
+        let mut encoder = ctx
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
+        {
+            let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+                label: Some("Render pass"),
+                color_attachments: &[],
+                depth_stencil_attachment: Some(wgpu::RenderPassDepthStencilAttachment {
+                    view: &depth_texture_view,
+                    depth_ops: Some(wgpu::Operations {
+                        load: wgpu::LoadOp::Clear(1.0),
+                        store: true,
+                    }),
+                    stencil_ops: None,
+                }),
+                timestamp_writes: None,
+                occlusion_query_set: Some(&query_set),
+            });
+            render_pass.set_pipeline(&pipeline);
+
+            // Not occluded (z = 1.0, nothing drawn yet)
+            render_pass.begin_occlusion_query(0);
+            render_pass.draw(4..7, 0..1);
+            render_pass.end_occlusion_query();
+
+            // Not occluded (z = 0.0)
+            render_pass.begin_occlusion_query(1);
+            render_pass.draw(0..3, 0..1);
+            render_pass.end_occlusion_query();
+
+            // Occluded (z = 1.0)
+            render_pass.begin_occlusion_query(2);
+            render_pass.draw(4..7, 0..1);
+            render_pass.end_occlusion_query();
+        }
+
+        // Resolve query set to buffer
+        let query_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
+            label: Some("Query buffer"),
+            size: std::mem::size_of::<u64>() as u64 * 3,
+            usage: wgpu::BufferUsages::QUERY_RESOLVE | wgpu::BufferUsages::COPY_SRC,
+            mapped_at_creation: false,
+        });
+        encoder.resolve_query_set(&query_set, 0..3, &query_buffer, 0);
+
+        let mapping_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
+            label: Some("Mapping buffer"),
+            size: query_buffer.size(),
+            usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
+            mapped_at_creation: false,
+        });
+        encoder.copy_buffer_to_buffer(&query_buffer, 0, &mapping_buffer, 0, query_buffer.size());
+
+        ctx.queue.submit(Some(encoder.finish()));
+
+        mapping_buffer
+            .slice(..)
+            .map_async(wgpu::MapMode::Read, |_| ());
+        ctx.device.poll(wgpu::Maintain::Wait);
+        let query_buffer_view = mapping_buffer.slice(..).get_mapped_range();
+        let query_data: &[u64; 3] = bytemuck::from_bytes(&query_buffer_view);
+
+        // WebGPU only defines query results as zero/non-zero
+        assert_ne!(query_data[0], 0);
+        assert_ne!(query_data[1], 0);
+        assert_eq!(query_data[2], 0);
+    })
+}
diff --git a/tests/tests/occlusion_query/shader.wgsl b/tests/tests/occlusion_query/shader.wgsl
new file mode 100644
index 0000000000..6c64f19c2f
--- /dev/null
+++ b/tests/tests/occlusion_query/shader.wgsl
@@ -0,0 +1,7 @@
+@vertex
+fn vs_main(@builtin(vertex_index) in_vertex_index: u32) -> @builtin(position) vec4<f32> {
+    let x = f32(i32(in_vertex_index & 3u) - 1);
+    let y = f32(i32(in_vertex_index & 1u) * 2 - 1);
+
+    return vec4<f32>(x, y, f32(in_vertex_index & 4u) / 8.0, 1.0);
+}
diff --git a/tests/tests/partially_bounded_arrays/mod.rs b/tests/tests/partially_bounded_arrays/mod.rs
index 99e637507c..43844e456e 100644
--- a/tests/tests/partially_bounded_arrays/mod.rs
+++ b/tests/tests/partially_bounded_arrays/mod.rs
@@ -86,8 +86,10 @@ fn partially_bounded_array() {
             let mut encoder =
                 device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
             {
-                let mut cpass =
-                    encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: None });
+                let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
+                    label: None,
+                    timestamp_writes: None,
+                });
                 cpass.set_pipeline(&compute_pipeline);
                 cpass.set_bind_group(0, &bind_group, &[]);
                 cpass.dispatch_workgroups(1, 1, 1);
diff --git a/tests/tests/poll.rs b/tests/tests/poll.rs
index 7409dad093..e27a47a42c 100644
--- a/tests/tests/poll.rs
+++ b/tests/tests/poll.rs
@@ -7,7 +7,7 @@ use wgpu::{
 };
 
 use wasm_bindgen_test::*;
-use wgpu_test::{initialize_test, TestParameters, TestingContext};
+use wgpu_test::{initialize_test, FailureCase, TestParameters, TestingContext};
 
 fn generate_dummy_work(ctx: &TestingContext) -> CommandBuffer {
     let buffer = ctx.device.create_buffer(&BufferDescriptor {
@@ -56,60 +56,75 @@ fn generate_dummy_work(ctx: &TestingContext) -> CommandBuffer {
 #[test]
 #[wasm_bindgen_test]
 fn wait() {
-    initialize_test(TestParameters::default().skip(), |ctx| {
-        let cmd_buf = generate_dummy_work(&ctx);
-
-        ctx.queue.submit(Some(cmd_buf));
-        ctx.device.poll(Maintain::Wait);
-    })
+    initialize_test(
+        TestParameters::default().skip(FailureCase::always()),
+        |ctx| {
+            let cmd_buf = generate_dummy_work(&ctx);
+
+            ctx.queue.submit(Some(cmd_buf));
+            ctx.device.poll(Maintain::Wait);
+        },
+    )
 }
 
 #[test]
 #[wasm_bindgen_test]
 fn double_wait() {
-    initialize_test(TestParameters::default().skip(), |ctx| {
-        let cmd_buf = generate_dummy_work(&ctx);
-
-        ctx.queue.submit(Some(cmd_buf));
-        ctx.device.poll(Maintain::Wait);
-        ctx.device.poll(Maintain::Wait);
-    })
+    initialize_test(
+        TestParameters::default().skip(FailureCase::always()),
+        |ctx| {
+            let cmd_buf = generate_dummy_work(&ctx);
+
+            ctx.queue.submit(Some(cmd_buf));
+            ctx.device.poll(Maintain::Wait);
+            ctx.device.poll(Maintain::Wait);
+        },
+    )
 }
 
 #[test]
 #[wasm_bindgen_test]
 fn wait_on_submission() {
-    initialize_test(TestParameters::default().skip(), |ctx| {
-        let cmd_buf = generate_dummy_work(&ctx);
-
-        let index = ctx.queue.submit(Some(cmd_buf));
-        ctx.device.poll(Maintain::WaitForSubmissionIndex(index));
-    })
+    initialize_test(
+        TestParameters::default().skip(FailureCase::always()),
+        |ctx| {
+            let cmd_buf = generate_dummy_work(&ctx);
+
+            let index = ctx.queue.submit(Some(cmd_buf));
+            ctx.device.poll(Maintain::WaitForSubmissionIndex(index));
+        },
+    )
 }
 
 #[test]
 #[wasm_bindgen_test]
 fn double_wait_on_submission() {
-    initialize_test(TestParameters::default().skip(), |ctx| {
-        let cmd_buf = generate_dummy_work(&ctx);
-
-        let index = ctx.queue.submit(Some(cmd_buf));
-        ctx.device
-            .poll(Maintain::WaitForSubmissionIndex(index.clone()));
-        ctx.device.poll(Maintain::WaitForSubmissionIndex(index));
-    })
+    initialize_test(
+        TestParameters::default().skip(FailureCase::always()),
+        |ctx| {
+            let cmd_buf = generate_dummy_work(&ctx);
+
+            let index = ctx.queue.submit(Some(cmd_buf));
+            ctx.device
+                .poll(Maintain::WaitForSubmissionIndex(index.clone()));
+            ctx.device.poll(Maintain::WaitForSubmissionIndex(index));
+        },
+    )
 }
 
 #[test]
 #[wasm_bindgen_test]
 fn wait_out_of_order() {
-    initialize_test(TestParameters::default().skip(), |ctx| {
-        let cmd_buf1 = generate_dummy_work(&ctx);
-        let cmd_buf2 = generate_dummy_work(&ctx);
-
-        let index1 = ctx.queue.submit(Some(cmd_buf1));
-        let index2 = ctx.queue.submit(Some(cmd_buf2));
-        ctx.device.poll(Maintain::WaitForSubmissionIndex(index2));
-        ctx.device.poll(Maintain::WaitForSubmissionIndex(index1));
-    })
+    initialize_test(
+        TestParameters::default().skip(FailureCase::always()),
+        |ctx| {
+            let cmd_buf1 = generate_dummy_work(&ctx);
+            let cmd_buf2 = generate_dummy_work(&ctx);
+
+            let index1 = ctx.queue.submit(Some(cmd_buf1));
+            let index2 = ctx.queue.submit(Some(cmd_buf2));
+            ctx.device.poll(Maintain::WaitForSubmissionIndex(index2));
+            ctx.device.poll(Maintain::WaitForSubmissionIndex(index1));
+        },
+    )
 }
diff --git a/tests/tests/regression/issue_3457.rs b/tests/tests/regression/issue_3457.rs
index 1582277654..2dccd3d427 100644
--- a/tests/tests/regression/issue_3457.rs
+++ b/tests/tests/regression/issue_3457.rs
@@ -144,6 +144,8 @@ fn pass_reset_vertex_buffer() {
                 },
             })],
             depth_stencil_attachment: None,
+            timestamp_writes: None,
+            occlusion_query_set: None,
         });
 
         double_rpass.set_pipeline(&double_pipeline);
@@ -177,6 +179,8 @@ fn pass_reset_vertex_buffer() {
                 },
             })],
             depth_stencil_attachment: None,
+            timestamp_writes: None,
+            occlusion_query_set: None,
         });
 
         single_rpass.set_pipeline(&single_pipeline);
diff --git a/tests/tests/regression/issue_4024.rs b/tests/tests/regression/issue_4024.rs
new file mode 100644
index 0000000000..959f58ffa5
--- /dev/null
+++ b/tests/tests/regression/issue_4024.rs
@@ -0,0 +1,91 @@
+use std::sync::Arc;
+
+use parking_lot::Mutex;
+use wgpu_test::{initialize_test, TestParameters};
+
+use wasm_bindgen_test::wasm_bindgen_test;
+use wgpu::*;
+
+/// The WebGPU specification has very specific requirements about the ordering of map_async
+/// and on_submitted_work_done callbacks. Specifically, all map_async callbacks that are initiated
+/// before a given on_submitted_work_done callback must be invoked before the on_submitted_work_done
+/// callback is invoked.
+///
+/// We previously immediately invoked on_submitted_work_done callbacks if there was no active submission
+/// to add them to. This is incorrect, as we do not immediatley invoke map_async callbacks.
+#[wasm_bindgen_test]
+#[test]
+fn queue_submitted_callback_ordering() {
+    initialize_test(TestParameters::default(), |ctx| {
+        // Create a mappable buffer
+        let buffer = ctx.device.create_buffer(&BufferDescriptor {
+            label: Some("mappable buffer"),
+            size: 4,
+            usage: BufferUsages::MAP_READ | BufferUsages::COPY_DST,
+            mapped_at_creation: false,
+        });
+
+        // Encode some work using it. The specifics of this work don't matter, just
+        // that the buffer is used.
+        let mut encoder = ctx
+            .device
+            .create_command_encoder(&CommandEncoderDescriptor {
+                label: Some("encoder"),
+            });
+
+        encoder.clear_buffer(&buffer, 0, None);
+
+        // Submit the work.
+        ctx.queue.submit(Some(encoder.finish()));
+        // Ensure the work is finished.
+        ctx.device.poll(MaintainBase::Wait);
+
+        #[derive(Debug)]
+        struct OrderingContext {
+            /// Incremented every time a callback in invoked.
+            /// This allows the callbacks to know their ordering.
+            counter: u8,
+            /// The value of the counter when the map_async callback was invoked.
+            value_read_map_async: Option<u8>,
+            /// The value of the counter when the queue submitted work done callback was invoked.
+            value_read_queue_submitted: Option<u8>,
+        }
+
+        // Create shared ownership of the ordering context, and clone 2 copies.
+        let ordering = Arc::new(Mutex::new(OrderingContext {
+            counter: 0,
+            value_read_map_async: None,
+            value_read_queue_submitted: None,
+        }));
+        let ordering_clone_map_async = Arc::clone(&ordering);
+        let ordering_clone_queue_submitted = Arc::clone(&ordering);
+
+        // Register the callabacks.
+        buffer.slice(..).map_async(MapMode::Read, move |_| {
+            let mut guard = ordering_clone_map_async.lock();
+            guard.value_read_map_async = Some(guard.counter);
+            guard.counter += 1;
+        });
+
+        // If the bug is present, this callback will be invoked immediately inside this function,
+        // despite the fact there is an outstanding map_async callback.
+        ctx.queue.on_submitted_work_done(move || {
+            let mut guard = ordering_clone_queue_submitted.lock();
+            guard.value_read_queue_submitted = Some(guard.counter);
+            guard.counter += 1;
+        });
+
+        // No GPU work is happening at this point, but we want to process callbacks.
+        ctx.device.poll(MaintainBase::Poll);
+
+        // Extract the ordering out of the arc.
+        let ordering = Arc::try_unwrap(ordering).unwrap().into_inner();
+
+        // There were two callbacks invoked
+        assert_eq!(ordering.counter, 2);
+        // The map async callback was invoked fist
+        assert_eq!(ordering.value_read_map_async, Some(0));
+        // The queue submitted work done callback was invoked second.
+        assert_eq!(ordering.value_read_queue_submitted, Some(1));
+    })
+}
diff --git a/tests/tests/regression/issue_4122.rs b/tests/tests/regression/issue_4122.rs
new file mode 100644
index 0000000000..41b9cd4231
--- /dev/null
+++ b/tests/tests/regression/issue_4122.rs
@@ -0,0 +1,110 @@
+use std::{num::NonZeroU64, ops::Range};
+
+use wasm_bindgen_test::wasm_bindgen_test;
+use wgpu_test::{initialize_test, TestParameters, TestingContext};
+
+fn fill_test(ctx: &TestingContext, range: Range<u64>, size: u64) -> bool {
+    let gpu_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
+        label: Some("gpu_buffer"),
+        size,
+        usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::COPY_SRC,
+        mapped_at_creation: false,
+    });
+
+    let cpu_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
+        label: Some("cpu_buffer"),
+        size,
+        usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
+        mapped_at_creation: false,
+    });
+
+    // Initialize the whole buffer with values.
+    let buffer_contents = vec![0xFF_u8; size as usize];
+    ctx.queue.write_buffer(&gpu_buffer, 0, &buffer_contents);
+
+    let mut encoder = ctx
+        .device
+        .create_command_encoder(&wgpu::CommandEncoderDescriptor {
+            label: Some("encoder"),
+        });
+
+    encoder.clear_buffer(
+        &gpu_buffer,
+        range.start,
+        NonZeroU64::new(range.end - range.start),
+    );
+    encoder.copy_buffer_to_buffer(&gpu_buffer, 0, &cpu_buffer, 0, size);
+
+    ctx.queue.submit(Some(encoder.finish()));
+    cpu_buffer.slice(..).map_async(wgpu::MapMode::Read, |_| ());
+    ctx.device.poll(wgpu::Maintain::Wait);
+
+    let buffer_slice = cpu_buffer.slice(..);
+    let buffer_data = buffer_slice.get_mapped_range();
+
+    let first_clear_byte = buffer_data
+        .iter()
+        .enumerate()
+        .find_map(|(index, byte)| (*byte == 0x00).then_some(index))
+        .expect("No clear happened at all");
+
+    let first_dirty_byte = buffer_data
+        .iter()
+        .enumerate()
+        .skip(first_clear_byte)
+        .find_map(|(index, byte)| (*byte != 0x00).then_some(index))
+        .unwrap_or(size as usize);
+
+    let second_clear_byte = buffer_data
+        .iter()
+        .enumerate()
+        .skip(first_dirty_byte)
+        .find_map(|(index, byte)| (*byte == 0x00).then_some(index));
+
+    if second_clear_byte.is_some() {
+        eprintln!("Found multiple cleared ranges instead of a single clear range of {}..{} on a buffer of size {}.", range.start, range.end, size);
+        return false;
+    }
+
+    let cleared_range = first_clear_byte as u64..first_dirty_byte as u64;
+
+    if cleared_range != range {
+        eprintln!(
+            "Cleared range is {}..{}, but the clear range is {}..{} on a buffer of size {}.",
+            cleared_range.start, cleared_range.end, range.start, range.end, size
+        );
+        return false;
+    }
+
+    eprintln!(
+        "Cleared range is {}..{} on a buffer of size {}.",
+        cleared_range.start, cleared_range.end, size
+    );
+
+    true
+}
+
+/// Nvidia has a bug in vkCmdFillBuffer where the clear range is not properly respected under
+/// certain conditions. See https://github.com/gfx-rs/wgpu/issues/4122 for more information.
+///
+/// This test will fail on nvidia if the bug is not properly worked around.
+#[wasm_bindgen_test]
+#[test]
+fn clear_buffer_bug() {
+    initialize_test(TestParameters::default(), |ctx| {
+        // This hits most of the cases in nvidia's clear buffer bug
+        let mut succeeded = true;
+        for power in 4..14 {
+            let size = 1 << power;
+            for start_offset in (0..=36).step_by(4) {
+                for size_offset in (0..=36).step_by(4) {
+                    let range = start_offset..size + size_offset + start_offset;
+                    let result = fill_test(&ctx, range, 1 << 16);
+
+                    succeeded &= result;
+                }
+            }
+        }
+        assert!(succeeded);
+    });
+}
diff --git a/tests/tests/root.rs b/tests/tests/root.rs
index 8ac63d8c89..85901ae491 100644
--- a/tests/tests/root.rs
+++ b/tests/tests/root.rs
@@ -2,17 +2,22 @@ use wasm_bindgen_test::wasm_bindgen_test_configure;
 
 mod regression {
     mod issue_3457;
+    mod issue_4024;
+    mod issue_4122;
 }
 
+mod bind_group_layout_dedup;
 mod buffer;
 mod buffer_copy;
 mod buffer_usages;
 mod clear_texture;
+mod create_surface_error;
 mod device;
 mod encoder;
 mod example_wgsl;
 mod external_texture;
 mod instance;
+mod occlusion_query;
 mod partially_bounded_arrays;
 mod poll;
 mod queue_transfer;
diff --git a/tests/tests/scissor_tests/mod.rs b/tests/tests/scissor_tests/mod.rs
index 6855b410bd..da050cb61f 100644
--- a/tests/tests/scissor_tests/mod.rs
+++ b/tests/tests/scissor_tests/mod.rs
@@ -79,6 +79,8 @@ fn scissor_test_impl(ctx: &TestingContext, scissor_rect: Rect, expected_data: [u
                     },
                 })],
                 depth_stencil_attachment: None,
+                timestamp_writes: None,
+                occlusion_query_set: None,
             });
             render_pass.set_pipeline(&pipeline);
             render_pass.set_scissor_rect(
diff --git a/tests/tests/shader/mod.rs b/tests/tests/shader/mod.rs
index 4508033068..498c16c337 100644
--- a/tests/tests/shader/mod.rs
+++ b/tests/tests/shader/mod.rs
@@ -326,6 +326,7 @@ fn shader_input_output_test(
 
         let mut cpass = encoder.begin_compute_pass(&ComputePassDescriptor {
             label: Some(&format!("cpass {test_name}")),
+            timestamp_writes: None,
         });
         cpass.set_pipeline(&pipeline);
         cpass.set_bind_group(0, &bg, &[]);
diff --git a/tests/tests/shader/struct_layout.rs b/tests/tests/shader/struct_layout.rs
index bc433b5820..7da8cfeef8 100644
--- a/tests/tests/shader/struct_layout.rs
+++ b/tests/tests/shader/struct_layout.rs
@@ -4,7 +4,7 @@ use wasm_bindgen_test::*;
 use wgpu::{Backends, DownlevelFlags, Features, Limits};
 
 use crate::shader::{shader_input_output_test, InputStorageType, ShaderTest, MAX_BUFFER_SIZE};
-use wgpu_test::{initialize_test, TestParameters};
+use wgpu_test::{initialize_test, FailureCase, TestParameters};
 
 fn create_struct_layout_tests(storage_type: InputStorageType) -> Vec<ShaderTest> {
     let input_values: Vec<_> = (0..(MAX_BUFFER_SIZE as u32 / 4)).collect();
@@ -182,7 +182,7 @@ fn uniform_input() {
         TestParameters::default()
             .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
             // Validation errors thrown by the SPIR-V validator https://github.com/gfx-rs/naga/issues/2034
-            .specific_failure(Some(wgpu::Backends::VULKAN), None, None, false)
+            .expect_fail(FailureCase::backend(wgpu::Backends::VULKAN))
             .limits(Limits::downlevel_defaults()),
         |ctx| {
             shader_input_output_test(
@@ -222,7 +222,7 @@ fn push_constant_input() {
                 max_push_constant_size: MAX_BUFFER_SIZE as u32,
                 ..Limits::downlevel_defaults()
             })
-            .backend_failure(Backends::GL),
+            .expect_fail(FailureCase::backend(Backends::GL)),
         |ctx| {
             shader_input_output_test(
                 ctx,
diff --git a/tests/tests/shader/zero_init_workgroup_mem.rs b/tests/tests/shader/zero_init_workgroup_mem.rs
index a666d2aa28..cbd1b3e561 100644
--- a/tests/tests/shader/zero_init_workgroup_mem.rs
+++ b/tests/tests/shader/zero_init_workgroup_mem.rs
@@ -8,7 +8,7 @@ use wgpu::{
     ShaderStages,
 };
 
-use wgpu_test::{initialize_test, TestParameters, TestingContext};
+use wgpu_test::{initialize_test, FailureCase, TestParameters, TestingContext};
 
 #[test]
 fn zero_init_workgroup_mem() {
@@ -18,13 +18,16 @@ fn zero_init_workgroup_mem() {
             .limits(Limits::downlevel_defaults())
             // remove both of these once we get to https://github.com/gfx-rs/wgpu/issues/3193 or
             // https://github.com/gfx-rs/wgpu/issues/3160
-            .specific_failure(
-                Some(Backends::DX12),
-                Some(5140),
-                Some("Microsoft Basic Render Driver"),
-                true,
-            )
-            .specific_failure(Some(Backends::VULKAN), None, Some("swiftshader"), true),
+            .skip(FailureCase {
+                backends: Some(Backends::DX12),
+                vendor: Some(5140),
+                adapter: Some("Microsoft Basic Render Driver"),
+                ..FailureCase::default()
+            })
+            .skip(FailureCase::backend_adapter(
+                Backends::VULKAN,
+                "swiftshader",
+            )),
         zero_init_workgroup_mem_impl,
     );
 }
diff --git a/tests/tests/shader_primitive_index/mod.rs b/tests/tests/shader_primitive_index/mod.rs
index 68daae873e..a05d1cd5f0 100644
--- a/tests/tests/shader_primitive_index/mod.rs
+++ b/tests/tests/shader_primitive_index/mod.rs
@@ -176,6 +176,7 @@ fn pulling_common(
         .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
     {
         let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+            label: None,
             color_attachments: &[Some(wgpu::RenderPassColorAttachment {
                 ops: wgpu::Operations {
                     load: wgpu::LoadOp::Clear(wgpu::Color::WHITE),
@@ -185,7 +186,8 @@ fn pulling_common(
                 view: &color_view,
             })],
             depth_stencil_attachment: None,
-            label: None,
+            timestamp_writes: None,
+            occlusion_query_set: None,
         });
 
         rpass.set_pipeline(&pipeline);
diff --git a/tests/tests/shader_view_format/mod.rs b/tests/tests/shader_view_format/mod.rs
index 04d7467d29..46741b4ea8 100644
--- a/tests/tests/shader_view_format/mod.rs
+++ b/tests/tests/shader_view_format/mod.rs
@@ -1,12 +1,17 @@
 use wgpu::{util::DeviceExt, DownlevelFlags, Limits, TextureFormat};
-use wgpu_test::{image::calc_difference, initialize_test, TestParameters, TestingContext};
+use wgpu_test::{
+    image::calc_difference, initialize_test, FailureCase, TestParameters, TestingContext,
+};
 
 #[test]
 fn reinterpret_srgb_ness() {
     let parameters = TestParameters::default()
         .downlevel_flags(DownlevelFlags::VIEW_FORMATS)
         .limits(Limits::downlevel_defaults())
-        .specific_failure(Some(wgpu::Backends::GL), None, None, true);
+        .skip(FailureCase {
+            backends: Some(wgpu::Backends::GL),
+            ..FailureCase::default()
+        });
     initialize_test(parameters, |ctx| {
         let unorm_data: [[u8; 4]; 4] = [
             [180, 0, 0, 255],
@@ -130,13 +135,15 @@ fn reinterpret(
         .device
         .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
     let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+        label: None,
         color_attachments: &[Some(wgpu::RenderPassColorAttachment {
             ops: wgpu::Operations::default(),
             resolve_target: None,
             view: &target_view,
         })],
         depth_stencil_attachment: None,
-        label: None,
+        timestamp_writes: None,
+        occlusion_query_set: None,
     });
     rpass.set_pipeline(&pipeline);
     rpass.set_bind_group(0, &bind_group, &[]);
diff --git a/tests/tests/vertex_indices/mod.rs b/tests/tests/vertex_indices/mod.rs
index 707a16a903..edd4f7b057 100644
--- a/tests/tests/vertex_indices/mod.rs
+++ b/tests/tests/vertex_indices/mod.rs
@@ -3,7 +3,7 @@ use std::num::NonZeroU64;
 use wasm_bindgen_test::*;
 use wgpu::util::DeviceExt;
 
-use wgpu_test::{initialize_test, TestParameters, TestingContext};
+use wgpu_test::{initialize_test, FailureCase, TestParameters, TestingContext};
 
 fn pulling_common(
     ctx: TestingContext,
@@ -108,13 +108,15 @@ fn pulling_common(
         .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
 
     let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+        label: None,
         color_attachments: &[Some(wgpu::RenderPassColorAttachment {
             ops: wgpu::Operations::default(),
             resolve_target: None,
             view: &dummy,
         })],
         depth_stencil_attachment: None,
-        label: None,
+        timestamp_writes: None,
+        occlusion_query_set: None,
     });
 
     rpass.set_pipeline(&pipeline);
@@ -148,7 +150,7 @@ fn draw_vertex_offset() {
     initialize_test(
         TestParameters::default()
             .test_features_limits()
-            .backend_failure(wgpu::Backends::DX11),
+            .expect_fail(FailureCase::backend(wgpu::Backends::DX11)),
         |ctx| {
             pulling_common(ctx, &[0, 1, 2, 3, 4, 5], |cmb| {
                 cmb.draw(0..3, 0..1);
@@ -174,7 +176,7 @@ fn draw_instanced_offset() {
     initialize_test(
         TestParameters::default()
             .test_features_limits()
-            .backend_failure(wgpu::Backends::DX11),
+            .expect_fail(FailureCase::backend(wgpu::Backends::DX11)),
         |ctx| {
             pulling_common(ctx, &[0, 1, 2, 3, 4, 5], |cmb| {
                 cmb.draw(0..3, 0..1);
diff --git a/tests/tests/write_texture.rs b/tests/tests/write_texture.rs
index 0578c60352..8b33cae7f5 100644
--- a/tests/tests/write_texture.rs
+++ b/tests/tests/write_texture.rs
@@ -1,6 +1,6 @@
 //! Tests for texture copy
 
-use wgpu_test::{initialize_test, TestParameters};
+use wgpu_test::{initialize_test, FailureCase, TestParameters};
 
 use wasm_bindgen_test::*;
 
@@ -8,7 +8,8 @@ use wasm_bindgen_test::*;
 #[wasm_bindgen_test]
 fn write_texture_subset_2d() {
     let size = 256;
-    let parameters = TestParameters::default().backend_failure(wgpu::Backends::DX12);
+    let parameters =
+        TestParameters::default().expect_fail(FailureCase::backend(wgpu::Backends::DX12));
     initialize_test(parameters, |ctx| {
         let tex = ctx.device.create_texture(&wgpu::TextureDescriptor {
             label: None,
diff --git a/tests/tests/zero_init_texture_after_discard.rs b/tests/tests/zero_init_texture_after_discard.rs
index f83576b1d9..2b757e069a 100644
--- a/tests/tests/zero_init_texture_after_discard.rs
+++ b/tests/tests/zero_init_texture_after_discard.rs
@@ -1,38 +1,46 @@
 use wasm_bindgen_test::*;
 use wgpu::*;
-use wgpu_test::{image::ReadbackBuffers, initialize_test, TestParameters, TestingContext};
+use wgpu_test::{
+    image::ReadbackBuffers, initialize_test, FailureCase, TestParameters, TestingContext,
+};
 
 // Checks if discarding a color target resets its init state, causing a zero read of this texture when copied in after submit of the encoder.
 #[test]
 #[wasm_bindgen_test]
 fn discarding_color_target_resets_texture_init_state_check_visible_on_copy_after_submit() {
-    initialize_test(TestParameters::default().webgl2_failure(), |mut ctx| {
-        let mut case = TestCase::new(&mut ctx, TextureFormat::Rgba8UnormSrgb);
-        case.create_command_encoder();
-        case.discard();
-        case.submit_command_encoder();
+    initialize_test(
+        TestParameters::default().skip(FailureCase::webgl2()),
+        |mut ctx| {
+            let mut case = TestCase::new(&mut ctx, TextureFormat::Rgba8UnormSrgb);
+            case.create_command_encoder();
+            case.discard();
+            case.submit_command_encoder();
 
-        case.create_command_encoder();
-        case.copy_texture_to_buffer();
-        case.submit_command_encoder();
+            case.create_command_encoder();
+            case.copy_texture_to_buffer();
+            case.submit_command_encoder();
 
-        case.assert_buffers_are_zero();
-    });
+            case.assert_buffers_are_zero();
+        },
+    );
 }
 
 // Checks if discarding a color target resets its init state, causing a zero read of this texture when copied in the same encoder to a buffer.
 #[test]
 #[wasm_bindgen_test]
 fn discarding_color_target_resets_texture_init_state_check_visible_on_copy_in_same_encoder() {
-    initialize_test(TestParameters::default().webgl2_failure(), |mut ctx| {
-        let mut case = TestCase::new(&mut ctx, TextureFormat::Rgba8UnormSrgb);
-        case.create_command_encoder();
-        case.discard();
-        case.copy_texture_to_buffer();
-        case.submit_command_encoder();
+    initialize_test(
+        TestParameters::default().skip(FailureCase::webgl2()),
+        |mut ctx| {
+            let mut case = TestCase::new(&mut ctx, TextureFormat::Rgba8UnormSrgb);
+            case.create_command_encoder();
+            case.discard();
+            case.copy_texture_to_buffer();
+            case.submit_command_encoder();
 
-        case.assert_buffers_are_zero();
-    });
+            case.assert_buffers_are_zero();
+        },
+    );
 }
 
 #[test]
@@ -154,6 +162,8 @@ impl<'ctx> TestCase<'ctx> {
                         store: true,
                     }),
                 }),
+                timestamp_writes: None,
+                occlusion_query_set: None,
             });
             ctx.queue.submit([encoder.finish()]);
         } else {
@@ -237,6 +247,8 @@ impl<'ctx> TestCase<'ctx> {
                         }),
                     },
                 ),
+                timestamp_writes: None,
+                occlusion_query_set: None,
             });
     }
 
@@ -260,6 +272,8 @@ impl<'ctx> TestCase<'ctx> {
                         }),
                     },
                 ),
+                timestamp_writes: None,
+                occlusion_query_set: None,
             });
     }
 
@@ -283,6 +297,8 @@ impl<'ctx> TestCase<'ctx> {
                         }),
                     },
                 ),
+                timestamp_writes: None,
+                occlusion_query_set: None,
             });
     }
 
diff --git a/wgpu-core/Cargo.toml b/wgpu-core/Cargo.toml
index dc5352979e..5487a8bdc0 100644
--- a/wgpu-core/Cargo.toml
+++ b/wgpu-core/Cargo.toml
@@ -72,7 +72,7 @@ thiserror = "1"
 
 [dependencies.naga]
 git = "https://github.com/gfx-rs/naga"
-rev = "bac2d82a430fbfcf100ee22b7c3bc12f3d593079"
+rev = "cc87b8f9eb30bb55d0735b89d3df3e099e1a6e7c"
 version = "0.13.0"
 features = ["clone", "span", "validate"]
 
diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs
index a06476a212..e9723fa60c 100644
--- a/wgpu-core/src/binding_model.rs
+++ b/wgpu-core/src/binding_model.rs
@@ -436,6 +436,8 @@ pub struct BindGroupLayoutDescriptor<'a> {
 
 pub(crate) type BindEntryMap = FastHashMap<u32, wgt::BindGroupLayoutEntry>;
 
+pub type BindGroupLayouts<A> = crate::storage::Storage<BindGroupLayout<A>, BindGroupLayoutId>;
+
 /// Bind group layout.
 ///
 /// The lifetime of BGLs is a bit special. They are only referenced on CPU
@@ -450,6 +452,12 @@ pub struct BindGroupLayout<A: hal::Api> {
     pub(crate) device_id: Stored<DeviceId>,
     pub(crate) multi_ref_count: MultiRefCount,
     pub(crate) entries: BindEntryMap,
+    // When a layout created and there already exists a compatible layout the new layout
+    // keeps a reference to the older compatible one. In some places we substitute the
+    // bind group layout id with its compatible sibling.
+    // Since this substitution can come at a cost, it is skipped when wgpu-core generates
+    // its own resource IDs.
+    pub(crate) compatible_layout: Option<Valid<BindGroupLayoutId>>,
     #[allow(unused)]
     pub(crate) dynamic_count: usize,
     pub(crate) count_validator: BindingTypeMaxCountValidator,
@@ -472,6 +480,30 @@ impl<A: hal::Api> Resource for BindGroupLayout<A> {
     }
 }
 
+// If a bindgroup needs to be substitued with its compatible equivalent, return the latter.
+pub(crate) fn try_get_bind_group_layout<A: HalApi>(
+    layouts: &BindGroupLayouts<A>,
+    id: BindGroupLayoutId,
+) -> Option<&BindGroupLayout<A>> {
+    let layout = layouts.get(id).ok()?;
+    if let Some(compat) = layout.compatible_layout {
+        return Some(&layouts[compat]);
+    }
+
+    Some(layout)
+}
+
+pub(crate) fn get_bind_group_layout<A: HalApi>(
+    layouts: &BindGroupLayouts<A>,
+    id: Valid<BindGroupLayoutId>,
+) -> (Valid<BindGroupLayoutId>, &BindGroupLayout<A>) {
+    let layout = &layouts[id];
+    layout
+        .compatible_layout
+        .map(|compat| (compat, &layouts[compat]))
+        .unwrap_or((id, layout))
+}
+
 #[derive(Clone, Debug, Error)]
 #[non_exhaustive]
 pub enum CreatePipelineLayoutError {
diff --git a/wgpu-core/src/command/bind.rs b/wgpu-core/src/command/bind.rs
index 9b87028e38..05f90c6bc9 100644
--- a/wgpu-core/src/command/bind.rs
+++ b/wgpu-core/src/command/bind.rs
@@ -1,8 +1,10 @@
 use crate::{
-    binding_model::{BindGroup, LateMinBufferBindingSizeMismatch, PipelineLayout},
+    binding_model::{
+        BindGroup, BindGroupLayouts, LateMinBufferBindingSizeMismatch, PipelineLayout,
+    },
     device::SHADER_STAGE_COUNT,
     hal_api::HalApi,
-    id::{BindGroupId, BindGroupLayoutId, PipelineLayoutId, Valid},
+    id::{BindGroupId, PipelineLayoutId, Valid},
     pipeline::LateSizedBufferGroup,
     storage::Storage,
     Stored,
@@ -13,37 +15,42 @@ use arrayvec::ArrayVec;
 type BindGroupMask = u8;
 
 mod compat {
+    use crate::{
+        binding_model::BindGroupLayouts,
+        id::{BindGroupLayoutId, Valid},
+    };
     use std::ops::Range;
 
-    #[derive(Debug)]
-    struct Entry<T> {
-        assigned: Option<T>,
-        expected: Option<T>,
+    #[derive(Debug, Default)]
+    struct Entry {
+        assigned: Option<Valid<BindGroupLayoutId>>,
+        expected: Option<Valid<BindGroupLayoutId>>,
     }
-    impl<T> Default for Entry<T> {
-        fn default() -> Self {
-            Self {
-                assigned: None,
-                expected: None,
-            }
-        }
-    }
-    impl<T: Copy + PartialEq> Entry<T> {
+
+    impl Entry {
         fn is_active(&self) -> bool {
             self.assigned.is_some() && self.expected.is_some()
         }
 
-        fn is_valid(&self) -> bool {
-            self.expected.is_none() || self.expected == self.assigned
+        fn is_valid<A: hal::Api>(&self, bind_group_layouts: &BindGroupLayouts<A>) -> bool {
+            if self.expected.is_none() || self.expected == self.assigned {
+                return true;
+            }
+
+            if let Some(id) = self.assigned {
+                return bind_group_layouts[id].compatible_layout == self.expected;
+            }
+
+            false
         }
     }
 
     #[derive(Debug)]
-    pub struct Manager<T> {
-        entries: [Entry<T>; hal::MAX_BIND_GROUPS],
+    pub(crate) struct BoundBindGroupLayouts {
+        entries: [Entry; hal::MAX_BIND_GROUPS],
     }
 
-    impl<T: Copy + PartialEq> Manager<T> {
+    impl BoundBindGroupLayouts {
         pub fn new() -> Self {
             Self {
                 entries: Default::default(),
@@ -60,7 +67,10 @@ mod compat {
             start_index..end.max(start_index)
         }
 
-        pub fn update_expectations(&mut self, expectations: &[T]) -> Range<usize> {
+        pub fn update_expectations(
+            &mut self,
+            expectations: &[Valid<BindGroupLayoutId>],
+        ) -> Range<usize> {
             let start_index = self
                 .entries
                 .iter()
@@ -79,7 +89,7 @@ mod compat {
             self.make_range(start_index)
         }
 
-        pub fn assign(&mut self, index: usize, value: T) -> Range<usize> {
+        pub fn assign(&mut self, index: usize, value: Valid<BindGroupLayoutId>) -> Range<usize> {
             self.entries[index].assigned = Some(value);
             self.make_range(index)
         }
@@ -91,9 +101,12 @@ mod compat {
                 .filter_map(|(i, e)| if e.is_active() { Some(i) } else { None })
         }
 
-        pub fn invalid_mask(&self) -> super::BindGroupMask {
+        pub fn invalid_mask<A: hal::Api>(
+            &self,
+            bind_group_layouts: &BindGroupLayouts<A>,
+        ) -> super::BindGroupMask {
             self.entries.iter().enumerate().fold(0, |mask, (i, entry)| {
-                if entry.is_valid() {
+                if entry.is_valid(bind_group_layouts) {
                     mask
                 } else {
                     mask | 1u8 << i
@@ -104,32 +117,36 @@ mod compat {
 
     #[test]
     fn test_compatibility() {
-        let mut man = Manager::<i32>::new();
+        fn id(val: u32) -> Valid<BindGroupLayoutId> {
+            BindGroupLayoutId::dummy(val)
+        }
+
+        let mut man = BoundBindGroupLayouts::new();
         man.entries[0] = Entry {
-            expected: Some(3),
-            assigned: Some(2),
+            expected: Some(id(3)),
+            assigned: Some(id(2)),
         };
         man.entries[1] = Entry {
-            expected: Some(1),
-            assigned: Some(1),
+            expected: Some(id(1)),
+            assigned: Some(id(1)),
         };
         man.entries[2] = Entry {
-            expected: Some(4),
-            assigned: Some(5),
+            expected: Some(id(4)),
+            assigned: Some(id(5)),
         };
         // check that we rebind [1] after [0] became compatible
-        assert_eq!(man.assign(0, 3), 0..2);
+        assert_eq!(man.assign(0, id(3)), 0..2);
         // check that nothing is rebound
-        assert_eq!(man.update_expectations(&[3, 2]), 1..1);
+        assert_eq!(man.update_expectations(&[id(3), id(2)]), 1..1);
         // check that [1] and [2] are rebound on expectations change
-        assert_eq!(man.update_expectations(&[3, 1, 5]), 1..3);
+        assert_eq!(man.update_expectations(&[id(3), id(1), id(5)]), 1..3);
         // reset the first two bindings
-        assert_eq!(man.update_expectations(&[4, 6, 5]), 0..0);
+        assert_eq!(man.update_expectations(&[id(4), id(6), id(5)]), 0..0);
         // check that nothing is rebound, even if there is a match,
         // since earlier binding is incompatible.
-        assert_eq!(man.assign(1, 6), 1..1);
+        assert_eq!(man.assign(1, id(6)), 1..1);
         // finally, bind everything
-        assert_eq!(man.assign(0, 4), 0..3);
+        assert_eq!(man.assign(0, id(4)), 0..3);
     }
 }
 
@@ -161,7 +178,7 @@ impl EntryPayload {
 #[derive(Debug)]
 pub(super) struct Binder {
     pub(super) pipeline_layout_id: Option<Valid<PipelineLayoutId>>, //TODO: strongly `Stored`
-    manager: compat::Manager<Valid<BindGroupLayoutId>>,
+    manager: compat::BoundBindGroupLayouts,
     payloads: [EntryPayload; hal::MAX_BIND_GROUPS],
 }
 
@@ -169,14 +186,14 @@ impl Binder {
     pub(super) fn new() -> Self {
         Self {
             pipeline_layout_id: None,
-            manager: compat::Manager::new(),
+            manager: compat::BoundBindGroupLayouts::new(),
             payloads: Default::default(),
         }
     }
 
     pub(super) fn reset(&mut self) {
         self.pipeline_layout_id = None;
-        self.manager = compat::Manager::new();
+        self.manager = compat::BoundBindGroupLayouts::new();
         for payload in self.payloads.iter_mut() {
             payload.reset();
         }
@@ -275,8 +292,11 @@ impl Binder {
             .map(move |index| payloads[index].group_id.as_ref().unwrap().value)
     }
 
-    pub(super) fn invalid_mask(&self) -> BindGroupMask {
-        self.manager.invalid_mask()
+    pub(super) fn invalid_mask<A: hal::Api>(
+        &self,
+        bind_group_layouts: &BindGroupLayouts<A>,
+    ) -> BindGroupMask {
+        self.manager.invalid_mask(bind_group_layouts)
     }
 
     /// Scan active buffer bindings corresponding to layouts without `min_binding_size` specified.
diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs
index 5c4ca122a8..0a4660a798 100644
--- a/wgpu-core/src/command/bundle.rs
+++ b/wgpu-core/src/command/bundle.rs
@@ -637,6 +637,8 @@ impl RenderBundleEncoder {
                 RenderCommand::InsertDebugMarker { color: _, len: _ } => unimplemented!(),
                 RenderCommand::PopDebugGroup => unimplemented!(),
                 RenderCommand::WriteTimestamp { .. } // Must check the TIMESTAMP_QUERY_INSIDE_PASSES feature
+                | RenderCommand::BeginOcclusionQuery { .. }
+                | RenderCommand::EndOcclusionQuery
                 | RenderCommand::BeginPipelineStatisticsQuery { .. }
                 | RenderCommand::EndPipelineStatisticsQuery => unimplemented!(),
                 RenderCommand::ExecuteBundle(_)
@@ -950,6 +952,8 @@ impl<A: HalApi> RenderBundle<A> {
                     return Err(ExecutionError::Unimplemented("debug-markers"))
                 }
                 RenderCommand::WriteTimestamp { .. }
+                | RenderCommand::BeginOcclusionQuery { .. }
+                | RenderCommand::EndOcclusionQuery
                 | RenderCommand::BeginPipelineStatisticsQuery { .. }
                 | RenderCommand::EndPipelineStatisticsQuery => {
                     return Err(ExecutionError::Unimplemented("queries"))
diff --git a/wgpu-core/src/command/clear.rs b/wgpu-core/src/command/clear.rs
index 9f677298b9..ceceb2ba58 100644
--- a/wgpu-core/src/command/clear.rs
+++ b/wgpu-core/src/command/clear.rs
@@ -452,6 +452,8 @@ fn clear_texture_via_render_passes<A: hal::Api>(
                     color_attachments,
                     depth_stencil_attachment,
                     multiview: None,
+                    timestamp_writes: None,
+                    occlusion_query_set: None,
                 });
                 encoder.end_render_pass();
             }
diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs
index 0a0b4e85e6..c9b20c5a0e 100644
--- a/wgpu-core/src/command/compute.rs
+++ b/wgpu-core/src/command/compute.rs
@@ -1,6 +1,7 @@
 use crate::{
     binding_model::{
-        BindError, BindGroup, LateMinBufferBindingSizeMismatch, PushConstantUploadError,
+        BindError, BindGroup, BindGroupLayouts, LateMinBufferBindingSizeMismatch,
+        PushConstantUploadError,
     },
     command::{
         bind::Binder,
@@ -26,6 +27,11 @@ use crate::{
 };
 
 use hal::CommandEncoder as _;
+#[cfg(any(feature = "serial-pass", feature = "replay"))]
+use serde::Deserialize;
+#[cfg(any(feature = "serial-pass", feature = "trace"))]
+use serde::Serialize;
+
 use thiserror::Error;
 
 use std::{fmt, mem, str};
@@ -94,6 +100,7 @@ pub enum ComputeCommand {
 pub struct ComputePass {
     base: BasePass<ComputeCommand>,
     parent_id: id::CommandEncoderId,
+    timestamp_writes: Option<ComputePassTimestampWrites>,
 
     // Resource binding dedupe state.
     #[cfg_attr(feature = "serial-pass", serde(skip))]
@@ -107,6 +114,7 @@ impl ComputePass {
         Self {
             base: BasePass::new(&desc.label),
             parent_id,
+            timestamp_writes: desc.timestamp_writes.cloned(),
 
             current_bind_groups: BindGroupStateChange::new(),
             current_pipeline: StateChange::new(),
@@ -119,7 +127,10 @@ impl ComputePass {
 
     #[cfg(feature = "trace")]
     pub fn into_command(self) -> crate::device::trace::Command {
-        crate::device::trace::Command::RunComputePass { base: self.base }
+        crate::device::trace::Command::RunComputePass {
+            base: self.base,
+            timestamp_writes: self.timestamp_writes,
+        }
     }
 }
 
@@ -135,9 +146,25 @@ impl fmt::Debug for ComputePass {
     }
 }
 
+/// Describes the writing of timestamp values in a compute pass.
+#[repr(C)]
+#[derive(Clone, Debug, PartialEq, Eq)]
+#[cfg_attr(any(feature = "serial-pass", feature = "trace"), derive(Serialize))]
+#[cfg_attr(any(feature = "serial-pass", feature = "replay"), derive(Deserialize))]
+pub struct ComputePassTimestampWrites {
+    /// The query set to write the timestamps to.
+    pub query_set: id::QuerySetId,
+    /// The index of the query set at which a start timestamp of this pass is written, if any.
+    pub beginning_of_pass_write_index: Option<u32>,
+    /// The index of the query set at which an end timestamp of this pass is written, if any.
+    pub end_of_pass_write_index: Option<u32>,
+}
+
 #[derive(Clone, Debug, Default)]
 pub struct ComputePassDescriptor<'a> {
     pub label: Label<'a>,
+    /// Defines where and when timestamp values will be written for this pass.
+    pub timestamp_writes: Option<&'a ComputePassTimestampWrites>,
 }
 
 #[derive(Clone, Debug, Error, Eq, PartialEq)]
@@ -257,8 +284,8 @@ struct State<A: HalApi> {
 }
 
 impl<A: HalApi> State<A> {
-    fn is_ready(&self) -> Result<(), DispatchError> {
-        let bind_mask = self.binder.invalid_mask();
+    fn is_ready(&self, bind_group_layouts: &BindGroupLayouts<A>) -> Result<(), DispatchError> {
+        let bind_mask = self.binder.invalid_mask(bind_group_layouts);
         if bind_mask != 0 {
             //let (expected, provided) = self.binder.entries[index as usize].info();
             return Err(DispatchError::IncompatibleBindGroup {
@@ -325,7 +352,11 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
         encoder_id: id::CommandEncoderId,
         pass: &ComputePass,
     ) -> Result<(), ComputePassError> {
-        self.command_encoder_run_compute_pass_impl::<A>(encoder_id, pass.base.as_ref())
+        self.command_encoder_run_compute_pass_impl::<A>(
+            encoder_id,
+            pass.base.as_ref(),
+            pass.timestamp_writes.as_ref(),
+        )
     }
 
     #[doc(hidden)]
@@ -333,6 +364,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
         &self,
         encoder_id: id::CommandEncoderId,
         base: BasePassRef<ComputeCommand>,
+        timestamp_writes: Option<&ComputePassTimestampWrites>,
     ) -> Result<(), ComputePassError> {
         profiling::scope!("CommandEncoder::run_compute_pass");
         let init_scope = PassErrorScope::Pass(encoder_id);
@@ -363,6 +395,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
         if let Some(ref mut list) = cmd_buf.commands {
             list.push(crate::device::trace::Command::RunComputePass {
                 base: BasePass::from_ref(base),
+                timestamp_writes: timestamp_writes.cloned(),
             });
         }
 
@@ -371,6 +404,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
         let (bind_group_guard, mut token) = hub.bind_groups.read(&mut token);
         let (pipeline_guard, mut token) = hub.compute_pipelines.read(&mut token);
         let (query_set_guard, mut token) = hub.query_sets.read(&mut token);
+        let (bind_group_layout_guard, mut token) = hub.bind_group_layouts.read(&mut token);
         let (buffer_guard, mut token) = hub.buffers.read(&mut token);
         let (texture_guard, _) = hub.textures.read(&mut token);
 
@@ -385,6 +419,42 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
         let mut string_offset = 0;
         let mut active_query = None;
 
+        let timestamp_writes = if let Some(tw) = timestamp_writes {
+            let query_set: &resource::QuerySet<A> = cmd_buf
+                .trackers
+                .query_sets
+                .add_single(&*query_set_guard, tw.query_set)
+                .ok_or(ComputePassErrorInner::InvalidQuerySet(tw.query_set))
+                .map_pass_err(init_scope)?;
+
+            // Unlike in render passes we can't delay resetting the query sets since
+            // there is no auxillary pass.
+            let range = if let (Some(index_a), Some(index_b)) =
+                (tw.beginning_of_pass_write_index, tw.end_of_pass_write_index)
+            {
+                Some(index_a.min(index_b)..index_a.max(index_b) + 1)
+            } else {
+                tw.beginning_of_pass_write_index
+                    .or(tw.end_of_pass_write_index)
+                    .map(|i| i..i + 1)
+            };
+            // Range should always be Some, both values being None should lead to a validation error.
+            // But no point in erroring over that nuance here!
+            if let Some(range) = range {
+                unsafe {
+                    raw.reset_queries(&query_set.raw, range);
+                }
+            }
+
+            Some(hal::ComputePassTimestampWrites {
+                query_set: &query_set.raw,
+                beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
+                end_of_pass_write_index: tw.end_of_pass_write_index,
+            })
+        } else {
+            None
+        };
+
         cmd_buf.trackers.set_size(
             Some(&*buffer_guard),
             Some(&*texture_guard),
@@ -397,7 +467,11 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
             Some(&*query_set_guard),
         );
 
-        let hal_desc = hal::ComputePassDescriptor { label: base.label };
+        let hal_desc = hal::ComputePassDescriptor {
+            label: base.label,
+            timestamp_writes,
+        };
+
         unsafe {
             raw.begin_compute_pass(&hal_desc);
         }
@@ -591,7 +665,9 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                         pipeline: state.pipeline,
                     };
 
-                    state.is_ready().map_pass_err(scope)?;
+                    state
+                        .is_ready(&*bind_group_layout_guard)
+                        .map_pass_err(scope)?;
                     state
                         .flush_states(
                             raw,
@@ -628,7 +704,9 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                         pipeline: state.pipeline,
                     };
 
-                    state.is_ready().map_pass_err(scope)?;
+                    state
+                        .is_ready(&*bind_group_layout_guard)
+                        .map_pass_err(scope)?;
 
                     device
                         .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION)
diff --git a/wgpu-core/src/command/draw.rs b/wgpu-core/src/command/draw.rs
index b629ffaba0..50ca9516b4 100644
--- a/wgpu-core/src/command/draw.rs
+++ b/wgpu-core/src/command/draw.rs
@@ -89,8 +89,8 @@ pub enum RenderCommandError {
     MissingTextureUsage(#[from] MissingTextureUsageError),
     #[error(transparent)]
     PushConstants(#[from] PushConstantUploadError),
-    #[error("Viewport width {0} and/or height {1} are less than or equal to 0")]
-    InvalidViewportDimension(f32, f32),
+    #[error("Viewport has invalid rect {0:?}; origin and/or size is less than or equal to 0, and/or is not contained in the render target {1:?}")]
+    InvalidViewportRect(Rect<f32>, wgt::Extent3d),
     #[error("Viewport minDepth {0} and/or maxDepth {1} are not in [0, 1]")]
     InvalidViewportDepth(f32, f32),
     #[error("Scissor {0:?} is not contained in the render target {1:?}")]
@@ -243,6 +243,10 @@ pub enum RenderCommand {
         query_set_id: id::QuerySetId,
         query_index: u32,
     },
+    BeginOcclusionQuery {
+        query_index: u32,
+    },
+    EndOcclusionQuery,
     BeginPipelineStatisticsQuery {
         query_set_id: id::QuerySetId,
         query_index: u32,
diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs
index 311ade7f43..d97d4b41e3 100644
--- a/wgpu-core/src/command/mod.rs
+++ b/wgpu-core/src/command/mod.rs
@@ -112,6 +112,7 @@ pub struct CommandBuffer<A: HalApi> {
     pub(crate) trackers: Tracker<A>,
     buffer_memory_init_actions: Vec<BufferInitTrackerAction>,
     texture_memory_actions: CommandBufferTextureMemoryActions,
+    pub(crate) pending_query_resets: QueryResetMap<A>,
     limits: wgt::Limits,
     support_clear_texture: bool,
     #[cfg(feature = "trace")]
@@ -140,6 +141,7 @@ impl<A: HalApi> CommandBuffer<A> {
             trackers: Tracker::new(),
             buffer_memory_init_actions: Default::default(),
             texture_memory_actions: Default::default(),
+            pending_query_resets: QueryResetMap::new(),
             limits,
             support_clear_texture: features.contains(wgt::Features::CLEAR_TEXTURE),
             #[cfg(feature = "trace")]
@@ -592,6 +594,10 @@ pub enum PassErrorScope {
     QueryReset,
     #[error("In a write_timestamp command")]
     WriteTimestamp,
+    #[error("In a begin_occlusion_query command")]
+    BeginOcclusionQuery,
+    #[error("In a end_occlusion_query command")]
+    EndOcclusionQuery,
     #[error("In a begin_pipeline_statistics_query command")]
     BeginPipelineStatisticsQuery,
     #[error("In a end_pipeline_statistics_query command")]
diff --git a/wgpu-core/src/command/query.rs b/wgpu-core/src/command/query.rs
index c34aa48c9c..14e91483e8 100644
--- a/wgpu-core/src/command/query.rs
+++ b/wgpu-core/src/command/query.rs
@@ -19,7 +19,7 @@ use thiserror::Error;
 use wgt::BufferAddress;
 
 #[derive(Debug)]
-pub(super) struct QueryResetMap<A: hal::Api> {
+pub(crate) struct QueryResetMap<A: hal::Api> {
     map: FastHashMap<Index, (Vec<bool>, Epoch)>,
     _phantom: PhantomData<A>,
 }
@@ -47,12 +47,12 @@ impl<A: hal::Api> QueryResetMap<A> {
     }
 
     pub fn reset_queries(
-        self,
+        &mut self,
         raw_encoder: &mut A::CommandEncoder,
         query_set_storage: &Storage<QuerySet<A>, id::QuerySetId>,
         backend: wgt::Backend,
     ) -> Result<(), id::QuerySetId> {
-        for (query_set_id, (state, epoch)) in self.map.into_iter() {
+        for (query_set_id, (state, epoch)) in self.map.drain() {
             let id = Id::zip(query_set_id, epoch, backend);
             let query_set = query_set_storage.get(id).map_err(|_| id)?;
 
@@ -240,6 +240,40 @@ impl<A: HalApi> QuerySet<A> {
         Ok(())
     }
 
+    pub(super) fn validate_and_begin_occlusion_query(
+        &self,
+        raw_encoder: &mut A::CommandEncoder,
+        query_set_id: id::QuerySetId,
+        query_index: u32,
+        reset_state: Option<&mut QueryResetMap<A>>,
+        active_query: &mut Option<(id::QuerySetId, u32)>,
+    ) -> Result<(), QueryUseError> {
+        let needs_reset = reset_state.is_none();
+        let query_set = self.validate_query(
+            query_set_id,
+            SimplifiedQueryType::Occlusion,
+            query_index,
+            reset_state,
+        )?;
+
+        if let Some((_old_id, old_idx)) = active_query.replace((query_set_id, query_index)) {
+            return Err(QueryUseError::AlreadyStarted {
+                active_query_index: old_idx,
+                new_query_index: query_index,
+            });
+        }
+
+        unsafe {
+            // If we don't have a reset state tracker which can defer resets, we must reset now.
+            if needs_reset {
+                raw_encoder.reset_queries(&self.raw, query_index..(query_index + 1));
+            }
+            raw_encoder.begin_query(query_set, query_index);
+        }
+
+        Ok(())
+    }
+
     pub(super) fn validate_and_begin_pipeline_statistics_query(
         &self,
         raw_encoder: &mut A::CommandEncoder,
@@ -275,6 +309,23 @@ impl<A: HalApi> QuerySet<A> {
     }
 }
 
+pub(super) fn end_occlusion_query<A: HalApi>(
+    raw_encoder: &mut A::CommandEncoder,
+    storage: &Storage<QuerySet<A>, id::QuerySetId>,
+    active_query: &mut Option<(id::QuerySetId, u32)>,
+) -> Result<(), QueryUseError> {
+    if let Some((query_set_id, query_index)) = active_query.take() {
+        // We can unwrap here as the validity was validated when the active query was set
+        let query_set = storage.get(query_set_id).unwrap();
+
+        unsafe { raw_encoder.end_query(&query_set.raw, query_index) };
+
+        Ok(())
+    } else {
+        Err(QueryUseError::AlreadyStopped)
+    }
+}
+
 pub(super) fn end_pipeline_statistics_query<A: HalApi>(
     raw_encoder: &mut A::CommandEncoder,
     storage: &Storage<QuerySet<A>, id::QuerySetId>,
@@ -411,6 +462,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
             .into());
         }
 
+        // TODO(https://github.com/gfx-rs/wgpu/issues/3993): Need to track initialization state.
         cmd_buf
             .buffer_memory_init_actions
             .extend(dst_buffer.initialization_status.create_action(
diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs
index e8080abba3..f9202ee3f5 100644
--- a/wgpu-core/src/command/render.rs
+++ b/wgpu-core/src/command/render.rs
@@ -1,13 +1,13 @@
 use crate::{
-    binding_model::BindError,
+    binding_model::{BindError, BindGroupLayouts},
     command::{
         self,
         bind::Binder,
-        end_pipeline_statistics_query,
+        end_occlusion_query, end_pipeline_statistics_query,
         memory_init::{fixup_discarded_surfaces, SurfacesInDiscardState},
         BasePass, BasePassRef, BindGroupStateChange, CommandBuffer, CommandEncoderError,
-        CommandEncoderStatus, DrawError, ExecutionError, MapPassErr, PassErrorScope, QueryResetMap,
-        QueryUseError, RenderCommand, RenderCommandError, StateChange,
+        CommandEncoderStatus, DrawError, ExecutionError, MapPassErr, PassErrorScope, QueryUseError,
+        RenderCommand, RenderCommandError, StateChange,
     },
     device::{
         AttachmentData, Device, MissingDownlevelFlags, MissingFeatures,
@@ -21,7 +21,7 @@ use crate::{
     identity::GlobalIdentityHandlerFactory,
     init_tracker::{MemoryInitKind, TextureInitRange, TextureInitTrackerAction},
     pipeline::{self, PipelineFlags},
-    resource::{self, Buffer, Texture, TextureView, TextureViewNotRenderableReason},
+    resource::{Buffer, QuerySet, Texture, TextureView, TextureViewNotRenderableReason},
     storage::Storage,
     track::{TextureSelector, UsageConflict, UsageScope},
     validation::{
@@ -179,6 +179,31 @@ impl RenderPassDepthStencilAttachment {
     }
 }
 
+/// Location to write a timestamp to (beginning or end of the pass).
+#[repr(C)]
+#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
+#[cfg_attr(any(feature = "serial-pass", feature = "trace"), derive(Serialize))]
+#[cfg_attr(any(feature = "serial-pass", feature = "replay"), derive(Deserialize))]
+#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))]
+pub enum RenderPassTimestampLocation {
+    Beginning = 0,
+    End = 1,
+}
+
+/// Describes the writing of timestamp values in a render pass.
+#[repr(C)]
+#[derive(Clone, Debug, PartialEq, Eq)]
+#[cfg_attr(any(feature = "serial-pass", feature = "trace"), derive(Serialize))]
+#[cfg_attr(any(feature = "serial-pass", feature = "replay"), derive(Deserialize))]
+pub struct RenderPassTimestampWrites {
+    /// The query set to write the timestamp to.
+    pub query_set: id::QuerySetId,
+    /// The index of the query set at which a start timestamp of this pass is written, if any.
+    pub beginning_of_pass_write_index: Option<u32>,
+    /// The index of the query set at which an end timestamp of this pass is written, if any.
+    pub end_of_pass_write_index: Option<u32>,
+}
+
 /// Describes the attachments of a render pass.
 #[derive(Clone, Debug, Default, PartialEq)]
 pub struct RenderPassDescriptor<'a> {
@@ -187,6 +212,10 @@ pub struct RenderPassDescriptor<'a> {
     pub color_attachments: Cow<'a, [Option<RenderPassColorAttachment>]>,
     /// The depth and stencil attachment of the render pass, if any.
     pub depth_stencil_attachment: Option<&'a RenderPassDepthStencilAttachment>,
+    /// Defines where and when timestamp values will be written for this pass.
+    pub timestamp_writes: Option<&'a RenderPassTimestampWrites>,
+    /// Defines where the occlusion query results will be stored for this pass.
+    pub occlusion_query_set: Option<id::QuerySetId>,
 }
 
 #[cfg_attr(feature = "serial-pass", derive(Deserialize, Serialize))]
@@ -195,6 +224,8 @@ pub struct RenderPass {
     parent_id: id::CommandEncoderId,
     color_targets: ArrayVec<Option<RenderPassColorAttachment>, { hal::MAX_COLOR_ATTACHMENTS }>,
     depth_stencil_target: Option<RenderPassDepthStencilAttachment>,
+    timestamp_writes: Option<RenderPassTimestampWrites>,
+    occlusion_query_set_id: Option<id::QuerySetId>,
 
     // Resource binding dedupe state.
     #[cfg_attr(feature = "serial-pass", serde(skip))]
@@ -210,6 +241,8 @@ impl RenderPass {
             parent_id,
             color_targets: desc.color_attachments.iter().cloned().collect(),
             depth_stencil_target: desc.depth_stencil_attachment.cloned(),
+            timestamp_writes: desc.timestamp_writes.cloned(),
+            occlusion_query_set_id: desc.occlusion_query_set,
 
             current_bind_groups: BindGroupStateChange::new(),
             current_pipeline: StateChange::new(),
@@ -226,6 +259,8 @@ impl RenderPass {
             base: self.base,
             target_colors: self.color_targets.into_iter().collect(),
             target_depth_stencil: self.depth_stencil_target,
+            timestamp_writes: self.timestamp_writes,
+            occlusion_query_set_id: self.occlusion_query_set_id,
         }
     }
 
@@ -386,7 +421,11 @@ struct State {
 }
 
 impl State {
-    fn is_ready(&self, indexed: bool) -> Result<(), DrawError> {
+    fn is_ready<A: hal::Api>(
+        &self,
+        indexed: bool,
+        bind_group_layouts: &BindGroupLayouts<A>,
+    ) -> Result<(), DrawError> {
         // Determine how many vertex buffers have already been bound
         let vertex_buffer_count = self.vertex.inputs.iter().take_while(|v| v.bound).count() as u32;
         // Compare with the needed quantity
@@ -396,7 +435,7 @@ impl State {
             });
         }
 
-        let bind_mask = self.binder.invalid_mask();
+        let bind_mask = self.binder.invalid_mask(bind_group_layouts);
         if bind_mask != 0 {
             //let (expected, provided) = self.binder.entries[index as usize].info();
             return Err(DrawError::IncompatibleBindGroup {
@@ -589,6 +628,10 @@ pub enum RenderPassErrorInner {
         "Multiview pass texture views with more than one array layer must have D2Array dimension"
     )]
     MultiViewDimensionMismatch,
+    #[error("QuerySet {0:?} is invalid")]
+    InvalidQuerySet(id::QuerySetId),
+    #[error("missing occlusion query set")]
+    MissingOcclusionQuerySet,
 }
 
 impl PrettyError for RenderPassErrorInner {
@@ -718,10 +761,13 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> {
         label: Option<&str>,
         color_attachments: &[Option<RenderPassColorAttachment>],
         depth_stencil_attachment: Option<&RenderPassDepthStencilAttachment>,
+        timestamp_writes: Option<&RenderPassTimestampWrites>,
+        occlusion_query_set: Option<id::QuerySetId>,
         cmd_buf: &mut CommandBuffer<A>,
         view_guard: &'a Storage<TextureView<A>, id::TextureViewId>,
         buffer_guard: &'a Storage<Buffer<A>, id::BufferId>,
         texture_guard: &'a Storage<Texture<A>, id::TextureId>,
+        query_set_guard: &'a Storage<QuerySet<A>, id::QuerySetId>,
     ) -> Result<Self, RenderPassErrorInner> {
         profiling::scope!("RenderPassInfo::start");
 
@@ -918,7 +964,13 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> {
 
             (is_depth_read_only, is_stencil_read_only) = at.depth_stencil_read_only(ds_aspects)?;
 
-            let usage = if is_depth_read_only && is_stencil_read_only {
+            let usage = if is_depth_read_only
+                && is_stencil_read_only
+                && device
+                    .downlevel
+                    .flags
+                    .contains(wgt::DownlevelFlags::READ_ONLY_DEPTH_STENCIL)
+            {
                 hal::TextureUses::DEPTH_STENCIL_READ | hal::TextureUses::RESOURCE
             } else {
                 hal::TextureUses::DEPTH_STENCIL_WRITE
@@ -1083,6 +1135,45 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> {
             multiview,
         };
 
+        let timestamp_writes = if let Some(tw) = timestamp_writes {
+            let query_set = cmd_buf
+                .trackers
+                .query_sets
+                .add_single(query_set_guard, tw.query_set)
+                .ok_or(RenderPassErrorInner::InvalidQuerySet(tw.query_set))?;
+
+            if let Some(index) = tw.beginning_of_pass_write_index {
+                cmd_buf
+                    .pending_query_resets
+                    .use_query_set(tw.query_set, query_set, index);
+            }
+            if let Some(index) = tw.end_of_pass_write_index {
+                cmd_buf
+                    .pending_query_resets
+                    .use_query_set(tw.query_set, query_set, index);
+            }
+
+            Some(hal::RenderPassTimestampWrites {
+                query_set: &query_set.raw,
+                beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
+                end_of_pass_write_index: tw.end_of_pass_write_index,
+            })
+        } else {
+            None
+        };
+
+        let occlusion_query_set = if let Some(occlusion_query_set) = occlusion_query_set {
+            let query_set = cmd_buf
+                .trackers
+                .query_sets
+                .add_single(query_set_guard, occlusion_query_set)
+                .ok_or(RenderPassErrorInner::InvalidQuerySet(occlusion_query_set))?;
+
+            Some(&query_set.raw)
+        } else {
+            None
+        };
+
         let hal_desc = hal::RenderPassDescriptor {
             label,
             extent,
@@ -1090,6 +1181,8 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> {
             color_attachments: &colors,
             depth_stencil_attachment: depth_stencil,
             multiview,
+            timestamp_writes,
+            occlusion_query_set,
         };
         unsafe {
             cmd_buf.encoder.raw.begin_render_pass(&hal_desc);
@@ -1177,6 +1270,8 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> {
                     clear_value: (0.0, 0),
                 }),
                 multiview: self.multiview,
+                timestamp_writes: None,
+                occlusion_query_set: None,
             };
             unsafe {
                 raw.begin_render_pass(&desc);
@@ -1201,6 +1296,8 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
             pass.base.as_ref(),
             &pass.color_targets,
             pass.depth_stencil_target.as_ref(),
+            pass.timestamp_writes.as_ref(),
+            pass.occlusion_query_set_id,
         )
     }
 
@@ -1211,6 +1308,8 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
         base: BasePassRef<RenderCommand>,
         color_attachments: &[Option<RenderPassColorAttachment>],
         depth_stencil_attachment: Option<&RenderPassDepthStencilAttachment>,
+        timestamp_writes: Option<&RenderPassTimestampWrites>,
+        occlusion_query_set_id: Option<id::QuerySetId>,
     ) -> Result<(), RenderPassError> {
         profiling::scope!("CommandEncoder::run_render_pass");
         let init_scope = PassErrorScope::Pass(encoder_id);
@@ -1219,7 +1318,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
         let mut token = Token::root();
         let (device_guard, mut token) = hub.devices.read(&mut token);
 
-        let (scope, query_reset_state, pending_discard_init_fixups) = {
+        let (scope, pending_discard_init_fixups) = {
             let (mut cmb_guard, mut token) = hub.command_buffers.write(&mut token);
 
             // Spell out the type, to placate rust-analyzer.
@@ -1241,6 +1340,8 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                     base: BasePass::from_ref(base),
                     target_colors: color_attachments.to_vec(),
                     target_depth_stencil: depth_stencil_attachment.cloned(),
+                    timestamp_writes: timestamp_writes.cloned(),
+                    occlusion_query_set_id,
                 });
             }
 
@@ -1252,6 +1353,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
             let (bind_group_guard, mut token) = hub.bind_groups.read(&mut token);
             let (render_pipeline_guard, mut token) = hub.render_pipelines.read(&mut token);
             let (query_set_guard, mut token) = hub.query_sets.read(&mut token);
+            let (bind_group_layout_guard, mut token) = hub.bind_group_layouts.read(&mut token);
             let (buffer_guard, mut token) = hub.buffers.read(&mut token);
             let (texture_guard, mut token) = hub.textures.read(&mut token);
             let (view_guard, _) = hub.texture_views.read(&mut token);
@@ -1266,10 +1368,13 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                 base.label,
                 color_attachments,
                 depth_stencil_attachment,
+                timestamp_writes,
+                occlusion_query_set_id,
                 cmd_buf,
                 &*view_guard,
                 &*buffer_guard,
                 &*texture_guard,
+                &*query_set_guard,
             )
             .map_pass_err(init_scope)?;
 
@@ -1301,7 +1406,6 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
             let mut dynamic_offset_count = 0;
             let mut string_offset = 0;
             let mut active_query = None;
-            let mut query_reset_state = QueryResetMap::new();
 
             for command in base.commands {
                 match *command {
@@ -1628,9 +1732,16 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                         depth_max,
                     } => {
                         let scope = PassErrorScope::SetViewport;
-                        if rect.w <= 0.0 || rect.h <= 0.0 {
-                            return Err(RenderCommandError::InvalidViewportDimension(
-                                rect.w, rect.h,
+                        if rect.x < 0.0
+                            || rect.y < 0.0
+                            || rect.w <= 0.0
+                            || rect.h <= 0.0
+                            || rect.x + rect.w > info.extent.width as f32
+                            || rect.y + rect.h > info.extent.height as f32
+                        {
+                            return Err(RenderCommandError::InvalidViewportRect(
+                                *rect,
+                                info.extent,
                             ))
                             .map_pass_err(scope);
                         }
@@ -1713,7 +1824,9 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                             indirect: false,
                             pipeline: state.pipeline,
                         };
-                        state.is_ready(indexed).map_pass_err(scope)?;
+                        state
+                            .is_ready::<A>(indexed, &bind_group_layout_guard)
+                            .map_pass_err(scope)?;
 
                         let last_vertex = first_vertex + vertex_count;
                         let vertex_limit = state.vertex.vertex_limit;
@@ -1753,7 +1866,9 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                             indirect: false,
                             pipeline: state.pipeline,
                         };
-                        state.is_ready(indexed).map_pass_err(scope)?;
+                        state
+                            .is_ready::<A>(indexed, &*bind_group_layout_guard)
+                            .map_pass_err(scope)?;
 
                         //TODO: validate that base_vertex + max_index() is
                         // within the provided range
@@ -1798,7 +1913,9 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                             indirect: true,
                             pipeline: state.pipeline,
                         };
-                        state.is_ready(indexed).map_pass_err(scope)?;
+                        state
+                            .is_ready::<A>(indexed, &*bind_group_layout_guard)
+                            .map_pass_err(scope)?;
 
                         let stride = match indexed {
                             false => mem::size_of::<wgt::DrawIndirectArgs>(),
@@ -1870,7 +1987,9 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                             indirect: true,
                             pipeline: state.pipeline,
                         };
-                        state.is_ready(indexed).map_pass_err(scope)?;
+                        state
+                            .is_ready::<A>(indexed, &*bind_group_layout_guard)
+                            .map_pass_err(scope)?;
 
                         let stride = match indexed {
                             false => mem::size_of::<wgt::DrawIndirectArgs>(),
@@ -2011,7 +2130,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                             .require_features(wgt::Features::TIMESTAMP_QUERY_INSIDE_PASSES)
                             .map_pass_err(scope)?;
 
-                        let query_set: &resource::QuerySet<A> = cmd_buf
+                        let query_set = cmd_buf
                             .trackers
                             .query_sets
                             .add_single(&*query_set_guard, query_set_id)
@@ -2023,17 +2142,47 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                                 raw,
                                 query_set_id,
                                 query_index,
-                                Some(&mut query_reset_state),
+                                Some(&mut cmd_buf.pending_query_resets),
+                            )
+                            .map_pass_err(scope)?;
+                    }
+                    RenderCommand::BeginOcclusionQuery { query_index } => {
+                        let scope = PassErrorScope::BeginOcclusionQuery;
+
+                        let query_set_id = occlusion_query_set_id
+                            .ok_or(RenderPassErrorInner::MissingOcclusionQuerySet)
+                            .map_pass_err(scope)?;
+
+                        let query_set = cmd_buf
+                            .trackers
+                            .query_sets
+                            .add_single(&*query_set_guard, query_set_id)
+                            .ok_or(RenderCommandError::InvalidQuerySet(query_set_id))
+                            .map_pass_err(scope)?;
+
+                        query_set
+                            .validate_and_begin_occlusion_query(
+                                raw,
+                                query_set_id,
+                                query_index,
+                                Some(&mut cmd_buf.pending_query_resets),
+                                &mut active_query,
                             )
                             .map_pass_err(scope)?;
                     }
+                    RenderCommand::EndOcclusionQuery => {
+                        let scope = PassErrorScope::EndOcclusionQuery;
+
+                        end_occlusion_query(raw, &*query_set_guard, &mut active_query)
+                            .map_pass_err(scope)?;
+                    }
                     RenderCommand::BeginPipelineStatisticsQuery {
                         query_set_id,
                         query_index,
                     } => {
                         let scope = PassErrorScope::BeginPipelineStatisticsQuery;
 
-                        let query_set: &resource::QuerySet<A> = cmd_buf
+                        let query_set = cmd_buf
                             .trackers
                             .query_sets
                             .add_single(&*query_set_guard, query_set_id)
@@ -2045,7 +2194,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                                 raw,
                                 query_set_id,
                                 query_index,
-                                Some(&mut query_reset_state),
+                                Some(&mut cmd_buf.pending_query_resets),
                                 &mut active_query,
                             )
                             .map_pass_err(scope)?;
@@ -2142,7 +2291,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                 info.finish(raw, &*texture_guard).map_pass_err(init_scope)?;
 
             cmd_buf.encoder.close();
-            (trackers, query_reset_state, pending_discard_init_fixups)
+            (trackers, pending_discard_init_fixups)
         };
 
         let (mut cmb_guard, mut token) = hub.command_buffers.write(&mut token);
@@ -2162,7 +2311,8 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                 &device_guard[cmd_buf.device_id.value],
             );
 
-            query_reset_state
+            cmd_buf
+                .pending_query_resets
                 .reset_queries(
                     transit,
                     &query_set_guard,
@@ -2544,6 +2694,21 @@ pub mod render_ffi {
         });
     }
 
+    #[no_mangle]
+    pub extern "C" fn wgpu_render_pass_begin_occlusion_query(
+        pass: &mut RenderPass,
+        query_index: u32,
+    ) {
+        pass.base
+            .commands
+            .push(RenderCommand::BeginOcclusionQuery { query_index });
+    }
+
+    #[no_mangle]
+    pub extern "C" fn wgpu_render_pass_end_occlusion_query(pass: &mut RenderPass) {
+        pass.base.commands.push(RenderCommand::EndOcclusionQuery);
+    }
+
     #[no_mangle]
     pub extern "C" fn wgpu_render_pass_begin_pipeline_statistics_query(
         pass: &mut RenderPass,
diff --git a/wgpu-core/src/conv.rs b/wgpu-core/src/conv.rs
index 75a97eb087..90629f08d6 100644
--- a/wgpu-core/src/conv.rs
+++ b/wgpu-core/src/conv.rs
@@ -95,6 +95,10 @@ pub fn map_buffer_usage(usage: wgt::BufferUsages) -> hal::BufferUses {
         hal::BufferUses::INDIRECT,
         usage.contains(wgt::BufferUsages::INDIRECT),
     );
+    u.set(
+        hal::BufferUses::QUERY_RESOLVE,
+        usage.contains(wgt::BufferUsages::QUERY_RESOLVE),
+    );
     u
 }
 
diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index 3f22459b38..632c83e37f 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -1078,19 +1078,28 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                 }
             }
 
-            // If there is an equivalent BGL, just bump the refcount and return it.
-            // This is only applicable for identity filters that are generating new IDs,
-            // so their inputs are `PhantomData` of size 0.
-            if mem::size_of::<Input<G, id::BindGroupLayoutId>>() == 0 {
+            let mut compatible_layout = None;
+            {
                 let (bgl_guard, _) = hub.bind_group_layouts.read(&mut token);
                 if let Some(id) =
                     Device::deduplicate_bind_group_layout(device_id, &entry_map, &*bgl_guard)
                 {
-                    return (id, None);
+                    // If there is an equivalent BGL, just bump the refcount and return it.
+                    // This is only applicable if ids are generated in wgpu. In practice:
+                    //  - wgpu users take this branch and return the existing
+                    //    id without using the indirection layer in BindGroupLayout.
+                    //  - Other users like gecko or the replay tool use don't take
+                    //    the branch and instead rely on the indirection to use the
+                    //    proper bind group layout id.
+                    if G::ids_are_generated_in_wgpu() {
+                        return (id, None);
+                    }
+
+                    compatible_layout = Some(id::Valid(id));
                 }
             }
 
-            let layout = match device.create_bind_group_layout(
+            let mut layout = match device.create_bind_group_layout(
                 device_id,
                 desc.label.borrow_option(),
                 entry_map,
@@ -1099,7 +1108,10 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                 Err(e) => break e,
             };
 
+            layout.compatible_layout = compatible_layout;
+
             let id = fid.assign(layout, &mut token);
+
             return (id.0, None);
         };
 
@@ -1244,16 +1256,28 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                     .add(trace::Action::CreateBindGroup(fid.id(), desc.clone()));
             }
 
-            let bind_group_layout = match bind_group_layout_guard.get(desc.layout) {
+            let mut bind_group_layout = match bind_group_layout_guard.get(desc.layout) {
                 Ok(layout) => layout,
-                Err(_) => break binding_model::CreateBindGroupError::InvalidLayout,
+                Err(..) => break binding_model::CreateBindGroupError::InvalidLayout,
+            };
+
+            let mut layout_id = id::Valid(desc.layout);
+            if let Some(id) = bind_group_layout.compatible_layout {
+                layout_id = id;
+                bind_group_layout = &bind_group_layout_guard[id];
+            }
+
+            let bind_group = match device.create_bind_group(
+                device_id,
+                bind_group_layout,
+                layout_id,
+                desc,
+                hub,
+                &mut token,
+            ) {
+                Ok(bind_group) => bind_group,
+                Err(e) => break e,
             };
-            let bind_group =
-                match device.create_bind_group(device_id, bind_group_layout, desc, hub, &mut token)
-                {
-                    Ok(bind_group) => bind_group,
-                    Err(e) => break e,
-                };
             let ref_count = bind_group.life_guard.add_ref();
 
             let id = fid.assign(bind_group, &mut token);
@@ -2110,7 +2134,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
 
         let (mut surface_guard, mut token) = self.surfaces.write(&mut token);
         let (adapter_guard, mut token) = hub.adapters.read(&mut token);
-        let (device_guard, _token) = hub.devices.read(&mut token);
+        let (device_guard, mut token) = hub.devices.read(&mut token);
 
         let error = 'outer: loop {
             let device = match device_guard.get(device_id) {
@@ -2183,6 +2207,24 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                 break error;
             }
 
+            // Wait for all work to finish before configuring the surface.
+            if let Err(e) = device.maintain(hub, wgt::Maintain::Wait, &mut token) {
+                break e.into();
+            }
+
+            // All textures must be destroyed before the surface can be re-configured.
+            if let Some(present) = surface.presentation.take() {
+                if present.acquired_texture.is_some() {
+                    break E::PreviousOutputExists;
+                }
+            }
+
+            // TODO: Texture views may still be alive that point to the texture.
+            // this will allow the user to render to the surface texture, long after
+            // it has been removed.
+            //
+            // https://github.com/gfx-rs/wgpu/issues/4105
+
             match unsafe {
                 A::get_surface_mut(surface)
                     .unwrap()
@@ -2202,12 +2244,6 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                 }
             }
 
-            if let Some(present) = surface.presentation.take() {
-                if present.acquired_texture.is_some() {
-                    break E::PreviousOutputExists;
-                }
-            }
-
             surface.presentation = Some(present::Presentation {
                 device_id: Stored {
                     value: id::Valid(device_id),
diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs
index 75491d10cb..b641567d71 100644
--- a/wgpu-core/src/device/life.rs
+++ b/wgpu-core/src/device/life.rs
@@ -219,6 +219,9 @@ struct ActiveSubmission<A: hal::Api> {
     mapped: Vec<id::Valid<id::BufferId>>,
 
     encoders: Vec<EncoderInFlight<A>>,
+
+    /// List of queue "on_submitted_work_done" closures to be called once this
+    /// submission has completed.
     work_done_closures: SmallVec<[SubmittedWorkDoneClosure; 1]>,
 }
 
@@ -304,6 +307,12 @@ pub(super) struct LifetimeTracker<A: hal::Api> {
     /// Buffers the user has asked us to map, and which are not used by any
     /// queue submission still in flight.
     ready_to_map: Vec<id::Valid<id::BufferId>>,
+
+    /// Queue "on_submitted_work_done" closures that were initiated for while there is no
+    /// currently pending submissions. These cannot be immeidately invoked as they
+    /// must happen _after_ all mapped buffer callbacks are mapped, so we defer them
+    /// here until the next time the device is maintained.
+    work_done_closures: SmallVec<[SubmittedWorkDoneClosure; 1]>,
 }
 
 impl<A: hal::Api> LifetimeTracker<A> {
@@ -316,6 +325,7 @@ impl<A: hal::Api> LifetimeTracker<A> {
             active: Vec::new(),
             free_resources: NonReferencedResources::new(),
             ready_to_map: Vec::new(),
+            work_done_closures: SmallVec::new(),
         }
     }
 
@@ -405,7 +415,7 @@ impl<A: hal::Api> LifetimeTracker<A> {
             .position(|a| a.index > last_done)
             .unwrap_or(self.active.len());
 
-        let mut work_done_closures = SmallVec::new();
+        let mut work_done_closures: SmallVec<_> = self.work_done_closures.drain(..).collect();
         for a in self.active.drain(..done_count) {
             log::trace!("Active submission {} is done", a.index);
             self.free_resources.extend(a.last_resources);
@@ -445,18 +455,16 @@ impl<A: hal::Api> LifetimeTracker<A> {
         }
     }
 
-    pub fn add_work_done_closure(
-        &mut self,
-        closure: SubmittedWorkDoneClosure,
-    ) -> Option<SubmittedWorkDoneClosure> {
+    pub fn add_work_done_closure(&mut self, closure: SubmittedWorkDoneClosure) {
         match self.active.last_mut() {
             Some(active) => {
                 active.work_done_closures.push(closure);
-                None
             }
-            // Note: we can't immediately invoke the closure, since it assumes
-            // nothing is currently locked in the hubs.
-            None => Some(closure),
+            // We must defer the closure until all previously occuring map_async closures
+            // have fired. This is required by the spec.
+            None => {
+                self.work_done_closures.push(closure);
+            }
         }
     }
 }
@@ -762,14 +770,24 @@ impl<A: HalApi> LifetimeTracker<A> {
                 //Note: nothing else can bump the refcount since the guard is locked exclusively
                 //Note: same BGL can appear multiple times in the list, but only the last
                 // encounter could drop the refcount to 0.
-                if guard[id].multi_ref_count.dec_and_check_empty() {
-                    log::debug!("Bind group layout {:?} will be destroyed", id);
-                    #[cfg(feature = "trace")]
-                    if let Some(t) = trace {
-                        t.lock().add(trace::Action::DestroyBindGroupLayout(id.0));
-                    }
-                    if let Some(lay) = hub.bind_group_layouts.unregister_locked(id.0, &mut *guard) {
-                        self.free_resources.bind_group_layouts.push(lay.raw);
+                let mut bgl_to_check = Some(id);
+                while let Some(id) = bgl_to_check.take() {
+                    let bgl = &guard[id];
+                    if bgl.multi_ref_count.dec_and_check_empty() {
+                        // If This layout points to a compatible one, go over the latter
+                        // to decrement the ref count and potentially destroy it.
+                        bgl_to_check = bgl.compatible_layout;
+
+                        log::debug!("Bind group layout {:?} will be destroyed", id);
+                        #[cfg(feature = "trace")]
+                        if let Some(t) = trace {
+                            t.lock().add(trace::Action::DestroyBindGroupLayout(id.0));
+                        }
+                        if let Some(lay) =
+                            hub.bind_group_layouts.unregister_locked(id.0, &mut *guard)
+                        {
+                            self.free_resources.bind_group_layouts.push(lay.raw);
+                        }
                     }
                 }
             }
diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs
index 2f749a4495..9a77bf9536 100644
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@@ -1,6 +1,5 @@
 use crate::{
     binding_model,
-    device::life::WaitIdleError,
     hal_api::HalApi,
     hub::Hub,
     id,
@@ -24,7 +23,7 @@ pub mod queue;
 pub mod resource;
 #[cfg(any(feature = "trace", feature = "replay"))]
 pub mod trace;
-pub use resource::Device;
+pub use {life::WaitIdleError, resource::Device};
 
 pub const SHADER_STAGE_COUNT: usize = 3;
 // Should be large enough for the largest possible texture row. This
@@ -181,6 +180,9 @@ impl UserClosures {
     fn fire(self) {
         // Note: this logic is specifically moved out of `handle_mapping()` in order to
         // have nothing locked by the time we execute users callback code.
+
+        // Mappings _must_ be fired before submissions, as the spec requires all mapping callbacks that are registered before
+        // a on_submitted_work_done callback to be fired before the on_submitted_work_done callback.
         for (operation, status) in self.mappings {
             operation.callback.call(status);
         }
@@ -296,6 +298,8 @@ pub enum DeviceError {
     Lost,
     #[error("Not enough memory left")]
     OutOfMemory,
+    #[error("Creation of a resource failed for a reason other than running out of memory.")]
+    ResourceCreationFailed,
 }
 
 impl From<hal::DeviceError> for DeviceError {
@@ -303,6 +307,7 @@ impl From<hal::DeviceError> for DeviceError {
         match error {
             hal::DeviceError::Lost => DeviceError::Lost,
             hal::DeviceError::OutOfMemory => DeviceError::OutOfMemory,
+            hal::DeviceError::ResourceCreationFailed => DeviceError::ResourceCreationFailed,
         }
     }
 }
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 6e0be3b297..73fa5de3b0 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -1435,17 +1435,12 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
         closure: SubmittedWorkDoneClosure,
     ) -> Result<(), InvalidQueue> {
         //TODO: flush pending writes
-        let closure_opt = {
-            let hub = A::hub(self);
-            let mut token = Token::root();
-            let (device_guard, mut token) = hub.devices.read(&mut token);
-            match device_guard.get(queue_id) {
-                Ok(device) => device.lock_life(&mut token).add_work_done_closure(closure),
-                Err(_) => return Err(InvalidQueue),
-            }
-        };
-        if let Some(closure) = closure_opt {
-            closure.call();
+        let hub = A::hub(self);
+        let mut token = Token::root();
+        let (device_guard, mut token) = hub.devices.read(&mut token);
+        match device_guard.get(queue_id) {
+            Ok(device) => device.lock_life(&mut token).add_work_done_closure(closure),
+            Err(_) => return Err(InvalidQueue),
         }
         Ok(())
     }
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 20e057a934..73f1887e10 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -1,7 +1,8 @@
 #[cfg(feature = "trace")]
 use crate::device::trace;
 use crate::{
-    binding_model, command, conv,
+    binding_model::{self, get_bind_group_layout, try_get_bind_group_layout},
+    command, conv,
     device::life::WaitIdleError,
     device::{
         AttachmentData, CommandAllocator, MissingDownlevelFlags, MissingFeatures,
@@ -1285,8 +1286,7 @@ impl<A: HalApi> Device<A> {
                     inner: Box::new(inner),
                 })
             })?;
-        let interface =
-            validation::Interface::new(&module, &info, self.features, self.limits.clone());
+        let interface = validation::Interface::new(&module, &info, self.limits.clone());
         let hal_shader = hal::ShaderInput::Naga(hal::NagaShader { module, info });
 
         let hal_desc = hal::ShaderModuleDescriptor {
@@ -1367,7 +1367,11 @@ impl<A: HalApi> Device<A> {
     ) -> Option<id::BindGroupLayoutId> {
         guard
             .iter(self_id.backend())
-            .find(|&(_, bgl)| bgl.device_id.value.0 == self_id && bgl.entries == *entry_map)
+            .find(|&(_, bgl)| {
+                bgl.device_id.value.0 == self_id
+                    && bgl.compatible_layout.is_none()
+                    && bgl.entries == *entry_map
+            })
             .map(|(id, value)| {
                 value.multi_ref_count.inc();
                 id
@@ -1626,6 +1630,7 @@ impl<A: HalApi> Device<A> {
             entries: entry_map,
             #[cfg(debug_assertions)]
             label: label.unwrap_or("").to_string(),
+            compatible_layout: None,
         })
     }
 
@@ -1799,6 +1804,7 @@ impl<A: HalApi> Device<A> {
         &self,
         self_id: id::DeviceId,
         layout: &binding_model::BindGroupLayout<A>,
+        layout_id: id::Valid<id::BindGroupLayoutId>,
         desc: &binding_model::BindGroupDescriptor,
         hub: &Hub<A, G>,
         token: &mut Token<binding_model::BindGroupLayout<A>>,
@@ -2038,7 +2044,7 @@ impl<A: HalApi> Device<A> {
                 value: id::Valid(self_id),
                 ref_count: self.life_guard.add_ref(),
             },
-            layout_id: id::Valid(desc.layout),
+            layout_id,
             life_guard: LifeGuard::new(desc.label.borrow_or_default()),
             used,
             used_buffer_ranges,
@@ -2287,7 +2293,7 @@ impl<A: HalApi> Device<A> {
         let bgl_vec = desc
             .bind_group_layouts
             .iter()
-            .map(|&id| &bgl_guard.get(id).unwrap().raw)
+            .map(|&id| &try_get_bind_group_layout(bgl_guard, id).unwrap().raw)
             .collect::<Vec<_>>();
         let hal_desc = hal::PipelineLayoutDescriptor {
             label: desc.label.borrow_option(),
@@ -2314,8 +2320,9 @@ impl<A: HalApi> Device<A> {
                 .iter()
                 .map(|&id| {
                     // manually add a dependency to BGL
-                    bgl_guard.get(id).unwrap().multi_ref_count.inc();
-                    id::Valid(id)
+                    let (id, layout) = get_bind_group_layout(bgl_guard, id::Valid(id));
+                    layout.multi_ref_count.inc();
+                    id
                 })
                 .collect(),
             push_constant_ranges: desc.push_constant_ranges.iter().cloned().collect(),
diff --git a/wgpu-core/src/device/trace.rs b/wgpu-core/src/device/trace.rs
index 57f82c181e..ab5806bb90 100644
--- a/wgpu-core/src/device/trace.rs
+++ b/wgpu-core/src/device/trace.rs
@@ -176,11 +176,14 @@ pub enum Command {
     InsertDebugMarker(String),
     RunComputePass {
         base: crate::command::BasePass<crate::command::ComputeCommand>,
+        timestamp_writes: Option<crate::command::ComputePassTimestampWrites>,
     },
     RunRenderPass {
         base: crate::command::BasePass<crate::command::RenderCommand>,
         target_colors: Vec<Option<crate::command::RenderPassColorAttachment>>,
         target_depth_stencil: Option<crate::command::RenderPassDepthStencilAttachment>,
+        timestamp_writes: Option<crate::command::RenderPassTimestampWrites>,
+        occlusion_query_set_id: Option<id::QuerySetId>,
     },
 }
 
diff --git a/wgpu-core/src/hub.rs b/wgpu-core/src/hub.rs
index d3da0be6c2..c0670e085c 100644
--- a/wgpu-core/src/hub.rs
+++ b/wgpu-core/src/hub.rs
@@ -243,6 +243,7 @@ impl<A: HalApi> Access<PipelineLayout<A>> for RenderBundle<A> {}
 impl<A: HalApi> Access<BindGroupLayout<A>> for Root {}
 impl<A: HalApi> Access<BindGroupLayout<A>> for Device<A> {}
 impl<A: HalApi> Access<BindGroupLayout<A>> for PipelineLayout<A> {}
+impl<A: HalApi> Access<BindGroupLayout<A>> for QuerySet<A> {}
 impl<A: HalApi> Access<BindGroup<A>> for Root {}
 impl<A: HalApi> Access<BindGroup<A>> for Device<A> {}
 impl<A: HalApi> Access<BindGroup<A>> for BindGroupLayout<A> {}
diff --git a/wgpu-core/src/identity.rs b/wgpu-core/src/identity.rs
index 5f7cc0dc5a..fe10bedb0e 100644
--- a/wgpu-core/src/identity.rs
+++ b/wgpu-core/src/identity.rs
@@ -162,9 +162,14 @@ pub trait GlobalIdentityHandlerFactory:
     + IdentityHandlerFactory<id::SamplerId>
     + IdentityHandlerFactory<id::SurfaceId>
 {
+    fn ids_are_generated_in_wgpu() -> bool;
 }
 
-impl GlobalIdentityHandlerFactory for IdentityManagerFactory {}
+impl GlobalIdentityHandlerFactory for IdentityManagerFactory {
+    fn ids_are_generated_in_wgpu() -> bool {
+        true
+    }
+}
 
 pub type Input<G, I> = <<G as IdentityHandlerFactory<I>>::Filter as IdentityHandler<I>>::Input;
 
diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs
index 45f01824b7..0aee56ac6e 100644
--- a/wgpu-core/src/instance.rs
+++ b/wgpu-core/src/instance.rs
@@ -82,9 +82,24 @@ impl Instance {
                     name: "wgpu",
                     flags,
                     dx12_shader_compiler: instance_desc.dx12_shader_compiler.clone(),
+                    gles_minor_version: instance_desc.gles_minor_version,
                 };
-                unsafe { hal::Instance::init(&hal_desc).ok() }
+                match unsafe { hal::Instance::init(&hal_desc) } {
+                    Ok(instance) => {
+                        log::debug!("Instance::new: created {:?} backend", A::VARIANT);
+                        Some(instance)
+                    }
+                    Err(err) => {
+                        log::debug!(
+                            "Instance::new: failed to create {:?} backend: {:?}",
+                            A::VARIANT,
+                            err
+                        );
+                        None
+                    }
+                }
             } else {
+                log::trace!("Instance::new: backend {:?} not requested", A::VARIANT);
                 None
             }
         }
@@ -352,6 +367,7 @@ impl<A: HalApi> Adapter<A> {
             |err| match err {
                 hal::DeviceError::Lost => RequestDeviceError::DeviceLost,
                 hal::DeviceError::OutOfMemory => RequestDeviceError::OutOfMemory,
+                hal::DeviceError::ResourceCreationFailed => RequestDeviceError::Internal,
             },
         )?;
 
diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs
index c9df46ad93..7366934d27 100644
--- a/wgpu-core/src/present.rs
+++ b/wgpu-core/src/present.rs
@@ -15,7 +15,7 @@ use std::borrow::Borrow;
 use crate::device::trace::Action;
 use crate::{
     conv,
-    device::{DeviceError, MissingDownlevelFlags},
+    device::{DeviceError, MissingDownlevelFlags, WaitIdleError},
     global::Global,
     hal_api::HalApi,
     hub::Token,
@@ -96,6 +96,18 @@ pub enum ConfigureSurfaceError {
     },
     #[error("Requested usage is not supported")]
     UnsupportedUsage,
+    #[error("Gpu got stuck :(")]
+    StuckGpu,
+}
+
+impl From<WaitIdleError> for ConfigureSurfaceError {
+    fn from(e: WaitIdleError) -> Self {
+        match e {
+            WaitIdleError::Device(d) => ConfigureSurfaceError::Device(d),
+            WaitIdleError::WrongSubmissionIndex(..) => unreachable!(),
+            WaitIdleError::StuckGpu => ConfigureSurfaceError::StuckGpu,
+        }
+    }
 }
 
 #[repr(C)]
@@ -300,15 +312,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
 
             let (texture, _) = hub.textures.unregister(texture_id.value.0, &mut token);
             if let Some(texture) = texture {
-                if let resource::TextureClearMode::RenderPass { clear_views, .. } =
-                    texture.clear_mode
-                {
-                    for clear_view in clear_views {
-                        unsafe {
-                            hal::Device::destroy_texture_view(&device.raw, clear_view);
-                        }
-                    }
-                }
+                texture.clear_mode.destroy_clear_views(&device.raw);
 
                 let suf = A::get_surface_mut(surface);
                 match texture.inner {
@@ -386,10 +390,16 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
 
             // The texture ID got added to the device tracker by `submit()`,
             // and now we are moving it away.
+            log::debug!(
+                "Removing swapchain texture {:?} from the device tracker",
+                texture_id.value
+            );
             device.trackers.lock().textures.remove(texture_id.value);
 
             let (texture, _) = hub.textures.unregister(texture_id.value.0, &mut token);
             if let Some(texture) = texture {
+                texture.clear_mode.destroy_clear_views(&device.raw);
+
                 let suf = A::get_surface_mut(surface);
                 match texture.inner {
                     resource::TextureInner::Surface {
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index fe881c2d06..c0977b80ef 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -384,6 +384,18 @@ pub enum TextureClearMode<A: hal::Api> {
     None,
 }
 
+impl<A: hal::Api> TextureClearMode<A> {
+    pub(crate) fn destroy_clear_views(self, device: &A::Device) {
+        if let TextureClearMode::RenderPass { clear_views, .. } = self {
+            for clear_view in clear_views {
+                unsafe {
+                    hal::Device::destroy_texture_view(device, clear_view);
+                }
+            }
+        }
+    }
+}
+
 #[derive(Debug)]
 pub struct Texture<A: hal::Api> {
     pub(crate) inner: TextureInner<A>,
diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs
index 111b4e9618..69e29fc59f 100644
--- a/wgpu-core/src/track/mod.rs
+++ b/wgpu-core/src/track/mod.rs
@@ -16,8 +16,10 @@ is a corresponding debug assert the checks if that access is valid. This helps
 get bugs caught fast, while still letting users not need to pay for the bounds
 checks.
 
-In wgpu, resource IDs are allocated and re-used, so will always be as low
-as reasonably possible. This allows us to use the ID as an index into an array.
+In wgpu, each resource ID includes a bitfield holding an index.
+Indices are allocated and re-used, so they will always be as low as
+reasonably possible. This allows us to use IDs to index into an array
+of tracking information.
 
 ## Statefulness
 
diff --git a/wgpu-core/src/validation.rs b/wgpu-core/src/validation.rs
index 771adba731..e3ecb916d3 100644
--- a/wgpu-core/src/validation.rs
+++ b/wgpu-core/src/validation.rs
@@ -1,5 +1,4 @@
 use crate::{binding_model::BindEntryMap, FastHashMap, FastHashSet};
-use naga::valid::GlobalUse;
 use std::{collections::hash_map::Entry, fmt};
 use thiserror::Error;
 use wgt::{BindGroupLayoutEntry, BindingType};
@@ -112,7 +111,7 @@ struct SpecializationConstant {
 struct EntryPoint {
     inputs: Vec<Varying>,
     outputs: Vec<Varying>,
-    resources: Vec<(naga::Handle<Resource>, GlobalUse)>,
+    resources: Vec<naga::Handle<Resource>>,
     #[allow(unused)]
     spec_constants: Vec<SpecializationConstant>,
     sampling_pairs: FastHashSet<(naga::Handle<Resource>, naga::Handle<Resource>)>,
@@ -121,7 +120,6 @@ struct EntryPoint {
 
 #[derive(Debug)]
 pub struct Interface {
-    features: wgt::Features,
     limits: wgt::Limits,
     resources: naga::Arena<Resource>,
     entry_points: FastHashMap<(naga::ShaderStage, String), EntryPoint>,
@@ -174,11 +172,6 @@ pub enum BindingError {
     Missing,
     #[error("Visibility flags don't include the shader stage")]
     Invisible,
-    #[error("The shader requires the load/store access flags {required:?} but only {allowed:?} is allowed")]
-    WrongUsage {
-        required: GlobalUse,
-        allowed: GlobalUse,
-    },
     #[error("Type on the shader side does not match the pipeline binding")]
     WrongType,
     #[error("Storage class {binding:?} doesn't match the shader {shader:?}")]
@@ -206,9 +199,9 @@ pub enum BindingError {
     #[error("Texture format {0:?} is not supported for storage use")]
     BadStorageFormat(wgt::TextureFormat),
     #[error(
-        "Storage texture usage {0:?} doesn't have a matching supported `StorageTextureAccess`"
+        "Storage texture with access {0:?} doesn't have a matching supported `StorageTextureAccess`"
     )]
-    UnsupportedTextureStorageAccess(GlobalUse),
+    UnsupportedTextureStorageAccess(naga::StorageAccess),
 }
 
 #[derive(Clone, Debug, Error)]
@@ -379,43 +372,32 @@ fn map_storage_format_from_naga(format: naga::StorageFormat) -> wgt::TextureForm
 }
 
 impl Resource {
-    fn check_binding_use(
-        &self,
-        entry: &BindGroupLayoutEntry,
-        shader_usage: GlobalUse,
-    ) -> Result<(), BindingError> {
-        let allowed_usage = match self.ty {
+    fn check_binding_use(&self, entry: &BindGroupLayoutEntry) -> Result<(), BindingError> {
+        match self.ty {
             ResourceType::Buffer { size } => {
-                let (allowed_usage, min_size) = match entry.ty {
+                let min_size = match entry.ty {
                     BindingType::Buffer {
                         ty,
                         has_dynamic_offset: _,
                         min_binding_size,
                     } => {
-                        let (class, global_use) = match ty {
-                            wgt::BufferBindingType::Uniform => {
-                                (naga::AddressSpace::Uniform, GlobalUse::READ)
-                            }
+                        let class = match ty {
+                            wgt::BufferBindingType::Uniform => naga::AddressSpace::Uniform,
                             wgt::BufferBindingType::Storage { read_only } => {
-                                let mut global_use = GlobalUse::READ | GlobalUse::QUERY;
-                                global_use.set(GlobalUse::WRITE, !read_only);
                                 let mut naga_access = naga::StorageAccess::LOAD;
                                 naga_access.set(naga::StorageAccess::STORE, !read_only);
-                                (
-                                    naga::AddressSpace::Storage {
-                                        access: naga_access,
-                                    },
-                                    global_use,
-                                )
+                                naga::AddressSpace::Storage {
+                                    access: naga_access,
+                                }
                             }
                         };
-                        if !address_space_matches(self.class, class) {
+                        if self.class != class {
                             return Err(BindingError::WrongAddressSpace {
                                 binding: class,
                                 shader: self.class,
                             });
                         }
-                        (global_use, min_binding_size)
+                        min_binding_size
                     }
                     _ => return Err(BindingError::WrongType),
                 };
@@ -425,13 +407,10 @@ impl Resource {
                     }
                     _ => (),
                 }
-                allowed_usage
             }
             ResourceType::Sampler { comparison } => match entry.ty {
                 BindingType::Sampler(ty) => {
-                    if (ty == wgt::SamplerBindingType::Comparison) == comparison {
-                        GlobalUse::READ
-                    } else {
+                    if (ty == wgt::SamplerBindingType::Comparison) != comparison {
                         return Err(BindingError::WrongSamplerComparison);
                     }
                 }
@@ -480,29 +459,26 @@ impl Resource {
                         }
                     }
                 }
-                let (expected_class, usage) = match entry.ty {
+                let expected_class = match entry.ty {
                     BindingType::Texture {
                         sample_type,
                         view_dimension: _,
                         multisampled: multi,
-                    } => {
-                        let class = match sample_type {
-                            wgt::TextureSampleType::Float { .. } => naga::ImageClass::Sampled {
-                                kind: naga::ScalarKind::Float,
-                                multi,
-                            },
-                            wgt::TextureSampleType::Sint => naga::ImageClass::Sampled {
-                                kind: naga::ScalarKind::Sint,
-                                multi,
-                            },
-                            wgt::TextureSampleType::Uint => naga::ImageClass::Sampled {
-                                kind: naga::ScalarKind::Uint,
-                                multi,
-                            },
-                            wgt::TextureSampleType::Depth => naga::ImageClass::Depth { multi },
-                        };
-                        (class, GlobalUse::READ | GlobalUse::QUERY)
-                    }
+                    } => match sample_type {
+                        wgt::TextureSampleType::Float { .. } => naga::ImageClass::Sampled {
+                            kind: naga::ScalarKind::Float,
+                            multi,
+                        },
+                        wgt::TextureSampleType::Sint => naga::ImageClass::Sampled {
+                            kind: naga::ScalarKind::Sint,
+                            multi,
+                        },
+                        wgt::TextureSampleType::Uint => naga::ImageClass::Sampled {
+                            kind: naga::ScalarKind::Uint,
+                            multi,
+                        },
+                        wgt::TextureSampleType::Depth => naga::ImageClass::Depth { multi },
+                    },
                     BindingType::StorageTexture {
                         access,
                         format,
@@ -510,26 +486,15 @@ impl Resource {
                     } => {
                         let naga_format = map_storage_format_to_naga(format)
                             .ok_or(BindingError::BadStorageFormat(format))?;
-                        let (naga_access, usage) = match access {
-                            wgt::StorageTextureAccess::ReadOnly => (
-                                naga::StorageAccess::LOAD,
-                                GlobalUse::READ | GlobalUse::QUERY,
-                            ),
-                            wgt::StorageTextureAccess::WriteOnly => (
-                                naga::StorageAccess::STORE,
-                                GlobalUse::WRITE | GlobalUse::QUERY,
-                            ),
-                            wgt::StorageTextureAccess::ReadWrite => {
-                                (naga::StorageAccess::all(), GlobalUse::all())
-                            }
+                        let naga_access = match access {
+                            wgt::StorageTextureAccess::ReadOnly => naga::StorageAccess::LOAD,
+                            wgt::StorageTextureAccess::WriteOnly => naga::StorageAccess::STORE,
+                            wgt::StorageTextureAccess::ReadWrite => naga::StorageAccess::all(),
                         };
-                        (
-                            naga::ImageClass::Storage {
-                                format: naga_format,
-                                access: naga_access,
-                            },
-                            usage,
-                        )
+                        naga::ImageClass::Storage {
+                            format: naga_format,
+                            access: naga_access,
+                        }
                     }
                     _ => return Err(BindingError::WrongType),
                 };
@@ -539,31 +504,19 @@ impl Resource {
                         shader: class,
                     });
                 }
-                usage
             }
         };
 
-        if allowed_usage.contains(shader_usage) {
-            Ok(())
-        } else {
-            Err(BindingError::WrongUsage {
-                required: shader_usage,
-                allowed: allowed_usage,
-            })
-        }
+        Ok(())
     }
 
-    fn derive_binding_type(
-        &self,
-        shader_usage: GlobalUse,
-        features: wgt::Features,
-    ) -> Result<BindingType, BindingError> {
+    fn derive_binding_type(&self) -> Result<BindingType, BindingError> {
         Ok(match self.ty {
             ResourceType::Buffer { size } => BindingType::Buffer {
                 ty: match self.class {
                     naga::AddressSpace::Uniform => wgt::BufferBindingType::Uniform,
-                    naga::AddressSpace::Storage { .. } => wgt::BufferBindingType::Storage {
-                        read_only: !shader_usage.contains(GlobalUse::WRITE),
+                    naga::AddressSpace::Storage { access } => wgt::BufferBindingType::Storage {
+                        read_only: access == naga::StorageAccess::LOAD,
                     },
                     _ => return Err(BindingError::WrongType),
                 },
@@ -606,19 +559,15 @@ impl Resource {
                         view_dimension,
                         multisampled: multi,
                     },
-                    naga::ImageClass::Storage { format, .. } => BindingType::StorageTexture {
-                        access: if !shader_usage.contains(GlobalUse::READ) {
-                            wgt::StorageTextureAccess::WriteOnly
-                        } else if !features
-                            .contains(wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES)
-                        {
-                            return Err(BindingError::UnsupportedTextureStorageAccess(
-                                shader_usage,
-                            ));
-                        } else if shader_usage.contains(GlobalUse::WRITE) {
-                            wgt::StorageTextureAccess::ReadWrite
-                        } else {
-                            wgt::StorageTextureAccess::ReadOnly
+                    naga::ImageClass::Storage { format, access } => BindingType::StorageTexture {
+                        access: {
+                            const LOAD_STORE: naga::StorageAccess = naga::StorageAccess::all();
+                            match access {
+                                naga::StorageAccess::LOAD => wgt::StorageTextureAccess::ReadOnly,
+                                naga::StorageAccess::STORE => wgt::StorageTextureAccess::WriteOnly,
+                                LOAD_STORE => wgt::StorageTextureAccess::ReadWrite,
+                                _ => unreachable!(),
+                            }
                         },
                         view_dimension,
                         format: {
@@ -863,6 +812,7 @@ impl Interface {
                 location,
                 interpolation,
                 sampling,
+                .. // second_blend_source
             }) => Varying::Local {
                 location,
                 iv: InterfaceVar {
@@ -880,12 +830,7 @@ impl Interface {
         list.push(varying);
     }
 
-    pub fn new(
-        module: &naga::Module,
-        info: &naga::valid::ModuleInfo,
-        features: wgt::Features,
-        limits: wgt::Limits,
-    ) -> Self {
+    pub fn new(module: &naga::Module, info: &naga::valid::ModuleInfo, limits: wgt::Limits) -> Self {
         let mut resources = naga::Arena::new();
         let mut resource_mapping = FastHashMap::default();
         for (var_handle, var) in module.global_variables.iter() {
@@ -949,11 +894,8 @@ impl Interface {
 
             for (var_handle, var) in module.global_variables.iter() {
                 let usage = info[var_handle];
-                if usage.is_empty() {
-                    continue;
-                }
-                if var.binding.is_some() {
-                    ep.resources.push((resource_mapping[&var_handle], usage));
+                if !usage.is_empty() && var.binding.is_some() {
+                    ep.resources.push(resource_mapping[&var_handle]);
                 }
             }
 
@@ -968,7 +910,6 @@ impl Interface {
         }
 
         Self {
-            features,
             limits,
             resources,
             entry_points,
@@ -1000,7 +941,7 @@ impl Interface {
             .ok_or(StageError::MissingEntryPoint(pair.1))?;
 
         // check resources visibility
-        for &(handle, usage) in entry_point.resources.iter() {
+        for &handle in entry_point.resources.iter() {
             let res = &self.resources[handle];
             let result = match given_layouts {
                 Some(layouts) => {
@@ -1026,13 +967,13 @@ impl Interface {
                                 Err(BindingError::Invisible)
                             }
                         })
-                        .and_then(|entry| res.check_binding_use(entry, usage))
+                        .and_then(|entry| res.check_binding_use(entry))
                 }
                 None => derived_layouts
                     .get_mut(res.bind.group as usize)
                     .ok_or(BindingError::Missing)
                     .and_then(|set| {
-                        let ty = res.derive_binding_type(usage, self.features)?;
+                        let ty = res.derive_binding_type()?;
                         match set.entry(res.bind.binding) {
                             Entry::Occupied(e) if e.get().ty != ty => {
                                 return Err(BindingError::InconsistentlyDerivedType)
@@ -1237,72 +1178,3 @@ impl Interface {
         Ok(outputs)
     }
 }
-
-fn address_space_matches(shader: naga::AddressSpace, binding: naga::AddressSpace) -> bool {
-    match (shader, binding) {
-        (
-            naga::AddressSpace::Storage {
-                access: access_shader,
-            },
-            naga::AddressSpace::Storage {
-                access: access_pipeline,
-            },
-        ) => {
-            // Allow read- and write-only usages to match read-write layouts:
-            (access_shader & access_pipeline) == access_shader
-        }
-        (a, b) => a == b,
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use super::address_space_matches;
-
-    #[test]
-    fn address_space_matches_correctly() {
-        assert!(address_space_matches(
-            naga::AddressSpace::Uniform,
-            naga::AddressSpace::Uniform
-        ));
-
-        assert!(!address_space_matches(
-            naga::AddressSpace::Uniform,
-            naga::AddressSpace::Storage {
-                access: naga::StorageAccess::LOAD
-            }
-        ));
-
-        let test_cases = [
-            (naga::StorageAccess::LOAD, naga::StorageAccess::LOAD, true),
-            (naga::StorageAccess::STORE, naga::StorageAccess::LOAD, false),
-            (naga::StorageAccess::LOAD, naga::StorageAccess::STORE, false),
-            (naga::StorageAccess::STORE, naga::StorageAccess::STORE, true),
-            (
-                naga::StorageAccess::LOAD | naga::StorageAccess::STORE,
-                naga::StorageAccess::LOAD | naga::StorageAccess::STORE,
-                true,
-            ),
-            (
-                naga::StorageAccess::STORE,
-                naga::StorageAccess::LOAD | naga::StorageAccess::STORE,
-                true,
-            ),
-            (
-                naga::StorageAccess::LOAD,
-                naga::StorageAccess::LOAD | naga::StorageAccess::STORE,
-                true,
-            ),
-        ];
-
-        for (shader, binding, expect_match) in test_cases {
-            assert_eq!(
-                expect_match,
-                address_space_matches(
-                    naga::AddressSpace::Storage { access: shader },
-                    naga::AddressSpace::Storage { access: binding }
-                )
-            );
-        }
-    }
-}
diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml
index 550752eb37..225f18256a 100644
--- a/wgpu-hal/Cargo.toml
+++ b/wgpu-hal/Cargo.toml
@@ -103,7 +103,7 @@ d3d12 = { version = "0.7", features = ["libloading"], optional = true }
 # backend: Metal
 block = { version = "0.1", optional = true }
 
-metal = { version = "0.26.0", default_features = false }
+metal = "0.26.0"
 objc = "0.2.5"
 core-graphics-types = "0.1"
 
@@ -120,14 +120,14 @@ android_system_properties = "0.1.1"
 
 [dependencies.naga]
 git = "https://github.com/gfx-rs/naga"
-rev = "bac2d82a430fbfcf100ee22b7c3bc12f3d593079"
+rev = "cc87b8f9eb30bb55d0735b89d3df3e099e1a6e7c"
 version = "0.13.0"
 features = ["clone"]
 
 # DEV dependencies
 [dev-dependencies.naga]
 git = "https://github.com/gfx-rs/naga"
-rev = "bac2d82a430fbfcf100ee22b7c3bc12f3d593079"
+rev = "cc87b8f9eb30bb55d0735b89d3df3e099e1a6e7c"
 version = "0.13.0"
 features = ["wgsl-in"]
 
diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs
index 61c1584a25..5518cdaf4b 100644
--- a/wgpu-hal/examples/halmark/main.rs
+++ b/wgpu-hal/examples/halmark/main.rs
@@ -86,7 +86,7 @@ struct Example<A: hal::Api> {
 }
 
 impl<A: hal::Api> Example<A> {
-    fn init(window: &winit::window::Window) -> Result<Self, hal::InstanceError> {
+    fn init(window: &winit::window::Window) -> Result<Self, Box<dyn std::error::Error>> {
         let instance_desc = hal::InstanceDescriptor {
             name: "example",
             flags: if cfg!(debug_assertions) {
@@ -96,6 +96,7 @@ impl<A: hal::Api> Example<A> {
             },
             // Can't rely on having DXC available, so use FXC instead
             dx12_shader_compiler: wgt::Dx12Compiler::Fxc,
+            gles_minor_version: wgt::Gles3MinorVersion::default(),
         };
         let instance = unsafe { A::Instance::init(&instance_desc)? };
         let mut surface = unsafe {
@@ -107,13 +108,13 @@ impl<A: hal::Api> Example<A> {
         let (adapter, capabilities) = unsafe {
             let mut adapters = instance.enumerate_adapters();
             if adapters.is_empty() {
-                return Err(hal::InstanceError);
+                return Err("no adapters found".into());
             }
             let exposed = adapters.swap_remove(0);
             (exposed.adapter, exposed.capabilities)
         };
-        let surface_caps =
-            unsafe { adapter.surface_capabilities(&surface) }.ok_or(hal::InstanceError)?;
+        let surface_caps = unsafe { adapter.surface_capabilities(&surface) }
+            .ok_or("failed to get surface capabilities")?;
         log::info!("Surface caps: {:#?}", surface_caps);
 
         let hal::OpenDevice { device, mut queue } = unsafe {
@@ -679,6 +680,8 @@ impl<A: hal::Api> Example<A> {
             })],
             depth_stencil_attachment: None,
             multiview: None,
+            timestamp_writes: None,
+            occlusion_query_set: None,
         };
         unsafe {
             ctx.encoder.begin_render_pass(&pass_desc);
diff --git a/wgpu-hal/examples/raw-gles.rs b/wgpu-hal/examples/raw-gles.rs
index 1bf2ead0f5..0e89783aa1 100644
--- a/wgpu-hal/examples/raw-gles.rs
+++ b/wgpu-hal/examples/raw-gles.rs
@@ -175,6 +175,8 @@ fn fill_screen(exposed: &hal::ExposedAdapter<hal::api::Gles>, width: u32, height
         })],
         depth_stencil_attachment: None,
         multiview: None,
+        timestamp_writes: None,
+        occlusion_query_set: None,
     };
     unsafe {
         encoder.begin_encoding(None).unwrap();
diff --git a/wgpu-hal/src/auxil/dxgi/factory.rs b/wgpu-hal/src/auxil/dxgi/factory.rs
index 123ca4933e..7ae6e745f0 100644
--- a/wgpu-hal/src/auxil/dxgi/factory.rs
+++ b/wgpu-hal/src/auxil/dxgi/factory.rs
@@ -96,7 +96,9 @@ pub fn create_factory(
     required_factory_type: DxgiFactoryType,
     instance_flags: crate::InstanceFlags,
 ) -> Result<(d3d12::DxgiLib, d3d12::DxgiFactory), crate::InstanceError> {
-    let lib_dxgi = d3d12::DxgiLib::new().map_err(|_| crate::InstanceError)?;
+    let lib_dxgi = d3d12::DxgiLib::new().map_err(|e| {
+        crate::InstanceError::with_source(String::from("failed to load dxgi.dll"), e)
+    })?;
 
     let mut factory_flags = d3d12::FactoryCreationFlags::empty();
 
@@ -128,18 +130,22 @@ pub fn create_factory(
             Ok(factory) => Some(factory),
             // We hard error here as we _should have_ been able to make a factory4 but couldn't.
             Err(err) => {
-                log::error!("Failed to create IDXGIFactory4: {}", err);
-                return Err(crate::InstanceError);
+                // err is a Cow<str>, not an Error implementor
+                return Err(crate::InstanceError::new(format!(
+                    "failed to create IDXGIFactory4: {err:?}"
+                )));
             }
         },
         // If we require factory4, hard error.
         Err(err) if required_factory_type == DxgiFactoryType::Factory4 => {
-            log::error!("IDXGIFactory1 creation function not found: {:?}", err);
-            return Err(crate::InstanceError);
+            return Err(crate::InstanceError::with_source(
+                String::from("IDXGIFactory1 creation function not found"),
+                err,
+            ));
         }
         // If we don't print it to info as all win7 will hit this case.
         Err(err) => {
-            log::info!("IDXGIFactory1 creation function not found: {:?}", err);
+            log::info!("IDXGIFactory1 creation function not found: {err:?}");
             None
         }
     };
@@ -153,8 +159,10 @@ pub fn create_factory(
             }
             // If we require factory6, hard error.
             Err(err) if required_factory_type == DxgiFactoryType::Factory6 => {
-                log::warn!("Failed to cast IDXGIFactory4 to IDXGIFactory6: {:?}", err);
-                return Err(crate::InstanceError);
+                // err is a Cow<str>, not an Error implementor
+                return Err(crate::InstanceError::new(format!(
+                    "failed to cast IDXGIFactory4 to IDXGIFactory6: {err:?}"
+                )));
             }
             // If we don't print it to info.
             Err(err) => {
@@ -169,14 +177,18 @@ pub fn create_factory(
         Ok(pair) => match pair.into_result() {
             Ok(factory) => factory,
             Err(err) => {
-                log::error!("Failed to create IDXGIFactory1: {}", err);
-                return Err(crate::InstanceError);
+                // err is a Cow<str>, not an Error implementor
+                return Err(crate::InstanceError::new(format!(
+                    "failed to create IDXGIFactory1: {err:?}"
+                )));
             }
         },
         // We always require at least factory1, so hard error
         Err(err) => {
-            log::error!("IDXGIFactory1 creation function not found: {:?}", err);
-            return Err(crate::InstanceError);
+            return Err(crate::InstanceError::with_source(
+                String::from("IDXGIFactory1 creation function not found"),
+                err,
+            ));
         }
     };
 
@@ -188,8 +200,10 @@ pub fn create_factory(
         }
         // If we require factory2, hard error.
         Err(err) if required_factory_type == DxgiFactoryType::Factory2 => {
-            log::warn!("Failed to cast IDXGIFactory1 to IDXGIFactory2: {:?}", err);
-            return Err(crate::InstanceError);
+            // err is a Cow<str>, not an Error implementor
+            return Err(crate::InstanceError::new(format!(
+                "failed to cast IDXGIFactory1 to IDXGIFactory2: {err:?}"
+            )));
         }
         // If we don't print it to info.
         Err(err) => {
diff --git a/wgpu-hal/src/dx11/adapter.rs b/wgpu-hal/src/dx11/adapter.rs
index a28106a9bb..290a9ade22 100644
--- a/wgpu-hal/src/dx11/adapter.rs
+++ b/wgpu-hal/src/dx11/adapter.rs
@@ -94,7 +94,8 @@ impl super::Adapter {
             | wgt::Features::POLYGON_MODE_LINE
             | wgt::Features::CLEAR_TEXTURE
             | wgt::Features::TEXTURE_FORMAT_16BIT_NORM
-            | wgt::Features::ADDRESS_MODE_CLAMP_TO_ZERO;
+            | wgt::Features::ADDRESS_MODE_CLAMP_TO_ZERO
+            | wgt::Features::ADDRESS_MODE_CLAMP_TO_BORDER;
         let mut downlevel = wgt::DownlevelFlags::BASE_VERTEX
             | wgt::DownlevelFlags::READ_ONLY_DEPTH_STENCIL
             | wgt::DownlevelFlags::UNRESTRICTED_INDEX_BUFFER
@@ -132,6 +133,8 @@ impl super::Adapter {
             features |= wgt::Features::TIMESTAMP_QUERY;
             features |= wgt::Features::PIPELINE_STATISTICS_QUERY;
             features |= wgt::Features::SHADER_PRIMITIVE_INDEX;
+            features |= wgt::Features::DEPTH32FLOAT_STENCIL8;
+            features |= wgt::Features::RG11B10UFLOAT_RENDERABLE;
         }
 
         if feature_level >= FL10_1 {
@@ -234,6 +237,7 @@ impl super::Adapter {
             max_compute_workgroups_per_dimension,
             // D3D11_BUFFER_DESC represents the buffer size as a 32 bit int.
             max_buffer_size: u32::MAX as u64,
+            max_non_sampler_bindings: u32::MAX,
         };
 
         //
diff --git a/wgpu-hal/src/dx11/command.rs b/wgpu-hal/src/dx11/command.rs
index 1c73f3c325..17cd5a22d2 100644
--- a/wgpu-hal/src/dx11/command.rs
+++ b/wgpu-hal/src/dx11/command.rs
@@ -246,7 +246,10 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
         todo!()
     }
 
-    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) {
+    unsafe fn begin_compute_pass<'a>(
+        &mut self,
+        desc: &crate::ComputePassDescriptor<'a, super::Api>,
+    ) {
         todo!()
     }
 
diff --git a/wgpu-hal/src/dx11/instance.rs b/wgpu-hal/src/dx11/instance.rs
index 1d8c2b51a2..e7a4e2e705 100644
--- a/wgpu-hal/src/dx11/instance.rs
+++ b/wgpu-hal/src/dx11/instance.rs
@@ -8,10 +8,13 @@ impl crate::Instance<super::Api> for super::Instance {
         };
 
         if !enable_dx11 {
-            return Err(crate::InstanceError);
+            return Err(crate::InstanceError::new(String::from(
+                "DX11 support is unstable; set WGPU_UNSTABLE_DX11_BACKEND=1 to enable anyway",
+            )));
         }
 
-        let lib_d3d11 = super::library::D3D11Lib::new().ok_or(crate::InstanceError)?;
+        let lib_d3d11 = super::library::D3D11Lib::new()
+            .ok_or_else(|| crate::InstanceError::new(String::from("failed to load d3d11.dll")))?;
 
         let (lib_dxgi, factory) = auxil::dxgi::factory::create_factory(
             auxil::dxgi::factory::DxgiFactoryType::Factory1,
diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs
index 27e8e8e05f..02cde913ca 100644
--- a/wgpu-hal/src/dx12/adapter.rs
+++ b/wgpu-hal/src/dx12/adapter.rs
@@ -242,8 +242,6 @@ impl super::Adapter {
             | wgt::Features::ADDRESS_MODE_CLAMP_TO_BORDER
             | wgt::Features::ADDRESS_MODE_CLAMP_TO_ZERO
             | wgt::Features::POLYGON_MODE_LINE
-            | wgt::Features::POLYGON_MODE_POINT
-            | wgt::Features::VERTEX_WRITABLE_STORAGE
             | wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES
             | wgt::Features::TIMESTAMP_QUERY
             | wgt::Features::TIMESTAMP_QUERY_INSIDE_PASSES
@@ -259,6 +257,10 @@ impl super::Adapter {
         // write the results there, and issue a bunch of copy commands.
         //| wgt::Features::PIPELINE_STATISTICS_QUERY
 
+        if max_feature_level as u32 >= d3d12::FeatureLevel::L11_1 as u32 {
+            features |= wgt::Features::VERTEX_WRITABLE_STORAGE;
+        }
+
         features.set(
             wgt::Features::CONSERVATIVE_RASTERIZATION,
             options.ConservativeRasterizationTier
@@ -355,7 +357,11 @@ impl super::Adapter {
                     max_compute_workgroup_size_z: d3d12_ty::D3D12_CS_THREAD_GROUP_MAX_Z,
                     max_compute_workgroups_per_dimension:
                         d3d12_ty::D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION,
-                    max_buffer_size: u64::MAX,
+                    // Dx12 does not expose a maximum buffer size in the API.
+                    // This limit is chosen to avoid potential issues with drivers should they internally
+                    // store buffer sizes using 32 bit ints (a situation we have already encountered with vulkan).
+                    max_buffer_size: i32::MAX as u64,
+                    max_non_sampler_bindings: 1_000_000,
                 },
                 alignments: crate::Alignments {
                     buffer_copy_offset: wgt::BufferSize::new(
@@ -377,7 +383,7 @@ impl crate::Adapter<super::Api> for super::Adapter {
     unsafe fn open(
         &self,
         _features: wgt::Features,
-        _limits: &wgt::Limits,
+        limits: &wgt::Limits,
     ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> {
         let queue = {
             profiling::scope!("ID3D12Device::CreateCommandQueue");
@@ -394,6 +400,7 @@ impl crate::Adapter<super::Api> for super::Adapter {
         let device = super::Device::new(
             self.device.clone(),
             queue.clone(),
+            limits,
             self.private_caps,
             &self.library,
             self.dx12_shader_compiler.clone(),
diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs
index 86fb7e4fe1..719e63a36f 100644
--- a/wgpu-hal/src/dx12/command.rs
+++ b/wgpu-hal/src/dx12/command.rs
@@ -228,6 +228,21 @@ impl super::CommandEncoder {
         self.pass.layout = layout.clone();
         self.pass.dirty_root_elements = (1 << layout.total_root_elements) - 1;
     }
+
+    fn write_pass_end_timestamp_if_requested(&mut self) {
+        if let Some((query_set_raw, index)) = self.end_of_pass_timer_query.take() {
+            use crate::CommandEncoder as _;
+            unsafe {
+                self.write_timestamp(
+                    &crate::dx12::QuerySet {
+                        raw: query_set_raw,
+                        raw_ty: d3d12_ty::D3D12_QUERY_TYPE_TIMESTAMP,
+                    },
+                    index,
+                );
+            }
+        }
+    }
 }
 
 impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
@@ -656,6 +671,19 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
 
     unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) {
         unsafe { self.begin_pass(super::PassKind::Render, desc.label) };
+
+        // Start timestamp if any (before all other commands but after debug marker)
+        if let Some(timestamp_writes) = desc.timestamp_writes.as_ref() {
+            if let Some(index) = timestamp_writes.beginning_of_pass_write_index {
+                unsafe {
+                    self.write_timestamp(timestamp_writes.query_set, index);
+                }
+            }
+            self.end_of_pass_timer_query = timestamp_writes
+                .end_of_pass_write_index
+                .map(|index| (timestamp_writes.query_set.raw.clone(), index));
+        }
+
         let mut color_views = [d3d12::CpuDescriptor { ptr: 0 }; crate::MAX_COLOR_ATTACHMENTS];
         for (rtv, cat) in color_views.iter_mut().zip(desc.color_attachments.iter()) {
             if let Some(cat) = cat.as_ref() {
@@ -825,6 +853,8 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
             }
         }
 
+        self.write_pass_end_timestamp_if_requested();
+
         unsafe { self.end_pass() };
     }
 
@@ -1109,10 +1139,25 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
 
     // compute
 
-    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) {
+    unsafe fn begin_compute_pass<'a>(
+        &mut self,
+        desc: &crate::ComputePassDescriptor<'a, super::Api>,
+    ) {
         unsafe { self.begin_pass(super::PassKind::Compute, desc.label) };
+
+        if let Some(timestamp_writes) = desc.timestamp_writes.as_ref() {
+            if let Some(index) = timestamp_writes.beginning_of_pass_write_index {
+                unsafe {
+                    self.write_timestamp(timestamp_writes.query_set, index);
+                }
+            }
+            self.end_of_pass_timer_query = timestamp_writes
+                .end_of_pass_write_index
+                .map(|index| (timestamp_writes.query_set.raw.clone(), index));
+        }
     }
     unsafe fn end_compute_pass(&mut self) {
+        self.write_pass_end_timestamp_if_requested();
         unsafe { self.end_pass() };
     }
 
diff --git a/wgpu-hal/src/dx12/conv.rs b/wgpu-hal/src/dx12/conv.rs
index 7b39e98ad2..8b44ae9c4b 100644
--- a/wgpu-hal/src/dx12/conv.rs
+++ b/wgpu-hal/src/dx12/conv.rs
@@ -213,12 +213,12 @@ pub fn map_topology(
 
 pub fn map_polygon_mode(mode: wgt::PolygonMode) -> d3d12_ty::D3D12_FILL_MODE {
     match mode {
-        wgt::PolygonMode::Point => {
-            log::error!("Point rasterization is not supported");
-            d3d12_ty::D3D12_FILL_MODE_WIREFRAME
-        }
-        wgt::PolygonMode::Line => d3d12_ty::D3D12_FILL_MODE_WIREFRAME,
         wgt::PolygonMode::Fill => d3d12_ty::D3D12_FILL_MODE_SOLID,
+        wgt::PolygonMode::Line => d3d12_ty::D3D12_FILL_MODE_WIREFRAME,
+        wgt::PolygonMode::Point => panic!(
+            "{:?} is not enabled for this backend",
+            wgt::Features::POLYGON_MODE_POINT
+        ),
     }
 }
 
diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs
index ad991d1227..4ad43cc165 100644
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@@ -16,6 +16,7 @@ impl super::Device {
     pub(super) fn new(
         raw: d3d12::Device,
         present_queue: d3d12::CommandQueue,
+        limits: &wgt::Limits,
         private_caps: super::PrivateCapabilities,
         library: &Arc<d3d12::D3D12Lib>,
         dx12_shader_compiler: wgt::Dx12Compiler,
@@ -92,7 +93,7 @@ impl super::Device {
         };
 
         // maximum number of CBV/SRV/UAV descriptors in heap for Tier 1
-        let capacity_views = 1_000_000;
+        let capacity_views = limits.max_non_sampler_bindings as u64;
         let capacity_samplers = 2_048;
 
         let shared = super::DeviceShared {
@@ -180,7 +181,10 @@ impl super::Device {
         })
     }
 
-    pub(super) unsafe fn wait_idle(&self) -> Result<(), crate::DeviceError> {
+    // Blocks until the dedicated present queue is finished with all of its work.
+    //
+    // Once this method completes, the surface is able to be resized or deleted.
+    pub(super) unsafe fn wait_for_present_queue_idle(&self) -> Result<(), crate::DeviceError> {
         let cur_value = self.idler.fence.get_value();
         if cur_value == !0 {
             return Err(crate::DeviceError::Lost);
@@ -644,6 +648,7 @@ impl crate::Device<super::Api> for super::Device {
             free_lists: Vec::new(),
             pass: super::PassState::new(),
             temp: super::Temp::default(),
+            end_of_pass_timer_query: None,
         })
     }
     unsafe fn destroy_command_encoder(&self, encoder: super::CommandEncoder) {
diff --git a/wgpu-hal/src/dx12/instance.rs b/wgpu-hal/src/dx12/instance.rs
index 208d2179f7..32d6f1690c 100644
--- a/wgpu-hal/src/dx12/instance.rs
+++ b/wgpu-hal/src/dx12/instance.rs
@@ -12,7 +12,9 @@ impl Drop for super::Instance {
 
 impl crate::Instance<super::Api> for super::Instance {
     unsafe fn init(desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> {
-        let lib_main = d3d12::D3D12Lib::new().map_err(|_| crate::InstanceError)?;
+        let lib_main = d3d12::D3D12Lib::new().map_err(|e| {
+            crate::InstanceError::with_source(String::from("failed to load d3d12.dll"), e)
+        })?;
 
         if desc.flags.contains(crate::InstanceFlags::VALIDATION) {
             // Enable debug layer
@@ -95,7 +97,9 @@ impl crate::Instance<super::Api> for super::Instance {
                 supports_allow_tearing: self.supports_allow_tearing,
                 swap_chain: None,
             }),
-            _ => Err(crate::InstanceError),
+            _ => Err(crate::InstanceError::new(format!(
+                "window handle {window_handle:?} is not a Win32 handle"
+            ))),
         }
     }
     unsafe fn destroy_surface(&self, _surface: super::Surface) {
diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index 74e8208502..a231619512 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -347,6 +347,10 @@ pub struct CommandEncoder {
     free_lists: Vec<d3d12::GraphicsCommandList>,
     pass: PassState,
     temp: Temp,
+
+    /// If set, the end of the next render/compute pass will write a timestamp at
+    /// the given pool & location.
+    end_of_pass_timer_query: Option<(d3d12::QueryHeap, u32)>,
 }
 
 unsafe impl Send for CommandEncoder {}
@@ -609,19 +613,23 @@ impl crate::Surface<Api> for Surface {
         let mut flags = dxgi::DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT;
         // We always set ALLOW_TEARING on the swapchain no matter
         // what kind of swapchain we want because ResizeBuffers
-        // cannot change if ALLOW_TEARING is applied to the swapchain.
+        // cannot change the swapchain's ALLOW_TEARING flag.
+        //
+        // This does not change the behavior of the swapchain, just
+        // allow present calls to use tearing.
         if self.supports_allow_tearing {
             flags |= dxgi::DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING;
         }
 
+        // While `configure`s contract ensures that no work on the GPU's main queues
+        // are in flight, we still need to wait for the present queue to be idle.
+        unsafe { device.wait_for_present_queue_idle() }?;
+
         let non_srgb_format = auxil::dxgi::conv::map_texture_format_nosrgb(config.format);
 
         let swap_chain = match self.swap_chain.take() {
             //Note: this path doesn't properly re-initialize all of the things
             Some(sc) => {
-                // can't have image resources in flight used by GPU
-                let _ = unsafe { device.wait_idle() };
-
                 let raw = unsafe { sc.release_resources() };
                 let result = unsafe {
                     raw.ResizeBuffers(
@@ -769,12 +777,16 @@ impl crate::Surface<Api> for Surface {
     }
 
     unsafe fn unconfigure(&mut self, device: &Device) {
-        if let Some(mut sc) = self.swap_chain.take() {
+        if let Some(sc) = self.swap_chain.take() {
             unsafe {
-                let _ = sc.wait(None);
-                //TODO: this shouldn't be needed,
-                // but it complains that the queue is still used otherwise
-                let _ = device.wait_idle();
+                // While `unconfigure`s contract ensures that no work on the GPU's main queues
+                // are in flight, we still need to wait for the present queue to be idle.
+
+                // The major failure mode of this function is device loss,
+                // which if we have lost the device, we should just continue
+                // cleaning up, without error.
+                let _ = device.wait_for_present_queue_idle();
+
                 let _raw = sc.release_resources();
             }
         }
@@ -833,6 +845,13 @@ impl crate::Queue<Api> for Queue {
                 .signal(&fence.raw, value)
                 .into_device_result("Signal fence")?;
         }
+
+        // Note the lack of synchronization here between the main Direct queue
+        // and the dedicated presentation queue. This is automatically handled
+        // by the D3D runtime by detecting uses of resources derived from the
+        // swapchain. This automatic detection is why you cannot use a swapchain
+        // as an UAV in D3D12.
+
         Ok(())
     }
     unsafe fn present(
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index 1497acad91..b72ea0aade 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -403,7 +403,7 @@ impl crate::CommandEncoder<Api> for Encoder {
 
     // compute
 
-    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) {}
+    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<Api>) {}
     unsafe fn end_compute_pass(&mut self) {}
 
     unsafe fn set_compute_pipeline(&mut self, pipeline: &Resource) {}
diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs
index 5594dfa237..348f62bc03 100644
--- a/wgpu-hal/src/gles/adapter.rs
+++ b/wgpu-hal/src/gles/adapter.rs
@@ -43,8 +43,9 @@ impl super::Adapter {
                     src = &src[pos + es_sig.len()..];
                 }
                 None => {
-                    log::warn!("ES not found in '{}'", src);
-                    return Err(crate::InstanceError);
+                    return Err(crate::InstanceError::new(format!(
+                        "OpenGL version {src:?} does not contain 'ES'"
+                    )));
                 }
             }
         };
@@ -86,10 +87,9 @@ impl super::Adapter {
                 },
                 minor,
             )),
-            _ => {
-                log::warn!("Unable to extract the version from '{}'", version);
-                Err(crate::InstanceError)
-            }
+            _ => Err(crate::InstanceError::new(format!(
+                "unable to extract OpenGL version from {version:?}"
+            ))),
         }
     }
 
@@ -576,6 +576,7 @@ impl super::Adapter {
             },
             max_compute_workgroups_per_dimension,
             max_buffer_size: i32::MAX as u64,
+            max_non_sampler_bindings: std::u32::MAX,
         };
 
         let mut workarounds = super::Workarounds::empty();
@@ -606,7 +607,7 @@ impl super::Adapter {
         // Drop the GL guard so we can move the context into AdapterShared
         // ( on Wasm the gl handle is just a ref so we tell clippy to allow
         // dropping the ref )
-        #[allow(clippy::drop_ref)]
+        #[cfg_attr(target_arch = "wasm32", allow(clippy::drop_ref))]
         drop(gl);
 
         Some(crate::ExposedAdapter {
@@ -974,27 +975,30 @@ mod tests {
 
     #[test]
     fn test_version_parse() {
-        let error = Err(crate::InstanceError);
-        assert_eq!(Adapter::parse_version("1"), error);
-        assert_eq!(Adapter::parse_version("1."), error);
-        assert_eq!(Adapter::parse_version("1 h3l1o. W0rld"), error);
-        assert_eq!(Adapter::parse_version("1. h3l1o. W0rld"), error);
-        assert_eq!(Adapter::parse_version("1.2.3"), error);
-        assert_eq!(Adapter::parse_version("OpenGL ES 3.1"), Ok((3, 1)));
+        Adapter::parse_version("1").unwrap_err();
+        Adapter::parse_version("1.").unwrap_err();
+        Adapter::parse_version("1 h3l1o. W0rld").unwrap_err();
+        Adapter::parse_version("1. h3l1o. W0rld").unwrap_err();
+        Adapter::parse_version("1.2.3").unwrap_err();
+
+        assert_eq!(Adapter::parse_version("OpenGL ES 3.1").unwrap(), (3, 1));
+        assert_eq!(
+            Adapter::parse_version("OpenGL ES 2.0 Google Nexus").unwrap(),
+            (2, 0)
+        );
+        assert_eq!(Adapter::parse_version("GLSL ES 1.1").unwrap(), (1, 1));
         assert_eq!(
-            Adapter::parse_version("OpenGL ES 2.0 Google Nexus"),
-            Ok((2, 0))
+            Adapter::parse_version("OpenGL ES GLSL ES 3.20").unwrap(),
+            (3, 2)
         );
-        assert_eq!(Adapter::parse_version("GLSL ES 1.1"), Ok((1, 1)));
-        assert_eq!(Adapter::parse_version("OpenGL ES GLSL ES 3.20"), Ok((3, 2)));
         assert_eq!(
             // WebGL 2.0 should parse as OpenGL ES 3.0
-            Adapter::parse_version("WebGL 2.0 (OpenGL ES 3.0 Chromium)"),
-            Ok((3, 0))
+            Adapter::parse_version("WebGL 2.0 (OpenGL ES 3.0 Chromium)").unwrap(),
+            (3, 0)
         );
         assert_eq!(
-            Adapter::parse_version("WebGL GLSL ES 3.00 (OpenGL ES GLSL ES 3.0 Chromium)"),
-            Ok((3, 0))
+            Adapter::parse_version("WebGL GLSL ES 3.00 (OpenGL ES GLSL ES 3.0 Chromium)").unwrap(),
+            (3, 0)
         );
     }
 }
diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs
index 5e3a1c52c8..866211f442 100644
--- a/wgpu-hal/src/gles/command.rs
+++ b/wgpu-hal/src/gles/command.rs
@@ -49,10 +49,7 @@ impl super::CommandBuffer {
 
     fn add_push_constant_data(&mut self, data: &[u32]) -> Range<u32> {
         let data_raw = unsafe {
-            std::slice::from_raw_parts(
-                data.as_ptr() as *const _,
-                data.len() * mem::size_of::<u32>(),
-            )
+            std::slice::from_raw_parts(data.as_ptr() as *const _, mem::size_of_val(data))
         };
         let start = self.data_bytes.len();
         assert!(start < u32::MAX as usize);
@@ -1033,7 +1030,7 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
 
     // compute
 
-    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) {
+    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<super::Api>) {
         if let Some(label) = desc.label {
             let range = self.cmd_buffer.add_marker(label);
             self.cmd_buffer.commands.push(C::PushDebugGroup(range));
diff --git a/wgpu-hal/src/gles/conv.rs b/wgpu-hal/src/gles/conv.rs
index 86ff3b60b0..dd5d764c6a 100644
--- a/wgpu-hal/src/gles/conv.rs
+++ b/wgpu-hal/src/gles/conv.rs
@@ -279,8 +279,18 @@ pub fn map_primitive_topology(topology: wgt::PrimitiveTopology) -> u32 {
 }
 
 pub(super) fn map_primitive_state(state: &wgt::PrimitiveState) -> super::PrimitiveState {
-    //Note: state.polygon_mode is not supported, see `Features::POLYGON_MODE_LINE` and
-    //`Features::POLYGON_MODE_POINT`
+    match state.polygon_mode {
+        wgt::PolygonMode::Fill => {}
+        wgt::PolygonMode::Line => panic!(
+            "{:?} is not enabled for this backend",
+            wgt::Features::POLYGON_MODE_LINE
+        ),
+        wgt::PolygonMode::Point => panic!(
+            "{:?} is not enabled for this backend",
+            wgt::Features::POLYGON_MODE_POINT
+        ),
+    }
+
     super::PrimitiveState {
         //Note: we are flipping the front face, so that
         // the Y-flip in the generated GLSL keeps the same visibility.
diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs
index cc21f14836..47004724a7 100644
--- a/wgpu-hal/src/gles/device.rs
+++ b/wgpu-hal/src/gles/device.rs
@@ -527,6 +527,10 @@ impl crate::Device<super::Api> for super::Device {
                     map_flags |= glow::MAP_COHERENT_BIT;
                 }
             }
+            // TODO: may also be required for other calls involving `buffer_sub_data_u8_slice` (e.g. copy buffer to buffer and clear buffer)
+            if desc.usage.intersects(crate::BufferUses::QUERY_RESOLVE) {
+                map_flags |= glow::DYNAMIC_STORAGE_BIT;
+            }
             unsafe { gl.buffer_storage(target, raw_size, None, map_flags) };
         } else {
             assert!(!is_coherent);
@@ -1238,7 +1242,7 @@ impl crate::Device<super::Api> for super::Device {
         Ok(super::QuerySet {
             queries: queries.into_boxed_slice(),
             target: match desc.ty {
-                wgt::QueryType::Occlusion => glow::ANY_SAMPLES_PASSED,
+                wgt::QueryType::Occlusion => glow::ANY_SAMPLES_PASSED_CONSERVATIVE,
                 _ => unimplemented!(),
             },
         })
diff --git a/wgpu-hal/src/gles/egl.rs b/wgpu-hal/src/gles/egl.rs
index 0c8e754971..d6d3d621f9 100644
--- a/wgpu-hal/src/gles/egl.rs
+++ b/wgpu-hal/src/gles/egl.rs
@@ -283,7 +283,10 @@ fn choose_config(
         }
     }
 
-    Err(crate::InstanceError)
+    // TODO: include diagnostic details that are currently logged
+    Err(crate::InstanceError::new(String::from(
+        "unable to find an acceptable EGL framebuffer configuration",
+    )))
 }
 
 fn gl_debug_message_callback(source: u32, gltype: u32, id: u32, severity: u32, message: &str) {
@@ -482,6 +485,8 @@ struct Inner {
     config: khronos_egl::Config,
     #[cfg_attr(target_os = "emscripten", allow(dead_code))]
     wl_display: Option<*mut raw::c_void>,
+    #[cfg_attr(target_os = "emscripten", allow(dead_code))]
+    force_gles_minor_version: wgt::Gles3MinorVersion,
     /// Method by which the framebuffer should support srgb
     srgb_kind: SrgbFrameBufferKind,
 }
@@ -491,8 +496,14 @@ impl Inner {
         flags: crate::InstanceFlags,
         egl: Arc<EglInstance>,
         display: khronos_egl::Display,
+        force_gles_minor_version: wgt::Gles3MinorVersion,
     ) -> Result<Self, crate::InstanceError> {
-        let version = egl.initialize(display).map_err(|_| crate::InstanceError)?;
+        let version = egl.initialize(display).map_err(|e| {
+            crate::InstanceError::with_source(
+                String::from("failed to initialize EGL display connection"),
+                e,
+            )
+        })?;
         let vendor = egl
             .query_string(Some(display), khronos_egl::VENDOR)
             .unwrap();
@@ -542,9 +553,20 @@ impl Inner {
 
         //TODO: make it so `Device` == EGL Context
         let mut context_attributes = vec![
-            khronos_egl::CONTEXT_CLIENT_VERSION,
+            khronos_egl::CONTEXT_MAJOR_VERSION,
             3, // Request GLES 3.0 or higher
         ];
+
+        if force_gles_minor_version != wgt::Gles3MinorVersion::Automatic {
+            context_attributes.push(khronos_egl::CONTEXT_MINOR_VERSION);
+            context_attributes.push(match force_gles_minor_version {
+                wgt::Gles3MinorVersion::Version0 => 0,
+                wgt::Gles3MinorVersion::Version1 => 1,
+                wgt::Gles3MinorVersion::Version2 => 2,
+                _ => unreachable!(),
+            });
+        }
+
         if flags.contains(crate::InstanceFlags::DEBUG) {
             if version >= (1, 5) {
                 log::info!("\tEGL context: +debug");
@@ -585,8 +607,10 @@ impl Inner {
         let context = match egl.create_context(display, config, None, &context_attributes) {
             Ok(context) => context,
             Err(e) => {
-                log::warn!("unable to create GLES 3.x context: {:?}", e);
-                return Err(crate::InstanceError);
+                return Err(crate::InstanceError::with_source(
+                    String::from("unable to create GLES 3.x context"),
+                    e,
+                ));
             }
         };
 
@@ -609,8 +633,10 @@ impl Inner {
             egl.create_pbuffer_surface(display, config, &attributes)
                 .map(Some)
                 .map_err(|e| {
-                    log::warn!("Error in create_pbuffer_surface: {:?}", e);
-                    crate::InstanceError
+                    crate::InstanceError::with_source(
+                        String::from("error in create_pbuffer_surface"),
+                        e,
+                    )
                 })?
         };
 
@@ -627,6 +653,7 @@ impl Inner {
             config,
             wl_display: None,
             srgb_kind,
+            force_gles_minor_version,
         })
     }
 }
@@ -719,8 +746,10 @@ impl crate::Instance<super::Api> for Instance {
         let egl = match egl_result {
             Ok(egl) => Arc::new(egl),
             Err(e) => {
-                log::info!("Unable to open libEGL: {:?}", e);
-                return Err(crate::InstanceError);
+                return Err(crate::InstanceError::with_source(
+                    String::from("unable to open libEGL"),
+                    e,
+                ));
             }
         };
 
@@ -836,7 +865,7 @@ impl crate::Instance<super::Api> for Instance {
             unsafe { (function)(Some(egl_debug_proc), attributes.as_ptr()) };
         }
 
-        let inner = Inner::create(desc.flags, egl, display)?;
+        let inner = Inner::create(desc.flags, egl, display, desc.gles_minor_version)?;
 
         Ok(Instance {
             wsi: WindowSystemInterface {
@@ -884,8 +913,9 @@ impl crate::Instance<super::Api> for Instance {
                 };
 
                 if ret != 0 {
-                    log::error!("Error returned from ANativeWindow_setBuffersGeometry");
-                    return Err(crate::InstanceError);
+                    return Err(crate::InstanceError::new(format!(
+                        "error {ret} returned from ANativeWindow_setBuffersGeometry",
+                    )));
                 }
             }
             #[cfg(not(target_os = "emscripten"))]
@@ -918,9 +948,12 @@ impl crate::Instance<super::Api> for Instance {
                         )
                         .unwrap();
 
-                    let new_inner =
-                        Inner::create(self.flags, Arc::clone(&inner.egl.instance), display)
-                            .map_err(|_| crate::InstanceError)?;
+                    let new_inner = Inner::create(
+                        self.flags,
+                        Arc::clone(&inner.egl.instance),
+                        display,
+                        inner.force_gles_minor_version,
+                    )?;
 
                     let old_inner = std::mem::replace(inner.deref_mut(), new_inner);
                     inner.wl_display = Some(display_handle.display);
@@ -931,8 +964,9 @@ impl crate::Instance<super::Api> for Instance {
             #[cfg(target_os = "emscripten")]
             (Rwh::Web(_), _) => {}
             other => {
-                log::error!("Unsupported window: {:?}", other);
-                return Err(crate::InstanceError);
+                return Err(crate::InstanceError::new(format!(
+                    "unsupported window: {other:?}"
+                )));
             }
         };
 
diff --git a/wgpu-hal/src/gles/web.rs b/wgpu-hal/src/gles/web.rs
index 49bc5656de..13bce85f84 100644
--- a/wgpu-hal/src/gles/web.rs
+++ b/wgpu-hal/src/gles/web.rs
@@ -25,6 +25,7 @@ impl AdapterContext {
 
 #[derive(Debug)]
 pub struct Instance {
+    /// Set when a canvas is provided, and used to implement [`Instance::enumerate_adapters()`].
     webgl2_context: Mutex<Option<web_sys::WebGl2RenderingContext>>,
 }
 
@@ -65,14 +66,16 @@ impl Instance {
                 // “not supported” could include “insufficient GPU resources” or “the GPU process
                 // previously crashed”. So, we must return it as an `Err` since it could occur
                 // for circumstances outside the application author's control.
-                return Err(crate::InstanceError);
+                return Err(crate::InstanceError::new(String::from(
+                    "canvas.getContext() returned null; webgl2 not available or canvas already in use"
+                )));
             }
             Err(js_error) => {
                 // <https://html.spec.whatwg.org/multipage/canvas.html#dom-canvas-getcontext>
-                // A thrown exception indicates misuse of the canvas state. Ideally we wouldn't
-                // panic in this case, but for now, `InstanceError` conveys no detail, so it
-                // is more informative to panic with a specific message.
-                panic!("canvas.getContext() threw {js_error:?}")
+                // A thrown exception indicates misuse of the canvas state.
+                return Err(crate::InstanceError::new(format!(
+                    "canvas.getContext() threw exception {js_error:?}",
+                )));
             }
         };
 
@@ -82,6 +85,8 @@ impl Instance {
             .dyn_into()
             .expect("canvas context is not a WebGl2RenderingContext");
 
+        // It is not inconsistent to overwrite an existing context, because the only thing that
+        // `self.webgl2_context` is used for is producing the response to `enumerate_adapters()`.
         *self.webgl2_context.lock() = Some(webgl2_context.clone());
 
         Ok(Surface {
@@ -153,7 +158,9 @@ impl crate::Instance<super::Api> for Instance {
 
             self.create_surface_from_canvas(canvas)
         } else {
-            Err(crate::InstanceError)
+            Err(crate::InstanceError::new(format!(
+                "window handle {window_handle:?} is not a web handle"
+            )))
         }
     }
 
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 910e46ca25..f1f4b2109e 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -60,7 +60,7 @@ pub mod dx12;
 /// A dummy API implementation.
 pub mod empty;
 /// GLES API internals.
-#[cfg(all(feature = "gles"))]
+#[cfg(feature = "gles")]
 pub mod gles;
 /// Metal API internals.
 #[cfg(all(feature = "metal", any(target_os = "macos", target_os = "ios")))]
@@ -90,7 +90,7 @@ use std::{
     num::NonZeroU32,
     ops::{Range, RangeInclusive},
     ptr::NonNull,
-    sync::atomic::AtomicBool,
+    sync::{atomic::AtomicBool, Arc},
 };
 
 use bitflags::bitflags;
@@ -118,6 +118,8 @@ pub enum DeviceError {
     OutOfMemory,
     #[error("Device is lost")]
     Lost,
+    #[error("Creation of a resource failed for a reason other than running out of memory.")]
+    ResourceCreationFailed,
 }
 
 #[derive(Clone, Debug, Eq, PartialEq, Error)]
@@ -150,9 +152,42 @@ pub enum SurfaceError {
     Other(&'static str),
 }
 
-#[derive(Clone, Debug, Eq, PartialEq, Error)]
-#[error("Not supported")]
-pub struct InstanceError;
+/// Error occurring while trying to create an instance, or create a surface from an instance;
+/// typically relating to the state of the underlying graphics API or hardware.
+#[derive(Clone, Debug, Error)]
+#[error("{message}")]
+pub struct InstanceError {
+    /// These errors are very platform specific, so do not attempt to encode them as an enum.
+    ///
+    /// This message should describe the problem in sufficient detail to be useful for a
+    /// user-to-developer “why won't this work on my machine” bug report, and otherwise follow
+    /// <https://rust-lang.github.io/api-guidelines/interoperability.html#error-types-are-meaningful-and-well-behaved-c-good-err>.
+    message: String,
+
+    /// Underlying error value, if any is available.
+    #[source]
+    source: Option<Arc<dyn std::error::Error + Send + Sync + 'static>>,
+}
+
+impl InstanceError {
+    #[allow(dead_code)] // may be unused on some platforms
+    pub(crate) fn new(message: String) -> Self {
+        Self {
+            message,
+            source: None,
+        }
+    }
+    #[allow(dead_code)] // may be unused on some platforms
+    pub(crate) fn with_source(
+        message: String,
+        source: impl std::error::Error + Send + Sync + 'static,
+    ) -> Self {
+        Self {
+            message,
+            source: Some(Arc::new(source)),
+        }
+    }
+}
 
 pub trait Api: Clone + Sized {
     type Instance: Instance<Self>;
@@ -192,12 +227,28 @@ pub trait Instance<A: Api>: Sized + WasmNotSend + WasmNotSync {
 }
 
 pub trait Surface<A: Api>: WasmNotSend + WasmNotSync {
+    /// Configures the surface to use the given device.
+    ///
+    /// # Safety
+    ///
+    /// - All gpu work that uses the surface must have been completed.
+    /// - All [`AcquiredSurfaceTexture`]s must have been destroyed.
+    /// - All [`Api::TextureView`]s derived from the [`AcquiredSurfaceTexture`]s must have been destroyed.
+    /// - All surfaces created using other devices must have been unconfigured before this call.
     unsafe fn configure(
         &mut self,
         device: &A::Device,
         config: &SurfaceConfiguration,
     ) -> Result<(), SurfaceError>;
 
+    /// Unconfigures the surface on the given device.
+    ///
+    /// # Safety
+    ///
+    /// - All gpu work that uses the surface must have been completed.
+    /// - All [`AcquiredSurfaceTexture`]s must have been destroyed.
+    /// - All [`Api::TextureView`]s derived from the [`AcquiredSurfaceTexture`]s must have been destroyed.
+    /// - The surface must have been configured on the given device.
     unsafe fn unconfigure(&mut self, device: &A::Device);
 
     /// Returns the next texture to be presented by the swapchain for drawing
@@ -463,7 +514,13 @@ pub trait CommandEncoder<A: Api>: WasmNotSend + WasmNotSync + fmt::Debug {
 
     // queries
 
+    /// # Safety:
+    ///
+    /// - If `set` is an occlusion query set, it must be the same one as used in the [`RenderPassDescriptor::occlusion_query_set`] parameter.
     unsafe fn begin_query(&mut self, set: &A::QuerySet, index: u32);
+    /// # Safety:
+    ///
+    /// - If `set` is an occlusion query set, it must be the same one as used in the [`RenderPassDescriptor::occlusion_query_set`] parameter.
     unsafe fn end_query(&mut self, set: &A::QuerySet, index: u32);
     unsafe fn write_timestamp(&mut self, set: &A::QuerySet, index: u32);
     unsafe fn reset_queries(&mut self, set: &A::QuerySet, range: Range<u32>);
@@ -542,7 +599,7 @@ pub trait CommandEncoder<A: Api>: WasmNotSend + WasmNotSync + fmt::Debug {
     // compute passes
 
     // Begins a compute pass, clears all active bindings.
-    unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor);
+    unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor<A>);
     unsafe fn end_compute_pass(&mut self);
 
     unsafe fn set_compute_pipeline(&mut self, pipeline: &A::ComputePipeline);
@@ -718,6 +775,8 @@ bitflags::bitflags! {
         const STORAGE_READ_WRITE = 1 << 8;
         /// The indirect or count buffer in a indirect draw or dispatch.
         const INDIRECT = 1 << 9;
+        /// A buffer used to store query results.
+        const QUERY_RESOLVE = 1 << 10;
         /// The combination of states that a buffer may be in _at the same time_.
         const INCLUSIVE = Self::MAP_READ.bits() | Self::COPY_SRC.bits() |
             Self::INDEX.bits() | Self::VERTEX.bits() | Self::UNIFORM.bits() |
@@ -777,6 +836,7 @@ pub struct InstanceDescriptor<'a> {
     pub name: &'a str,
     pub flags: InstanceFlags,
     pub dx12_shader_compiler: wgt::Dx12Compiler,
+    pub gles_minor_version: wgt::Gles3MinorVersion,
 }
 
 #[derive(Clone, Debug)]
@@ -1262,6 +1322,24 @@ pub struct DepthStencilAttachment<'a, A: Api> {
     pub clear_value: (f32, u32),
 }
 
+#[derive(Debug)]
+pub struct RenderPassTimestampWrites<'a, A: Api> {
+    pub query_set: &'a A::QuerySet,
+    pub beginning_of_pass_write_index: Option<u32>,
+    pub end_of_pass_write_index: Option<u32>,
+}
+
+// Rust gets confused about the impl requirements for `A`
+impl<A: Api> Clone for RenderPassTimestampWrites<'_, A> {
+    fn clone(&self) -> Self {
+        Self {
+            query_set: self.query_set,
+            beginning_of_pass_write_index: self.beginning_of_pass_write_index,
+            end_of_pass_write_index: self.end_of_pass_write_index,
+        }
+    }
+}
+
 #[derive(Clone, Debug)]
 pub struct RenderPassDescriptor<'a, A: Api> {
     pub label: Label<'a>,
@@ -1270,11 +1348,32 @@ pub struct RenderPassDescriptor<'a, A: Api> {
     pub color_attachments: &'a [Option<ColorAttachment<'a, A>>],
     pub depth_stencil_attachment: Option<DepthStencilAttachment<'a, A>>,
     pub multiview: Option<NonZeroU32>,
+    pub timestamp_writes: Option<RenderPassTimestampWrites<'a, A>>,
+    pub occlusion_query_set: Option<&'a A::QuerySet>,
+}
+
+#[derive(Debug)]
+pub struct ComputePassTimestampWrites<'a, A: Api> {
+    pub query_set: &'a A::QuerySet,
+    pub beginning_of_pass_write_index: Option<u32>,
+    pub end_of_pass_write_index: Option<u32>,
+}
+
+// Rust gets confused about the impl requirements for `A`
+impl<A: Api> Clone for ComputePassTimestampWrites<'_, A> {
+    fn clone(&self) -> Self {
+        Self {
+            query_set: self.query_set,
+            beginning_of_pass_write_index: self.beginning_of_pass_write_index,
+            end_of_pass_write_index: self.end_of_pass_write_index,
+        }
+    }
 }
 
 #[derive(Clone, Debug)]
-pub struct ComputePassDescriptor<'a> {
+pub struct ComputePassDescriptor<'a, A: Api> {
     pub label: Label<'a>,
+    pub timestamp_writes: Option<ComputePassTimestampWrites<'a, A>>,
 }
 
 /// Stores if any API validation error has occurred in this process
diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs
index c5e6316c19..bc90954b35 100644
--- a/wgpu-hal/src/metal/adapter.rs
+++ b/wgpu-hal/src/metal/adapter.rs
@@ -27,6 +27,33 @@ impl crate::Adapter<super::Api> for super::Adapter {
             .device
             .lock()
             .new_command_queue_with_max_command_buffer_count(MAX_COMMAND_BUFFERS);
+
+        // Acquiring the meaning of timestamp ticks is hard with Metal!
+        // The only thing there is is a method correlating cpu & gpu timestamps (`device.sample_timestamps`).
+        // Users are supposed to call this method twice and calculate the difference,
+        // see "Converting GPU Timestamps into CPU Time":
+        // https://developer.apple.com/documentation/metal/gpu_counters_and_counter_sample_buffers/converting_gpu_timestamps_into_cpu_time
+        // Not only does this mean we get an approximate value, this is as also *very slow*!
+        // Chromium opted to solve this using a linear regression that they stop at some point
+        // https://source.chromium.org/chromium/chromium/src/+/refs/heads/main:third_party/dawn/src/dawn/native/metal/DeviceMTL.mm;drc=76be2f9f117654f3fe4faa477b0445114fccedda;bpv=0;bpt=1;l=46
+        // Generally, the assumption is that timestamp values aren't changing over time, after all all other APIs provide stable values.
+        //
+        // We should do as Chromium does for the general case, but this requires quite some state tracking
+        // and doesn't even provide perfectly accurate values, especially at the start of the application when
+        // we didn't have the chance to sample a lot of values just yet.
+        //
+        // So instead, we're doing the dangerous but easy thing and use our "knowledge" of timestamps
+        // conversions on different devices, after all Metal isn't supported on that many ;)
+        // Based on:
+        // * https://github.com/gfx-rs/wgpu/pull/2528
+        // * https://github.com/gpuweb/gpuweb/issues/1325#issuecomment-761041326
+        let timestamp_period = if self.shared.device.lock().name().starts_with("Intel") {
+            83.333
+        } else {
+            // Known for Apple Silicon (at least M1 & M2, iPad Pro 2018) and AMD GPUs.
+            1.0
+        };
+
         Ok(crate::OpenDevice {
             device: super::Device {
                 shared: Arc::clone(&self.shared),
@@ -34,6 +61,7 @@ impl crate::Adapter<super::Api> for super::Adapter {
             },
             queue: super::Queue {
                 raw: Arc::new(Mutex::new(queue)),
+                timestamp_period,
             },
         })
     }
@@ -745,6 +773,13 @@ impl super::PrivateCapabilities {
             } else {
                 None
             },
+            support_timestamp_query: version.at_least((11, 0), (14, 0), os_is_mac)
+                && device
+                    .supports_counter_sampling(metal::MTLCounterSamplingPoint::AtStageBoundary),
+            support_timestamp_query_in_passes: version.at_least((11, 0), (14, 0), os_is_mac)
+                && device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtDrawBoundary)
+                && device
+                    .supports_counter_sampling(metal::MTLCounterSamplingPoint::AtDispatchBoundary),
         }
     }
 
@@ -772,6 +807,12 @@ impl super::PrivateCapabilities {
             | F::DEPTH32FLOAT_STENCIL8
             | F::MULTI_DRAW_INDIRECT;
 
+        features.set(F::TIMESTAMP_QUERY, self.support_timestamp_query);
+        // TODO: Not yet implemented.
+        // features.set(
+        //     F::TIMESTAMP_QUERY_INSIDE_PASSES,
+        //     self.support_timestamp_query_in_passes,
+        // );
         features.set(F::TEXTURE_COMPRESSION_ASTC, self.format_astc);
         features.set(F::TEXTURE_COMPRESSION_ASTC_HDR, self.format_astc_hdr);
         features.set(F::TEXTURE_COMPRESSION_BC, self.format_bc);
@@ -863,6 +904,7 @@ impl super::PrivateCapabilities {
                 max_compute_workgroup_size_z: self.max_threads_per_group,
                 max_compute_workgroups_per_dimension: 0xFFFF,
                 max_buffer_size: self.max_buffer_size,
+                max_non_sampler_bindings: std::u32::MAX,
             },
             alignments: crate::Alignments {
                 buffer_copy_offset: wgt::BufferSize::new(self.buffer_alignment).unwrap(),
diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs
index 34712859ef..cc737fd228 100644
--- a/wgpu-hal/src/metal/command.rs
+++ b/wgpu-hal/src/metal/command.rs
@@ -1,4 +1,5 @@
 use super::{conv, AsNative};
+use crate::CommandEncoder as _;
 use std::{borrow::Cow, mem, ops::Range};
 
 // has to match `Temp::binding_sizes`
@@ -321,7 +322,18 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
             _ => {}
         }
     }
-    unsafe fn write_timestamp(&mut self, _set: &super::QuerySet, _index: u32) {}
+    unsafe fn write_timestamp(&mut self, _set: &super::QuerySet, _index: u32) {
+        // TODO: If MTLCounterSamplingPoint::AtDrawBoundary/AtBlitBoundary/AtDispatchBoundary is supported,
+        //       we don't need to insert a new encoder, but can instead use respective current one.
+        //let encoder = self.enter_any().unwrap_or_else(|| self.enter_blit());
+
+        // TODO: Otherwise, we need to create a new blit command encoder with a descriptor that inserts the timestamps.
+        // Note that as of writing creating a new encoder is not exposed by the metal crate.
+        // https://developer.apple.com/documentation/metal/mtlcommandbuffer/3564431-makeblitcommandencoder
+
+        // TODO: Enable respective test in `examples/timestamp-queries/src/tests.rs`.
+    }
+
     unsafe fn reset_queries(&mut self, set: &super::QuerySet, range: Range<u32>) {
         let encoder = self.enter_blit();
         let raw_range = metal::NSRange {
@@ -339,14 +351,27 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
         _: wgt::BufferSize, // Metal doesn't support queries that are bigger than a single element are not supported
     ) {
         let encoder = self.enter_blit();
-        let size = (range.end - range.start) as u64 * crate::QUERY_SIZE;
-        encoder.copy_from_buffer(
-            &set.raw_buffer,
-            range.start as u64 * crate::QUERY_SIZE,
-            &buffer.raw,
-            offset,
-            size,
-        );
+        match set.ty {
+            wgt::QueryType::Occlusion => {
+                let size = (range.end - range.start) as u64 * crate::QUERY_SIZE;
+                encoder.copy_from_buffer(
+                    &set.raw_buffer,
+                    range.start as u64 * crate::QUERY_SIZE,
+                    &buffer.raw,
+                    offset,
+                    size,
+                );
+            }
+            wgt::QueryType::Timestamp => {
+                encoder.resolve_counters(
+                    set.counter_sample_buffer.as_ref().unwrap(),
+                    metal::NSRange::new(range.start as u64, range.end as u64),
+                    &buffer.raw,
+                    offset,
+                );
+            }
+            wgt::QueryType::PipelineStatistics(_) => todo!(),
+        }
     }
 
     // render
@@ -361,7 +386,6 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
 
         objc::rc::autoreleasepool(|| {
             let descriptor = metal::RenderPassDescriptor::new();
-            //TODO: set visibility results buffer
 
             for (i, at) in desc.color_attachments.iter().enumerate() {
                 if let Some(at) = at.as_ref() {
@@ -430,6 +454,29 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
                 }
             }
 
+            if let Some(ref timestamp_writes) = desc.timestamp_writes {
+                let sba_descriptor = descriptor.sample_buffer_attachments().object_at(0).unwrap();
+                sba_descriptor.set_sample_buffer(
+                    timestamp_writes
+                        .query_set
+                        .counter_sample_buffer
+                        .as_ref()
+                        .unwrap(),
+                );
+
+                if let Some(start_index) = timestamp_writes.beginning_of_pass_write_index {
+                    sba_descriptor.set_start_of_vertex_sample_index(start_index as _);
+                }
+                if let Some(end_index) = timestamp_writes.end_of_pass_write_index {
+                    sba_descriptor.set_end_of_fragment_sample_index(end_index as _);
+                }
+            }
+
+            if let Some(occlusion_query_set) = desc.occlusion_query_set {
+                descriptor
+                    .set_visibility_result_buffer(Some(occlusion_query_set.raw_buffer.as_ref()))
+            }
+
             let raw = self.raw_cmd_buf.as_ref().unwrap();
             let encoder = raw.new_render_command_encoder(descriptor);
             if let Some(label) = desc.label {
@@ -910,18 +957,44 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
 
     // compute
 
-    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) {
+    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<super::Api>) {
         self.begin_pass();
 
-        let raw = self.raw_cmd_buf.as_ref().unwrap();
         debug_assert!(self.state.blit.is_none());
         debug_assert!(self.state.compute.is_none());
         debug_assert!(self.state.render.is_none());
+
+        let raw = self.raw_cmd_buf.as_ref().unwrap();
+
         objc::rc::autoreleasepool(|| {
-            let encoder = raw.new_compute_command_encoder();
+            let descriptor = metal::ComputePassDescriptor::new();
+
+            if let Some(timestamp_writes) = desc.timestamp_writes.as_ref() {
+                let sba_descriptor = descriptor
+                    .sample_buffer_attachments()
+                    .object_at(0 as _)
+                    .unwrap();
+                sba_descriptor.set_sample_buffer(
+                    timestamp_writes
+                        .query_set
+                        .counter_sample_buffer
+                        .as_ref()
+                        .unwrap(),
+                );
+
+                if let Some(start_index) = timestamp_writes.beginning_of_pass_write_index {
+                    sba_descriptor.set_start_of_encoder_sample_index(start_index as _);
+                }
+                if let Some(end_index) = timestamp_writes.end_of_pass_write_index {
+                    sba_descriptor.set_end_of_encoder_sample_index(end_index as _);
+                }
+            }
+
+            let encoder = raw.compute_command_encoder_with_descriptor(descriptor);
             if let Some(label) = desc.label {
                 encoder.set_label(label);
             }
+
             self.state.compute = Some(encoder.to_owned());
         });
     }
@@ -982,3 +1055,20 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
         encoder.dispatch_thread_groups_indirect(&buffer.raw, offset, self.state.raw_wg_size);
     }
 }
+
+impl Drop for super::CommandEncoder {
+    fn drop(&mut self) {
+        // Metal raises an assert when a MTLCommandEncoder is deallocated without a call
+        // to endEncoding. This isn't documented in the general case at
+        // https://developer.apple.com/documentation/metal/mtlcommandencoder, but for the
+        // more-specific MTLComputeCommandEncoder it is stated as a requirement at
+        // https://developer.apple.com/documentation/metal/mtlcomputecommandencoder. It
+        // appears to be a requirement for all MTLCommandEncoder objects. Failing to call
+        // endEncoding causes a crash with the message 'Command encoder released without
+        // endEncoding'. To prevent this, we explicitiy call discard_encoding, which
+        // calls end_encoding on any still-held metal::CommandEncoders.
+        unsafe {
+            self.discard_encoding();
+        }
+    }
+}
diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs
index c3166d26ab..475332b76d 100644
--- a/wgpu-hal/src/metal/device.rs
+++ b/wgpu-hal/src/metal/device.rs
@@ -1098,11 +1098,51 @@ impl crate::Device<super::Api> for super::Device {
                     }
                     Ok(super::QuerySet {
                         raw_buffer,
+                        counter_sample_buffer: None,
                         ty: desc.ty,
                     })
                 }
-                wgt::QueryType::Timestamp | wgt::QueryType::PipelineStatistics(_) => {
-                    Err(crate::DeviceError::OutOfMemory)
+                wgt::QueryType::Timestamp => {
+                    let size = desc.count as u64 * crate::QUERY_SIZE;
+                    let device = self.shared.device.lock();
+                    let destination_buffer =
+                        device.new_buffer(size, metal::MTLResourceOptions::empty());
+
+                    let csb_desc = metal::CounterSampleBufferDescriptor::new();
+                    csb_desc.set_storage_mode(metal::MTLStorageMode::Shared);
+                    csb_desc.set_sample_count(desc.count as _);
+                    if let Some(label) = desc.label {
+                        csb_desc.set_label(label);
+                    }
+
+                    let counter_sets = device.counter_sets();
+                    let timestamp_counter =
+                        match counter_sets.iter().find(|cs| cs.name() == "timestamp") {
+                            Some(counter) => counter,
+                            None => {
+                                log::error!("Failed to obtain timestamp counter set.");
+                                return Err(crate::DeviceError::ResourceCreationFailed);
+                            }
+                        };
+                    csb_desc.set_counter_set(timestamp_counter);
+
+                    let counter_sample_buffer =
+                        match device.new_counter_sample_buffer_with_descriptor(&csb_desc) {
+                            Ok(buffer) => buffer,
+                            Err(err) => {
+                                log::error!("Failed to create counter sample buffer: {:?}", err);
+                                return Err(crate::DeviceError::ResourceCreationFailed);
+                            }
+                        };
+
+                    Ok(super::QuerySet {
+                        raw_buffer: destination_buffer,
+                        counter_sample_buffer: Some(counter_sample_buffer),
+                        ty: desc.ty,
+                    })
+                }
+                _ => {
+                    todo!()
                 }
             }
         })
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index 6611f29548..76f57002ff 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -100,7 +100,9 @@ impl crate::Instance<Api> for Instance {
             raw_window_handle::RawWindowHandle::AppKit(handle) => Ok(unsafe {
                 Surface::from_view(handle.ns_view, Some(&self.managed_metal_layer_delegate))
             }),
-            _ => Err(crate::InstanceError),
+            _ => Err(crate::InstanceError::new(format!(
+                "window handle {window_handle:?} is not a Metal-compatible handle"
+            ))),
         }
     }
 
@@ -237,6 +239,8 @@ struct PrivateCapabilities {
     supports_preserve_invariance: bool,
     supports_shader_primitive_index: bool,
     has_unified_memory: Option<bool>,
+    support_timestamp_query: bool,
+    support_timestamp_query_in_passes: bool,
 }
 
 #[derive(Clone, Debug)]
@@ -285,18 +289,21 @@ pub struct Adapter {
 
 pub struct Queue {
     raw: Arc<Mutex<metal::CommandQueue>>,
+    timestamp_period: f32,
 }
 
 unsafe impl Send for Queue {}
 unsafe impl Sync for Queue {}
 
 impl Queue {
-    pub unsafe fn queue_from_raw(raw: metal::CommandQueue) -> Self {
+    pub unsafe fn queue_from_raw(raw: metal::CommandQueue, timestamp_period: f32) -> Self {
         Self {
             raw: Arc::new(Mutex::new(raw)),
+            timestamp_period,
         }
     }
 }
+
 pub struct Device {
     shared: Arc<AdapterShared>,
     features: wgt::Features,
@@ -406,8 +413,7 @@ impl crate::Queue<Api> for Queue {
     }
 
     unsafe fn get_timestamp_period(&self) -> f32 {
-        // TODO: This is hard, see https://github.com/gpuweb/gpuweb/issues/1325
-        1.0
+        self.timestamp_period
     }
 }
 
@@ -701,6 +707,8 @@ unsafe impl Sync for ComputePipeline {}
 #[derive(Debug)]
 pub struct QuerySet {
     raw_buffer: metal::Buffer,
+    //Metal has a custom buffer for counters.
+    counter_sample_buffer: Option<metal::CounterSampleBuffer>,
     ty: wgt::QueryType,
 }
 
diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs
index 4bac0eede6..bcbab85084 100644
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@@ -755,6 +755,7 @@ impl PhysicalDeviceCapabilities {
             max_compute_workgroup_size_z: max_compute_workgroup_sizes[2],
             max_compute_workgroups_per_dimension,
             max_buffer_size,
+            max_non_sampler_bindings: std::u32::MAX,
         }
     }
 
@@ -983,6 +984,10 @@ impl super::Instance {
                 super::Workarounds::EMPTY_RESOLVE_ATTACHMENT_LISTS,
                 phd_capabilities.properties.vendor_id == db::qualcomm::VENDOR,
             );
+            workarounds.set(
+                super::Workarounds::FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16,
+                phd_capabilities.properties.vendor_id == db::nvidia::VENDOR,
+            );
         };
 
         if phd_capabilities.effective_api_version == vk::API_VERSION_1_0
diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs
index 417367689b..391b754d33 100644
--- a/wgpu-hal/src/vulkan/command.rs
+++ b/wgpu-hal/src/vulkan/command.rs
@@ -45,6 +45,21 @@ impl super::DeviceShared {
     }
 }
 
+impl super::CommandEncoder {
+    fn write_pass_end_timestamp_if_requested(&mut self) {
+        if let Some((query_set, index)) = self.end_of_pass_timer_query.take() {
+            unsafe {
+                self.device.raw.cmd_write_timestamp(
+                    self.active,
+                    vk::PipelineStageFlags::BOTTOM_OF_PIPE,
+                    query_set,
+                    index,
+                );
+            }
+        }
+    }
+}
+
 impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
     unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> {
         if self.free.is_empty() {
@@ -197,15 +212,44 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
     }
 
     unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) {
-        unsafe {
-            self.device.raw.cmd_fill_buffer(
-                self.active,
-                buffer.raw,
-                range.start,
-                range.end - range.start,
-                0,
-            )
-        };
+        let range_size = range.end - range.start;
+        if self.device.workarounds.contains(
+            super::Workarounds::FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16,
+        ) && range_size >= 4096
+            && range.start % 16 != 0
+        {
+            let rounded_start = wgt::math::align_to(range.start, 16);
+            let prefix_size = rounded_start - range.start;
+
+            unsafe {
+                self.device.raw.cmd_fill_buffer(
+                    self.active,
+                    buffer.raw,
+                    range.start,
+                    prefix_size,
+                    0,
+                )
+            };
+
+            // This will never be zero, as rounding can only add up to 12 bytes, and the total size is 4096.
+            let suffix_size = range.end - rounded_start;
+
+            unsafe {
+                self.device.raw.cmd_fill_buffer(
+                    self.active,
+                    buffer.raw,
+                    rounded_start,
+                    suffix_size,
+                    0,
+                )
+            };
+        } else {
+            unsafe {
+                self.device
+                    .raw
+                    .cmd_fill_buffer(self.active, buffer.raw, range.start, range_size, 0)
+            };
+        }
     }
 
     unsafe fn copy_buffer_to_buffer<T>(
@@ -489,6 +533,18 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
             self.rpass_debug_marker_active = true;
         }
 
+        // Start timestamp if any (before all other commands but after debug marker)
+        if let Some(timestamp_writes) = desc.timestamp_writes.as_ref() {
+            if let Some(index) = timestamp_writes.beginning_of_pass_write_index {
+                unsafe {
+                    self.write_timestamp(timestamp_writes.query_set, index);
+                }
+            }
+            self.end_of_pass_timer_query = timestamp_writes
+                .end_of_pass_write_index
+                .map(|index| (timestamp_writes.query_set.raw, index));
+        }
+
         unsafe {
             self.device
                 .raw
@@ -508,10 +564,16 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
     unsafe fn end_render_pass(&mut self) {
         unsafe {
             self.device.raw.cmd_end_render_pass(self.active);
-            if self.rpass_debug_marker_active {
+        }
+
+        // After all other commands but before debug marker, so this is still seen as part of this pass.
+        self.write_pass_end_timestamp_if_requested();
+
+        if self.rpass_debug_marker_active {
+            unsafe {
                 self.end_debug_marker();
-                self.rpass_debug_marker_active = false;
             }
+            self.rpass_debug_marker_active = false;
         }
     }
 
@@ -781,14 +843,27 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
 
     // compute
 
-    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) {
+    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<'_, super::Api>) {
         self.bind_point = vk::PipelineBindPoint::COMPUTE;
         if let Some(label) = desc.label {
             unsafe { self.begin_debug_marker(label) };
             self.rpass_debug_marker_active = true;
         }
+
+        if let Some(timestamp_writes) = desc.timestamp_writes.as_ref() {
+            if let Some(index) = timestamp_writes.beginning_of_pass_write_index {
+                unsafe {
+                    self.write_timestamp(timestamp_writes.query_set, index);
+                }
+            }
+            self.end_of_pass_timer_query = timestamp_writes
+                .end_of_pass_write_index
+                .map(|index| (timestamp_writes.query_set.raw, index));
+        }
     }
     unsafe fn end_compute_pass(&mut self) {
+        self.write_pass_end_timestamp_if_requested();
+
         if self.rpass_debug_marker_active {
             unsafe { self.end_debug_marker() };
             self.rpass_debug_marker_active = false
diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index 43410be885..cb955e8318 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -1143,7 +1143,7 @@ impl crate::Device<super::Api> for super::Device {
         }
 
         if desc.anisotropy_clamp != 1 {
-            // We only enable anisotropy if it is supported, and wgpu-hal interface guarentees
+            // We only enable anisotropy if it is supported, and wgpu-hal interface guarantees
             // the clamp is in the range [1, 16] which is always supported if anisotropy is.
             vk_info = vk_info
                 .anisotropy_enable(true)
@@ -1188,20 +1188,15 @@ impl crate::Device<super::Api> for super::Device {
             free: Vec::new(),
             discarded: Vec::new(),
             rpass_debug_marker_active: false,
+            end_of_pass_timer_query: None,
         })
     }
     unsafe fn destroy_command_encoder(&self, cmd_encoder: super::CommandEncoder) {
         unsafe {
-            if !cmd_encoder.free.is_empty() {
-                self.shared
-                    .raw
-                    .free_command_buffers(cmd_encoder.raw, &cmd_encoder.free)
-            }
-            if !cmd_encoder.discarded.is_empty() {
-                self.shared
-                    .raw
-                    .free_command_buffers(cmd_encoder.raw, &cmd_encoder.discarded)
-            }
+            // `vkDestroyCommandPool` also frees any command buffers allocated
+            // from that pool, so there's no need to explicitly call
+            // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
+            // fields.
             self.shared.raw.destroy_command_pool(cmd_encoder.raw, None);
         }
     }
diff --git a/wgpu-hal/src/vulkan/instance.rs b/wgpu-hal/src/vulkan/instance.rs
index 81ecbaf3e3..18269fff77 100644
--- a/wgpu-hal/src/vulkan/instance.rs
+++ b/wgpu-hal/src/vulkan/instance.rs
@@ -47,6 +47,19 @@ unsafe extern "system" fn debug_utils_messenger_callback(
         return vk::FALSE;
     }
 
+    // Silence Vulkan Validation error "VUID-VkRenderPassBeginInfo-framebuffer-04627"
+    // if the OBS layer is enabled. This is a bug in the OBS layer. As the OBS layer
+    // does not have a version number they increment, there is no way to qualify the
+    // supression of the error to a specific version of the OBS layer.
+    //
+    // See https://github.com/obsproject/obs-studio/issues/9353
+    const VUID_VKRENDERPASSBEGININFO_FRAMEBUFFER_04627: i32 = 0x45125641;
+    if cd.message_id_number == VUID_VKRENDERPASSBEGININFO_FRAMEBUFFER_04627
+        && user_data.has_obs_layer
+    {
+        return vk::FALSE;
+    }
+
     let level = match message_severity {
         vk::DebugUtilsMessageSeverityFlagsEXT::VERBOSE => log::Level::Debug,
         vk::DebugUtilsMessageSeverityFlagsEXT::INFO => log::Level::Info,
@@ -139,12 +152,11 @@ unsafe extern "system" fn debug_utils_messenger_callback(
 }
 
 impl super::Swapchain {
+    /// # Safety
+    ///
+    /// - The device must have been made idle before calling this function.
     unsafe fn release_resources(self, device: &ash::Device) -> Self {
         profiling::scope!("Swapchain::release_resources");
-        {
-            profiling::scope!("vkDeviceWaitIdle");
-            let _ = unsafe { device.device_wait_idle() };
-        };
         unsafe { device.destroy_fence(self.fence, None) };
         self
     }
@@ -173,7 +185,20 @@ impl super::Instance {
         &self.shared
     }
 
-    pub fn required_extensions(
+    /// Return the instance extension names wgpu would like to enable.
+    ///
+    /// Return a vector of the names of instance extensions actually available
+    /// on `entry` that wgpu would like to enable.
+    ///
+    /// The `driver_api_version` argument should be the instance's Vulkan API
+    /// version, as obtained from `vkEnumerateInstanceVersion`. This is the same
+    /// space of values as the `VK_API_VERSION` constants.
+    ///
+    /// Note that wgpu can function without many of these extensions (for
+    /// example, `VK_KHR_wayland_surface` is certainly not going to be available
+    /// everywhere), but if one of these extensions is available at all, wgpu
+    /// assumes that it has been enabled.
+    pub fn desired_extensions(
         entry: &ash::Entry,
         _driver_api_version: u32,
         flags: crate::InstanceFlags,
@@ -181,8 +206,10 @@ impl super::Instance {
         let instance_extensions = entry
             .enumerate_instance_extension_properties(None)
             .map_err(|e| {
-                log::info!("enumerate_instance_extension_properties: {:?}", e);
-                crate::InstanceError
+                crate::InstanceError::with_source(
+                    String::from("enumerate_instance_extension_properties() failed"),
+                    e,
+                )
             })?;
 
         // Check our extensions against the available extensions
@@ -215,8 +242,7 @@ impl super::Instance {
         if cfg!(target_os = "macos") {
             // VK_EXT_metal_surface
             extensions.push(ext::MetalSurface::name());
-            extensions
-                .push(CStr::from_bytes_with_nul(b"VK_KHR_portability_enumeration\0").unwrap());
+            extensions.push(ash::vk::KhrPortabilityEnumerationFn::name());
         }
 
         if flags.contains(crate::InstanceFlags::DEBUG) {
@@ -251,7 +277,7 @@ impl super::Instance {
     ///
     /// - `raw_instance` must be created from `entry`
     /// - `raw_instance` must be created respecting `driver_api_version`, `extensions` and `flags`
-    /// - `extensions` must be a superset of `required_extensions()` and must be created from the
+    /// - `extensions` must be a superset of `desired_extensions()` and must be created from the
     ///   same entry, driver_api_version and flags.
     /// - `android_sdk_version` is ignored and can be `0` for all platforms besides Android
     ///
@@ -354,8 +380,9 @@ impl super::Instance {
         window: vk::Window,
     ) -> Result<super::Surface, crate::InstanceError> {
         if !self.shared.extensions.contains(&khr::XlibSurface::name()) {
-            log::warn!("Vulkan driver does not support VK_KHR_xlib_surface");
-            return Err(crate::InstanceError);
+            return Err(crate::InstanceError::new(String::from(
+                "Vulkan driver does not support VK_KHR_xlib_surface",
+            )));
         }
 
         let surface = {
@@ -379,8 +406,9 @@ impl super::Instance {
         window: vk::xcb_window_t,
     ) -> Result<super::Surface, crate::InstanceError> {
         if !self.shared.extensions.contains(&khr::XcbSurface::name()) {
-            log::warn!("Vulkan driver does not support VK_KHR_xcb_surface");
-            return Err(crate::InstanceError);
+            return Err(crate::InstanceError::new(String::from(
+                "Vulkan driver does not support VK_KHR_xcb_surface",
+            )));
         }
 
         let surface = {
@@ -408,8 +436,9 @@ impl super::Instance {
             .extensions
             .contains(&khr::WaylandSurface::name())
         {
-            log::debug!("Vulkan driver does not support VK_KHR_wayland_surface");
-            return Err(crate::InstanceError);
+            return Err(crate::InstanceError::new(String::from(
+                "Vulkan driver does not support VK_KHR_wayland_surface",
+            )));
         }
 
         let surface = {
@@ -435,8 +464,9 @@ impl super::Instance {
             .extensions
             .contains(&khr::AndroidSurface::name())
         {
-            log::warn!("Vulkan driver does not support VK_KHR_android_surface");
-            return Err(crate::InstanceError);
+            return Err(crate::InstanceError::new(String::from(
+                "Vulkan driver does not support VK_KHR_android_surface",
+            )));
         }
 
         let surface = {
@@ -458,8 +488,9 @@ impl super::Instance {
         hwnd: *mut c_void,
     ) -> Result<super::Surface, crate::InstanceError> {
         if !self.shared.extensions.contains(&khr::Win32Surface::name()) {
-            log::debug!("Vulkan driver does not support VK_KHR_win32_surface");
-            return Err(crate::InstanceError);
+            return Err(crate::InstanceError::new(String::from(
+                "Vulkan driver does not support VK_KHR_win32_surface",
+            )));
         }
 
         let surface = {
@@ -484,8 +515,9 @@ impl super::Instance {
         view: *mut c_void,
     ) -> Result<super::Surface, crate::InstanceError> {
         if !self.shared.extensions.contains(&ext::MetalSurface::name()) {
-            log::warn!("Vulkan driver does not support VK_EXT_metal_surface");
-            return Err(crate::InstanceError);
+            return Err(crate::InstanceError::new(String::from(
+                "Vulkan driver does not support VK_EXT_metal_surface",
+            )));
         }
 
         let layer = unsafe {
@@ -534,20 +566,18 @@ impl crate::Instance<super::Api> for super::Instance {
     unsafe fn init(desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> {
         use crate::auxil::cstr_from_bytes_until_nul;
 
-        let entry = match unsafe { ash::Entry::load() } {
-            Ok(entry) => entry,
-            Err(err) => {
-                log::info!("Missing Vulkan entry points: {:?}", err);
-                return Err(crate::InstanceError);
-            }
-        };
+        let entry = unsafe { ash::Entry::load() }.map_err(|err| {
+            crate::InstanceError::with_source(String::from("missing Vulkan entry points"), err)
+        })?;
         let driver_api_version = match entry.try_enumerate_instance_version() {
             // Vulkan 1.1+
             Ok(Some(version)) => version,
             Ok(None) => vk::API_VERSION_1_0,
             Err(err) => {
-                log::warn!("try_enumerate_instance_version: {:?}", err);
-                return Err(crate::InstanceError);
+                return Err(crate::InstanceError::with_source(
+                    String::from("try_enumerate_instance_version() failed"),
+                    err,
+                ));
             }
         };
 
@@ -574,11 +604,14 @@ impl crate::Instance<super::Api> for super::Instance {
                 },
             );
 
-        let extensions = Self::required_extensions(&entry, driver_api_version, desc.flags)?;
+        let extensions = Self::desired_extensions(&entry, driver_api_version, desc.flags)?;
 
         let instance_layers = entry.enumerate_instance_layer_properties().map_err(|e| {
             log::info!("enumerate_instance_layer_properties: {:?}", e);
-            crate::InstanceError
+            crate::InstanceError::with_source(
+                String::from("enumerate_instance_layer_properties() failed"),
+                e,
+            )
         })?;
 
         fn find_layer<'layers>(
@@ -593,6 +626,9 @@ impl crate::Instance<super::Api> for super::Instance {
         let nv_optimus_layer = CStr::from_bytes_with_nul(b"VK_LAYER_NV_optimus\0").unwrap();
         let has_nv_optimus = find_layer(&instance_layers, nv_optimus_layer).is_some();
 
+        let obs_layer = CStr::from_bytes_with_nul(b"VK_LAYER_OBS_HOOK\0").unwrap();
+        let has_obs_layer = find_layer(&instance_layers, obs_layer).is_some();
+
         let mut layers: Vec<&'static CStr> = Vec::new();
 
         // Request validation layer if asked.
@@ -609,6 +645,7 @@ impl crate::Instance<super::Api> for super::Instance {
                     .unwrap()
                     .to_owned(),
                     validation_layer_spec_version: layer_properties.spec_version,
+                    has_obs_layer,
                 });
             } else {
                 log::warn!(
@@ -640,6 +677,15 @@ impl crate::Instance<super::Api> for super::Instance {
         #[cfg(not(target_os = "android"))]
         let android_sdk_version = 0;
 
+        let mut flags = vk::InstanceCreateFlags::empty();
+
+        // Avoid VUID-VkInstanceCreateInfo-flags-06559: Only ask the instance to
+        // enumerate incomplete Vulkan implementations (which we need on Mac) if
+        // we managed to find the extension that provides the flag.
+        if extensions.contains(&ash::vk::KhrPortabilityEnumerationFn::name()) {
+            flags |= vk::InstanceCreateFlags::ENUMERATE_PORTABILITY_KHR;
+        }
+
         let vk_instance = {
             let str_pointers = layers
                 .iter()
@@ -650,18 +696,17 @@ impl crate::Instance<super::Api> for super::Instance {
                 })
                 .collect::<Vec<_>>();
 
-            const VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR: u32 = 0x00000001;
             let create_info = vk::InstanceCreateInfo::builder()
-                .flags(vk::InstanceCreateFlags::from_raw(
-                    VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR,
-                ))
+                .flags(flags)
                 .application_info(&app_info)
                 .enabled_layer_names(&str_pointers[..layers.len()])
                 .enabled_extension_names(&str_pointers[layers.len()..]);
 
             unsafe { entry.create_instance(&create_info, None) }.map_err(|e| {
-                log::warn!("create_instance: {:?}", e);
-                crate::InstanceError
+                crate::InstanceError::with_source(
+                    String::from("Entry::create_instance() failed"),
+                    e,
+                )
             })?
         };
 
@@ -717,7 +762,9 @@ impl crate::Instance<super::Api> for super::Instance {
             {
                 self.create_surface_from_view(handle.ui_view)
             }
-            (_, _) => Err(crate::InstanceError),
+            (_, _) => Err(crate::InstanceError::new(format!(
+                "window handle {window_handle:?} is not a Vulkan-compatible handle"
+            ))),
         }
     }
 
@@ -751,13 +798,22 @@ impl crate::Instance<super::Api> for super::Instance {
                 if exposed.info.device_type == wgt::DeviceType::IntegratedGpu
                     && exposed.info.vendor == db::intel::VENDOR
                 {
-                    // See https://gitlab.freedesktop.org/mesa/mesa/-/issues/4688
-                    log::warn!(
-                        "Disabling presentation on '{}' (id {:?}) because of NV Optimus (on Linux)",
-                        exposed.info.name,
-                        exposed.adapter.raw
-                    );
-                    exposed.adapter.private_caps.can_present = false;
+                    // Check if mesa driver and version less than 21.2
+                    if let Some(version) = exposed.info.driver_info.split_once("Mesa ").map(|s| {
+                        s.1.rsplit_once('.')
+                            .map(|v| v.0.parse::<f32>().unwrap_or_default())
+                            .unwrap_or_default()
+                    }) {
+                        if version < 21.2 {
+                            // See https://gitlab.freedesktop.org/mesa/mesa/-/issues/4688
+                            log::warn!(
+                                "Disabling presentation on '{}' (id {:?}) due to NV Optimus and Intel Mesa < v21.2",
+                                exposed.info.name,
+                                exposed.adapter.raw
+                            );
+                            exposed.adapter.private_caps.can_present = false;
+                        }
+                    }
                 }
             }
         }
@@ -772,6 +828,7 @@ impl crate::Surface<super::Api> for super::Surface {
         device: &super::Device,
         config: &crate::SurfaceConfiguration,
     ) -> Result<(), crate::SurfaceError> {
+        // Safety: `configure`'s contract guarantees there are no resources derived from the swapchain in use.
         let old = self
             .swapchain
             .take()
@@ -785,6 +842,7 @@ impl crate::Surface<super::Api> for super::Surface {
 
     unsafe fn unconfigure(&mut self, device: &super::Device) {
         if let Some(sc) = self.swapchain.take() {
+            // Safety: `unconfigure`'s contract guarantees there are no resources derived from the swapchain in use.
             let swapchain = unsafe { sc.release_resources(&device.shared.raw) };
             unsafe { swapchain.functor.destroy_swapchain(swapchain.raw, None) };
         }
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index 3a0bfd82b9..fe2ee914cd 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -96,6 +96,10 @@ pub struct DebugUtilsMessengerUserData {
 
     /// Validation layer specification version, from `vk::LayerProperties`.
     validation_layer_spec_version: u32,
+
+    /// If the OBS layer is present. OBS never increments the version of their layer,
+    /// so there's no reason to have the version.
+    has_obs_layer: bool,
 }
 
 pub struct InstanceShared {
@@ -203,6 +207,28 @@ bitflags::bitflags!(
         /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
         /// to a subpass resolve attachment array. This nulls out that pointer in that case.
         const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
+        /// If the following code returns false, then nvidia will end up filling the wrong range.
+        ///
+        /// ```skip
+        /// fn nvidia_succeeds() -> bool {
+        ///   # let (copy_length, start_offset) = (0, 0);
+        ///     if copy_length >= 4096 {
+        ///         if start_offset % 16 != 0 {
+        ///             if copy_length == 4096 {
+        ///                 return true;
+        ///             }
+        ///             if copy_length % 16 == 0 {
+        ///                 return false;
+        ///             }
+        ///         }
+        ///     }
+        ///     true
+        /// }
+        /// ```
+        ///
+        /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
+        /// if they cover a range of 4096 bytes or more.
+        const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
     }
 );
 
@@ -400,6 +426,10 @@ pub struct CommandEncoder {
     /// If this is true, the active renderpass enabled a debug span,
     /// and needs to be disabled on renderpass close.
     rpass_debug_marker_active: bool,
+
+    /// If set, the end of the next render/compute pass will write a timestamp at
+    /// the given pool & location.
+    end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
 }
 
 impl fmt::Debug for CommandEncoder {
diff --git a/wgpu-info/src/human.rs b/wgpu-info/src/human.rs
index 66b0e506e2..11d88d955c 100644
--- a/wgpu-info/src/human.rs
+++ b/wgpu-info/src/human.rs
@@ -124,6 +124,7 @@ fn print_adapter(output: &mut impl io::Write, report: &AdapterReport, idx: usize
         max_compute_workgroup_size_y,
         max_compute_workgroup_size_z,
         max_compute_workgroups_per_dimension,
+        max_non_sampler_bindings,
     } = limits;
     writeln!(output, "\t\t                        Max Texture Dimension 1d: {max_texture_dimension_1d}")?;
     writeln!(output, "\t\t                        Max Texture Dimension 2d: {max_texture_dimension_2d}")?;
@@ -155,6 +156,10 @@ fn print_adapter(output: &mut impl io::Write, report: &AdapterReport, idx: usize
     writeln!(output, "\t\t                    Max Compute Workgroup Size Z: {max_compute_workgroup_size_z}")?;
     writeln!(output, "\t\t            Max Compute Workgroups Per Dimension: {max_compute_workgroups_per_dimension}")?;
 
+    // This one reflects more of a wgpu implementation limitations than a hardware limit
+    // so don't show it here.
+    let _ = max_non_sampler_bindings;
+
     //////////////////////////
     // Downlevel Properties //
     //////////////////////////
diff --git a/wgpu-types/Cargo.toml b/wgpu-types/Cargo.toml
index a782b34dc2..fd0abb0dc9 100644
--- a/wgpu-types/Cargo.toml
+++ b/wgpu-types/Cargo.toml
@@ -42,4 +42,4 @@ web-sys = { version = "0.3.64", features = [
 
 [dev-dependencies]
 serde = { version = "1", features = ["serde_derive"] }
-serde_json = "1.0.96"
+serde_json = "1.0.107"
diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
index 180dd095ae..c892874afa 100644
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@@ -253,9 +253,14 @@ bitflags::bitflags! {
         /// This is a web and native feature.
         const DEPTH_CLIP_CONTROL = 1 << 0;
         /// Enables use of Timestamp Queries. These queries tell the current gpu timestamp when
-        /// all work before the query is finished. Call [`CommandEncoder::write_timestamp`],
-        /// [`RenderPassEncoder::write_timestamp`], or [`ComputePassEncoder::write_timestamp`] to
-        /// write out a timestamp.
+        /// all work before the query is finished.
+        ///
+        /// This feature allows the use of
+        /// - [`CommandEncoder::write_timestamp`]
+        /// - [`RenderPassDescriptor::timestamp_writes`]
+        /// - [`ComputePassDescriptor::timestamp_writes`]
+        /// to write out timestamps.
+        /// For timestamps within passes refer to [`Features::TIMESTAMP_QUERY_INSIDE_PASSES`]
         ///
         /// They must be resolved using [`CommandEncoder::resolve_query_sets`] into a buffer,
         /// then the result must be multiplied by the timestamp period [`Queue::get_timestamp_period`]
@@ -265,8 +270,7 @@ bitflags::bitflags! {
         /// Supported Platforms:
         /// - Vulkan
         /// - DX12
-        ///
-        /// This is currently unimplemented on Metal.
+        /// - Metal - TODO: Not yet supported on command encoder.
         ///
         /// This is a web and native feature.
         const TIMESTAMP_QUERY = 1 << 1;
@@ -447,12 +451,17 @@ bitflags::bitflags! {
         ///
         /// Implies [`Features::TIMESTAMP_QUERY`] is supported.
         ///
+        /// Additionally allows for timestamp queries to be used inside render & compute passes using:
+        /// - [`RenderPassEncoder::write_timestamp`]
+        /// - [`ComputePassEncoder::write_timestamp`]
+        ///
         /// Supported platforms:
         /// - Vulkan
         /// - DX12
         ///
         /// This is currently unimplemented on Metal.
         /// When implemented, it will be supported on Metal on AMD and Intel GPUs, but not Apple GPUs.
+        /// (This is a common limitation of tile-based rasterization GPUs)
         ///
         /// This is a native only feature with a [proposal](https://github.com/gpuweb/gpuweb/blob/0008bd30da2366af88180b511a5d0d0c1dffbc36/proposals/timestamp-query-inside-passes.md) for the web.
         const TIMESTAMP_QUERY_INSIDE_PASSES = 1 << 33;
@@ -670,7 +679,6 @@ bitflags::bitflags! {
         /// This allows only drawing the vertices of polygons/triangles instead of filled
         ///
         /// Supported platforms:
-        /// - DX12
         /// - Vulkan
         ///
         /// This is a native only feature.
@@ -847,7 +855,7 @@ pub struct Limits {
     pub max_texture_array_layers: u32,
     /// Amount of bind groups that can be attached to a pipeline at the same time. Defaults to 4. Higher is "better".
     pub max_bind_groups: u32,
-    /// Maximum binding index allowed in `create_bind_group_layout`. Defaults to 1000.
+    /// Maximum binding index allowed in `create_bind_group_layout`. Defaults to 1000. Higher is "better".
     pub max_bindings_per_bind_group: u32,
     /// Amount of uniform buffer bindings that can be dynamic in a single pipeline. Defaults to 8. Higher is "better".
     pub max_dynamic_uniform_buffers_per_pipeline_layout: u32,
@@ -863,14 +871,15 @@ pub struct Limits {
     pub max_storage_textures_per_shader_stage: u32,
     /// Amount of uniform buffers visible in a single shader stage. Defaults to 12. Higher is "better".
     pub max_uniform_buffers_per_shader_stage: u32,
-    /// Maximum size in bytes of a binding to a uniform buffer. Defaults to 64 KB. Higher is "better".
+    /// Maximum size in bytes of a binding to a uniform buffer. Defaults to 64 KiB. Higher is "better".
     pub max_uniform_buffer_binding_size: u32,
-    /// Maximum size in bytes of a binding to a storage buffer. Defaults to 128 MB. Higher is "better".
+    /// Maximum size in bytes of a binding to a storage buffer. Defaults to 128 MiB. Higher is "better".
     pub max_storage_buffer_binding_size: u32,
     /// Maximum length of `VertexState::buffers` when creating a `RenderPipeline`.
     /// Defaults to 8. Higher is "better".
     pub max_vertex_buffers: u32,
     /// A limit above which buffer allocations are guaranteed to fail.
+    /// Defaults to 256 MiB. Higher is "better".
     ///
     /// Buffer allocations below the maximum buffer size may not succeed depending on available memory,
     /// fragmentation and other factors.
@@ -892,24 +901,25 @@ pub struct Limits {
     pub min_storage_buffer_offset_alignment: u32,
     /// Maximum allowed number of components (scalars) of input or output locations for
     /// inter-stage communication (vertex outputs to fragment inputs). Defaults to 60.
+    /// Higher is "better".
     pub max_inter_stage_shader_components: u32,
     /// Maximum number of bytes used for workgroup memory in a compute entry point. Defaults to
-    /// 16352.
+    /// 16352. Higher is "better".
     pub max_compute_workgroup_storage_size: u32,
     /// Maximum value of the product of the `workgroup_size` dimensions for a compute entry-point.
-    /// Defaults to 256.
+    /// Defaults to 256. Higher is "better".
     pub max_compute_invocations_per_workgroup: u32,
     /// The maximum value of the workgroup_size X dimension for a compute stage `ShaderModule` entry-point.
-    /// Defaults to 256.
+    /// Defaults to 256. Higher is "better".
     pub max_compute_workgroup_size_x: u32,
     /// The maximum value of the workgroup_size Y dimension for a compute stage `ShaderModule` entry-point.
-    /// Defaults to 256.
+    /// Defaults to 256. Higher is "better".
     pub max_compute_workgroup_size_y: u32,
     /// The maximum value of the workgroup_size Z dimension for a compute stage `ShaderModule` entry-point.
-    /// Defaults to 64.
+    /// Defaults to 64. Higher is "better".
     pub max_compute_workgroup_size_z: u32,
     /// The maximum value for each dimension of a `ComputePass::dispatch(x, y, z)` operation.
-    /// Defaults to 65535.
+    /// Defaults to 65535. Higher is "better".
     pub max_compute_workgroups_per_dimension: u32,
     /// Amount of storage available for push constants in bytes. Defaults to 0. Higher is "better".
     /// Requesting more than 0 during device creation requires [`Features::PUSH_CONSTANTS`] to be enabled.
@@ -921,6 +931,12 @@ pub struct Limits {
     /// - DX11 & OpenGL don't natively support push constants, and are emulated with uniforms,
     ///   so this number is less useful but likely 256.
     pub max_push_constant_size: u32,
+
+    /// Maximum number of live non-sampler bindings.
+    ///
+    /// This limit only affects the d3d12 backend. Using a large number will allow the device
+    /// to create many bind groups at the cost of a large up-front allocation at device creation.
+    pub max_non_sampler_bindings: u32,
 }
 
 impl Default for Limits {
@@ -942,7 +958,7 @@ impl Default for Limits {
             max_uniform_buffer_binding_size: 64 << 10,
             max_storage_buffer_binding_size: 128 << 20,
             max_vertex_buffers: 8,
-            max_buffer_size: 1 << 28,
+            max_buffer_size: 256 << 20,
             max_vertex_attributes: 16,
             max_vertex_buffer_array_stride: 2048,
             min_uniform_buffer_offset_alignment: 256,
@@ -955,12 +971,50 @@ impl Default for Limits {
             max_compute_workgroup_size_z: 64,
             max_compute_workgroups_per_dimension: 65535,
             max_push_constant_size: 0,
+            max_non_sampler_bindings: 1_000_000,
         }
     }
 }
 
 impl Limits {
     /// These default limits are guaranteed to be compatible with GLES-3.1, and D3D11
+    ///
+    /// Those limits are as follows (different from default are marked with *):
+    /// ```rust
+    /// # use wgpu_types::Limits;
+    /// assert_eq!(Limits::downlevel_defaults(), Limits {
+    ///     max_texture_dimension_1d: 2048, // *
+    ///     max_texture_dimension_2d: 2048, // *
+    ///     max_texture_dimension_3d: 256, // *
+    ///     max_texture_array_layers: 256,
+    ///     max_bind_groups: 4,
+    ///     max_bindings_per_bind_group: 1000,
+    ///     max_dynamic_uniform_buffers_per_pipeline_layout: 8,
+    ///     max_dynamic_storage_buffers_per_pipeline_layout: 4,
+    ///     max_sampled_textures_per_shader_stage: 16,
+    ///     max_samplers_per_shader_stage: 16,
+    ///     max_storage_buffers_per_shader_stage: 4, // *
+    ///     max_storage_textures_per_shader_stage: 4,
+    ///     max_uniform_buffers_per_shader_stage: 12,
+    ///     max_uniform_buffer_binding_size: 16 << 10, // * (16 KiB)
+    ///     max_storage_buffer_binding_size: 128 << 20, // (128 MiB)
+    ///     max_vertex_buffers: 8,
+    ///     max_vertex_attributes: 16,
+    ///     max_vertex_buffer_array_stride: 2048,
+    ///     max_push_constant_size: 0,
+    ///     min_uniform_buffer_offset_alignment: 256,
+    ///     min_storage_buffer_offset_alignment: 256,
+    ///     max_inter_stage_shader_components: 60,
+    ///     max_compute_workgroup_storage_size: 16352,
+    ///     max_compute_invocations_per_workgroup: 256,
+    ///     max_compute_workgroup_size_x: 256,
+    ///     max_compute_workgroup_size_y: 256,
+    ///     max_compute_workgroup_size_z: 64,
+    ///     max_compute_workgroups_per_dimension: 65535,
+    ///     max_buffer_size: 256 << 20, // (256 MiB)
+    ///     max_non_sampler_bindings: 1_000_000,
+    /// });
+    /// ```
     pub fn downlevel_defaults() -> Self {
         Self {
             max_texture_dimension_1d: 2048,
@@ -991,11 +1045,50 @@ impl Limits {
             max_compute_workgroup_size_y: 256,
             max_compute_workgroup_size_z: 64,
             max_compute_workgroups_per_dimension: 65535,
-            max_buffer_size: 1 << 28,
+            max_buffer_size: 256 << 20,
+            max_non_sampler_bindings: 1_000_000,
         }
     }
 
     /// These default limits are guaranteed to be compatible with GLES-3.0, and D3D11, and WebGL2
+    ///
+    /// Those limits are as follows (different from `downlevel_defaults` are marked with +,
+    /// *'s from `downlevel_defaults` shown as well.):
+    /// ```rust
+    /// # use wgpu_types::Limits;
+    /// assert_eq!(Limits::downlevel_webgl2_defaults(), Limits {
+    ///     max_texture_dimension_1d: 2048, // *
+    ///     max_texture_dimension_2d: 2048, // *
+    ///     max_texture_dimension_3d: 256, // *
+    ///     max_texture_array_layers: 256,
+    ///     max_bind_groups: 4,
+    ///     max_bindings_per_bind_group: 1000,
+    ///     max_dynamic_uniform_buffers_per_pipeline_layout: 8,
+    ///     max_dynamic_storage_buffers_per_pipeline_layout: 0, // +
+    ///     max_sampled_textures_per_shader_stage: 16,
+    ///     max_samplers_per_shader_stage: 16,
+    ///     max_storage_buffers_per_shader_stage: 0, // * +
+    ///     max_storage_textures_per_shader_stage: 0, // +
+    ///     max_uniform_buffers_per_shader_stage: 11, // +
+    ///     max_uniform_buffer_binding_size: 16 << 10, // * (16 KiB)
+    ///     max_storage_buffer_binding_size: 0, // * +
+    ///     max_vertex_buffers: 8,
+    ///     max_vertex_attributes: 16,
+    ///     max_vertex_buffer_array_stride: 255, // +
+    ///     max_push_constant_size: 0,
+    ///     min_uniform_buffer_offset_alignment: 256,
+    ///     min_storage_buffer_offset_alignment: 256,
+    ///     max_inter_stage_shader_components: 60,
+    ///     max_compute_workgroup_storage_size: 0, // +
+    ///     max_compute_invocations_per_workgroup: 0, // +
+    ///     max_compute_workgroup_size_x: 0, // +
+    ///     max_compute_workgroup_size_y: 0, // +
+    ///     max_compute_workgroup_size_z: 0, // +
+    ///     max_compute_workgroups_per_dimension: 0, // +
+    ///     max_buffer_size: 256 << 20, // (256 MiB),
+    ///     max_non_sampler_bindings: 1_000_000,
+    /// });
+    /// ```
     pub fn downlevel_webgl2_defaults() -> Self {
         Self {
             max_uniform_buffers_per_shader_stage: 11,
@@ -1110,6 +1203,7 @@ impl Limits {
         compare!(max_compute_workgroup_size_z, Less);
         compare!(max_compute_workgroups_per_dimension, Less);
         compare!(max_buffer_size, Less);
+        compare!(max_non_sampler_bindings, Less);
     }
 }
 
@@ -1186,8 +1280,8 @@ bitflags::bitflags! {
         const INDIRECT_EXECUTION = 1 << 2;
         /// Supports non-zero `base_vertex` parameter to indexed draw calls.
         const BASE_VERTEX = 1 << 3;
-        /// Supports reading from a depth/stencil buffer while using as a read-only depth/stencil
-        /// attachment.
+        /// Supports reading from a depth/stencil texture while using it as a read-only
+        /// depth/stencil attachment.
         ///
         /// The WebGL2 and GLES backends do not support RODS.
         const READ_ONLY_DEPTH_STENCIL = 1 << 4;
@@ -4218,73 +4312,73 @@ pub struct VertexAttribute {
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 #[cfg_attr(feature = "serde", serde(rename_all = "lowercase"))]
 pub enum VertexFormat {
-    /// Two unsigned bytes (u8). `uvec2` in shaders.
+    /// Two unsigned bytes (u8). `vec2<u32>` in shaders.
     Uint8x2 = 0,
-    /// Four unsigned bytes (u8). `uvec4` in shaders.
+    /// Four unsigned bytes (u8). `vec4<u32>` in shaders.
     Uint8x4 = 1,
-    /// Two signed bytes (i8). `ivec2` in shaders.
+    /// Two signed bytes (i8). `vec2<i32>` in shaders.
     Sint8x2 = 2,
-    /// Four signed bytes (i8). `ivec4` in shaders.
+    /// Four signed bytes (i8). `vec4<i32>` in shaders.
     Sint8x4 = 3,
-    /// Two unsigned bytes (u8). [0, 255] converted to float [0, 1] `vec2` in shaders.
+    /// Two unsigned bytes (u8). [0, 255] converted to float [0, 1] `vec2<f32>` in shaders.
     Unorm8x2 = 4,
-    /// Four unsigned bytes (u8). [0, 255] converted to float [0, 1] `vec4` in shaders.
+    /// Four unsigned bytes (u8). [0, 255] converted to float [0, 1] `vec4<f32>` in shaders.
     Unorm8x4 = 5,
-    /// Two signed bytes (i8). [-127, 127] converted to float [-1, 1] `vec2` in shaders.
+    /// Two signed bytes (i8). [-127, 127] converted to float [-1, 1] `vec2<f32>` in shaders.
     Snorm8x2 = 6,
-    /// Four signed bytes (i8). [-127, 127] converted to float [-1, 1] `vec4` in shaders.
+    /// Four signed bytes (i8). [-127, 127] converted to float [-1, 1] `vec4<f32>` in shaders.
     Snorm8x4 = 7,
-    /// Two unsigned shorts (u16). `uvec2` in shaders.
+    /// Two unsigned shorts (u16). `vec2<u32>` in shaders.
     Uint16x2 = 8,
-    /// Four unsigned shorts (u16). `uvec4` in shaders.
+    /// Four unsigned shorts (u16). `vec4<u32>` in shaders.
     Uint16x4 = 9,
-    /// Two signed shorts (i16). `ivec2` in shaders.
+    /// Two signed shorts (i16). `vec2<i32>` in shaders.
     Sint16x2 = 10,
-    /// Four signed shorts (i16). `ivec4` in shaders.
+    /// Four signed shorts (i16). `vec4<i32>` in shaders.
     Sint16x4 = 11,
-    /// Two unsigned shorts (u16). [0, 65535] converted to float [0, 1] `vec2` in shaders.
+    /// Two unsigned shorts (u16). [0, 65535] converted to float [0, 1] `vec2<f32>` in shaders.
     Unorm16x2 = 12,
-    /// Four unsigned shorts (u16). [0, 65535] converted to float [0, 1] `vec4` in shaders.
+    /// Four unsigned shorts (u16). [0, 65535] converted to float [0, 1] `vec4<f32>` in shaders.
     Unorm16x4 = 13,
-    /// Two signed shorts (i16). [-32767, 32767] converted to float [-1, 1] `vec2` in shaders.
+    /// Two signed shorts (i16). [-32767, 32767] converted to float [-1, 1] `vec2<f32>` in shaders.
     Snorm16x2 = 14,
-    /// Four signed shorts (i16). [-32767, 32767] converted to float [-1, 1] `vec4` in shaders.
+    /// Four signed shorts (i16). [-32767, 32767] converted to float [-1, 1] `vec4<f32>` in shaders.
     Snorm16x4 = 15,
-    /// Two half-precision floats (no Rust equiv). `vec2` in shaders.
+    /// Two half-precision floats (no Rust equiv). `vec2<f32>` in shaders.
     Float16x2 = 16,
-    /// Four half-precision floats (no Rust equiv). `vec4` in shaders.
+    /// Four half-precision floats (no Rust equiv). `vec4<f32>` in shaders.
     Float16x4 = 17,
-    /// One single-precision float (f32). `float` in shaders.
+    /// One single-precision float (f32). `f32` in shaders.
     Float32 = 18,
-    /// Two single-precision floats (f32). `vec2` in shaders.
+    /// Two single-precision floats (f32). `vec2<f32>` in shaders.
     Float32x2 = 19,
-    /// Three single-precision floats (f32). `vec3` in shaders.
+    /// Three single-precision floats (f32). `vec3<f32>` in shaders.
     Float32x3 = 20,
-    /// Four single-precision floats (f32). `vec4` in shaders.
+    /// Four single-precision floats (f32). `vec4<f32>` in shaders.
     Float32x4 = 21,
-    /// One unsigned int (u32). `uint` in shaders.
+    /// One unsigned int (u32). `u32` in shaders.
     Uint32 = 22,
-    /// Two unsigned ints (u32). `uvec2` in shaders.
+    /// Two unsigned ints (u32). `vec2<u32>` in shaders.
     Uint32x2 = 23,
-    /// Three unsigned ints (u32). `uvec3` in shaders.
+    /// Three unsigned ints (u32). `vec3<u32>` in shaders.
     Uint32x3 = 24,
-    /// Four unsigned ints (u32). `uvec4` in shaders.
+    /// Four unsigned ints (u32). `vec4<u32>` in shaders.
     Uint32x4 = 25,
-    /// One signed int (i32). `int` in shaders.
+    /// One signed int (i32). `i32` in shaders.
     Sint32 = 26,
-    /// Two signed ints (i32). `ivec2` in shaders.
+    /// Two signed ints (i32). `vec2<i32>` in shaders.
     Sint32x2 = 27,
-    /// Three signed ints (i32). `ivec3` in shaders.
+    /// Three signed ints (i32). `vec3<i32>` in shaders.
     Sint32x3 = 28,
-    /// Four signed ints (i32). `ivec4` in shaders.
+    /// Four signed ints (i32). `vec4<i32>` in shaders.
     Sint32x4 = 29,
-    /// One double-precision float (f64). `double` in shaders. Requires [`Features::VERTEX_ATTRIBUTE_64BIT`].
+    /// One double-precision float (f64). `f32` in shaders. Requires [`Features::VERTEX_ATTRIBUTE_64BIT`].
     Float64 = 30,
-    /// Two double-precision floats (f64). `dvec2` in shaders. Requires [`Features::VERTEX_ATTRIBUTE_64BIT`].
+    /// Two double-precision floats (f64). `vec2<f32>` in shaders. Requires [`Features::VERTEX_ATTRIBUTE_64BIT`].
     Float64x2 = 31,
-    /// Three double-precision floats (f64). `dvec3` in shaders. Requires [`Features::VERTEX_ATTRIBUTE_64BIT`].
+    /// Three double-precision floats (f64). `vec3<f32>` in shaders. Requires [`Features::VERTEX_ATTRIBUTE_64BIT`].
     Float64x3 = 32,
-    /// Four double-precision floats (f64). `dvec4` in shaders. Requires [`Features::VERTEX_ATTRIBUTE_64BIT`].
+    /// Four double-precision floats (f64). `vec4<f32>` in shaders. Requires [`Features::VERTEX_ATTRIBUTE_64BIT`].
     Float64x4 = 33,
 }
 
@@ -6311,17 +6405,38 @@ pub enum Dx12Compiler {
     Dxc {
         /// Path to the `dxil.dll` file, or path to the directory containing `dxil.dll` file. Passing `None` will use standard platform specific dll loading rules.
         dxil_path: Option<PathBuf>,
-        /// Path to the `dxcompiler.dll` file, or path to the directory containing `dxil.dll` file. Passing `None` will use standard platform specific dll loading rules.
+        /// Path to the `dxcompiler.dll` file, or path to the directory containing `dxcompiler.dll` file. Passing `None` will use standard platform specific dll loading rules.
         dxc_path: Option<PathBuf>,
     },
 }
 
+/// Selects which OpenGL ES 3 minor version to request.
+///
+/// When using ANGLE as an OpenGL ES/EGL implementation, explicitly requesting `Version1` can provide a non-conformant ES 3.1 on APIs like D3D11.
+#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Hash)]
+pub enum Gles3MinorVersion {
+    /// No explicit minor version is requested, the driver automatically picks the highest available.
+    #[default]
+    Automatic,
+
+    /// Request an ES 3.0 context.
+    Version0,
+
+    /// Request an ES 3.1 context.
+    Version1,
+
+    /// Request an ES 3.2 context.
+    Version2,
+}
+
 /// Options for creating an instance.
 pub struct InstanceDescriptor {
     /// Which `Backends` to enable.
     pub backends: Backends,
     /// Which DX12 shader compiler to use.
     pub dx12_shader_compiler: Dx12Compiler,
+    /// Which OpenGL ES 3 minor version to request.
+    pub gles_minor_version: Gles3MinorVersion,
 }
 
 impl Default for InstanceDescriptor {
@@ -6329,6 +6444,7 @@ impl Default for InstanceDescriptor {
         Self {
             backends: Backends::all(),
             dx12_shader_compiler: Dx12Compiler::default(),
+            gles_minor_version: Gles3MinorVersion::default(),
         }
     }
 }
diff --git a/wgpu/Cargo.toml b/wgpu/Cargo.toml
index 2842281a19..e34e90235d 100644
--- a/wgpu/Cargo.toml
+++ b/wgpu/Cargo.toml
@@ -154,6 +154,7 @@ web-sys = { workspace = true, features = [
     "GpuCompilationMessageType",
     "GpuComputePassDescriptor",
     "GpuComputePassEncoder",
+    "GpuComputePassTimestampWrite",
     "GpuComputePipeline",
     "GpuComputePipelineDescriptor",
     "GpuCullMode",
diff --git a/wgpu/README.md b/wgpu/README.md
index 331cb3b049..4f4f85a1e5 100644
--- a/wgpu/README.md
+++ b/wgpu/README.md
@@ -35,9 +35,9 @@ The following environment variables can be used to configure how the framework e
 
 - `WGPU_POWER_PREF`
 
-  Options: `low`, `high`
+  Options: `low`, `high`, `none`
 
-  If unset a low power adapter is preferred.
+  If unset power usage is not considered when choosing an adapter.
 
 - `WGPU_ADAPTER_NAME`
 
diff --git a/wgpu/src/backend/direct.rs b/wgpu/src/backend/direct.rs
index f418c4dbc2..8eec9adad5 100644
--- a/wgpu/src/backend/direct.rs
+++ b/wgpu/src/backend/direct.rs
@@ -244,10 +244,7 @@ impl Context {
         &self,
         canvas: web_sys::HtmlCanvasElement,
     ) -> Result<Surface, crate::CreateSurfaceError> {
-        let id = self
-            .0
-            .create_surface_webgl_canvas(canvas, ())
-            .map_err(|hal::InstanceError| crate::CreateSurfaceError {})?;
+        let id = self.0.create_surface_webgl_canvas(canvas, ())?;
         Ok(Surface {
             id,
             configured_device: Mutex::default(),
@@ -259,10 +256,7 @@ impl Context {
         &self,
         canvas: web_sys::OffscreenCanvas,
     ) -> Result<Surface, crate::CreateSurfaceError> {
-        let id = self
-            .0
-            .create_surface_webgl_offscreen_canvas(canvas, ())
-            .map_err(|hal::InstanceError| crate::CreateSurfaceError {})?;
+        let id = self.0.create_surface_webgl_offscreen_canvas(canvas, ())?;
         Ok(Surface {
             id,
             configured_device: Mutex::default(),
@@ -1849,12 +1843,21 @@ impl crate::Context for Context {
         _encoder_data: &Self::CommandEncoderData,
         desc: &ComputePassDescriptor,
     ) -> (Self::ComputePassId, Self::ComputePassData) {
+        let timestamp_writes =
+            desc.timestamp_writes
+                .as_ref()
+                .map(|tw| wgc::command::ComputePassTimestampWrites {
+                    query_set: tw.query_set.id.into(),
+                    beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
+                    end_of_pass_write_index: tw.end_of_pass_write_index,
+                });
         (
             Unused,
             wgc::command::ComputePass::new(
                 *encoder,
                 &wgc::command::ComputePassDescriptor {
                     label: desc.label.map(Borrowed),
+                    timestamp_writes: timestamp_writes.as_ref(),
                 },
             ),
         )
@@ -1918,6 +1921,15 @@ impl crate::Context for Context {
             }
         });
 
+        let timestamp_writes =
+            desc.timestamp_writes
+                .as_ref()
+                .map(|tw| wgc::command::RenderPassTimestampWrites {
+                    query_set: tw.query_set.id.into(),
+                    beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
+                    end_of_pass_write_index: tw.end_of_pass_write_index,
+                });
+
         (
             Unused,
             wgc::command::RenderPass::new(
@@ -1926,6 +1938,10 @@ impl crate::Context for Context {
                     label: desc.label.map(Borrowed),
                     color_attachments: Borrowed(&colors),
                     depth_stencil_attachment: depth_stencil.as_ref(),
+                    timestamp_writes: timestamp_writes.as_ref(),
+                    occlusion_query_set: desc
+                        .occlusion_query_set
+                        .map(|query_set| query_set.id.into()),
                 },
             ),
         )
@@ -2940,6 +2956,23 @@ impl crate::Context for Context {
         wgpu_render_pass_write_timestamp(pass_data, *query_set, query_index)
     }
 
+    fn render_pass_begin_occlusion_query(
+        &self,
+        _pass: &mut Self::RenderPassId,
+        pass_data: &mut Self::RenderPassData,
+        query_index: u32,
+    ) {
+        wgpu_render_pass_begin_occlusion_query(pass_data, query_index)
+    }
+
+    fn render_pass_end_occlusion_query(
+        &self,
+        _pass: &mut Self::RenderPassId,
+        pass_data: &mut Self::RenderPassData,
+    ) {
+        wgpu_render_pass_end_occlusion_query(pass_data)
+    }
+
     fn render_pass_begin_pipeline_statistics_query(
         &self,
         _pass: &mut Self::RenderPassId,
diff --git a/wgpu/src/backend/web.rs b/wgpu/src/backend/web.rs
index 1045041746..d64bd8bcb1 100644
--- a/wgpu/src/backend/web.rs
+++ b/wgpu/src/backend/web.rs
@@ -687,6 +687,99 @@ fn map_wgt_features(supported_features: web_sys::GpuSupportedFeatures) -> wgt::F
     features
 }
 
+fn map_wgt_limits(limits: web_sys::GpuSupportedLimits) -> wgt::Limits {
+    wgt::Limits {
+        max_texture_dimension_1d: limits.max_texture_dimension_1d(),
+        max_texture_dimension_2d: limits.max_texture_dimension_2d(),
+        max_texture_dimension_3d: limits.max_texture_dimension_3d(),
+        max_texture_array_layers: limits.max_texture_array_layers(),
+        max_bind_groups: limits.max_bind_groups(),
+        max_bindings_per_bind_group: limits.max_bindings_per_bind_group(),
+        max_dynamic_uniform_buffers_per_pipeline_layout: limits
+            .max_dynamic_uniform_buffers_per_pipeline_layout(),
+        max_dynamic_storage_buffers_per_pipeline_layout: limits
+            .max_dynamic_storage_buffers_per_pipeline_layout(),
+        max_sampled_textures_per_shader_stage: limits.max_sampled_textures_per_shader_stage(),
+        max_samplers_per_shader_stage: limits.max_samplers_per_shader_stage(),
+        max_storage_buffers_per_shader_stage: limits.max_storage_buffers_per_shader_stage(),
+        max_storage_textures_per_shader_stage: limits.max_storage_textures_per_shader_stage(),
+        max_uniform_buffers_per_shader_stage: limits.max_uniform_buffers_per_shader_stage(),
+        max_uniform_buffer_binding_size: limits.max_uniform_buffer_binding_size() as u32,
+        max_storage_buffer_binding_size: limits.max_storage_buffer_binding_size() as u32,
+        max_vertex_buffers: limits.max_vertex_buffers(),
+        max_buffer_size: limits.max_buffer_size() as u64,
+        max_vertex_attributes: limits.max_vertex_attributes(),
+        max_vertex_buffer_array_stride: limits.max_vertex_buffer_array_stride(),
+        min_uniform_buffer_offset_alignment: limits.min_uniform_buffer_offset_alignment(),
+        min_storage_buffer_offset_alignment: limits.min_storage_buffer_offset_alignment(),
+        max_inter_stage_shader_components: limits.max_inter_stage_shader_components(),
+        max_compute_workgroup_storage_size: limits.max_compute_workgroup_storage_size(),
+        max_compute_invocations_per_workgroup: limits.max_compute_invocations_per_workgroup(),
+        max_compute_workgroup_size_x: limits.max_compute_workgroup_size_x(),
+        max_compute_workgroup_size_y: limits.max_compute_workgroup_size_y(),
+        max_compute_workgroup_size_z: limits.max_compute_workgroup_size_z(),
+        max_compute_workgroups_per_dimension: limits.max_compute_workgroups_per_dimension(),
+        // The following are not part of WebGPU
+        max_push_constant_size: wgt::Limits::default().max_push_constant_size,
+        max_non_sampler_bindings: wgt::Limits::default().max_non_sampler_bindings,
+    }
+}
+
+fn map_js_sys_limits(limits: &wgt::Limits) -> js_sys::Object {
+    let object = js_sys::Object::new();
+
+    macro_rules! set_properties {
+        (($from:expr) => ($on:expr) : $(($js_ident:ident, $rs_ident:ident)),* $(,)?) => {
+            $(
+                ::js_sys::Reflect::set(
+                    &$on,
+                    &::wasm_bindgen::JsValue::from(stringify!($js_ident)),
+                    // Numbers may be u64, however using `from` on a u64 yields
+                    // errors on the wasm side, since it uses an unsupported api.
+                    // Wasm sends us things that need to fit into u64s by sending
+                    // us f64s instead. So we just send them f64s back.
+                    &::wasm_bindgen::JsValue::from($from.$rs_ident as f64)
+                )
+                    .expect("Setting Object properties should never fail.");
+            )*
+        }
+    }
+
+    set_properties![
+        (limits) => (object):
+        (maxTextureDimension1D, max_texture_dimension_1d),
+        (maxTextureDimension2D, max_texture_dimension_2d),
+        (maxTextureDimension3D, max_texture_dimension_3d),
+        (maxTextureArrayLayers, max_texture_array_layers),
+        (maxBindGroups, max_bind_groups),
+        (maxBindingsPerBindGroup, max_bindings_per_bind_group),
+        (maxDynamicUniformBuffersPerPipelineLayout, max_dynamic_uniform_buffers_per_pipeline_layout),
+        (maxDynamicStorageBuffersPerPipelineLayout, max_dynamic_storage_buffers_per_pipeline_layout),
+        (maxSampledTexturesPerShaderStage, max_sampled_textures_per_shader_stage),
+        (maxSamplersPerShaderStage, max_samplers_per_shader_stage),
+        (maxStorageBuffersPerShaderStage, max_storage_buffers_per_shader_stage),
+        (maxStorageTexturesPerShaderStage, max_storage_textures_per_shader_stage),
+        (maxUniformBuffersPerShaderStage, max_uniform_buffers_per_shader_stage),
+        (maxUniformBufferBindingSize, max_uniform_buffer_binding_size),
+        (maxStorageBufferBindingSize, max_storage_buffer_binding_size),
+        (minUniformBufferOffsetAlignment, min_uniform_buffer_offset_alignment),
+        (minStorageBufferOffsetAlignment, min_storage_buffer_offset_alignment),
+        (maxVertexBuffers, max_vertex_buffers),
+        (maxBufferSize, max_buffer_size),
+        (maxVertexAttributes, max_vertex_attributes),
+        (maxVertexBufferArrayStride, max_vertex_buffer_array_stride),
+        (maxInterStageShaderComponents, max_inter_stage_shader_components),
+        (maxComputeWorkgroupStorageSize, max_compute_workgroup_storage_size),
+        (maxComputeInvocationsPerWorkgroup, max_compute_invocations_per_workgroup),
+        (maxComputeWorkgroupSizeX, max_compute_workgroup_size_x),
+        (maxComputeWorkgroupSizeY, max_compute_workgroup_size_y),
+        (maxComputeWorkgroupSizeZ, max_compute_workgroup_size_z),
+        (maxComputeWorkgroupsPerDimension, max_compute_workgroups_per_dimension),
+    ];
+
+    object
+}
+
 type JsFutureResult = Result<wasm_bindgen::JsValue, wasm_bindgen::JsValue>;
 
 fn future_request_adapter(
@@ -827,13 +920,22 @@ impl Context {
                 // “not supported” could include “insufficient GPU resources” or “the GPU process
                 // previously crashed”. So, we must return it as an `Err` since it could occur
                 // for circumstances outside the application author's control.
-                return Err(crate::CreateSurfaceError {});
+                return Err(crate::CreateSurfaceError {
+                    inner: crate::CreateSurfaceErrorKind::Web(
+                        String::from(
+                            "canvas.getContext() returned null; webgpu not available or canvas already in use"
+                        )
+                    )
+                });
             }
             Err(js_error) => {
                 // <https://html.spec.whatwg.org/multipage/canvas.html#dom-canvas-getcontext>
-                // A thrown exception indicates misuse of the canvas state. Ideally we wouldn't
-                // panic in this case ... TODO
-                panic!("canvas.getContext() threw {js_error:?}")
+                // A thrown exception indicates misuse of the canvas state.
+                return Err(crate::CreateSurfaceError {
+                    inner: crate::CreateSurfaceErrorKind::Web(format!(
+                        "canvas.getContext() threw exception {js_error:?}",
+                    )),
+                });
             }
         };
 
@@ -1014,9 +1116,19 @@ impl crate::context::Context for Context {
             //Error: Tracing isn't supported on the Web target
         }
 
-        // TODO: non-guaranteed limits
         let mut mapped_desc = web_sys::GpuDeviceDescriptor::new();
 
+        // TODO: Migrate to a web_sys api.
+        // See https://github.com/rustwasm/wasm-bindgen/issues/3587
+        let limits_object = map_js_sys_limits(&desc.limits);
+
+        js_sys::Reflect::set(
+            &mapped_desc,
+            &JsValue::from("requiredLimits"),
+            &limits_object,
+        )
+        .expect("Setting Object properties should never fail.");
+
         let required_features = FEATURES_MAPPING
             .iter()
             .copied()
@@ -1070,30 +1182,7 @@ impl crate::context::Context for Context {
         _adapter: &Self::AdapterId,
         adapter_data: &Self::AdapterData,
     ) -> wgt::Limits {
-        let limits = adapter_data.0.limits();
-        wgt::Limits {
-            max_texture_dimension_1d: limits.max_texture_dimension_1d(),
-            max_texture_dimension_2d: limits.max_texture_dimension_2d(),
-            max_texture_dimension_3d: limits.max_texture_dimension_3d(),
-            max_texture_array_layers: limits.max_texture_array_layers(),
-            max_bind_groups: limits.max_bind_groups(),
-            max_bindings_per_bind_group: limits.max_bindings_per_bind_group(),
-            max_dynamic_uniform_buffers_per_pipeline_layout: limits
-                .max_dynamic_uniform_buffers_per_pipeline_layout(),
-            max_dynamic_storage_buffers_per_pipeline_layout: limits
-                .max_dynamic_storage_buffers_per_pipeline_layout(),
-            max_sampled_textures_per_shader_stage: limits.max_sampled_textures_per_shader_stage(),
-            max_samplers_per_shader_stage: limits.max_samplers_per_shader_stage(),
-            max_storage_buffers_per_shader_stage: limits.max_storage_buffers_per_shader_stage(),
-            max_storage_textures_per_shader_stage: limits.max_storage_textures_per_shader_stage(),
-            max_uniform_buffers_per_shader_stage: limits.max_uniform_buffers_per_shader_stage(),
-            max_uniform_buffer_binding_size: limits.max_uniform_buffer_binding_size() as u32,
-            max_storage_buffer_binding_size: limits.max_storage_buffer_binding_size() as u32,
-            max_vertex_buffers: limits.max_vertex_buffers(),
-            max_vertex_attributes: limits.max_vertex_attributes(),
-            max_vertex_buffer_array_stride: limits.max_vertex_buffer_array_stride(),
-            ..wgt::Limits::default()
-        }
+        map_wgt_limits(adapter_data.0.limits())
     }
 
     fn adapter_downlevel_capabilities(
@@ -1256,10 +1345,9 @@ impl crate::context::Context for Context {
     fn device_limits(
         &self,
         _device: &Self::DeviceId,
-        _device_data: &Self::DeviceData,
+        device_data: &Self::DeviceData,
     ) -> wgt::Limits {
-        // TODO
-        wgt::Limits::default()
+        map_wgt_limits(device_data.0.limits())
     }
 
     fn device_downlevel_properties(
@@ -1890,10 +1978,8 @@ impl crate::context::Context for Context {
         buffer_data: &Self::BufferData,
         sub_range: Range<wgt::BufferAddress>,
     ) -> Box<dyn crate::context::BufferMappedRange> {
-        let array_buffer = buffer_data.0.get_mapped_range_with_f64_and_f64(
-            sub_range.start as f64,
-            (sub_range.end - sub_range.start) as f64,
-        );
+        let array_buffer =
+            self.buffer_get_mapped_range_as_array_buffer(_buffer, buffer_data, sub_range);
         let actual_mapping = js_sys::Uint8Array::new(&array_buffer);
         let temporary_mapping = actual_mapping.to_vec();
         Box::new(BufferMappedRange {
@@ -1902,6 +1988,18 @@ impl crate::context::Context for Context {
         })
     }
 
+    fn buffer_get_mapped_range_as_array_buffer(
+        &self,
+        _buffer: &Self::BufferId,
+        buffer_data: &Self::BufferData,
+        sub_range: Range<wgt::BufferAddress>,
+    ) -> js_sys::ArrayBuffer {
+        buffer_data.0.get_mapped_range_with_f64_and_f64(
+            sub_range.start as f64,
+            (sub_range.end - sub_range.start) as f64,
+        )
+    }
+
     fn buffer_unmap(&self, _buffer: &Self::BufferId, buffer_data: &Self::BufferData) {
         buffer_data.0.unmap();
     }
@@ -2559,7 +2657,8 @@ impl crate::context::Context for Context {
         _queue: &Self::QueueId,
         _queue_data: &Self::QueueData,
     ) -> f32 {
-        1.0 //TODO
+        // Timestamp values are always in nanoseconds, see https://gpuweb.github.io/gpuweb/#timestamp
+        1.0
     }
 
     fn queue_on_submitted_work_done(
@@ -2593,15 +2692,19 @@ impl crate::context::Context for Context {
         bind_group_data: &Self::BindGroupData,
         offsets: &[wgt::DynamicOffset],
     ) {
-        pass_data
-            .0
-            .set_bind_group_with_u32_array_and_f64_and_dynamic_offsets_data_length(
-                index,
-                &bind_group_data.0,
-                offsets,
-                0f64,
-                offsets.len() as u32,
-            );
+        if offsets.is_empty() {
+            pass_data.0.set_bind_group(index, &bind_group_data.0);
+        } else {
+            pass_data
+                .0
+                .set_bind_group_with_u32_array_and_f64_and_dynamic_offsets_data_length(
+                    index,
+                    &bind_group_data.0,
+                    offsets,
+                    0f64,
+                    offsets.len() as u32,
+                );
+        }
     }
 
     fn compute_pass_set_push_constants(
@@ -2718,15 +2821,19 @@ impl crate::context::Context for Context {
         bind_group_data: &Self::BindGroupData,
         offsets: &[wgt::DynamicOffset],
     ) {
-        encoder_data
-            .0
-            .set_bind_group_with_u32_array_and_f64_and_dynamic_offsets_data_length(
-                index,
-                &bind_group_data.0,
-                offsets,
-                0f64,
-                offsets.len() as u32,
-            );
+        if offsets.is_empty() {
+            encoder_data.0.set_bind_group(index, &bind_group_data.0);
+        } else {
+            encoder_data
+                .0
+                .set_bind_group_with_u32_array_and_f64_and_dynamic_offsets_data_length(
+                    index,
+                    &bind_group_data.0,
+                    offsets,
+                    0f64,
+                    offsets.len() as u32,
+                );
+        }
     }
 
     fn render_bundle_encoder_set_index_buffer(
@@ -2933,15 +3040,19 @@ impl crate::context::Context for Context {
         bind_group_data: &Self::BindGroupData,
         offsets: &[wgt::DynamicOffset],
     ) {
-        pass_data
-            .0
-            .set_bind_group_with_u32_array_and_f64_and_dynamic_offsets_data_length(
-                index,
-                &bind_group_data.0,
-                offsets,
-                0f64,
-                offsets.len() as u32,
-            );
+        if offsets.is_empty() {
+            pass_data.0.set_bind_group(index, &bind_group_data.0);
+        } else {
+            pass_data
+                .0
+                .set_bind_group_with_u32_array_and_f64_and_dynamic_offsets_data_length(
+                    index,
+                    &bind_group_data.0,
+                    offsets,
+                    0f64,
+                    offsets.len() as u32,
+                );
+        }
     }
 
     fn render_pass_set_index_buffer(
@@ -3217,6 +3328,23 @@ impl crate::context::Context for Context {
         panic!("TIMESTAMP_QUERY_INSIDE_PASSES feature must be enabled to call write_timestamp in a compute pass")
     }
 
+    fn render_pass_begin_occlusion_query(
+        &self,
+        _pass: &mut Self::RenderPassId,
+        _pass_data: &mut Self::RenderPassData,
+        _query_index: u32,
+    ) {
+        // Not available in gecko yet
+    }
+
+    fn render_pass_end_occlusion_query(
+        &self,
+        _pass: &mut Self::RenderPassId,
+        _pass_data: &mut Self::RenderPassData,
+    ) {
+        // Not available in gecko yet
+    }
+
     fn render_pass_begin_pipeline_statistics_query(
         &self,
         _pass: &mut Self::RenderPassId,
diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs
index 33e8b5a5e4..9d0bdd9100 100644
--- a/wgpu/src/context.rs
+++ b/wgpu/src/context.rs
@@ -307,6 +307,16 @@ pub trait Context: Debug + WasmNotSend + WasmNotSync + Sized {
         buffer_data: &Self::BufferData,
         sub_range: Range<BufferAddress>,
     ) -> Box<dyn BufferMappedRange>;
+    #[cfg(all(
+        target_arch = "wasm32",
+        not(any(target_os = "emscripten", feature = "webgl"))
+    ))]
+    fn buffer_get_mapped_range_as_array_buffer(
+        &self,
+        buffer: &Self::BufferId,
+        buffer_data: &Self::BufferData,
+        sub_range: Range<BufferAddress>,
+    ) -> js_sys::ArrayBuffer;
     fn buffer_unmap(&self, buffer: &Self::BufferId, buffer_data: &Self::BufferData);
     fn texture_create_view(
         &self,
@@ -970,6 +980,17 @@ pub trait Context: Debug + WasmNotSend + WasmNotSync + Sized {
         query_set_data: &Self::QuerySetData,
         query_index: u32,
     );
+    fn render_pass_begin_occlusion_query(
+        &self,
+        pass: &mut Self::RenderPassId,
+        pass_data: &mut Self::RenderPassData,
+        query_index: u32,
+    );
+    fn render_pass_end_occlusion_query(
+        &self,
+        pass: &mut Self::RenderPassId,
+        pass_data: &mut Self::RenderPassData,
+    );
     fn render_pass_begin_pipeline_statistics_query(
         &self,
         pass: &mut Self::RenderPassId,
@@ -1375,6 +1396,16 @@ pub(crate) trait DynContext: Debug + WasmNotSend + WasmNotSync {
         buffer_data: &crate::Data,
         sub_range: Range<BufferAddress>,
     ) -> Box<dyn BufferMappedRange>;
+    #[cfg(all(
+        target_arch = "wasm32",
+        not(any(target_os = "emscripten", feature = "webgl"))
+    ))]
+    fn buffer_get_mapped_range_as_array_buffer(
+        &self,
+        buffer: &ObjectId,
+        buffer_data: &crate::Data,
+        sub_range: Range<BufferAddress>,
+    ) -> js_sys::ArrayBuffer;
     fn buffer_unmap(&self, buffer: &ObjectId, buffer_data: &crate::Data);
     fn texture_create_view(
         &self,
@@ -1986,6 +2017,13 @@ pub(crate) trait DynContext: Debug + WasmNotSend + WasmNotSync {
         query_set_data: &crate::Data,
         query_index: u32,
     );
+    fn render_pass_begin_occlusion_query(
+        &self,
+        pass: &mut ObjectId,
+        pass_data: &mut crate::Data,
+        query_index: u32,
+    );
+    fn render_pass_end_occlusion_query(&self, pass: &mut ObjectId, pass_data: &mut crate::Data);
     fn render_pass_begin_pipeline_statistics_query(
         &self,
         pass: &mut ObjectId,
@@ -2453,6 +2491,21 @@ where
         Context::buffer_get_mapped_range(self, &buffer, buffer_data, sub_range)
     }
 
+    #[cfg(all(
+        target_arch = "wasm32",
+        not(any(target_os = "emscripten", feature = "webgl"))
+    ))]
+    fn buffer_get_mapped_range_as_array_buffer(
+        &self,
+        buffer: &ObjectId,
+        buffer_data: &crate::Data,
+        sub_range: Range<BufferAddress>,
+    ) -> js_sys::ArrayBuffer {
+        let buffer = <T::BufferId>::from(*buffer);
+        let buffer_data = downcast_ref(buffer_data);
+        Context::buffer_get_mapped_range_as_array_buffer(self, &buffer, buffer_data, sub_range)
+    }
+
     fn buffer_unmap(&self, buffer: &ObjectId, buffer_data: &crate::Data) {
         let buffer = <T::BufferId>::from(*buffer);
         let buffer_data = downcast_ref(buffer_data);
@@ -3911,6 +3964,23 @@ where
         )
     }
 
+    fn render_pass_begin_occlusion_query(
+        &self,
+        pass: &mut ObjectId,
+        pass_data: &mut crate::Data,
+        query_index: u32,
+    ) {
+        let mut pass = <T::RenderPassId>::from(*pass);
+        let pass_data = downcast_mut::<T::RenderPassData>(pass_data);
+        Context::render_pass_begin_occlusion_query(self, &mut pass, pass_data, query_index)
+    }
+
+    fn render_pass_end_occlusion_query(&self, pass: &mut ObjectId, pass_data: &mut crate::Data) {
+        let mut pass = <T::RenderPassId>::from(*pass);
+        let pass_data = downcast_mut::<T::RenderPassData>(pass_data);
+        Context::render_pass_end_occlusion_query(self, &mut pass, pass_data)
+    }
+
     fn render_pass_begin_pipeline_statistics_query(
         &self,
         pass: &mut ObjectId,
diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs
index 0e2be70b09..94345f1adb 100644
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@@ -15,8 +15,7 @@ mod macros;
 use std::{
     any::Any,
     borrow::Cow,
-    error,
-    fmt::{Debug, Display},
+    error, fmt,
     future::Future,
     marker::PhantomData,
     num::NonZeroU32,
@@ -34,12 +33,12 @@ pub use wgt::{
     BufferBindingType, BufferSize, BufferUsages, Color, ColorTargetState, ColorWrites,
     CommandBufferDescriptor, CompareFunction, CompositeAlphaMode, DepthBiasState,
     DepthStencilState, DeviceType, DownlevelCapabilities, DownlevelFlags, Dx12Compiler,
-    DynamicOffset, Extent3d, Face, Features, FilterMode, FrontFace, ImageDataLayout,
-    ImageSubresourceRange, IndexFormat, InstanceDescriptor, Limits, MultisampleState, Origin2d,
-    Origin3d, PipelineStatisticsTypes, PolygonMode, PowerPreference, PredefinedColorSpace,
-    PresentMode, PresentationTimestamp, PrimitiveState, PrimitiveTopology, PushConstantRange,
-    QueryType, RenderBundleDepthStencil, SamplerBindingType, SamplerBorderColor, ShaderLocation,
-    ShaderModel, ShaderStages, StencilFaceState, StencilOperation, StencilState,
+    DynamicOffset, Extent3d, Face, Features, FilterMode, FrontFace, Gles3MinorVersion,
+    ImageDataLayout, ImageSubresourceRange, IndexFormat, InstanceDescriptor, Limits,
+    MultisampleState, Origin2d, Origin3d, PipelineStatisticsTypes, PolygonMode, PowerPreference,
+    PredefinedColorSpace, PresentMode, PresentationTimestamp, PrimitiveState, PrimitiveTopology,
+    PushConstantRange, QueryType, RenderBundleDepthStencil, SamplerBindingType, SamplerBorderColor,
+    ShaderLocation, ShaderModel, ShaderStages, StencilFaceState, StencilOperation, StencilState,
     StorageTextureAccess, SurfaceCapabilities, SurfaceStatus, TextureAspect, TextureDimension,
     TextureFormat, TextureFormatFeatureFlags, TextureFormatFeatures, TextureSampleType,
     TextureUsages, TextureViewDimension, VertexAttribute, VertexFormat, VertexStepMode,
@@ -764,9 +763,21 @@ impl Drop for CommandEncoder {
     }
 }
 
-/// In-progress recording of a render pass.
+/// In-progress recording of a render pass: a list of render commands in a [`CommandEncoder`].
 ///
-/// It can be created with [`CommandEncoder::begin_render_pass`].
+/// It can be created with [`CommandEncoder::begin_render_pass()`], whose [`RenderPassDescriptor`]
+/// specifies the attachments (textures) that will be rendered to.
+///
+/// Most of the methods on `RenderPass` serve one of two purposes, identifiable by their names:
+///
+/// * `draw_*()`: Drawing (that is, encoding a render command, which, when executed by the GPU, will
+///   rasterize something and execute shaders).
+/// * `set_*()`: Setting part of the [render state](https://gpuweb.github.io/gpuweb/#renderstate)
+///   for future drawing commands.
+///
+/// A render pass may contain any number of drawing commands, and before/between each command the
+/// render state may be updated however you wish; each drawing command will be executed using the
+/// render state that has been set when the `draw_*()` function is called.
 ///
 /// Corresponds to [WebGPU `GPURenderPassEncoder`](
 /// https://gpuweb.github.io/gpuweb/#render-pass-encoder).
@@ -851,6 +862,7 @@ impl Drop for RenderBundle {
 /// It can be created with [`Device::create_query_set`].
 ///
 /// Corresponds to [WebGPU `GPUQuerySet`](https://gpuweb.github.io/gpuweb/#queryset).
+#[derive(Debug)]
 pub struct QuerySet {
     context: Arc<C>,
     id: ObjectId,
@@ -863,6 +875,13 @@ pub struct QuerySet {
         not(target_feature = "atomics")
     )
 ))]
+#[cfg(any(
+    not(target_arch = "wasm32"),
+    all(
+        feature = "fragile-send-sync-non-atomic-wasm",
+        not(target_feature = "atomics")
+    )
+))]
 static_assertions::assert_impl_all!(QuerySet: Send, Sync);
 
 impl Drop for QuerySet {
@@ -1029,6 +1048,31 @@ impl<V: Default> Default for Operations<V> {
     }
 }
 
+/// Describes the timestamp writes of a render pass.
+///
+/// For use with [`RenderPassDescriptor`].
+/// At least one of `beginning_of_pass_write_index` and `end_of_pass_write_index` must be `Some`.
+///
+/// Corresponds to [WebGPU `GPURenderPassTimestampWrite`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderpasstimestampwrites).
+#[derive(Clone, Debug)]
+pub struct RenderPassTimestampWrites<'a> {
+    /// The query set to write to.
+    pub query_set: &'a QuerySet,
+    /// The index of the query set at which a start timestamp of this pass is written, if any.
+    pub beginning_of_pass_write_index: Option<u32>,
+    /// The index of the query set at which an end timestamp of this pass is written, if any.
+    pub end_of_pass_write_index: Option<u32>,
+}
+#[cfg(any(
+    not(target_arch = "wasm32"),
+    all(
+        feature = "fragile-send-sync-non-atomic-wasm",
+        not(target_feature = "atomics")
+    )
+))]
+static_assertions::assert_impl_all!(RenderPassTimestampWrites: Send, Sync);
+
 /// Describes a color attachment to a [`RenderPass`].
 ///
 /// For use with [`RenderPassDescriptor`].
@@ -1336,6 +1380,12 @@ pub struct RenderPassDescriptor<'tex, 'desc> {
     pub color_attachments: &'desc [Option<RenderPassColorAttachment<'tex>>],
     /// The depth and stencil attachment of the render pass, if any.
     pub depth_stencil_attachment: Option<RenderPassDepthStencilAttachment<'tex>>,
+    /// Defines which timestamp values will be written for this pass, and where to write them to.
+    ///
+    /// Requires [`Features::TIMESTAMP_QUERY`] to be enabled.
+    pub timestamp_writes: Option<RenderPassTimestampWrites<'desc>>,
+    /// Defines where the occlusion query results will be stored for this pass.
+    pub occlusion_query_set: Option<&'tex QuerySet>,
 }
 #[cfg(any(
     not(target_arch = "wasm32"),
@@ -1448,17 +1498,53 @@ pub struct RenderPipelineDescriptor<'a> {
 ))]
 static_assertions::assert_impl_all!(RenderPipelineDescriptor: Send, Sync);
 
+/// Describes the timestamp writes of a compute pass.
+///
+/// For use with [`ComputePassDescriptor`].
+/// At least one of `beginning_of_pass_write_index` and `end_of_pass_write_index` must be `Some`.
+///
+/// Corresponds to [WebGPU `GPUComputePassTimestampWrite`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepasstimestampwrites).
+#[derive(Clone, Debug)]
+pub struct ComputePassTimestampWrites<'a> {
+    /// The query set to write to.
+    pub query_set: &'a QuerySet,
+    /// The index of the query set at which a start timestamp of this pass is written, if any.
+    pub beginning_of_pass_write_index: Option<u32>,
+    /// The index of the query set at which an end timestamp of this pass is written, if any.
+    pub end_of_pass_write_index: Option<u32>,
+}
+#[cfg(any(
+    not(target_arch = "wasm32"),
+    all(
+        feature = "fragile-send-sync-non-atomic-wasm",
+        not(target_feature = "atomics")
+    )
+))]
+static_assertions::assert_impl_all!(ComputePassTimestampWrites: Send, Sync);
+
 /// Describes the attachments of a compute pass.
 ///
 /// For use with [`CommandEncoder::begin_compute_pass`].
 ///
 /// Corresponds to [WebGPU `GPUComputePassDescriptor`](
 /// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepassdescriptor).
-#[derive(Clone, Debug, Default)]
+#[derive(Clone, Default, Debug)]
 pub struct ComputePassDescriptor<'a> {
     /// Debug label of the compute pass. This will show up in graphics debuggers for easy identification.
     pub label: Label<'a>,
+    /// Defines which timestamp values will be written for this pass, and where to write them to.
+    ///
+    /// Requires [`Features::TIMESTAMP_QUERY`] to be enabled.
+    pub timestamp_writes: Option<ComputePassTimestampWrites<'a>>,
 }
+#[cfg(any(
+    not(target_arch = "wasm32"),
+    all(
+        feature = "fragile-send-sync-non-atomic-wasm",
+        not(target_feature = "atomics")
+    )
+))]
 static_assertions::assert_impl_all!(ComputePassDescriptor: Send, Sync);
 
 /// Describes a compute pipeline.
@@ -1613,8 +1699,8 @@ pub enum SurfaceError {
 }
 static_assertions::assert_impl_all!(SurfaceError: Send, Sync);
 
-impl Display for SurfaceError {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+impl fmt::Display for SurfaceError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         write!(f, "{}", match self {
             Self::Timeout => "A timeout was encountered while trying to acquire the next frame",
             Self::Outdated => "The underlying surface has changed, and therefore the swap chain must be updated",
@@ -2657,8 +2743,8 @@ impl Drop for Device {
 pub struct RequestDeviceError;
 static_assertions::assert_impl_all!(RequestDeviceError: Send, Sync);
 
-impl Display for RequestDeviceError {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+impl fmt::Display for RequestDeviceError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         write!(f, "Requesting a device failed")
     }
 }
@@ -2666,28 +2752,76 @@ impl Display for RequestDeviceError {
 impl error::Error for RequestDeviceError {}
 
 /// [`Instance::create_surface()`] or a related function failed.
-#[derive(Clone, PartialEq, Eq, Debug)]
+#[derive(Clone, Debug)]
 #[non_exhaustive]
 pub struct CreateSurfaceError {
-    // TODO: Report diagnostic clues
+    inner: CreateSurfaceErrorKind,
+}
+#[derive(Clone, Debug)]
+enum CreateSurfaceErrorKind {
+    /// Error from [`wgpu_hal`].
+    #[cfg(any(
+        not(target_arch = "wasm32"),
+        target_os = "emscripten",
+        feature = "webgl"
+    ))]
+    // must match dependency cfg
+    Hal(hal::InstanceError),
+
+    /// Error from WebGPU surface creation.
+    #[allow(dead_code)] // may be unused depending on target and features
+    Web(String),
 }
 static_assertions::assert_impl_all!(CreateSurfaceError: Send, Sync);
 
-impl Display for CreateSurfaceError {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "Creating a surface failed")
+impl fmt::Display for CreateSurfaceError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match &self.inner {
+            #[cfg(any(
+                not(target_arch = "wasm32"),
+                target_os = "emscripten",
+                feature = "webgl"
+            ))]
+            CreateSurfaceErrorKind::Hal(e) => e.fmt(f),
+            CreateSurfaceErrorKind::Web(e) => e.fmt(f),
+        }
     }
 }
 
-impl error::Error for CreateSurfaceError {}
+impl error::Error for CreateSurfaceError {
+    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
+        match &self.inner {
+            #[cfg(any(
+                not(target_arch = "wasm32"),
+                target_os = "emscripten",
+                feature = "webgl"
+            ))]
+            CreateSurfaceErrorKind::Hal(e) => e.source(),
+            CreateSurfaceErrorKind::Web(_) => None,
+        }
+    }
+}
+
+#[cfg(any(
+    not(target_arch = "wasm32"),
+    target_os = "emscripten",
+    feature = "webgl"
+))]
+impl From<hal::InstanceError> for CreateSurfaceError {
+    fn from(e: hal::InstanceError) -> Self {
+        Self {
+            inner: CreateSurfaceErrorKind::Hal(e),
+        }
+    }
+}
 
 /// Error occurred when trying to async map a buffer.
 #[derive(Clone, PartialEq, Eq, Debug)]
 pub struct BufferAsyncError;
 static_assertions::assert_impl_all!(BufferAsyncError: Send, Sync);
 
-impl Display for BufferAsyncError {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+impl fmt::Display for BufferAsyncError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         write!(f, "Error occurred when trying to async map a buffer")
     }
 }
@@ -2722,33 +2856,6 @@ fn range_to_offset_size<S: RangeBounds<BufferAddress>>(
     (offset, size)
 }
 
-#[cfg(test)]
-mod tests {
-    use crate::BufferSize;
-
-    #[test]
-    fn range_to_offset_size_works() {
-        assert_eq!(crate::range_to_offset_size(0..2), (0, BufferSize::new(2)));
-        assert_eq!(crate::range_to_offset_size(2..5), (2, BufferSize::new(3)));
-        assert_eq!(crate::range_to_offset_size(..), (0, None));
-        assert_eq!(crate::range_to_offset_size(21..), (21, None));
-        assert_eq!(crate::range_to_offset_size(0..), (0, None));
-        assert_eq!(crate::range_to_offset_size(..21), (0, BufferSize::new(21)));
-    }
-
-    #[test]
-    #[should_panic]
-    fn range_to_offset_size_panics_for_empty_range() {
-        crate::range_to_offset_size(123..123);
-    }
-
-    #[test]
-    #[should_panic]
-    fn range_to_offset_size_panics_for_unbounded_empty_range() {
-        crate::range_to_offset_size(..0);
-    }
-}
-
 /// Read only view into a mapped buffer.
 #[derive(Debug)]
 pub struct BufferView<'a> {
@@ -2931,6 +3038,26 @@ impl<'a> BufferSlice<'a> {
         BufferView { slice: *self, data }
     }
 
+    /// Synchronously and immediately map a buffer for reading. If the buffer is not immediately mappable
+    /// through [`BufferDescriptor::mapped_at_creation`] or [`BufferSlice::map_async`], will panic.
+    ///
+    /// This is useful in wasm builds when you want to pass mapped data directly to js. Unlike `get_mapped_range`
+    /// which unconditionally copies mapped data into the wasm heap, this function directly hands you the
+    /// ArrayBuffer that we mapped the data into in js.
+    #[cfg(all(
+        target_arch = "wasm32",
+        not(any(target_os = "emscripten", feature = "webgl"))
+    ))]
+    pub fn get_mapped_range_as_array_buffer(&self) -> js_sys::ArrayBuffer {
+        let end = self.buffer.map_context.lock().add(self.offset, self.size);
+        DynContext::buffer_get_mapped_range_as_array_buffer(
+            &*self.buffer.context,
+            &self.buffer.id,
+            self.buffer.data.as_ref(),
+            self.offset..end,
+        )
+    }
+
     /// Synchronously and immediately map a buffer for writing. If the buffer is not immediately mappable
     /// through [`BufferDescriptor::mapped_at_creation`] or [`BufferSlice::map_async`], will panic.
     pub fn get_mapped_range_mut(&self) -> BufferViewMut<'a> {
@@ -3348,11 +3475,14 @@ impl CommandEncoder {
 
 impl<'a> RenderPass<'a> {
     /// Sets the active bind group for a given bind group index. The bind group layout
-    /// in the active pipeline when any `draw()` function is called must match the layout of this bind group.
+    /// in the active pipeline when any `draw_*()` method is called must match the layout of
+    /// this bind group.
     ///
     /// If the bind group have dynamic offsets, provide them in binding order.
     /// These offsets have to be aligned to [`Limits::min_uniform_buffer_offset_alignment`]
     /// or [`Limits::min_storage_buffer_offset_alignment`] appropriately.
+    ///
+    /// Subsequent draw calls’ shader executions will be able to access data in these bind groups.
     pub fn set_bind_group(
         &mut self,
         index: u32,
@@ -3386,6 +3516,8 @@ impl<'a> RenderPass<'a> {
     /// Sets the blend color as used by some of the blending modes.
     ///
     /// Subsequent blending tests will test against this value.
+    /// If this method has not been called, the blend constant defaults to [`Color::TRANSPARENT`]
+    /// (all components zero).
     pub fn set_blend_constant(&mut self, color: Color) {
         DynContext::render_pass_set_blend_constant(
             &*self.parent.context,
@@ -3439,6 +3571,11 @@ impl<'a> RenderPass<'a> {
     /// After transformation into [viewport coordinates](https://www.w3.org/TR/webgpu/#viewport-coordinates).
     ///
     /// Subsequent draw calls will discard any fragments which fall outside the scissor rectangle.
+    /// If this method has not been called, the scissor rectangle defaults to the entire bounds of
+    /// the render targets.
+    ///
+    /// The function of the scissor rectangle resembles [`set_viewport()`](Self::set_viewport),
+    /// but it does not affect the coordinate system, only which fragments are discarded.
     pub fn set_scissor_rect(&mut self, x: u32, y: u32, width: u32, height: u32) {
         DynContext::render_pass_set_scissor_rect(
             &*self.parent.context,
@@ -3454,7 +3591,9 @@ impl<'a> RenderPass<'a> {
     /// Sets the viewport used during the rasterization stage to linearly map
     /// from [normalized device coordinates](https://www.w3.org/TR/webgpu/#ndc) to [viewport coordinates](https://www.w3.org/TR/webgpu/#viewport-coordinates).
     ///
-    /// Subsequent draw calls will draw any fragments in this region.
+    /// Subsequent draw calls will only draw within this region.
+    /// If this method has not been called, the viewport defaults to the entire bounds of the render
+    /// targets.
     pub fn set_viewport(&mut self, x: f32, y: f32, w: f32, h: f32, min_depth: f32, max_depth: f32) {
         DynContext::render_pass_set_viewport(
             &*self.parent.context,
@@ -3472,6 +3611,7 @@ impl<'a> RenderPass<'a> {
     /// Sets the stencil reference.
     ///
     /// Subsequent stencil tests will test against this value.
+    /// If this method has not been called, the stencil reference value defaults to `0`.
     pub fn set_stencil_reference(&mut self, reference: u32) {
         DynContext::render_pass_set_stencil_reference(
             &*self.parent.context,
@@ -3499,6 +3639,9 @@ impl<'a> RenderPass<'a> {
     ///     }
     /// }
     /// ```
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
     pub fn draw(&mut self, vertices: Range<u32>, instances: Range<u32>) {
         DynContext::render_pass_draw(
             &*self.parent.context,
@@ -3559,6 +3702,9 @@ impl<'a> RenderPass<'a> {
     ///     }
     /// }
     /// ```
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
     pub fn draw_indexed(&mut self, indices: Range<u32>, base_vertex: i32, instances: Range<u32>) {
         DynContext::render_pass_draw_indexed(
             &*self.parent.context,
@@ -3575,6 +3721,9 @@ impl<'a> RenderPass<'a> {
     /// The active vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
     ///
     /// The structure expected in `indirect_buffer` must conform to [`DrawIndirect`](crate::util::DrawIndirect).
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
     pub fn draw_indirect(&mut self, indirect_buffer: &'a Buffer, indirect_offset: BufferAddress) {
         DynContext::render_pass_draw_indirect(
             &*self.parent.context,
@@ -3593,6 +3742,9 @@ impl<'a> RenderPass<'a> {
     /// vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
     ///
     /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirect`](crate::util::DrawIndexedIndirect).
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
     pub fn draw_indexed_indirect(
         &mut self,
         indirect_buffer: &'a Buffer,
@@ -3610,6 +3762,9 @@ impl<'a> RenderPass<'a> {
 
     /// Execute a [render bundle][RenderBundle], which is a set of pre-recorded commands
     /// that can be run together.
+    ///
+    /// Commands in the bundle do not inherit this render pass's current render state, and after the
+    /// bundle has executed, the state is **cleared** (reset to defaults, not the previous state).
     pub fn execute_bundles<I: IntoIterator<Item = &'a RenderBundle> + 'a>(
         &mut self,
         render_bundles: I,
@@ -3635,8 +3790,10 @@ impl<'a> RenderPass<'a> {
     /// The active vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
     ///
     /// The structure expected in `indirect_buffer` must conform to [`DrawIndirect`](crate::util::DrawIndirect).
-    ///
     /// These draw structures are expected to be tightly packed.
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
     pub fn multi_draw_indirect(
         &mut self,
         indirect_buffer: &'a Buffer,
@@ -3661,8 +3818,10 @@ impl<'a> RenderPass<'a> {
     /// vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
     ///
     /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirect`](crate::util::DrawIndexedIndirect).
-    ///
     /// These draw structures are expected to be tightly packed.
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
     pub fn multi_draw_indexed_indirect(
         &mut self,
         indirect_buffer: &'a Buffer,
@@ -3692,7 +3851,6 @@ impl<'a> RenderPass<'a> {
     /// The active vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
     ///
     /// The structure expected in `indirect_buffer` must conform to [`DrawIndirect`](crate::util::DrawIndirect).
-    ///
     /// These draw structures are expected to be tightly packed.
     ///
     /// The structure expected in `count_buffer` is the following:
@@ -3703,6 +3861,9 @@ impl<'a> RenderPass<'a> {
     ///     count: u32, // Number of draw calls to issue.
     /// }
     /// ```
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
     pub fn multi_draw_indirect_count(
         &mut self,
         indirect_buffer: &'a Buffer,
@@ -3747,6 +3908,9 @@ impl<'a> RenderPass<'a> {
     ///     count: u32, // Number of draw calls to issue.
     /// }
     /// ```
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
     pub fn multi_draw_indexed_indirect_count(
         &mut self,
         indirect_buffer: &'a Buffer,
@@ -3845,6 +4009,29 @@ impl<'a> RenderPass<'a> {
     }
 }
 
+impl<'a> RenderPass<'a> {
+    /// Start a occlusion query on this render pass. It can be ended with
+    /// `end_occlusion_query`. Occlusion queries may not be nested.
+    pub fn begin_occlusion_query(&mut self, query_index: u32) {
+        DynContext::render_pass_begin_occlusion_query(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            query_index,
+        );
+    }
+
+    /// End the occlusion query on this render pass. It can be started with
+    /// `begin_occlusion_query`. Occlusion queries may not be nested.
+    pub fn end_occlusion_query(&mut self) {
+        DynContext::render_pass_end_occlusion_query(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+        );
+    }
+}
+
 /// [`Features::PIPELINE_STATISTICS_QUERY`] must be enabled on the device in order to call these functions.
 impl<'a> RenderPass<'a> {
     /// Start a pipeline statistics query on this render pass. It can be ended with
@@ -4481,13 +4668,16 @@ impl Queue {
     /// Gets the amount of nanoseconds each tick of a timestamp query represents.
     ///
     /// Returns zero if timestamp queries are unsupported.
+    ///
+    /// Timestamp values are represented in nanosecond values on WebGPU, see `<https://gpuweb.github.io/gpuweb/#timestamp>`
+    /// Therefore, this is always 1.0 on the web, but on wgpu-core a manual conversion is required.
     pub fn get_timestamp_period(&self) -> f32 {
         DynContext::queue_get_timestamp_period(&*self.context, &self.id, self.data.as_ref())
     }
 
     /// Registers a callback when the previous call to submit finishes running on the gpu. This callback
-    /// being called implies that all mapped buffer callbacks attached to the same submission have also
-    /// been called.
+    /// being called implies that all mapped buffer callbacks which were registered before this call will
+    /// have been called.
     ///
     /// For the callback to complete, either `queue.submit(..)`, `instance.poll_all(..)`, or `device.poll(..)`
     /// must be called elsewhere in the runtime, possibly integrated into an event loop or run on a separate thread.
@@ -4706,8 +4896,8 @@ impl<T> Clone for Id<T> {
 impl<T> Copy for Id<T> {}
 
 #[cfg(feature = "expose-ids")]
-impl<T> Debug for Id<T> {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+impl<T> fmt::Debug for Id<T> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         f.debug_tuple("Id").field(&self.0).finish()
     }
 }
@@ -5007,8 +5197,8 @@ impl error::Error for Error {
     }
 }
 
-impl Display for Error {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         match self {
             Error::OutOfMemory { .. } => f.write_str("Out of Memory"),
             Error::Validation { description, .. } => f.write_str(description),
@@ -5047,3 +5237,30 @@ mod send_sync {
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use crate::BufferSize;
+
+    #[test]
+    fn range_to_offset_size_works() {
+        assert_eq!(crate::range_to_offset_size(0..2), (0, BufferSize::new(2)));
+        assert_eq!(crate::range_to_offset_size(2..5), (2, BufferSize::new(3)));
+        assert_eq!(crate::range_to_offset_size(..), (0, None));
+        assert_eq!(crate::range_to_offset_size(21..), (21, None));
+        assert_eq!(crate::range_to_offset_size(0..), (0, None));
+        assert_eq!(crate::range_to_offset_size(..21), (0, BufferSize::new(21)));
+    }
+
+    #[test]
+    #[should_panic]
+    fn range_to_offset_size_panics_for_empty_range() {
+        crate::range_to_offset_size(123..123);
+    }
+
+    #[test]
+    #[should_panic]
+    fn range_to_offset_size_panics_for_unbounded_empty_range() {
+        crate::range_to_offset_size(..0);
+    }
+}
diff --git a/wgpu/src/util/init.rs b/wgpu/src/util/init.rs
index 9d186b685a..987e8400fd 100644
--- a/wgpu/src/util/init.rs
+++ b/wgpu/src/util/init.rs
@@ -30,6 +30,7 @@ pub fn power_preference_from_env() -> Option<PowerPreference> {
         {
             Ok("low") => PowerPreference::LowPower,
             Ok("high") => PowerPreference::HighPerformance,
+            Ok("none") => PowerPreference::None,
             _ => return None,
         },
     )
@@ -114,3 +115,22 @@ pub fn dx12_shader_compiler_from_env() -> Option<wgt::Dx12Compiler> {
         },
     )
 }
+
+/// Choose which minor OpenGL ES version to use from the environment variable `WGPU_GLES_MINOR_VERSION`.
+///
+/// Possible values are `0`, `1`, `2` or `automatic`. Case insensitive.
+pub fn gles_minor_version_from_env() -> Option<wgt::Gles3MinorVersion> {
+    Some(
+        match std::env::var("WGPU_GLES_MINOR_VERSION")
+            .as_deref()
+            .map(str::to_lowercase)
+            .as_deref()
+        {
+            Ok("automatic") => wgt::Gles3MinorVersion::Automatic,
+            Ok("0") => wgt::Gles3MinorVersion::Version0,
+            Ok("1") => wgt::Gles3MinorVersion::Version1,
+            Ok("2") => wgt::Gles3MinorVersion::Version2,
+            _ => return None,
+        },
+    )
+}