diff --git a/crates/cubecl-wgpu/Cargo.toml b/crates/cubecl-wgpu/Cargo.toml index a390d6424..be4ecfe2d 100644 --- a/crates/cubecl-wgpu/Cargo.toml +++ b/crates/cubecl-wgpu/Cargo.toml @@ -27,7 +27,7 @@ cubecl-common = { path = "../cubecl-common", version = "0.1.1" } cubecl-core = { path = "../cubecl-core", version = "0.1.1" } bytemuck = { workspace = true } -wgpu = { version = "0.20.1", features = ["fragile-send-sync-non-atomic-wasm"] } +wgpu = { version = "22.0.0", features = ["fragile-send-sync-non-atomic-wasm"] } pollster = { workspace = true } log = { workspace = true } diff --git a/crates/cubecl-wgpu/src/compute/server.rs b/crates/cubecl-wgpu/src/compute/server.rs index 8f4885e11..9b970d220 100644 --- a/crates/cubecl-wgpu/src/compute/server.rs +++ b/crates/cubecl-wgpu/src/compute/server.rs @@ -124,6 +124,7 @@ where module: &module, entry_point: "main", compilation_options: Default::default(), + cache: None, }), ) } diff --git a/crates/cubecl-wgpu/src/runtime.rs b/crates/cubecl-wgpu/src/runtime.rs index c4217f35b..2b2c7db24 100644 --- a/crates/cubecl-wgpu/src/runtime.rs +++ b/crates/cubecl-wgpu/src/runtime.rs @@ -155,6 +155,10 @@ pub async fn select_device( label: None, required_features: adapter.features(), required_limits: limits, + // The default is MemoryHints::Performance, which tries to do some bigger + // block allocations. However, we already batch allocations, so we + // can use MemoryHints::MemoryUsage to lower memory usage. + memory_hints: wgpu::MemoryHints::MemoryUsage, }, None, )