diff --git a/wgpu/CHANGELOG.md b/wgpu/CHANGELOG.md
index 89060ad991..dcfb7660c9 100644
--- a/wgpu/CHANGELOG.md
+++ b/wgpu/CHANGELOG.md
@@ -1,5 +1,11 @@
 # Change Log
 
+### v0.8 (2021-04-29)
+- See https://github.com/gfx-rs/wgpu/blob/v0.8/CHANGELOG.md#v08-2021-04-29
+- Naga is the default shader conversion path on Metal, Vulkan, and OpenGL
+- SPIRV-Cross is optionally enabled with "cross" feature
+- All of the examples (except "texture-array") now use WGSL
+
 ### v0.7 (2021-01-31)
 - See https://github.com/gfx-rs/wgpu/blob/v0.7/CHANGELOG.md#v07-2020-08-30
 - Features:
diff --git a/wgpu/Cargo.toml b/wgpu/Cargo.toml
index 888bb60b69..826b35a777 100644
--- a/wgpu/Cargo.toml
+++ b/wgpu/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "wgpu"
-version = "0.7.0"
+version = "0.8.0"
 authors = ["wgpu developers"]
 edition = "2018"
 description = "Rusty WebGPU API wrapper"
@@ -45,7 +45,6 @@ rev = "e5ddb94be0221b0f53a8f43adfb15458daebfd7c"
 arrayvec = "0.5"
 log = "0.4"
 parking_lot = "0.11"
-profiling = { version = "1", default-features = false }
 raw-window-handle = "0.3"
 serde = { version = "1", features = ["derive"], optional = true }
 smallvec = "1"
diff --git a/wgpu/src/backend/direct.rs b/wgpu/src/backend/direct.rs
index 796e296d96..943cf6ec78 100644
--- a/wgpu/src/backend/direct.rs
+++ b/wgpu/src/backend/direct.rs
@@ -395,7 +395,6 @@ mod pass_impl {
             &mut self,
             render_bundles: I,
         ) {
-            profiling::scope!("RenderPass::execute_bundles wrapper");
             let temp_render_bundles = render_bundles.cloned().collect::<SmallVec<[_; 4]>>();
             unsafe {
                 wgpu_render_pass_execute_bundles(
@@ -858,7 +857,6 @@ impl crate::Context for Context {
         device: &Self::DeviceId,
         desc: &BindGroupDescriptor,
     ) -> Self::BindGroupId {
-        profiling::scope!("Device::create_bind_group wrapper");
         use wgc::binding_model as bm;
 
         let mut arrayed_texture_views = Vec::new();
@@ -953,8 +951,6 @@ impl crate::Context for Context {
         device: &Self::DeviceId,
         desc: &PipelineLayoutDescriptor,
     ) -> Self::PipelineLayoutId {
-        profiling::scope!("Device::create_pipeline_layout wrapper");
-
         // Limit is always less or equal to wgc::MAX_BIND_GROUPS, so this is always right
         // Guards following ArrayVec
         assert!(
@@ -998,7 +994,6 @@ impl crate::Context for Context {
         device: &Self::DeviceId,
         desc: &RenderPipelineDescriptor,
     ) -> Self::RenderPipelineId {
-        profiling::scope!("Device::create_render_pipeline wrapper");
         use wgc::pipeline as pipe;
 
         let vertex_buffers: ArrayVec<[_; wgc::device::MAX_VERTEX_BUFFERS]> = desc
@@ -1316,8 +1311,6 @@ impl crate::Context for Context {
         mode: MapMode,
         range: Range<wgt::BufferAddress>,
     ) -> Self::MapAsyncFuture {
-        profiling::scope!("Buffer::buffer_map_async wrapper");
-
         let (future, completion) = native_gpu_future::new_gpu_future();
 
         extern "C" fn buffer_map_future_wrapper(
@@ -1728,7 +1721,6 @@ impl crate::Context for Context {
         encoder: &Self::CommandEncoderId,
         desc: &crate::RenderPassDescriptor<'a, '_>,
     ) -> Self::RenderPassId {
-        profiling::scope!("CommandEncoder::begin_render_pass wrapper");
         let colors = desc
             .color_attachments
             .iter()
diff --git a/wgpu/src/util/belt.rs b/wgpu/src/util/belt.rs
index d94ae0a0cb..d787159e79 100644
--- a/wgpu/src/util/belt.rs
+++ b/wgpu/src/util/belt.rs
@@ -112,8 +112,6 @@ impl StagingBelt {
             self.free_chunks.swap_remove(index)
         } else {
             let size = self.chunk_size.max(size.get());
-            #[cfg(not(target_arch = "wasm32"))]
-            profiling::scope!("Creating chunk of size {}");
             Chunk {
                 buffer: device.create_buffer(&BufferDescriptor {
                     label: Some("staging"),
@@ -148,9 +146,6 @@ impl StagingBelt {
     /// At this point, all the partially used staging buffers are closed until
     /// the GPU is done copying the data from them.
     pub fn finish(&mut self) {
-        #[cfg(not(target_arch = "wasm32"))]
-        profiling::scope!("Finishing chunks");
-
         for chunk in self.active_chunks.drain(..) {
             chunk.buffer.unmap();
             self.closed_chunks.push(chunk);