From 97a61ef19bf3b72cf698c278cdb7597f94487dbf Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Wed, 9 Oct 2024 10:11:11 +0200 Subject: [PATCH] more profiling scopes --- .../chroma_subsampling_converter.rs | 337 ++++++++++++++++++ .../image_data_to_texture.rs | 4 + 2 files changed, 341 insertions(+) create mode 100644 crates/viewer/re_renderer/src/resource_managers/chroma_subsampling_converter.rs diff --git a/crates/viewer/re_renderer/src/resource_managers/chroma_subsampling_converter.rs b/crates/viewer/re_renderer/src/resource_managers/chroma_subsampling_converter.rs new file mode 100644 index 000000000000..1fe490cffe44 --- /dev/null +++ b/crates/viewer/re_renderer/src/resource_managers/chroma_subsampling_converter.rs @@ -0,0 +1,337 @@ +use smallvec::smallvec; + +use crate::{ + allocator::create_and_fill_uniform_buffer, + include_shader_module, + renderer::{screen_triangle_vertex_shader, DrawData, DrawError, Renderer}, + wgpu_resources::{ + BindGroupDesc, BindGroupEntry, BindGroupLayoutDesc, GpuBindGroup, GpuBindGroupLayoutHandle, + GpuRenderPipelineHandle, GpuTexture, PipelineLayoutDesc, RenderPipelineDesc, TextureDesc, + }, + DebugLabel, RenderContext, +}; + +use super::ColorPrimaries; + +/// Supported chroma subsampling input formats. +/// +/// Keep indices in sync with `yuv_converter.wgsl` +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Debug)] +pub enum YuvPixelLayout { + /// 4:2:0 subsampling with a separate Y plane, followed by a UV plane. + /// + /// Expects single channel texture format. + /// + /// First comes entire image in Y in one plane, + /// followed by a plane with interleaved lines ordered as U0, V0, U1, V1, etc. + /// + /// width + /// __________ + /// | | + /// height | Y | + /// | | + /// |_________| + /// height/2 | U,V,U,… | + /// |_________| + Y_UV12 = 0, + + /// YUV 4:2:2 subsampling, single plane. + /// + /// Expects single channel texture format. + /// + /// The order of the channels is Y0, U0, Y1, V0, all in the same plane. + /// + /// width * 2 + /// __________________ + /// | | + /// height | Y0, U0, Y1, V0… | + /// |_________________| + /// + YUYV16 = 1, +} + +impl YuvPixelLayout { + /// Given the dimensions of the output picture, what are the expected dimensions of the input data texture. + pub fn data_texture_width_height(&self, [decoded_width, decoded_height]: [u32; 2]) -> [u32; 2] { + match self { + Self::Y_UV12 => [decoded_width, decoded_height + decoded_height / 2], + Self::YUYV16 => [decoded_width * 2, decoded_height], + } + } + + /// What format the input data texture is expected to be in. + pub fn data_texture_format(&self) -> wgpu::TextureFormat { + // TODO(andreas): How to deal with higher precision formats here? + // + // Our shader currently works with 8 bit integer formats here since while + // _technically_ YUV formats have nothing to do with concrete bit depth, + // practically there's underlying expectation for 8 bits per channel + // as long as the data is Bt.709 or Bt.601. + // In other words: The conversions implementations we have today expect 0-255 as the value range. + + #[allow(clippy::match_same_arms)] + match self { + Self::Y_UV12 => wgpu::TextureFormat::R8Uint, + // TODO(andreas): Why not use [`wgpu::TextureFormat::Rg8Uint`] here? + Self::YUYV16 => wgpu::TextureFormat::R8Uint, + } + } + + /// Size of the buffer needed to create the data texture, i.e. the raw input data. + pub fn num_data_buffer_bytes(&self, decoded_width: [u32; 2]) -> usize { + let num_pixels = decoded_width[0] as usize * decoded_width[1] as usize; + match self { + Self::Y_UV12 => 12 * num_pixels / 8, + Self::YUYV16 => 16 * num_pixels / 8, + } + } +} + +mod gpu_data { + use crate::wgpu_buffer_types; + + #[repr(C)] + #[derive(Clone, Copy, bytemuck::Pod, bytemuck::Zeroable)] + pub struct UniformBuffer { + /// Uses [`super::ChromaSubsamplingPixelFormat`]. + pub format: u32, + + /// Uses [`super::ColorPrimaries`]. + pub primaries: u32, + + pub target_texture_size: [u32; 2], + + pub _end_padding: [wgpu_buffer_types::PaddingRow; 16 - 1], + } +} + +/// A work item for the subsampling converter. +pub struct YuvFormatConversionTask { + bind_group: GpuBindGroup, + target_texture: GpuTexture, +} + +impl DrawData for YuvFormatConversionTask { + type Renderer = YuvFormatConverter; +} + +impl YuvFormatConversionTask { + /// sRGB encoded 8 bit texture. + /// + /// Not using [`wgpu::TextureFormat::Rgba8UnormSrgb`] since consumers typically consume this + /// texture with software EOTF ("to linear") for more flexibility. + pub const OUTPUT_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba8Unorm; + + /// Creates a new conversion task that can be used with [`YuvFormatConverter`]. + /// + /// Does *not* validate that the input data has the expected format, + /// see methods of [`YuvPixelLayout`] for details. + pub fn new( + ctx: &RenderContext, + format: YuvPixelLayout, + primaries: ColorPrimaries, + input_data: &GpuTexture, + output_label: &DebugLabel, + output_width_height: [u32; 2], + ) -> Self { + re_tracing::profile_function!(); + + let target_texture = ctx.gpu_resources.textures.alloc( + &ctx.device, + &TextureDesc { + label: output_label.clone(), + size: wgpu::Extent3d { + width: output_width_height[0], + height: output_width_height[1], + depth_or_array_layers: 1, + }, + mip_level_count: 1, // We don't have mipmap level generation yet! + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: Self::OUTPUT_FORMAT, + usage: wgpu::TextureUsages::TEXTURE_BINDING + | wgpu::TextureUsages::COPY_DST + | wgpu::TextureUsages::RENDER_ATTACHMENT, + }, + ); + + let renderer = ctx.renderer::(); + + let uniform_buffer = create_and_fill_uniform_buffer( + ctx, + format!("{output_label}_conversion").into(), + gpu_data::UniformBuffer { + format: format as _, + primaries: primaries as _, + target_texture_size: output_width_height, + + _end_padding: Default::default(), + }, + ); + + let bind_group = ctx.gpu_resources.bind_groups.alloc( + &ctx.device, + &ctx.gpu_resources, + &BindGroupDesc { + label: "RectangleInstance::bind_group".into(), + entries: smallvec![ + uniform_buffer, + BindGroupEntry::DefaultTextureView(input_data.handle), + ], + layout: renderer.bind_group_layout, + }, + ); + + Self { + bind_group, + target_texture, + } + } + + /// Runs the conversion from the input texture data. + pub fn convert_input_data_to_texture( + self, + ctx: &RenderContext, + ) -> Result { + re_tracing::profile_function!(); + + // TODO(andreas): Does this have to be on the global view encoder? + // If this ever becomes a problem we could easily schedule this to another encoder as long as + // we guarantee that the conversion is enqueued before the resulting texture is used. + // Given that we already have this neatly encapsulated work package this would be quite easy to do! + let mut encoder = ctx.active_frame.before_view_builder_encoder.lock(); + let mut pass = encoder + .get() + .begin_render_pass(&wgpu::RenderPassDescriptor { + label: self.target_texture.creation_desc.label.get(), + color_attachments: &[Some(wgpu::RenderPassColorAttachment { + view: &self.target_texture.default_view, + resolve_target: None, + ops: wgpu::Operations { + load: wgpu::LoadOp::Clear(wgpu::Color::BLACK), + store: wgpu::StoreOp::Store, + }, + })], + ..Default::default() + }); + + ctx.renderer::().draw( + &ctx.gpu_resources.render_pipelines.resources(), + crate::draw_phases::DrawPhase::Opaque, // Don't care about the phase. + &mut pass, + &self, + )?; + + Ok(self.target_texture) + } +} + +/// Converter for chroma subsampling formats. +/// +/// Takes chroma subsampled data and draws to a fullscreen sRGB output texture. +/// Implemented as a [`Renderer`] in order to make use of the existing mechanisms for storing renderer data. +/// (we need some place to lazily create the render pipeline, store a handle to it and encapsulate the draw logic!) +pub struct YuvFormatConverter { + render_pipeline: GpuRenderPipelineHandle, + bind_group_layout: GpuBindGroupLayoutHandle, +} + +impl Renderer for YuvFormatConverter { + type RendererDrawData = YuvFormatConversionTask; + + fn create_renderer(ctx: &RenderContext) -> Self { + let vertex_handle = screen_triangle_vertex_shader(ctx); + + let bind_group_layout = ctx.gpu_resources.bind_group_layouts.get_or_create( + &ctx.device, + &BindGroupLayoutDesc { + label: "ChromaSubsamplingConverter".into(), + entries: vec![ + // Uniform buffer with some information. + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: (std::mem::size_of::() + as u64) + .try_into() + .ok(), + }, + count: None, + }, + // Input data texture. + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + multisampled: false, + view_dimension: wgpu::TextureViewDimension::D2, + sample_type: wgpu::TextureSampleType::Uint, + }, + count: None, + }, + ], + }, + ); + + let pipeline_layout = ctx.gpu_resources.pipeline_layouts.get_or_create( + ctx, + &PipelineLayoutDesc { + label: "ChromaSubsamplingConverter".into(), + // Note that this is a fairly unusual layout for us with the first entry + // not being the globally set bind group! + entries: vec![bind_group_layout], + }, + ); + + let shader_modules = &ctx.gpu_resources.shader_modules; + let render_pipeline = ctx.gpu_resources.render_pipelines.get_or_create( + ctx, + &RenderPipelineDesc { + label: "TestTriangle::render_pipeline".into(), + pipeline_layout, + vertex_entrypoint: "main".into(), + vertex_handle, + fragment_entrypoint: "fs_main".into(), + fragment_handle: shader_modules.get_or_create( + ctx, + &include_shader_module!("../../shader/conversions/yuv_converter.wgsl"), + ), + vertex_buffers: smallvec![], + render_targets: smallvec![Some(YuvFormatConversionTask::OUTPUT_FORMAT.into())], + primitive: wgpu::PrimitiveState::default(), + depth_stencil: None, + multisample: wgpu::MultisampleState::default(), + }, + ); + + Self { + render_pipeline, + bind_group_layout, + } + } + + fn draw( + &self, + render_pipelines: &crate::wgpu_resources::GpuRenderPipelinePoolAccessor<'_>, + _phase: crate::draw_phases::DrawPhase, + pass: &mut wgpu::RenderPass<'_>, + draw_data: &Self::RendererDrawData, + ) -> Result<(), DrawError> { + let pipeline = render_pipelines.get(self.render_pipeline)?; + + pass.set_pipeline(pipeline); + pass.set_bind_group(0, &draw_data.bind_group, &[]); + pass.draw(0..3, 0..1); + + Ok(()) + } + + fn participated_phases() -> &'static [crate::draw_phases::DrawPhase] { + // Doesn't participate in regular rendering. + &[] + } +} diff --git a/crates/viewer/re_renderer/src/resource_managers/image_data_to_texture.rs b/crates/viewer/re_renderer/src/resource_managers/image_data_to_texture.rs index 7455882b033f..613dbdbfe4f4 100644 --- a/crates/viewer/re_renderer/src/resource_managers/image_data_to_texture.rs +++ b/crates/viewer/re_renderer/src/resource_managers/image_data_to_texture.rs @@ -300,9 +300,13 @@ fn copy_data_to_texture( )?; if buffer_info.buffer_size_padded as usize == data.len() { + re_tracing::profile_scope!("bulk_copy"); + // Fast path: Just copy the data over as-is. gpu_read_buffer.extend_from_slice(data)?; } else { + re_tracing::profile_scope!("row_by_row_copy"); + // Copy row by row in order to jump over padding bytes. let bytes_per_row_unpadded = buffer_info.bytes_per_row_unpadded as usize; let num_padding_bytes_per_row =