From cbf55d5070ce0273f016f8a90422e3fb396f5fc7 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Thu, 10 Oct 2024 15:56:16 +0200 Subject: [PATCH 01/13] transfer_image_data_to_texture can now update existing textures --- .../image_data_to_texture.rs | 136 ++++++++++++++---- .../re_renderer/src/resource_managers/mod.rs | 3 +- .../src/resource_managers/texture_manager.rs | 18 +-- .../src/resource_managers/yuv_converter.rs | 71 ++++----- 4 files changed, 160 insertions(+), 68 deletions(-) diff --git a/crates/viewer/re_renderer/src/resource_managers/image_data_to_texture.rs b/crates/viewer/re_renderer/src/resource_managers/image_data_to_texture.rs index 1ba94392a9ac..0ea13be26f50 100644 --- a/crates/viewer/re_renderer/src/resource_managers/image_data_to_texture.rs +++ b/crates/viewer/re_renderer/src/resource_managers/image_data_to_texture.rs @@ -111,6 +111,20 @@ pub enum ImageDataToTextureError { #[error("Gpu-based conversion for texture {label:?} did not succeed: {err}")] GpuBasedConversionError { label: DebugLabel, err: DrawError }, + #[error("Texture {label:?} has invalid texture usage flags: {actual_usage:?}, expected at least {required_usage:?}")] + InvalidTargetTextureUsageFlags { + label: DebugLabel, + actual_usage: wgpu::TextureUsages, + required_usage: wgpu::TextureUsages, + }, + + #[error("Texture {label:?} has invalid texture format: {actual_format:?}, expected at least {required_format:?}")] + InvalidTargetTextureFormat { + label: DebugLabel, + actual_format: wgpu::TextureFormat, + required_format: wgpu::TextureFormat, + }, + // TODO(andreas): As we stop using `wgpu::TextureFormat` for input, this should become obsolete. #[error("Unsupported texture format {0:?}")] UnsupportedTextureFormat(wgpu::TextureFormat), @@ -120,6 +134,8 @@ pub enum ImageDataToTextureError { /// /// Arbitrary (potentially gpu based) conversions may be performed to upload the data to the GPU. pub struct ImageDataDesc<'a> { + /// If this desc is not used for a texture update, this label is used for the target texture. + /// Otherwise, it may still used for any intermediate resources that may be required during the conversion process. pub label: DebugLabel, /// Data for the highest mipmap level. @@ -141,7 +157,11 @@ pub struct ImageDataDesc<'a> { } impl<'a> ImageDataDesc<'a> { - fn validate(&self, limits: &wgpu::Limits) -> Result<(), ImageDataToTextureError> { + fn validate( + &self, + limits: &wgpu::Limits, + target_texture_desc: &TextureDesc, + ) -> Result<(), ImageDataToTextureError> { let Self { label, data, @@ -149,6 +169,24 @@ impl<'a> ImageDataDesc<'a> { width_height, } = self; + if !target_texture_desc + .usage + .contains(self.target_texture_usage_requirements()) + { + return Err(ImageDataToTextureError::InvalidTargetTextureUsageFlags { + label: target_texture_desc.label.clone(), + actual_usage: target_texture_desc.usage, + required_usage: self.target_texture_usage_requirements(), + }); + } + if target_texture_desc.format != self.target_texture_format() { + return Err(ImageDataToTextureError::InvalidTargetTextureFormat { + label: target_texture_desc.label.clone(), + actual_format: target_texture_desc.format, + required_format: self.target_texture_format(), + }); + } + if width_height[0] == 0 || width_height[1] == 0 { return Err(ImageDataToTextureError::ZeroSize(label.clone())); } @@ -189,6 +227,48 @@ impl<'a> ImageDataDesc<'a> { Ok(()) } + + /// The texture usages required in order to store this image data. + pub fn target_texture_usage_requirements(&self) -> wgpu::TextureUsages { + match self.format { + SourceImageDataFormat::WgpuCompatible(_) => wgpu::TextureUsages::COPY_DST, // Data arrives via raw data copy. + SourceImageDataFormat::Yuv { .. } => { + YuvFormatConversionTask::REQUIRED_TARGET_TEXTURE_USAGE_FLAGS + } + } + } + + /// The texture format required in order to store this image data. + pub fn target_texture_format(&self) -> wgpu::TextureFormat { + match self.format { + SourceImageDataFormat::WgpuCompatible(format) => format, + SourceImageDataFormat::Yuv { .. } => YuvFormatConversionTask::OUTPUT_FORMAT, + } + } + + /// Creates a texture that can hold the image data. + pub fn create_target_texture( + &self, + ctx: &RenderContext, + texture_usages: wgpu::TextureUsages, + ) -> GpuTexture { + ctx.gpu_resources.textures.alloc( + &ctx.device, + &TextureDesc { + label: self.label.clone(), + size: wgpu::Extent3d { + width: self.width_height[0], + height: self.width_height[0], + depth_or_array_layers: 1, + }, + mip_level_count: 1, // No mipmapping support yet. + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: self.target_texture_format(), + usage: self.target_texture_usage_requirements() | texture_usages, + }, + ) + } } /// Takes raw image data and transfers & converts it to a GPU texture. @@ -206,10 +286,11 @@ impl<'a> ImageDataDesc<'a> { pub fn transfer_image_data_to_texture( ctx: &RenderContext, image_data: ImageDataDesc<'_>, -) -> Result { + target_texture: &GpuTexture, +) -> Result<(), ImageDataToTextureError> { re_tracing::profile_function!(); - image_data.validate(&ctx.device.limits())?; + image_data.validate(&ctx.device.limits(), &target_texture.creation_desc)?; let ImageDataDesc { label, @@ -236,29 +317,37 @@ pub fn transfer_image_data_to_texture( SourceImageDataFormat::WgpuCompatible(_) => label.clone(), SourceImageDataFormat::Yuv { .. } => format!("{label}_source_data").into(), }; - let data_texture = ctx.gpu_resources.textures.alloc( - &ctx.device, - &TextureDesc { - label: data_texture_label, - size: wgpu::Extent3d { - width: data_texture_width, - height: data_texture_height, - depth_or_array_layers: 1, + + let data_texture = match source_format { + // Needs intermediate data texture. + SourceImageDataFormat::Yuv { .. } => ctx.gpu_resources.textures.alloc( + &ctx.device, + &TextureDesc { + label: data_texture_label, + size: wgpu::Extent3d { + width: data_texture_width, + height: data_texture_height, + depth_or_array_layers: 1, + }, + mip_level_count: 1, // We don't have mipmap level generation yet! + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: data_texture_format, + usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST, }, - mip_level_count: 1, // We don't have mipmap level generation yet! - sample_count: 1, - dimension: wgpu::TextureDimension::D2, - format: data_texture_format, - usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST, - }, - ); + ), + + // Target is directly written to. + SourceImageDataFormat::WgpuCompatible(_) => target_texture.clone(), + }; + copy_data_to_texture(ctx, &data_texture, data.as_ref())?; // Build a converter task, feeding in the raw data. let converter_task = match source_format { SourceImageDataFormat::WgpuCompatible(_) => { // No further conversion needed, we're done here! - return Ok(data_texture); + return Ok(()); } SourceImageDataFormat::Yuv { format, @@ -270,19 +359,16 @@ pub fn transfer_image_data_to_texture( range, primaries, &data_texture, - &label, - output_width_height, + target_texture, ), }; // Once there's different gpu based conversions, we should probably trait-ify this so we can keep the basic steps. // Note that we execute the task right away, but the way things are set up (by means of using the `Renderer` framework) // it would be fairly easy to schedule this differently! - let output_texture = converter_task + converter_task .convert_input_data_to_texture(ctx) - .map_err(|err| ImageDataToTextureError::GpuBasedConversionError { label, err })?; - - Ok(output_texture) + .map_err(|err| ImageDataToTextureError::GpuBasedConversionError { label, err }) } fn copy_data_to_texture( diff --git a/crates/viewer/re_renderer/src/resource_managers/mod.rs b/crates/viewer/re_renderer/src/resource_managers/mod.rs index c3210fc9e034..5f62146856b8 100644 --- a/crates/viewer/re_renderer/src/resource_managers/mod.rs +++ b/crates/viewer/re_renderer/src/resource_managers/mod.rs @@ -11,7 +11,8 @@ mod texture_manager; mod yuv_converter; pub use image_data_to_texture::{ - ColorPrimaries, ImageDataDesc, ImageDataToTextureError, SourceImageDataFormat, + transfer_image_data_to_texture, ColorPrimaries, ImageDataDesc, ImageDataToTextureError, + SourceImageDataFormat, }; pub use texture_manager::{GpuTexture2D, TextureManager2D, TextureManager2DError}; pub use yuv_converter::{YuvPixelLayout, YuvRange}; diff --git a/crates/viewer/re_renderer/src/resource_managers/texture_manager.rs b/crates/viewer/re_renderer/src/resource_managers/texture_manager.rs index 0ed8b37eebaf..5caa15dac09e 100644 --- a/crates/viewer/re_renderer/src/resource_managers/texture_manager.rs +++ b/crates/viewer/re_renderer/src/resource_managers/texture_manager.rs @@ -229,10 +229,10 @@ impl TextureManager2D { // Currently we don't store any data in the texture manager. // In the future we might handle (lazy?) mipmap generation in here or keep track of lazy upload processing. - Ok(GpuTexture2D(transfer_image_data_to_texture( - render_ctx, - creation_desc, - )?)) + let texture = + creation_desc.create_target_texture(render_ctx, wgpu::TextureUsages::TEXTURE_BINDING); + transfer_image_data_to_texture(render_ctx, creation_desc, &texture)?; + Ok(GpuTexture2D(texture)) } /// Creates a new 2D texture resource and schedules data upload to the GPU if a texture @@ -277,11 +277,11 @@ impl TextureManager2D { // Run potentially expensive texture creation code: let tex_creation_desc = try_create_texture_desc() .map_err(|err| TextureManager2DError::DataCreation(err))?; - let texture = GpuTexture2D(transfer_image_data_to_texture( - render_ctx, - tex_creation_desc, - )?); - entry.insert(texture).clone() + + let texture = tex_creation_desc + .create_target_texture(render_ctx, wgpu::TextureUsages::TEXTURE_BINDING); + transfer_image_data_to_texture(render_ctx, tex_creation_desc, &texture)?; + entry.insert(GpuTexture2D(texture)).clone() } }; diff --git a/crates/viewer/re_renderer/src/resource_managers/yuv_converter.rs b/crates/viewer/re_renderer/src/resource_managers/yuv_converter.rs index 075c24f13ccb..0f597748324a 100644 --- a/crates/viewer/re_renderer/src/resource_managers/yuv_converter.rs +++ b/crates/viewer/re_renderer/src/resource_managers/yuv_converter.rs @@ -6,9 +6,9 @@ use crate::{ renderer::{screen_triangle_vertex_shader, DrawData, DrawError, Renderer}, wgpu_resources::{ BindGroupDesc, BindGroupEntry, BindGroupLayoutDesc, GpuBindGroup, GpuBindGroupLayoutHandle, - GpuRenderPipelineHandle, GpuTexture, PipelineLayoutDesc, RenderPipelineDesc, TextureDesc, + GpuRenderPipelineHandle, GpuTexture, PipelineLayoutDesc, RenderPipelineDesc, }, - DebugLabel, RenderContext, + RenderContext, }; use super::ColorPrimaries; @@ -276,12 +276,18 @@ impl DrawData for YuvFormatConversionTask { } impl YuvFormatConversionTask { + /// Format that a target texture must have in order to be used as output of this converter. + /// /// sRGB encoded 8 bit texture. /// /// Not using [`wgpu::TextureFormat::Rgba8UnormSrgb`] since consumers typically consume this /// texture with software EOTF ("to linear") for more flexibility. pub const OUTPUT_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba8Unorm; + /// Usage flags that a target texture must have in order to be used as output of this converter. + pub const REQUIRED_TARGET_TEXTURE_USAGE_FLAGS: wgpu::TextureUsages = + wgpu::TextureUsages::RENDER_ATTACHMENT; + /// Creates a new conversion task that can be used with [`YuvFormatConverter`]. /// /// Does *not* validate that the input data has the expected format, @@ -292,37 +298,41 @@ impl YuvFormatConversionTask { yuv_range: YuvRange, primaries: ColorPrimaries, input_data: &GpuTexture, - output_label: &DebugLabel, - output_width_height: [u32; 2], + target_texture: &GpuTexture, ) -> Self { - let target_texture = ctx.gpu_resources.textures.alloc( - &ctx.device, - &TextureDesc { - label: output_label.clone(), - size: wgpu::Extent3d { - width: output_width_height[0], - height: output_width_height[1], - depth_or_array_layers: 1, - }, - mip_level_count: 1, // We don't have mipmap level generation yet! - sample_count: 1, - dimension: wgpu::TextureDimension::D2, - format: Self::OUTPUT_FORMAT, - usage: wgpu::TextureUsages::TEXTURE_BINDING - | wgpu::TextureUsages::COPY_DST - | wgpu::TextureUsages::RENDER_ATTACHMENT, - }, - ); - + // TODO: + // let target_texture = ctx.gpu_resources.textures.alloc( + // &ctx.device, + // &TextureDesc { + // label: output_label.clone(), + // size: wgpu::Extent3d { + // width: output_width_height[0], + // height: output_width_height[1], + // depth_or_array_layers: 1, + // }, + // mip_level_count: 1, // We don't have mipmap level generation yet! + // sample_count: 1, + // dimension: wgpu::TextureDimension::D2, + // format: Self::OUTPUT_FORMAT, + // usage: output_usage_flags | wgpu::TextureUsages::RENDER_ATTACHMENT, + // }, + // ); + + // TODO: validate target_texture + + let target_label = target_texture.creation_desc.label.clone(); let renderer = ctx.renderer::(); let uniform_buffer = create_and_fill_uniform_buffer( ctx, - format!("{output_label}_conversion").into(), + format!("{target_label}_conversion").into(), gpu_data::UniformBuffer { yuv_layout: yuv_layout as _, primaries: primaries as _, - target_texture_size: output_width_height, + target_texture_size: [ + target_texture.creation_desc.size.width, + target_texture.creation_desc.size.height, + ], yuv_range: (yuv_range as u32).into(), _end_padding: Default::default(), @@ -344,15 +354,12 @@ impl YuvFormatConversionTask { Self { bind_group, - target_texture, + target_texture: target_texture.clone(), } } /// Runs the conversion from the input texture data. - pub fn convert_input_data_to_texture( - self, - ctx: &RenderContext, - ) -> Result { + pub fn convert_input_data_to_texture(self, ctx: &RenderContext) -> Result<(), DrawError> { // TODO(andreas): Does this have to be on the global view encoder? // If this ever becomes a problem we could easily schedule this to another encoder as long as // we guarantee that the conversion is enqueued before the resulting texture is used. @@ -378,9 +385,7 @@ impl YuvFormatConversionTask { crate::draw_phases::DrawPhase::Opaque, // Don't care about the phase. &mut pass, &self, - )?; - - Ok(self.target_texture) + ) } } From 4673d8f7a123b0b623ca5708451f89911f99fe15 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Wed, 9 Oct 2024 10:33:01 +0200 Subject: [PATCH 02/13] wire up native_decoder to transfer_image_data_to_texture --- crates/store/re_video/src/decode/av1.rs | 223 +++++++----------- crates/store/re_video/src/decode/mod.rs | 36 +++ .../src/allocator/cpu_write_gpu_read_belt.rs | 2 +- crates/viewer/re_renderer/src/renderer/mod.rs | 2 +- .../image_data_to_texture.rs | 16 +- .../src/video/decoder/native_decoder.rs | 86 ++++--- crates/viewer/re_renderer/src/video/mod.rs | 3 + .../src/wgpu_resources/resource.rs | 2 +- .../src/gpu_bridge/image_to_gpu.rs | 12 +- 9 files changed, 192 insertions(+), 190 deletions(-) diff --git a/crates/store/re_video/src/decode/av1.rs b/crates/store/re_video/src/decode/av1.rs index 983b918a7fb6..066f557d2253 100644 --- a/crates/store/re_video/src/decode/av1.rs +++ b/crates/store/re_video/src/decode/av1.rs @@ -6,7 +6,10 @@ use dav1d::{PixelLayout, PlanarImageComponent}; use crate::Time; -use super::{Chunk, Error, Frame, OutputCallback, PixelFormat, Result, SyncDecoder}; +use super::{ + Chunk, ColorPrimaries, Error, Frame, OutputCallback, PixelFormat, Result, SyncDecoder, + YuvPixelLayout, YuvRange, +}; pub struct SyncDav1dDecoder { decoder: dav1d::Decoder, @@ -112,163 +115,107 @@ fn output_picture(picture: &dav1d::Picture, on_output: &(dyn Fn(Result) + // TODO(jan): support other parameters? // What do these even do: // - matrix_coefficients - // - color_range - // - color_primaries // - transfer_characteristics - let frame = Frame { - data: match picture.pixel_layout() { - PixelLayout::I400 => i400_to_rgba(picture), - PixelLayout::I420 => i420_to_rgba(picture), - PixelLayout::I422 => i422_to_rgba(picture), - PixelLayout::I444 => i444_to_rgba(picture), + let data = match picture.pixel_layout() { + PixelLayout::I400 => picture.plane(PlanarImageComponent::Y).to_vec(), + PixelLayout::I420 | PixelLayout::I422 | PixelLayout::I444 => { + let mut data = Vec::with_capacity( + picture.stride(PlanarImageComponent::Y) as usize + + picture.stride(PlanarImageComponent::U) as usize + + picture.stride(PlanarImageComponent::V) as usize, + ); + data.extend_from_slice(&picture.plane(PlanarImageComponent::Y)); + data.extend_from_slice(&picture.plane(PlanarImageComponent::U)); + data.extend_from_slice(&picture.plane(PlanarImageComponent::V)); + data // TODO: how badly does this break with hdr? + } + }; + + let format = PixelFormat::Yuv { + layout: match picture.pixel_layout() { + PixelLayout::I400 => YuvPixelLayout::Y400, + PixelLayout::I420 => YuvPixelLayout::Y_U_V420, + PixelLayout::I422 => YuvPixelLayout::Y_U_V422, + PixelLayout::I444 => YuvPixelLayout::Y_U_V444, + }, + range: match picture.color_range() { + dav1d::pixel::YUVRange::Limited => YuvRange::Limited, + dav1d::pixel::YUVRange::Full => YuvRange::Full, }, + primaries: color_primaries(picture), + }; + + let frame = Frame { + data, width: picture.width(), height: picture.height(), - format: PixelFormat::Rgba8Unorm, + format, timestamp: Time(picture.timestamp().unwrap_or(0)), duration: Time(picture.duration()), }; on_output(Ok(frame)); } -fn rgba_from_yuv(y: u8, u: u8, v: u8) -> [u8; 4] { - let (y, u, v) = (f32::from(y), f32::from(u), f32::from(v)); - - // Adjust for color range - let y = (y - 16.0) / 219.0; - let u = (u - 128.0) / 224.0; - let v = (v - 128.0) / 224.0; - - // BT.601 coefficients - let r = y + 1.402 * v; - let g = y - 0.344136 * u - 0.714136 * v; - let b = y + 1.772 * u; - - [ - (r.clamp(0.0, 1.0) * 255.0) as u8, - (g.clamp(0.0, 1.0) * 255.0) as u8, - (b.clamp(0.0, 1.0) * 255.0) as u8, - 255, // Alpha channel, fully opaque - ] -} - -fn i400_to_rgba(picture: &dav1d::Picture) -> Vec { - re_tracing::profile_function!(); - - let width = picture.width() as usize; - let height = picture.height() as usize; - let y_plane = picture.plane(PlanarImageComponent::Y); - let y_stride = picture.stride(PlanarImageComponent::Y) as usize; - - let mut rgba = Vec::with_capacity(width * height * 4); - - for y in 0..height { - for x in 0..width { - let y_value = y_plane[y * y_stride + x]; - let rgba_pixel = rgba_from_yuv(y_value, 128, 128); - - let offset = y * width * 4 + x * 4; - rgba[offset] = rgba_pixel[0]; - rgba[offset + 1] = rgba_pixel[1]; - rgba[offset + 2] = rgba_pixel[2]; - rgba[offset + 3] = rgba_pixel[3]; +fn color_primaries(picture: &dav1d::Picture) -> ColorPrimaries { + #[allow(clippy::match_same_arms)] + match picture.color_primaries() { + dav1d::pixel::ColorPrimaries::Reserved + | dav1d::pixel::ColorPrimaries::Reserved0 + | dav1d::pixel::ColorPrimaries::Unspecified => { + // This happens quite often. Don't issue a warning, that would be noise! + + if picture.transfer_characteristic() == dav1d::pixel::TransferCharacteristic::SRGB { + // If the transfer characteristic is sRGB, assume BT.709 primaries, would be quite odd otherwise. + // TODO(andreas): Other transfer characteristics may also hint at primaries. + ColorPrimaries::Bt709 + } else { + // Best guess: If the picture is 720p+ assume Bt709 because Rec709 + // is the "HDR" standard. + // TODO(#7594): 4k/UHD material should probably assume Bt2020? + // else if picture.height() >= 720 { + // ColorPrimaries::Bt709 + // } else { + // ColorPrimaries::Bt601 + // } + // + // ... then again, eyeballing VLC it looks like it just always assumes BT.709. + // The handwavy test case employed here was the same video in low & high resolution + // without specified primaries. Both looked the same. + ColorPrimaries::Bt709 + } } - } - - rgba -} - -fn i420_to_rgba(picture: &dav1d::Picture) -> Vec { - re_tracing::profile_function!(); - let width = picture.width() as usize; - let height = picture.height() as usize; - let y_plane = picture.plane(PlanarImageComponent::Y); - let u_plane = picture.plane(PlanarImageComponent::U); - let v_plane = picture.plane(PlanarImageComponent::V); - let y_stride = picture.stride(PlanarImageComponent::Y) as usize; - let uv_stride = picture.stride(PlanarImageComponent::U) as usize; + dav1d::pixel::ColorPrimaries::BT709 => ColorPrimaries::Bt709, - let mut rgba = vec![0u8; width * height * 4]; + // NTSC standard. Close enough to BT.601 for now. TODO(andreas): Is it worth warning? + dav1d::pixel::ColorPrimaries::BT470M => ColorPrimaries::Bt601, - for y in 0..height { - for x in 0..width { - let y_value = y_plane[y * y_stride + x]; - let u_value = u_plane[(y / 2) * uv_stride + (x / 2)]; - let v_value = v_plane[(y / 2) * uv_stride + (x / 2)]; - let rgba_pixel = rgba_from_yuv(y_value, u_value, v_value); + // PAL standard. Close enough to BT.601 for now. TODO(andreas): Is it worth warning? + dav1d::pixel::ColorPrimaries::BT470BG => ColorPrimaries::Bt601, - let offset = y * width * 4 + x * 4; - rgba[offset] = rgba_pixel[0]; - rgba[offset + 1] = rgba_pixel[1]; - rgba[offset + 2] = rgba_pixel[2]; - rgba[offset + 3] = rgba_pixel[3]; + // These are both using BT.2020 primaries. + dav1d::pixel::ColorPrimaries::ST170M | dav1d::pixel::ColorPrimaries::ST240M => { + ColorPrimaries::Bt601 } - } - - rgba -} - -fn i422_to_rgba(picture: &dav1d::Picture) -> Vec { - re_tracing::profile_function!(); - - let width = picture.width() as usize; - let height = picture.height() as usize; - let y_plane = picture.plane(PlanarImageComponent::Y); - let u_plane = picture.plane(PlanarImageComponent::U); - let v_plane = picture.plane(PlanarImageComponent::V); - let y_stride = picture.stride(PlanarImageComponent::Y) as usize; - let uv_stride = picture.stride(PlanarImageComponent::U) as usize; - - let mut rgba = vec![0u8; width * height * 4]; - - for y in 0..height { - for x in 0..width { - let y_value = y_plane[y * y_stride + x]; - let u_value = u_plane[y * uv_stride + (x / 2)]; - let v_value = v_plane[y * uv_stride + (x / 2)]; - let rgba_pixel = rgba_from_yuv(y_value, u_value, v_value); - let offset = y * width * 4 + x * 4; - rgba[offset] = rgba_pixel[0]; - rgba[offset + 1] = rgba_pixel[1]; - rgba[offset + 2] = rgba_pixel[2]; - rgba[offset + 3] = rgba_pixel[3]; + // Is st428 also HDR? Not sure. + // BT2020 and P3 variants definitely are ;) + dav1d::pixel::ColorPrimaries::BT2020 + | dav1d::pixel::ColorPrimaries::ST428 + | dav1d::pixel::ColorPrimaries::P3DCI + | dav1d::pixel::ColorPrimaries::P3Display => { + // TODO(#7594): HDR support. + re_log::warn_once!("Video specified HDR color primaries. Rerun doesn't handle HDR colors correctly yet. Color artifacts may be visible."); + ColorPrimaries::Bt709 } - } - - rgba -} - -fn i444_to_rgba(picture: &dav1d::Picture) -> Vec { - re_tracing::profile_function!(); - - let width = picture.width() as usize; - let height = picture.height() as usize; - let y_plane = picture.plane(PlanarImageComponent::Y); - let u_plane = picture.plane(PlanarImageComponent::U); - let v_plane = picture.plane(PlanarImageComponent::V); - let y_stride = picture.stride(PlanarImageComponent::Y) as usize; - let u_stride = picture.stride(PlanarImageComponent::U) as usize; - let v_stride = picture.stride(PlanarImageComponent::V) as usize; - let mut rgba = vec![0u8; width * height * 4]; - - for y in 0..height { - for x in 0..width { - let y_value = y_plane[y * y_stride + x]; - let u_value = u_plane[y * u_stride + x]; - let v_value = v_plane[y * v_stride + x]; - let rgba_pixel = rgba_from_yuv(y_value, u_value, v_value); - - let offset = y * width * 4 + x * 4; - rgba[offset] = rgba_pixel[0]; - rgba[offset + 1] = rgba_pixel[1]; - rgba[offset + 2] = rgba_pixel[2]; - rgba[offset + 3] = rgba_pixel[3]; + dav1d::pixel::ColorPrimaries::Film | dav1d::pixel::ColorPrimaries::Tech3213 => { + re_log::warn_once!( + "Video specified unsupported color primaries {:?}. Color artifacts may be visible.", + picture.color_primaries() + ); + ColorPrimaries::Bt709 } } - - rgba } diff --git a/crates/store/re_video/src/decode/mod.rs b/crates/store/re_video/src/decode/mod.rs index 7fd77960f6de..21a455dfb36e 100644 --- a/crates/store/re_video/src/decode/mod.rs +++ b/crates/store/re_video/src/decode/mod.rs @@ -59,7 +59,43 @@ pub struct Frame { pub duration: Time, } +/// Pixel format/layout used by [`Frame::data`]. pub enum PixelFormat { Rgb8Unorm, Rgba8Unorm, + + Yuv { + layout: YuvPixelLayout, + range: YuvRange, + // TODO(andreas): color primaries should also apply to RGB data, + // but for now we just always assume RGB to be BT.709 ~= sRGB. + primaries: ColorPrimaries, + }, +} + +/// Pixel layout used by [`PixelFormat::Yuv`]. +/// +/// For details see `re_renderer`'s `YuvPixelLayout` type. +#[allow(non_camel_case_types)] +pub enum YuvPixelLayout { + Y_U_V444, + Y_U_V422, + Y_U_V420, + Y400, +} + +/// Yuv value range used by [`PixelFormat::Yuv`]. +/// +/// For details see `re_renderer`'s `YuvRange` type. +pub enum YuvRange { + Limited, + Full, +} + +/// Color primaries used by [`PixelFormat::Yuv`]. +/// +/// For details see `re_renderer`'s `ColorPrimaries` type. +pub enum ColorPrimaries { + Bt601, + Bt709, } diff --git a/crates/viewer/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs b/crates/viewer/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs index 4652705d9b95..59e270458f4f 100644 --- a/crates/viewer/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs +++ b/crates/viewer/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs @@ -5,7 +5,7 @@ use crate::{ wgpu_resources::{BufferDesc, GpuBuffer, GpuBufferPool, GpuTexture}, }; -#[derive(thiserror::Error, Debug, PartialEq, Eq)] +#[derive(thiserror::Error, Debug, Clone, PartialEq, Eq)] pub enum CpuWriteGpuReadError { #[error("Attempting to allocate an empty buffer.")] ZeroSizeBufferAllocation, diff --git a/crates/viewer/re_renderer/src/renderer/mod.rs b/crates/viewer/re_renderer/src/renderer/mod.rs index 6ab0caec0d25..f29c19da86e3 100644 --- a/crates/viewer/re_renderer/src/renderer/mod.rs +++ b/crates/viewer/re_renderer/src/renderer/mod.rs @@ -50,7 +50,7 @@ pub trait DrawData { type Renderer: Renderer + Send + Sync; } -#[derive(thiserror::Error, Debug)] +#[derive(thiserror::Error, Debug, Clone, PartialEq, Eq)] pub enum DrawError { #[error(transparent)] Pool(#[from] PoolError), diff --git a/crates/viewer/re_renderer/src/resource_managers/image_data_to_texture.rs b/crates/viewer/re_renderer/src/resource_managers/image_data_to_texture.rs index 0ea13be26f50..bb95d05341c9 100644 --- a/crates/viewer/re_renderer/src/resource_managers/image_data_to_texture.rs +++ b/crates/viewer/re_renderer/src/resource_managers/image_data_to_texture.rs @@ -60,7 +60,7 @@ pub enum SourceImageDataFormat { /// YUV (== `YCbCr`) formats, typically using chroma downsampling. Yuv { - format: YuvPixelLayout, + layout: YuvPixelLayout, primaries: ColorPrimaries, range: YuvRange, }, @@ -75,7 +75,7 @@ impl From for SourceImageDataFormat { } /// Error that can occur when converting image data to a texture. -#[derive(thiserror::Error, Debug)] +#[derive(thiserror::Error, Debug, Clone, PartialEq, Eq)] pub enum ImageDataToTextureError { #[error("Texture {0:?} has zero width or height!")] ZeroSize(DebugLabel), @@ -211,7 +211,7 @@ impl<'a> ImageDataDesc<'a> { .ok_or(ImageDataToTextureError::UnsupportedTextureFormat(*format))? as usize } - SourceImageDataFormat::Yuv { format, .. } => { + SourceImageDataFormat::Yuv { layout: format, .. } => { format.num_data_buffer_bytes(*width_height) } }; @@ -303,13 +303,13 @@ pub fn transfer_image_data_to_texture( // Reminder: We can't use raw buffers because of WebGL compatibility. let [data_texture_width, data_texture_height] = match source_format { SourceImageDataFormat::WgpuCompatible(_) => output_width_height, - SourceImageDataFormat::Yuv { format, .. } => { - format.data_texture_width_height(output_width_height) + SourceImageDataFormat::Yuv { layout, .. } => { + layout.data_texture_width_height(output_width_height) } }; let data_texture_format = match source_format { SourceImageDataFormat::WgpuCompatible(format) => format, - SourceImageDataFormat::Yuv { format, .. } => format.data_texture_format(), + SourceImageDataFormat::Yuv { layout, .. } => layout.data_texture_format(), }; // Allocate gpu belt data and upload it. @@ -350,12 +350,12 @@ pub fn transfer_image_data_to_texture( return Ok(()); } SourceImageDataFormat::Yuv { - format, + layout, primaries, range, } => YuvFormatConversionTask::new( ctx, - format, + layout, range, primaries, &data_texture, diff --git a/crates/viewer/re_renderer/src/video/decoder/native_decoder.rs b/crates/viewer/re_renderer/src/video/decoder/native_decoder.rs index a02cb4fbf807..1673001223ad 100644 --- a/crates/viewer/re_renderer/src/video/decoder/native_decoder.rs +++ b/crates/viewer/re_renderer/src/video/decoder/native_decoder.rs @@ -6,7 +6,15 @@ use re_video::{Chunk, Frame, Time}; use parking_lot::Mutex; -use crate::{video::DecodingError, RenderContext}; +use crate::{ + resource_managers::{ + transfer_image_data_to_texture, ColorPrimaries, ImageDataDesc, SourceImageDataFormat, + YuvPixelLayout, YuvRange, + }, + video::DecodingError, + wgpu_resources::GpuTexture, + RenderContext, +}; use super::{latest_at_idx, TimedDecodingError, VideoChunkDecoder, VideoTexture}; @@ -100,7 +108,7 @@ impl VideoChunkDecoder for NativeDecoder { if frame_time_range.contains(&presentation_timestamp) && video_texture.time_range != frame_time_range { - copy_video_frame_to_texture(&render_ctx.queue, frame, &video_texture.texture.texture)?; + copy_video_frame_to_texture(render_ctx, frame, &video_texture.texture)?; video_texture.time_range = frame_time_range; } @@ -125,58 +133,66 @@ impl VideoChunkDecoder for NativeDecoder { } fn copy_video_frame_to_texture( - queue: &wgpu::Queue, + ctx: &RenderContext, frame: &Frame, - texture: &wgpu::Texture, + target_texture: &GpuTexture, ) -> Result<(), DecodingError> { let format = match frame.format { re_video::PixelFormat::Rgb8Unorm => { + // TODO(andreas): `ImageDataDesc` should have RGB handling! return copy_video_frame_to_texture( - queue, + ctx, &Frame { data: crate::pad_rgb_to_rgba(&frame.data, 255_u8), format: re_video::PixelFormat::Rgba8Unorm, ..*frame }, - texture, + target_texture, ); } - - re_video::PixelFormat::Rgba8Unorm => wgpu::TextureFormat::Rgba8Unorm, + re_video::PixelFormat::Rgba8Unorm | re_video::PixelFormat::Yuv { .. } => { + wgpu::TextureFormat::Rgba8Unorm + } }; re_tracing::profile_function!(); - let size = wgpu::Extent3d { - width: frame.width, - height: frame.height, - depth_or_array_layers: 1, + let format = match &frame.format { + re_video::PixelFormat::Rgb8Unorm | re_video::PixelFormat::Rgba8Unorm => { + SourceImageDataFormat::WgpuCompatible(wgpu::TextureFormat::Rgba8Unorm) + } + re_video::PixelFormat::Yuv { + layout, + range, + primaries, + } => SourceImageDataFormat::Yuv { + layout: match layout { + re_video::decode::YuvPixelLayout::Y_U_V444 => YuvPixelLayout::Y_U_V444, + re_video::decode::YuvPixelLayout::Y_U_V422 => YuvPixelLayout::Y_U_V422, + re_video::decode::YuvPixelLayout::Y_U_V420 => YuvPixelLayout::Y_U_V420, + re_video::decode::YuvPixelLayout::Y400 => YuvPixelLayout::Y400, + }, + primaries: match primaries { + re_video::decode::ColorPrimaries::Bt601 => ColorPrimaries::Bt601, + re_video::decode::ColorPrimaries::Bt709 => ColorPrimaries::Bt709, + }, + range: match range { + re_video::decode::YuvRange::Limited => YuvRange::Limited, + re_video::decode::YuvRange::Full => YuvRange::Full, + }, + }, }; - let width_blocks = frame.width / format.block_dimensions().0; - - #[allow(clippy::unwrap_used)] // block_copy_size can only fail for weird compressed formats - let block_size = format - .block_copy_size(Some(wgpu::TextureAspect::All)) - .unwrap(); - - let bytes_per_row_unaligned = width_blocks * block_size; - - queue.write_texture( - wgpu::ImageCopyTexture { - texture, - mip_level: 0, - origin: wgpu::Origin3d::ZERO, - aspect: wgpu::TextureAspect::All, - }, - &frame.data, - wgpu::ImageDataLayout { - offset: 0, - bytes_per_row: Some(bytes_per_row_unaligned), - rows_per_image: None, + transfer_image_data_to_texture( + ctx, + ImageDataDesc { + label: "video_texture_upload".into(), + data: std::borrow::Cow::Borrowed(frame.data.as_slice()), + format, + width_height: [frame.width, frame.height], }, - size, - ); + target_texture, + )?; Ok(()) } diff --git a/crates/viewer/re_renderer/src/video/mod.rs b/crates/viewer/re_renderer/src/video/mod.rs index 659fed6131e5..c60d6fce3cbc 100644 --- a/crates/viewer/re_renderer/src/video/mod.rs +++ b/crates/viewer/re_renderer/src/video/mod.rs @@ -60,6 +60,9 @@ pub enum DecodingError { #[cfg(not(target_arch = "wasm32"))] #[error("Native video decoding not supported in native debug builds.")] NoNativeDebug, + + #[error("Failed to create gpu texture from decoded video data: {0}")] + ImageDataToTextureError(#[from] crate::resource_managers::ImageDataToTextureError), } pub type FrameDecodingResult = Result; diff --git a/crates/viewer/re_renderer/src/wgpu_resources/resource.rs b/crates/viewer/re_renderer/src/wgpu_resources/resource.rs index b6ed1645a7fd..e7c345babcf9 100644 --- a/crates/viewer/re_renderer/src/wgpu_resources/resource.rs +++ b/crates/viewer/re_renderer/src/wgpu_resources/resource.rs @@ -1,6 +1,6 @@ use std::sync::atomic::AtomicU64; -#[derive(thiserror::Error, Debug, PartialEq, Eq)] +#[derive(thiserror::Error, Debug, Clone, PartialEq, Eq)] pub enum PoolError { #[error("Requested resource isn't available because the handle is no longer valid")] ResourceNotAvailable, diff --git a/crates/viewer/re_viewer_context/src/gpu_bridge/image_to_gpu.rs b/crates/viewer/re_viewer_context/src/gpu_bridge/image_to_gpu.rs index ba6f0907fb2e..41f5c2e3983f 100644 --- a/crates/viewer/re_viewer_context/src/gpu_bridge/image_to_gpu.rs +++ b/crates/viewer/re_viewer_context/src/gpu_bridge/image_to_gpu.rs @@ -267,7 +267,7 @@ pub fn texture_creation_desc_from_color_image<'a>( // PixelFormat::Y_U_V24_FullRange | PixelFormat::Y_U_V24_LimitedRange => { SourceImageDataFormat::Yuv { - format: YuvPixelLayout::Y_U_V444, + layout: YuvPixelLayout::Y_U_V444, range, primaries, } @@ -275,7 +275,7 @@ pub fn texture_creation_desc_from_color_image<'a>( PixelFormat::Y_U_V16_FullRange | PixelFormat::Y_U_V16_LimitedRange => { SourceImageDataFormat::Yuv { - format: YuvPixelLayout::Y_U_V422, + layout: YuvPixelLayout::Y_U_V422, range, primaries, } @@ -283,7 +283,7 @@ pub fn texture_creation_desc_from_color_image<'a>( PixelFormat::Y_U_V12_FullRange | PixelFormat::Y_U_V12_LimitedRange => { SourceImageDataFormat::Yuv { - format: YuvPixelLayout::Y_U_V420, + layout: YuvPixelLayout::Y_U_V420, range, primaries, } @@ -291,20 +291,20 @@ pub fn texture_creation_desc_from_color_image<'a>( PixelFormat::Y8_FullRange | PixelFormat::Y8_LimitedRange => { SourceImageDataFormat::Yuv { - format: YuvPixelLayout::Y400, + layout: YuvPixelLayout::Y400, range, primaries, } } PixelFormat::NV12 => SourceImageDataFormat::Yuv { - format: YuvPixelLayout::Y_UV420, + layout: YuvPixelLayout::Y_UV420, range, primaries, }, PixelFormat::YUY2 => SourceImageDataFormat::Yuv { - format: YuvPixelLayout::YUYV422, + layout: YuvPixelLayout::YUYV422, range, primaries, }, From 3c8bd6b91e7fc50e614525ae69ee67f9368e08a1 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Thu, 10 Oct 2024 18:17:22 +0200 Subject: [PATCH 03/13] handle strides in YUV data --- crates/store/re_video/src/decode/av1.rs | 82 ++++++++++++++++++++----- crates/store/re_video/src/decode/mod.rs | 4 ++ 2 files changed, 72 insertions(+), 14 deletions(-) diff --git a/crates/store/re_video/src/decode/av1.rs b/crates/store/re_video/src/decode/av1.rs index 066f557d2253..0a85dfe5ec53 100644 --- a/crates/store/re_video/src/decode/av1.rs +++ b/crates/store/re_video/src/decode/av1.rs @@ -2,9 +2,8 @@ use std::sync::atomic::{AtomicBool, Ordering}; -use dav1d::{PixelLayout, PlanarImageComponent}; - use crate::Time; +use dav1d::{PixelLayout, PlanarImageComponent}; use super::{ Chunk, ColorPrimaries, Error, Frame, OutputCallback, PixelFormat, Result, SyncDecoder, @@ -117,18 +116,73 @@ fn output_picture(picture: &dav1d::Picture, on_output: &(dyn Fn(Result) + // - matrix_coefficients // - transfer_characteristics - let data = match picture.pixel_layout() { - PixelLayout::I400 => picture.plane(PlanarImageComponent::Y).to_vec(), - PixelLayout::I420 | PixelLayout::I422 | PixelLayout::I444 => { - let mut data = Vec::with_capacity( - picture.stride(PlanarImageComponent::Y) as usize - + picture.stride(PlanarImageComponent::U) as usize - + picture.stride(PlanarImageComponent::V) as usize, - ); - data.extend_from_slice(&picture.plane(PlanarImageComponent::Y)); - data.extend_from_slice(&picture.plane(PlanarImageComponent::U)); - data.extend_from_slice(&picture.plane(PlanarImageComponent::V)); - data // TODO: how badly does this break with hdr? + let data = { + re_tracing::profile_scope!("copy_picture_data"); + + match picture.pixel_layout() { + PixelLayout::I400 => picture.plane(PlanarImageComponent::Y).to_vec(), + PixelLayout::I420 | PixelLayout::I422 | PixelLayout::I444 => { + // TODO(#7594): If `picture.bit_depth()` isn't 8 we have a problem: + // We can't handle high bit depths yet and the YUV converter at the other side + // bases its opinion on what an acceptable number of incoming bytes is on this. + // So we just clamp to that expectation, ignoring `picture.stride(PlanarImageComponent::Y)` & friends. + // Note that `bit_depth` is either 8 or 16, which is semi-independent `bits_per_component` (which is None/8/10/12). + if picture.bit_depth() != 8 { + re_log::warn_once!( + "Video uses unsupported bit depth larger than 8 bit per color component which is currently unsupported." + ); + } + + let height_y = picture.height() as usize; + let height_uv = match picture.pixel_layout() { + PixelLayout::I400 => 0, + PixelLayout::I420 => height_y / 2, + PixelLayout::I422 | PixelLayout::I444 => height_y, + }; + + let packed_stride_y = picture.width() as usize; + let actual_stride_y = picture.stride(PlanarImageComponent::Y) as usize; + + let packed_stride_uv = match picture.pixel_layout() { + PixelLayout::I400 => 0, + PixelLayout::I420 | PixelLayout::I422 => packed_stride_y / 2, + PixelLayout::I444 => packed_stride_y, + }; + let actual_stride_uv = picture.stride(PlanarImageComponent::U) as usize; // U / V stride is always the same. + + let num_packed_bytes_y = packed_stride_y * height_y; + let num_packed_bytes_uv = packed_stride_uv * height_uv; + + let mut data = Vec::with_capacity(num_packed_bytes_y + num_packed_bytes_uv * 2); + + // We could make our image ingestion pipeline even more sophisticated and pass that stride information through. + // But given that this is a matter of replacing a single large memcpy with a few hundred _still_ quite large ones, + // this should not make a lot of difference (citation needed!). + { + let plane = picture.plane(PlanarImageComponent::Y); + if actual_stride_y != packed_stride_y { + for y in 0..height_y { + let offset = y * actual_stride_y; + data.extend_from_slice(&plane[offset..(offset + packed_stride_y)]); + } + } else { + data.extend_from_slice(&plane[0..num_packed_bytes_y]); + } + } + for comp in [PlanarImageComponent::U, PlanarImageComponent::V] { + let plane = picture.plane(comp); + if actual_stride_uv != packed_stride_uv { + for y in 0..height_uv { + let offset = y * actual_stride_uv; + data.extend_from_slice(&plane[offset..(offset + packed_stride_uv)]); + } + } else { + data.extend_from_slice(&plane[0..num_packed_bytes_uv]); + } + } + + data + } } }; diff --git a/crates/store/re_video/src/decode/mod.rs b/crates/store/re_video/src/decode/mod.rs index 21a455dfb36e..77f641b41db9 100644 --- a/crates/store/re_video/src/decode/mod.rs +++ b/crates/store/re_video/src/decode/mod.rs @@ -60,6 +60,7 @@ pub struct Frame { } /// Pixel format/layout used by [`Frame::data`]. +#[derive(Debug)] pub enum PixelFormat { Rgb8Unorm, Rgba8Unorm, @@ -77,6 +78,7 @@ pub enum PixelFormat { /// /// For details see `re_renderer`'s `YuvPixelLayout` type. #[allow(non_camel_case_types)] +#[derive(Debug)] pub enum YuvPixelLayout { Y_U_V444, Y_U_V422, @@ -87,6 +89,7 @@ pub enum YuvPixelLayout { /// Yuv value range used by [`PixelFormat::Yuv`]. /// /// For details see `re_renderer`'s `YuvRange` type. +#[derive(Debug)] pub enum YuvRange { Limited, Full, @@ -95,6 +98,7 @@ pub enum YuvRange { /// Color primaries used by [`PixelFormat::Yuv`]. /// /// For details see `re_renderer`'s `ColorPrimaries` type. +#[derive(Debug)] pub enum ColorPrimaries { Bt601, Bt709, From 0881d5c138d8ea8734e2ad5a31afc087e3965713 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Fri, 11 Oct 2024 12:12:53 +0200 Subject: [PATCH 04/13] add profiling scopes for slow yuv plane copy paths --- crates/store/re_video/src/decode/av1.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/store/re_video/src/decode/av1.rs b/crates/store/re_video/src/decode/av1.rs index 0a85dfe5ec53..ebf45c3b8832 100644 --- a/crates/store/re_video/src/decode/av1.rs +++ b/crates/store/re_video/src/decode/av1.rs @@ -161,6 +161,8 @@ fn output_picture(picture: &dav1d::Picture, on_output: &(dyn Fn(Result) + { let plane = picture.plane(PlanarImageComponent::Y); if actual_stride_y != packed_stride_y { + re_tracing::profile_scope!("slow path copy y-plane"); + for y in 0..height_y { let offset = y * actual_stride_y; data.extend_from_slice(&plane[offset..(offset + packed_stride_y)]); @@ -172,6 +174,8 @@ fn output_picture(picture: &dav1d::Picture, on_output: &(dyn Fn(Result) + for comp in [PlanarImageComponent::U, PlanarImageComponent::V] { let plane = picture.plane(comp); if actual_stride_uv != packed_stride_uv { + re_tracing::profile_scope!("slow path copy u/v-plane"); + for y in 0..height_uv { let offset = y * actual_stride_uv; data.extend_from_slice(&plane[offset..(offset + packed_stride_uv)]); From c5afa8ce05b789712811bde348171ef63cbca070 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Fri, 11 Oct 2024 12:35:23 +0200 Subject: [PATCH 05/13] improve av1 fast path further --- crates/store/re_video/src/decode/av1.rs | 68 +++++++++++++++---------- 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/crates/store/re_video/src/decode/av1.rs b/crates/store/re_video/src/decode/av1.rs index ebf45c3b8832..8fa2f57e0f60 100644 --- a/crates/store/re_video/src/decode/av1.rs +++ b/crates/store/re_video/src/decode/av1.rs @@ -153,39 +153,51 @@ fn output_picture(picture: &dav1d::Picture, on_output: &(dyn Fn(Result) + let num_packed_bytes_y = packed_stride_y * height_y; let num_packed_bytes_uv = packed_stride_uv * height_uv; - let mut data = Vec::with_capacity(num_packed_bytes_y + num_packed_bytes_uv * 2); - - // We could make our image ingestion pipeline even more sophisticated and pass that stride information through. - // But given that this is a matter of replacing a single large memcpy with a few hundred _still_ quite large ones, - // this should not make a lot of difference (citation needed!). - { - let plane = picture.plane(PlanarImageComponent::Y); - if actual_stride_y != packed_stride_y { - re_tracing::profile_scope!("slow path copy y-plane"); - - for y in 0..height_y { - let offset = y * actual_stride_y; - data.extend_from_slice(&plane[offset..(offset + packed_stride_y)]); + if actual_stride_y == packed_stride_y && actual_stride_uv == packed_stride_uv { + // Best case scenario: There's no additional strides at all, so we can just copy the data directly. + // TODO(andreas): This still has *significant* overhead for 8k video. Can we take ownership of the data instead without a copy? + re_tracing::profile_scope!("fast path"); + let plane_y = &picture.plane(PlanarImageComponent::Y)[0..num_packed_bytes_y]; + let plane_u = &picture.plane(PlanarImageComponent::U)[0..num_packed_bytes_uv]; + let plane_v = &picture.plane(PlanarImageComponent::V)[0..num_packed_bytes_uv]; + [plane_y, plane_u, plane_v].concat() + } else { + // At least either y or u/v have strides. + // + // We could make our image ingestion pipeline even more sophisticated and pass that stride information through. + // But given that this is a matter of replacing a single large memcpy with a few hundred _still_ quite large ones, + // this should not make a lot of difference (citation needed!). + + let mut data = Vec::with_capacity(num_packed_bytes_y + num_packed_bytes_uv * 2); + { + let plane = picture.plane(PlanarImageComponent::Y); + if actual_stride_y != packed_stride_y { + re_tracing::profile_scope!("slow path, y-plane"); + + for y in 0..height_y { + let offset = y * actual_stride_y; + data.extend_from_slice(&plane[offset..(offset + packed_stride_y)]); + } + } else { + data.extend_from_slice(&plane[0..num_packed_bytes_y]); } - } else { - data.extend_from_slice(&plane[0..num_packed_bytes_y]); } - } - for comp in [PlanarImageComponent::U, PlanarImageComponent::V] { - let plane = picture.plane(comp); - if actual_stride_uv != packed_stride_uv { - re_tracing::profile_scope!("slow path copy u/v-plane"); - - for y in 0..height_uv { - let offset = y * actual_stride_uv; - data.extend_from_slice(&plane[offset..(offset + packed_stride_uv)]); + for comp in [PlanarImageComponent::U, PlanarImageComponent::V] { + let plane = picture.plane(comp); + if actual_stride_uv != packed_stride_uv { + re_tracing::profile_scope!("slow path, u/v-plane"); + + for y in 0..height_uv { + let offset = y * actual_stride_uv; + data.extend_from_slice(&plane[offset..(offset + packed_stride_uv)]); + } + } else { + data.extend_from_slice(&plane[0..num_packed_bytes_uv]); } - } else { - data.extend_from_slice(&plane[0..num_packed_bytes_uv]); } - } - data + data + } } } }; From 06b051573a0cb75506812028b163aa1f2c2471b8 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Fri, 11 Oct 2024 12:46:39 +0200 Subject: [PATCH 06/13] =?UTF-8?q?clean=20up=20todos=20and=20'=E2=80=A6'=20?= =?UTF-8?q?usage?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/store/re_video/src/decode/av1.rs | 2 +- .../src/resource_managers/yuv_converter.rs | 20 ------------------- 2 files changed, 1 insertion(+), 21 deletions(-) diff --git a/crates/store/re_video/src/decode/av1.rs b/crates/store/re_video/src/decode/av1.rs index 8fa2f57e0f60..7e6f902f3c0c 100644 --- a/crates/store/re_video/src/decode/av1.rs +++ b/crates/store/re_video/src/decode/av1.rs @@ -249,7 +249,7 @@ fn color_primaries(picture: &dav1d::Picture) -> ColorPrimaries { // ColorPrimaries::Bt601 // } // - // ... then again, eyeballing VLC it looks like it just always assumes BT.709. + // …then again, eyeballing VLC it looks like it just always assumes BT.709. // The handwavy test case employed here was the same video in low & high resolution // without specified primaries. Both looked the same. ColorPrimaries::Bt709 diff --git a/crates/viewer/re_renderer/src/resource_managers/yuv_converter.rs b/crates/viewer/re_renderer/src/resource_managers/yuv_converter.rs index 0f597748324a..9aea41c22b29 100644 --- a/crates/viewer/re_renderer/src/resource_managers/yuv_converter.rs +++ b/crates/viewer/re_renderer/src/resource_managers/yuv_converter.rs @@ -300,26 +300,6 @@ impl YuvFormatConversionTask { input_data: &GpuTexture, target_texture: &GpuTexture, ) -> Self { - // TODO: - // let target_texture = ctx.gpu_resources.textures.alloc( - // &ctx.device, - // &TextureDesc { - // label: output_label.clone(), - // size: wgpu::Extent3d { - // width: output_width_height[0], - // height: output_width_height[1], - // depth_or_array_layers: 1, - // }, - // mip_level_count: 1, // We don't have mipmap level generation yet! - // sample_count: 1, - // dimension: wgpu::TextureDimension::D2, - // format: Self::OUTPUT_FORMAT, - // usage: output_usage_flags | wgpu::TextureUsages::RENDER_ATTACHMENT, - // }, - // ); - - // TODO: validate target_texture - let target_label = target_texture.creation_desc.label.clone(); let renderer = ctx.renderer::(); From c8bb8c2520471debe05dd653c100de646cbe30d4 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Fri, 11 Oct 2024 16:43:26 +0200 Subject: [PATCH 07/13] pass through debug name to `output_picture` --- crates/store/re_video/src/decode/av1.rs | 131 +++++++++--------- .../re_renderer/src/video/decoder/mod.rs | 2 +- 2 files changed, 69 insertions(+), 64 deletions(-) diff --git a/crates/store/re_video/src/decode/av1.rs b/crates/store/re_video/src/decode/av1.rs index 7e6f902f3c0c..808f54d862f7 100644 --- a/crates/store/re_video/src/decode/av1.rs +++ b/crates/store/re_video/src/decode/av1.rs @@ -12,10 +12,29 @@ use super::{ pub struct SyncDav1dDecoder { decoder: dav1d::Decoder, + debug_name: String, +} + +impl SyncDecoder for SyncDav1dDecoder { + fn submit_chunk(&mut self, should_stop: &AtomicBool, chunk: Chunk, on_output: &OutputCallback) { + re_tracing::profile_function!(); + self.submit_chunk(chunk, on_output); + self.output_frames(should_stop, on_output); + } + + /// Clear and reset everything + fn reset(&mut self) { + re_tracing::profile_function!(); + + self.decoder.flush(); + + debug_assert!(matches!(self.decoder.get_picture(), Err(dav1d::Error::Again)), + "There should be no pending pictures, since we output them directly after submitting a chunk."); + } } impl SyncDav1dDecoder { - pub fn new() -> Result { + pub fn new(debug_name: String) -> Result { re_tracing::profile_function!(); // TODO(#7671): enable this warning again on Linux when the `nasm` feature actually does something @@ -39,78 +58,63 @@ impl SyncDav1dDecoder { let decoder = dav1d::Decoder::with_settings(&settings)?; - Ok(Self { decoder }) + Ok(Self { + decoder, + debug_name, + }) } -} -impl SyncDecoder for SyncDav1dDecoder { - fn submit_chunk(&mut self, should_stop: &AtomicBool, chunk: Chunk, on_output: &OutputCallback) { + fn submit_chunk(&mut self, chunk: Chunk, on_output: &OutputCallback) { re_tracing::profile_function!(); - submit_chunk(&mut self.decoder, chunk, on_output); - output_frames(should_stop, &mut self.decoder, on_output); + econtext::econtext_function_data!(format!("chunk timestamp: {:?}", chunk.timestamp)); + + re_tracing::profile_scope!("send_data"); + match self.decoder.send_data( + chunk.data, + None, + Some(chunk.timestamp.0), + Some(chunk.duration.0), + ) { + Ok(()) => {} + Err(err) => { + debug_assert!(err != dav1d::Error::Again, "Bug in AV1 decoder: send_data returned `Error::Again`. This shouldn't happen, since we process all images in a chunk right away"); + on_output(Err(Error::Dav1d(err))); + } + }; } - /// Clear and reset everything - fn reset(&mut self) { + /// Returns the number of new frames. + fn output_frames(&mut self, should_stop: &AtomicBool, on_output: &OutputCallback) -> usize { re_tracing::profile_function!(); - - self.decoder.flush(); - - debug_assert!(matches!(self.decoder.get_picture(), Err(dav1d::Error::Again)), - "There should be no pending pictures, since we output them directly after submitting a chunk."); - } -} - -fn submit_chunk(decoder: &mut dav1d::Decoder, chunk: Chunk, on_output: &OutputCallback) { - re_tracing::profile_function!(); - econtext::econtext_function_data!(format!("chunk timestamp: {:?}", chunk.timestamp)); - - re_tracing::profile_scope!("send_data"); - match decoder.send_data( - chunk.data, - None, - Some(chunk.timestamp.0), - Some(chunk.duration.0), - ) { - Ok(()) => {} - Err(err) => { - debug_assert!(err != dav1d::Error::Again, "Bug in AV1 decoder: send_data returned `Error::Again`. This shouldn't happen, since we process all images in a chunk right away"); - on_output(Err(Error::Dav1d(err))); - } - }; -} - -/// Returns the number of new frames. -fn output_frames( - should_stop: &AtomicBool, - decoder: &mut dav1d::Decoder, - on_output: &OutputCallback, -) -> usize { - re_tracing::profile_function!(); - let mut count = 0; - while !should_stop.load(Ordering::SeqCst) { - let picture = { - econtext::econtext!("get_picture"); - decoder.get_picture() - }; - match picture { - Ok(picture) => { - output_picture(&picture, on_output); - count += 1; - } - Err(dav1d::Error::Again) => { - // We need to submit more chunks to get more pictures - break; - } - Err(err) => { - on_output(Err(Error::Dav1d(err))); + let mut count = 0; + while !should_stop.load(Ordering::SeqCst) { + let picture = { + econtext::econtext!("get_picture"); + self.decoder.get_picture() + }; + match picture { + Ok(picture) => { + output_picture(&self.debug_name, &picture, on_output); + count += 1; + } + Err(dav1d::Error::Again) => { + // We need to submit more chunks to get more pictures + break; + } + Err(err) => { + on_output(Err(Error::Dav1d(err))); + } } } + count } - count } -fn output_picture(picture: &dav1d::Picture, on_output: &(dyn Fn(Result) + Send + Sync)) { +fn output_picture( + debug_name: &str, + picture: &dav1d::Picture, + on_output: &(dyn Fn(Result) + Send + Sync), +) { // TODO(jan): support other parameters? // What do these even do: // - matrix_coefficients @@ -129,7 +133,8 @@ fn output_picture(picture: &dav1d::Picture, on_output: &(dyn Fn(Result) + // Note that `bit_depth` is either 8 or 16, which is semi-independent `bits_per_component` (which is None/8/10/12). if picture.bit_depth() != 8 { re_log::warn_once!( - "Video uses unsupported bit depth larger than 8 bit per color component which is currently unsupported." + "Video {debug_name:?} uses {} bis per component. Only a bit depth of 8 bit is currently unsupported.", + picture.bits_per_component().map_or(picture.bit_depth(), |bpc| bpc.0) ); } diff --git a/crates/viewer/re_renderer/src/video/decoder/mod.rs b/crates/viewer/re_renderer/src/video/decoder/mod.rs index cf5add19a795..c62c58857c64 100644 --- a/crates/viewer/re_renderer/src/video/decoder/mod.rs +++ b/crates/viewer/re_renderer/src/video/decoder/mod.rs @@ -139,7 +139,7 @@ impl VideoDecoder { if cfg!(debug_assertions) { return Err(DecodingError::NoNativeDebug); // because debug builds of rav1d are EXTREMELY slow } else { - let av1_decoder = re_video::decode::av1::SyncDav1dDecoder::new() + let av1_decoder = re_video::decode::av1::SyncDav1dDecoder::new(debug_name.clone()) .map_err(|err| DecodingError::StartDecoder(err.to_string()))?; let decoder = native_decoder::NativeDecoder::new(debug_name, Box::new(av1_decoder))?; From fb183893dfa1e37cf722f5b5d29763674fe270c3 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Fri, 11 Oct 2024 16:45:48 +0200 Subject: [PATCH 08/13] even more debug name! --- crates/store/re_video/src/decode/av1.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/crates/store/re_video/src/decode/av1.rs b/crates/store/re_video/src/decode/av1.rs index 808f54d862f7..da3276900e6d 100644 --- a/crates/store/re_video/src/decode/av1.rs +++ b/crates/store/re_video/src/decode/av1.rs @@ -176,28 +176,28 @@ fn output_picture( let mut data = Vec::with_capacity(num_packed_bytes_y + num_packed_bytes_uv * 2); { let plane = picture.plane(PlanarImageComponent::Y); - if actual_stride_y != packed_stride_y { + if packed_stride_y != actual_stride_y { + data.extend_from_slice(&plane[0..num_packed_bytes_y]); + } else { re_tracing::profile_scope!("slow path, y-plane"); for y in 0..height_y { let offset = y * actual_stride_y; data.extend_from_slice(&plane[offset..(offset + packed_stride_y)]); } - } else { - data.extend_from_slice(&plane[0..num_packed_bytes_y]); } } for comp in [PlanarImageComponent::U, PlanarImageComponent::V] { let plane = picture.plane(comp); - if actual_stride_uv != packed_stride_uv { + if actual_stride_uv == packed_stride_uv { + data.extend_from_slice(&plane[0..num_packed_bytes_uv]); + } else { re_tracing::profile_scope!("slow path, u/v-plane"); for y in 0..height_uv { let offset = y * actual_stride_uv; data.extend_from_slice(&plane[offset..(offset + packed_stride_uv)]); } - } else { - data.extend_from_slice(&plane[0..num_packed_bytes_uv]); } } @@ -218,7 +218,7 @@ fn output_picture( dav1d::pixel::YUVRange::Limited => YuvRange::Limited, dav1d::pixel::YUVRange::Full => YuvRange::Full, }, - primaries: color_primaries(picture), + primaries: color_primaries(debug_name, picture), }; let frame = Frame { @@ -232,7 +232,7 @@ fn output_picture( on_output(Ok(frame)); } -fn color_primaries(picture: &dav1d::Picture) -> ColorPrimaries { +fn color_primaries(debug_name: &str, picture: &dav1d::Picture) -> ColorPrimaries { #[allow(clippy::match_same_arms)] match picture.color_primaries() { dav1d::pixel::ColorPrimaries::Reserved @@ -281,13 +281,13 @@ fn color_primaries(picture: &dav1d::Picture) -> ColorPrimaries { | dav1d::pixel::ColorPrimaries::P3DCI | dav1d::pixel::ColorPrimaries::P3Display => { // TODO(#7594): HDR support. - re_log::warn_once!("Video specified HDR color primaries. Rerun doesn't handle HDR colors correctly yet. Color artifacts may be visible."); + re_log::warn_once!("Video {debug_name:?} specified HDR color primaries. Rerun doesn't handle HDR colors correctly yet. Color artifacts may be visible."); ColorPrimaries::Bt709 } dav1d::pixel::ColorPrimaries::Film | dav1d::pixel::ColorPrimaries::Tech3213 => { re_log::warn_once!( - "Video specified unsupported color primaries {:?}. Color artifacts may be visible.", + "Video {debug_name:?} specified unsupported color primaries {:?}. Color artifacts may be visible.", picture.color_primaries() ); ColorPrimaries::Bt709 From f489be875815cd11e025d4697c0cf7c3a93495d9 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Fri, 11 Oct 2024 16:47:38 +0200 Subject: [PATCH 09/13] more explicit unreachableness of Rgb8unorm later down in copy_video_frame_to_texture --- .../viewer/re_renderer/src/video/decoder/native_decoder.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/crates/viewer/re_renderer/src/video/decoder/native_decoder.rs b/crates/viewer/re_renderer/src/video/decoder/native_decoder.rs index 1673001223ad..fccdb1fe13ec 100644 --- a/crates/viewer/re_renderer/src/video/decoder/native_decoder.rs +++ b/crates/viewer/re_renderer/src/video/decoder/native_decoder.rs @@ -158,9 +158,14 @@ fn copy_video_frame_to_texture( re_tracing::profile_function!(); let format = match &frame.format { - re_video::PixelFormat::Rgb8Unorm | re_video::PixelFormat::Rgba8Unorm => { + re_video::PixelFormat::Rgb8Unorm => { + unreachable!("Handled explicitly earlier in this function"); + } + + re_video::PixelFormat::Rgba8Unorm => { SourceImageDataFormat::WgpuCompatible(wgpu::TextureFormat::Rgba8Unorm) } + re_video::PixelFormat::Yuv { layout, range, From 5f872b386d2d559b14da48123df11f81e40628fd Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Fri, 11 Oct 2024 16:52:39 +0200 Subject: [PATCH 10/13] =?UTF-8?q?not=20break=20almost=20=5Feverything=5F?= =?UTF-8?q?=20else=20=F0=9F=98=B3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../re_renderer/src/resource_managers/image_data_to_texture.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/viewer/re_renderer/src/resource_managers/image_data_to_texture.rs b/crates/viewer/re_renderer/src/resource_managers/image_data_to_texture.rs index bb95d05341c9..d0a2183887f1 100644 --- a/crates/viewer/re_renderer/src/resource_managers/image_data_to_texture.rs +++ b/crates/viewer/re_renderer/src/resource_managers/image_data_to_texture.rs @@ -258,7 +258,7 @@ impl<'a> ImageDataDesc<'a> { label: self.label.clone(), size: wgpu::Extent3d { width: self.width_height[0], - height: self.width_height[0], + height: self.width_height[1], depth_or_array_layers: 1, }, mip_level_count: 1, // No mipmapping support yet. From 4e201d1058eee5b7c939874e592037fbe0712aad Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Fri, 11 Oct 2024 17:08:50 +0200 Subject: [PATCH 11/13] fix re_video example --- crates/store/re_video/examples/frames.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/crates/store/re_video/examples/frames.rs b/crates/store/re_video/examples/frames.rs index 877c20bdff1d..0ad5684d7ea1 100644 --- a/crates/store/re_video/examples/frames.rs +++ b/crates/store/re_video/examples/frames.rs @@ -36,15 +36,16 @@ fn main() { video.config.coded_height ); - let mut decoder = create_decoder(&video); + let mut decoder = create_decoder(video_path, &video); write_video_frames(&video, decoder.as_mut(), &output_dir); } -fn create_decoder(video: &VideoData) -> Box { +fn create_decoder(debug_name: &str, video: &VideoData) -> Box { if video.config.is_av1() { Box::new( - re_video::decode::av1::SyncDav1dDecoder::new().expect("Failed to start AV1 decoder"), + re_video::decode::av1::SyncDav1dDecoder::new(debug_name.to_owned()) + .expect("Failed to start AV1 decoder"), ) } else { panic!("Unsupported codec: {}", video.human_readable_codec_string()); From 5740e2fb326aa3b46e8480d919252c8733c15eba Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Fri, 11 Oct 2024 17:33:46 +0200 Subject: [PATCH 12/13] fix oversight on trying to make conditions easier to read, caused padding handling to break again --- crates/store/re_video/src/decode/av1.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/store/re_video/src/decode/av1.rs b/crates/store/re_video/src/decode/av1.rs index da3276900e6d..db663b1b58ce 100644 --- a/crates/store/re_video/src/decode/av1.rs +++ b/crates/store/re_video/src/decode/av1.rs @@ -176,7 +176,7 @@ fn output_picture( let mut data = Vec::with_capacity(num_packed_bytes_y + num_packed_bytes_uv * 2); { let plane = picture.plane(PlanarImageComponent::Y); - if packed_stride_y != actual_stride_y { + if packed_stride_y == actual_stride_y { data.extend_from_slice(&plane[0..num_packed_bytes_y]); } else { re_tracing::profile_scope!("slow path, y-plane"); From 62943344856bb2c734607e95d3671e3adae4ef2c Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Fri, 11 Oct 2024 17:58:26 +0200 Subject: [PATCH 13/13] add a comment pointing to how common it is to change colors depending on --- crates/store/re_video/src/decode/av1.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crates/store/re_video/src/decode/av1.rs b/crates/store/re_video/src/decode/av1.rs index db663b1b58ce..61c29e2a0f4f 100644 --- a/crates/store/re_video/src/decode/av1.rs +++ b/crates/store/re_video/src/decode/av1.rs @@ -254,6 +254,9 @@ fn color_primaries(debug_name: &str, picture: &dav1d::Picture) -> ColorPrimaries // ColorPrimaries::Bt601 // } // + // This is also what the mpv player does (and probably others): + // https://wiki.x266.mov/docs/colorimetry/primaries#2-unspecified + // // …then again, eyeballing VLC it looks like it just always assumes BT.709. // The handwavy test case employed here was the same video in low & high resolution // without specified primaries. Both looked the same.