From 187b673e124f9f45bee5b2a63b7c73aebbb30655 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Tue, 8 Oct 2024 15:21:04 +0200 Subject: [PATCH] Refactor video decoding (#7625) ### What * Closes #7583 This refactors the video decoder to unify the native and web decoder logic. It changes how error handling is done quite a bit, so this will require some testing. This PR also hides the spinner if the current frame is less than 400ms outdated. This means a small hickup during decoding will not cause the spinner to show up, but jumping a big step in the video stream (moving the time cursor by a large step), will make the spinner show up immediately, making Rerun feel responsive. ### Checklist * [x] I have read and agree to [Contributor Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and the [Code of Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md) * [x] I've included a screenshot or gif (if applicable) * [x] I have tested the web demo (if applicable): * Using examples from latest `main` build: [rerun.io/viewer](https://rerun.io/viewer/pr/7625?manifest_url=https://app.rerun.io/version/main/examples_manifest.json) * Using full set of examples from `nightly` build: [rerun.io/viewer](https://rerun.io/viewer/pr/7625?manifest_url=https://app.rerun.io/version/nightly/examples_manifest.json) * [x] The PR title and labels are set such as to maximize their usefulness for the next release's CHANGELOG * [x] If applicable, add a new check to the [release checklist](https://github.com/rerun-io/rerun/blob/main/tests/python/release_checklist)! * [x] If have noted any breaking changes to the log API in `CHANGELOG.md` and the migration guide - [PR Build Summary](https://build.rerun.io/pr/7625) - [Recent benchmark results](https://build.rerun.io/graphs/crates.html) - [Wasm size tracking](https://build.rerun.io/graphs/sizes.html) To run all checks from `main`, comment on the PR with `@rerun-bot full-check`. --- crates/store/re_video/examples/frames.rs | 6 +- crates/store/re_video/src/demux/mod.rs | 31 +- crates/store/re_video/src/demux/mp4.rs | 150 +++---- crates/store/re_video/src/lib.rs | 9 + crates/store/re_video/src/mp4.rs | 93 ---- crates/viewer/re_data_ui/src/blob.rs | 24 +- .../re_renderer/src/video/decoder/mod.rs | 413 ++++++++++++++++-- .../src/video/decoder/native_av1.rs | 278 ++---------- .../src/video/decoder/no_native_decoder.rs | 21 - .../re_renderer/src/video/decoder/web.rs | 373 +++------------- crates/viewer/re_renderer/src/video/mod.rs | 22 +- .../src/visualizers/videos.rs | 35 +- scripts/lint.py | 1 + 13 files changed, 642 insertions(+), 814 deletions(-) delete mode 100644 crates/store/re_video/src/mp4.rs delete mode 100644 crates/viewer/re_renderer/src/video/decoder/no_native_decoder.rs diff --git a/crates/store/re_video/examples/frames.rs b/crates/store/re_video/examples/frames.rs index 1b95e8fd1868..663b74df8d07 100644 --- a/crates/store/re_video/examples/frames.rs +++ b/crates/store/re_video/examples/frames.rs @@ -13,8 +13,6 @@ use std::{ use indicatif::ProgressBar; use parking_lot::Mutex; -use re_video::demux::mp4::load_mp4; - fn main() { // frames let args: Vec<_> = std::env::args().collect(); @@ -27,11 +25,11 @@ fn main() { println!("Decoding {video_path}"); let video = std::fs::read(video_path).expect("failed to read video"); - let video = load_mp4(&video).expect("failed to load video"); + let video = re_video::VideoData::load_mp4(&video).expect("failed to load video"); println!( "{} {}x{}", - video.segments.len(), + video.gops.len(), video.config.coded_width, video.config.coded_height ); diff --git a/crates/store/re_video/src/demux/mod.rs b/crates/store/re_video/src/demux/mod.rs index ce2f7e5abdb1..8b70c440e0bb 100644 --- a/crates/store/re_video/src/demux/mod.rs +++ b/crates/store/re_video/src/demux/mod.rs @@ -1,6 +1,6 @@ //! Video demultiplexing. //! -//! Parses a video file into a raw [`VideoData`] struct, which contains basic metadata and a list of [`Segment`]s. +//! Parses a video file into a raw [`VideoData`] struct, which contains basic metadata and a list of [`GroupOfPictures`]s. //! //! The entry point is [`VideoData::load_from_bytes`] //! which produces an instance of [`VideoData`] from any supported video container. @@ -26,9 +26,9 @@ pub struct VideoData { /// Duration of the video, in time units. pub duration: Time, - /// We split video into segments, each beginning with a key frame, + /// We split video into GOPs, each beginning with a key frame, /// followed by any number of delta frames. - pub segments: Vec, + pub gops: Vec, /// Samples contain the byte offsets into `data` for each frame. /// @@ -54,7 +54,8 @@ impl VideoData { /// at the very least the should be a way to extract only metadata. pub fn load_from_bytes(data: &[u8], media_type: &str) -> Result { match media_type { - "video/mp4" => mp4::load_mp4(data), + "video/mp4" => Self::load_mp4(data), + media_type => { if media_type.starts_with("video/") { Err(VideoLoadError::UnsupportedMimeType { @@ -111,7 +112,7 @@ impl VideoData { pub fn frame_timestamps_ns(&self) -> impl Iterator + '_ { // Segments are guaranteed to be sorted among each other, but within a segment, // presentation timestamps may not be sorted since this is sorted by decode timestamps. - self.segments.iter().flat_map(|seg| { + self.gops.iter().flat_map(|seg| { self.samples[seg.range()] .iter() .map(|sample| sample.composition_timestamp.into_nanos(self.timescale)) @@ -138,18 +139,20 @@ impl VideoData { } } -/// A segment of a video. +/// A Group of Pictures (GOP) always starts with an I-frame, followed by delta-frames. +/// +/// See for more. #[derive(Debug, Clone)] -pub struct Segment { - /// Decode timestamp of the first sample in this segment, in time units. +pub struct GroupOfPictures { + /// Decode timestamp of the first sample in this GOP, in time units. pub start: Time, - /// Range of samples contained in this segment. + /// Range of samples contained in this GOP. pub sample_range: Range, } -impl Segment { - /// The segment's `sample_range` mapped to `usize` for slicing. +impl GroupOfPictures { + /// The GOP's `sample_range` mapped to `usize` for slicing. pub fn range(&self) -> Range { Range { start: self.sample_range.start as usize, @@ -163,11 +166,15 @@ impl Segment { pub struct Sample { /// Time at which this sample appears in the decoded bitstream, in time units. /// + /// Samples should be decoded in this order. + /// /// `decode_timestamp <= composition_timestamp` pub decode_timestamp: Time, /// Time at which this sample appears in the frame stream, in time units. /// + /// The frame should be shown at this time. + /// /// `decode_timestamp <= composition_timestamp` pub composition_timestamp: Time, @@ -245,7 +252,7 @@ impl std::fmt::Debug for VideoData { .field("config", &self.config) .field("timescale", &self.timescale) .field("duration", &self.duration) - .field("segments", &self.segments) + .field("gops", &self.gops) .field( "samples", &self.samples.iter().enumerate().collect::>(), diff --git a/crates/store/re_video/src/demux/mp4.rs b/crates/store/re_video/src/demux/mp4.rs index 04721da582ba..14732f1f0469 100644 --- a/crates/store/re_video/src/demux/mp4.rs +++ b/crates/store/re_video/src/demux/mp4.rs @@ -1,89 +1,91 @@ #![allow(clippy::map_err_ignore)] -use super::{Config, Sample, Segment, VideoData, VideoLoadError}; +use super::{Config, GroupOfPictures, Sample, VideoData, VideoLoadError}; use crate::{Time, Timescale}; -pub fn load_mp4(bytes: &[u8]) -> Result { - let mp4 = re_mp4::Mp4::read_bytes(bytes)?; - - let mp4_tracks = mp4.tracks().iter().map(|(k, t)| (*k, t.kind)).collect(); - - let track = mp4 - .tracks() - .values() - .find(|t| t.kind == Some(re_mp4::TrackKind::Video)) - .ok_or_else(|| VideoLoadError::NoVideoTrack)?; - - let codec = track - .codec_string(&mp4) - .ok_or_else(|| VideoLoadError::UnsupportedCodec(unknown_codec_fourcc(&mp4, track)))?; - let description = track - .raw_codec_config(&mp4) - .ok_or_else(|| VideoLoadError::UnsupportedCodec(unknown_codec_fourcc(&mp4, track)))?; - - let coded_height = track.height; - let coded_width = track.width; - - let config = Config { - codec, - description, - coded_height, - coded_width, - }; - - let timescale = Timescale::new(track.timescale); - let duration = Time::new(track.duration as i64); - let mut samples = Vec::::new(); - let mut segments = Vec::::new(); - let mut segment_sample_start_index = 0; - let data = track.data.clone(); - - for sample in &track.samples { - if sample.is_sync && !samples.is_empty() { - let start = samples[segment_sample_start_index].decode_timestamp; - let sample_range = segment_sample_start_index as u32..samples.len() as u32; - segments.push(Segment { +impl VideoData { + pub fn load_mp4(bytes: &[u8]) -> Result { + let mp4 = re_mp4::Mp4::read_bytes(bytes)?; + + let mp4_tracks = mp4.tracks().iter().map(|(k, t)| (*k, t.kind)).collect(); + + let track = mp4 + .tracks() + .values() + .find(|t| t.kind == Some(re_mp4::TrackKind::Video)) + .ok_or_else(|| VideoLoadError::NoVideoTrack)?; + + let codec = track + .codec_string(&mp4) + .ok_or_else(|| VideoLoadError::UnsupportedCodec(unknown_codec_fourcc(&mp4, track)))?; + let description = track + .raw_codec_config(&mp4) + .ok_or_else(|| VideoLoadError::UnsupportedCodec(unknown_codec_fourcc(&mp4, track)))?; + + let coded_height = track.height; + let coded_width = track.width; + + let config = Config { + codec, + description, + coded_height, + coded_width, + }; + + let timescale = Timescale::new(track.timescale); + let duration = Time::new(track.duration as i64); + let mut samples = Vec::::new(); + let mut gops = Vec::::new(); + let mut gop_sample_start_index = 0; + let data = track.data.clone(); + + for sample in &track.samples { + if sample.is_sync && !samples.is_empty() { + let start = samples[gop_sample_start_index].decode_timestamp; + let sample_range = gop_sample_start_index as u32..samples.len() as u32; + gops.push(GroupOfPictures { + start, + sample_range, + }); + gop_sample_start_index = samples.len(); + } + + let decode_timestamp = Time::new(sample.decode_timestamp as i64); + let composition_timestamp = Time::new(sample.composition_timestamp as i64); + let duration = Time::new(sample.duration as i64); + + let byte_offset = sample.offset as u32; + let byte_length = sample.size as u32; + + samples.push(Sample { + decode_timestamp, + composition_timestamp, + duration, + byte_offset, + byte_length, + }); + } + + if !samples.is_empty() { + let start = samples[gop_sample_start_index].decode_timestamp; + let sample_range = gop_sample_start_index as u32..samples.len() as u32; + gops.push(GroupOfPictures { start, sample_range, }); - segment_sample_start_index = samples.len(); } - let decode_timestamp = Time::new(sample.decode_timestamp as i64); - let composition_timestamp = Time::new(sample.composition_timestamp as i64); - let duration = Time::new(sample.duration as i64); - - let byte_offset = sample.offset as u32; - let byte_length = sample.size as u32; - - samples.push(Sample { - decode_timestamp, - composition_timestamp, + Ok(Self { + config, + timescale, duration, - byte_offset, - byte_length, - }); - } - - if !samples.is_empty() { - let start = samples[segment_sample_start_index].decode_timestamp; - let sample_range = segment_sample_start_index as u32..samples.len() as u32; - segments.push(Segment { - start, - sample_range, - }); + gops, + samples, + data, + mp4_tracks, + }) } - - Ok(VideoData { - config, - timescale, - duration, - segments, - samples, - data, - mp4_tracks, - }) } fn unknown_codec_fourcc(mp4: &re_mp4::Mp4, track: &re_mp4::Track) -> re_mp4::FourCC { diff --git a/crates/store/re_video/src/lib.rs b/crates/store/re_video/src/lib.rs index c0a600738310..dbde69d6d1b1 100644 --- a/crates/store/re_video/src/lib.rs +++ b/crates/store/re_video/src/lib.rs @@ -88,6 +88,15 @@ impl Time { } } +impl std::ops::Add for Time { + type Output = Self; + + #[inline] + fn add(self, rhs: Self) -> Self::Output { + Self(self.0.saturating_add(rhs.0)) + } +} + impl std::ops::Sub for Time { type Output = Self; diff --git a/crates/store/re_video/src/mp4.rs b/crates/store/re_video/src/mp4.rs deleted file mode 100644 index 23a551cbb4eb..000000000000 --- a/crates/store/re_video/src/mp4.rs +++ /dev/null @@ -1,93 +0,0 @@ -#![allow(clippy::map_err_ignore)] - -use super::{Config, Sample, Segment, Time, Timescale, VideoData, VideoLoadError}; - -pub fn load_mp4(bytes: &[u8]) -> Result { - let mp4 = re_mp4::Mp4::read_bytes(bytes)?; - - let mp4_tracks = mp4.tracks().iter().map(|(k, t)| (*k, t.kind)).collect(); - - let track = mp4 - .tracks() - .values() - .find(|t| t.kind == Some(re_mp4::TrackKind::Video)) - .ok_or_else(|| VideoLoadError::NoVideoTrack)?; - - let codec = track - .codec_string(&mp4) - .ok_or_else(|| VideoLoadError::UnsupportedCodec(unknown_codec_fourcc(&mp4, track)))?; - let description = track - .raw_codec_config(&mp4) - .ok_or_else(|| VideoLoadError::UnsupportedCodec(unknown_codec_fourcc(&mp4, track)))?; - - let coded_height = track.height; - let coded_width = track.width; - - let config = Config { - codec, - description, - coded_height, - coded_width, - }; - - let timescale = Timescale::new(track.timescale); - let duration = Time::new(track.duration); - let mut samples = Vec::::new(); - let mut segments = Vec::::new(); - let mut segment_sample_start_index = 0; - let data = track.data.clone(); - - for sample in &track.samples { - if sample.is_sync && !samples.is_empty() { - let start = samples[segment_sample_start_index].decode_timestamp; - let sample_range = segment_sample_start_index as u32..samples.len() as u32; - segments.push(Segment { - start, - sample_range, - }); - segment_sample_start_index = samples.len(); - } - - let decode_timestamp = Time::new(sample.decode_timestamp); - let composition_timestamp = Time::new(sample.composition_timestamp); - let duration = Time::new(sample.duration); - - let byte_offset = sample.offset as u32; - let byte_length = sample.size as u32; - - samples.push(Sample { - decode_timestamp, - composition_timestamp, - duration, - byte_offset, - byte_length, - }); - } - - if !samples.is_empty() { - let start = samples[segment_sample_start_index].decode_timestamp; - let sample_range = segment_sample_start_index as u32..samples.len() as u32; - segments.push(Segment { - start, - sample_range, - }); - } - - Ok(VideoData { - config, - timescale, - duration, - segments, - samples, - data, - mp4_tracks, - }) -} - -fn unknown_codec_fourcc(mp4: &re_mp4::Mp4, track: &re_mp4::Track) -> re_mp4::FourCC { - let stsd = &track.trak(mp4).mdia.minf.stbl.stsd; - match &stsd.contents { - re_mp4::StsdBoxContent::Unknown(four_cc) => *four_cc, - _ => Default::default(), - } -} diff --git a/crates/viewer/re_data_ui/src/blob.rs b/crates/viewer/re_data_ui/src/blob.rs index 77db7cff569d..719b152849b4 100644 --- a/crates/viewer/re_data_ui/src/blob.rs +++ b/crates/viewer/re_data_ui/src/blob.rs @@ -246,21 +246,11 @@ fn show_video_blob_info( ); match video.frame_at(render_ctx, decode_stream_id, timestamp_in_seconds) { - Ok(frame) => { - let is_pending; - let texture = match frame { - VideoFrameTexture::Ready(texture) => { - is_pending = false; - texture - } - - VideoFrameTexture::Pending(placeholder) => { - is_pending = true; - ui.ctx().request_repaint(); - placeholder - } - }; - + Ok(VideoFrameTexture { + texture, + is_pending, + show_spinner, + }) => { let response = crate::image::texture_preview_ui( render_ctx, ui, @@ -270,6 +260,10 @@ fn show_video_blob_info( ); if is_pending { + ui.ctx().request_repaint(); // Keep polling for an up-to-date texture + } + + if show_spinner { // Shrink slightly: let smaller_rect = egui::Rect::from_center_size( response.rect.center(), diff --git a/crates/viewer/re_renderer/src/video/decoder/mod.rs b/crates/viewer/re_renderer/src/video/decoder/mod.rs index 990a6466c1e6..1ce8d1363c89 100644 --- a/crates/viewer/re_renderer/src/video/decoder/mod.rs +++ b/crates/viewer/re_renderer/src/video/decoder/mod.rs @@ -1,66 +1,393 @@ #[cfg(target_arch = "wasm32")] mod web; -#[cfg(not(target_arch = "wasm32"))] -mod no_native_decoder; - #[cfg(feature = "video_av1")] #[cfg(not(target_arch = "wasm32"))] mod native_av1; +use std::{ops::Range, sync::Arc, time::Duration}; + +use web_time::Instant; + +use re_video::{Chunk, Time}; + use crate::{ resource_managers::GpuTexture2D, wgpu_resources::{GpuTexturePool, TextureDesc}, RenderContext, }; -use std::{sync::Arc, time::Duration}; - -use super::{DecodeHardwareAcceleration, DecodingError, FrameDecodingResult}; +use super::{DecodeHardwareAcceleration, DecodingError, VideoFrameTexture}; +/// Ignore hickups lasting shorter than this. +/// /// Delaying error reports (and showing last-good images meanwhile) allows us to skip over /// transient errors without flickering. -#[allow(unused)] -pub const DECODING_ERROR_REPORTING_DELAY: Duration = Duration::from_millis(400); +/// +/// Same with showing a spinner: if we show it too fast, it is annoying. +const DECODING_GRACE_DELAY: Duration = Duration::from_millis(400); + +#[allow(unused)] // Unused for certain build flags +struct TimedDecodingError { + time_of_first_error: Instant, + latest_error: DecodingError, +} + +impl TimedDecodingError { + #[allow(unused)] // Unused for certain build flags + pub fn new(latest_error: DecodingError) -> Self { + Self { + time_of_first_error: Instant::now(), + latest_error, + } + } +} + +/// A texture of a specific video frame. +struct VideoTexture { + pub texture: GpuTexture2D, + + /// What part of the video this video frame covers. + pub time_range: Range