From 779fbcd6a5c5d36871425177f0c40ceb5c1eefee Mon Sep 17 00:00:00 2001 From: jprochazk Date: Wed, 25 Sep 2024 13:12:14 +0200 Subject: [PATCH 1/4] some promising results --- .../re_renderer/src/video/decoder/native.rs | 2 +- .../re_renderer/src/video/decoder/web.rs | 155 +++++++++++++++--- crates/viewer/re_renderer/src/video/mod.rs | 6 +- 3 files changed, 137 insertions(+), 26 deletions(-) diff --git a/crates/viewer/re_renderer/src/video/decoder/native.rs b/crates/viewer/re_renderer/src/video/decoder/native.rs index 292e08a86cb9..dff3ca2a7d82 100644 --- a/crates/viewer/re_renderer/src/video/decoder/native.rs +++ b/crates/viewer/re_renderer/src/video/decoder/native.rs @@ -41,7 +41,7 @@ impl VideoDecoder { pub fn frame_at( &mut self, _render_ctx: &RenderContext, - _timestamp_s: f64, + _presentation_timestamp_s: f64, ) -> FrameDecodingResult { FrameDecodingResult::Error(DecodingError::NoNativeSupport) } diff --git a/crates/viewer/re_renderer/src/video/decoder/web.rs b/crates/viewer/re_renderer/src/video/decoder/web.rs index d94d7fa38b2d..e25e2949de39 100644 --- a/crates/viewer/re_renderer/src/video/decoder/web.rs +++ b/crates/viewer/re_renderer/src/video/decoder/web.rs @@ -107,6 +107,8 @@ impl VideoDecoder { render_context: &RenderContext, data: Arc, ) -> Result { + re_log::debug!("{:?}", data.samples); + let frames = Arc::new(Mutex::new(Vec::with_capacity(16))); let decode_error = Arc::new(Mutex::new(None)); @@ -116,6 +118,7 @@ impl VideoDecoder { let frames = frames.clone(); let decode_error = decode_error.clone(); move |frame: web_sys::VideoFrame| { + web_sys::console::log_1(&frame); let composition_timestamp = Time::from_micros(frame.timestamp().unwrap_or(0.0), timescale); let duration = Time::from_micros(frame.duration().unwrap_or(0.0), timescale); @@ -167,7 +170,7 @@ impl VideoDecoder { pub fn frame_at( &mut self, render_ctx: &RenderContext, - timestamp_s: f64, + presentation_timestamp_s: f64, ) -> FrameDecodingResult { if let Some(error) = self.decode_error.lock().clone() { // TODO(emilk): if there is a decoding error in one segment or sample, @@ -177,7 +180,7 @@ impl VideoDecoder { return FrameDecodingResult::Error(error); } - let result = self.frame_at_internal(timestamp_s); + let result = self.frame_at_internal(presentation_timestamp_s); match &result { FrameDecodingResult::Ready(_) => { self.error_on_last_frame_at = false; @@ -200,27 +203,127 @@ impl VideoDecoder { result } - fn frame_at_internal(&mut self, timestamp_s: f64) -> FrameDecodingResult { - if timestamp_s < 0.0 { + fn frame_at_internal(&mut self, presentation_timestamp_s: f64) -> FrameDecodingResult { + if presentation_timestamp_s < 0.0 { return FrameDecodingResult::Error(DecodingError::NegativeTimestamp); } - let timescale = self.data.timescale; - let timestamp = Time::from_secs(timestamp_s, timescale); + let presentation_timestamp = Time::from_secs(presentation_timestamp_s, self.data.timescale); + + if let Err(err) = self.enqueue_requested_segments2(presentation_timestamp) { + return FrameDecodingResult::Error(err); + } - let Some(requested_segment_idx) = - latest_at_idx(&self.data.segments, |segment| segment.start, ×tamp) + self.try_present_frame(presentation_timestamp) + } + + fn enqueue_requested_segments2( + &mut self, + presentation_timestamp: Time, + ) -> Result<(), DecodingError> { + // Some terminology: + // - presentation timestamp = composition timestamp + // = the time at which the frame should be shown + // - decode timestamp + // = determines the decoding order of samples + // + // Note: `composition >= decode` for any given sample. + // For some codecs, the two timestamps are the same. + // We must enqueue samples in decode order, but show them in composition order. + + // 1. Find the latest sample where `decode_timestamp <= presentation_timestamp`. + // Because `composition >= decode`, we never have to look further ahead in the + // video than this. + let Some(decode_sample_idx) = latest_at_idx( + &self.data.samples, + |sample| sample.decode_timestamp, + &presentation_timestamp, + ) else { + return Err(DecodingError::EmptyVideo); + }; + + // 2. Search _backwards_, starting at `decode_sample_idx`, looking for + // the first sample where `sample.composition_timestamp <= presentation_timestamp`. + // This is the sample the user requested. + let Some(requested_sample_idx) = self.data.samples[..=decode_sample_idx] + .iter() + .rposition(|sample| sample.composition_timestamp <= presentation_timestamp) else { - return FrameDecodingResult::Error(DecodingError::EmptyVideo); + return Err(DecodingError::EmptyVideo); + }; + + // 3. Do a binary search through segments by the decode timestamp of the found sample + // to find the segment that contains the sample. + let Some(requested_segment_idx) = latest_at_idx( + &self.data.segments, + |segment| segment.start, + &self.data.samples[requested_sample_idx].decode_timestamp, + ) else { + return Err(DecodingError::EmptyVideo); + }; + + re_log::debug!("decode={decode_sample_idx} segment={requested_segment_idx} sample={requested_sample_idx}"); + + // 4. Enqueue segments as needed. + // + // We maintain a buffer of 2 segments, so we can always smoothly transition to the next segment. + // We can always start decoding from any segment, because segments always begin with a keyframe. + // + // Backward seeks or seeks across many segments trigger a reset of the decoder, + // because decoding all the samples between the previous sample and the requested + // one would mean decoding and immediately discarding more frames than we need. + if requested_segment_idx != self.current_segment_idx { + let segment_distance = requested_segment_idx.checked_sub(self.current_segment_idx); + if segment_distance == Some(1) { + // forward seek to next segment - queue up the one _after_ requested + self.enqueue_segment(requested_segment_idx + 1); + } else { + // Startup, forward seek by N>1, or backward seek across segments -> reset decoder + self.reset()?; + self.enqueue_segment(requested_segment_idx); + self.enqueue_segment(requested_segment_idx + 1); + } + } else if requested_sample_idx != self.current_sample_idx { + // special case: handle seeking backwards within a single segment + // this is super inefficient, but it's the only way to handle it + // while maintaining a buffer of 2 segments + let sample_distance = requested_sample_idx as isize - self.current_sample_idx as isize; + if sample_distance < 0 { + self.reset()?; + self.enqueue_segment(requested_segment_idx); + self.enqueue_segment(requested_segment_idx + 1); + } + } + + // At this point, we have the requested segments enqueued. They will be output + // in _composition timestamp_ order, so presenting the frame is a binary search + // through the frame buffer as usual. + + self.current_segment_idx = requested_segment_idx; + self.current_sample_idx = requested_sample_idx; + + Ok(()) + } + + fn enqueue_requested_segments( + &mut self, + presentation_timestamp: Time, + ) -> Result<(), DecodingError> { + let Some(requested_segment_idx) = latest_at_idx( + &self.data.segments, + |segment| segment.start, + &presentation_timestamp, + ) else { + return Err(DecodingError::EmptyVideo); }; let requested_segment = &self.data.segments[requested_segment_idx]; let Some(requested_sample_idx) = latest_at_idx( &self.data.samples[requested_segment.range()], |sample| sample.decode_timestamp, - ×tamp, + &presentation_timestamp, ) else { // This should never happen, because segments are never empty. - return FrameDecodingResult::Error(DecodingError::EmptySegment); + return Err(DecodingError::EmptySegment); }; // Enqueue segments as needed. We maintain a buffer of 2 segments, so we can @@ -238,9 +341,7 @@ impl VideoDecoder { self.enqueue_segment(requested_segment_idx + 1); } else { // Startup, forward seek by N>1, or backward seek across segments -> reset decoder - if let Err(err) = self.reset() { - return FrameDecodingResult::Error(err); - } + self.reset()?; self.enqueue_segment(requested_segment_idx); self.enqueue_segment(requested_segment_idx + 1); } @@ -250,9 +351,7 @@ impl VideoDecoder { // while maintaining a buffer of 2 segments let sample_distance = requested_sample_idx as isize - self.current_sample_idx as isize; if sample_distance < 0 { - if let Err(err) = self.reset() { - return FrameDecodingResult::Error(err); - } + self.reset()?; self.enqueue_segment(requested_segment_idx); self.enqueue_segment(requested_segment_idx + 1); } @@ -261,11 +360,19 @@ impl VideoDecoder { self.current_segment_idx = requested_segment_idx; self.current_sample_idx = requested_sample_idx; + Ok(()) + } + + fn try_present_frame(&mut self, presentation_timestamp: Time) -> FrameDecodingResult { + let timescale = self.data.timescale; + let mut frames = self.frames.lock(); - let Some(frame_idx) = - latest_at_idx(&frames, |frame| frame.composition_timestamp, ×tamp) - else { + let Some(frame_idx) = latest_at_idx( + &frames, + |frame| frame.composition_timestamp, + &presentation_timestamp, + ) else { // no buffered frames - texture will be blank // Don't return a zeroed texture, because we may just be behind on decoding // and showing an old frame is better than showing a blank frame, @@ -287,7 +394,7 @@ impl VideoDecoder { // This handles the case when we have a buffered frame that's older than the requested timestamp. // We don't want to show this frame to the user, because it's not actually the one they requested, // so instead return the last decoded frame. - if timestamp.into_millis(timescale) - frame_timestamp_ms > frame_duration_ms { + if presentation_timestamp.into_millis(timescale) - frame_timestamp_ms > frame_duration_ms { return FrameDecodingResult::Pending(self.texture.clone()); } @@ -351,10 +458,11 @@ impl VideoDecoder { } else { EncodedVideoChunkType::Delta }; - // TODO(jan): use `composition_timestamp` instead let chunk = EncodedVideoChunkInit::new( &data, - sample.decode_timestamp.into_micros(self.data.timescale), + sample + .composition_timestamp + .into_micros(self.data.timescale), type_, ); chunk.set_duration(sample.duration.into_micros(self.data.timescale)); @@ -368,6 +476,7 @@ impl VideoDecoder { return; }; + web_sys::console::log_1(&chunk); if let Err(err) = self.decoder.decode(&chunk) { *self.decode_error.lock() = Some(DecodingError::DecodeChunk(js_error_to_string(&err))); } diff --git a/crates/viewer/re_renderer/src/video/mod.rs b/crates/viewer/re_renderer/src/video/mod.rs index fe2119721ac4..92592ccddb0b 100644 --- a/crates/viewer/re_renderer/src/video/mod.rs +++ b/crates/viewer/re_renderer/src/video/mod.rs @@ -122,7 +122,7 @@ impl Video { &self, render_context: &RenderContext, decoder_stream_id: VideoDecodingStreamId, - timestamp_s: f64, + presentation_timestamp_s: f64, ) -> FrameDecodingResult { re_tracing::profile_function!(); @@ -152,7 +152,9 @@ impl Video { }; decoder_entry.frame_index = render_context.active_frame_idx(); - decoder_entry.decoder.frame_at(render_context, timestamp_s) + decoder_entry + .decoder + .frame_at(render_context, presentation_timestamp_s) } /// Removes all decoders that have been unused in the last frame. From 5d0c9e988ee522d578a49905c81117769edd93c0 Mon Sep 17 00:00:00 2001 From: jprochazk Date: Wed, 25 Sep 2024 18:32:00 +0200 Subject: [PATCH 2/4] debug logging --- crates/store/re_video/src/lib.rs | 16 ++-- .../re_renderer/src/video/decoder/web.rs | 73 +++---------------- 2 files changed, 15 insertions(+), 74 deletions(-) diff --git a/crates/store/re_video/src/lib.rs b/crates/store/re_video/src/lib.rs index c211fd89f647..fc56be110b60 100644 --- a/crates/store/re_video/src/lib.rs +++ b/crates/store/re_video/src/lib.rs @@ -120,7 +120,7 @@ impl VideoData { } /// A segment of a video. -#[derive(Clone)] +#[derive(Debug, Clone)] pub struct Segment { /// Decode timestamp of the first sample in this segment, in time units. pub start: Time, @@ -275,18 +275,14 @@ impl std::fmt::Debug for VideoData { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("Video") .field("config", &self.config) + .field("timescale", &self.timescale) .field("duration", &self.duration) .field("segments", &self.segments) + .field( + "samples", + &self.samples.iter().enumerate().collect::>(), + ) .field("data", &self.data.len()) .finish() } } - -impl std::fmt::Debug for Segment { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Segment") - .field("timestamp", &self.start) - .field("samples", &self.sample_range.len()) - .finish() - } -} diff --git a/crates/viewer/re_renderer/src/video/decoder/web.rs b/crates/viewer/re_renderer/src/video/decoder/web.rs index e25e2949de39..0146b5ba0cb0 100644 --- a/crates/viewer/re_renderer/src/video/decoder/web.rs +++ b/crates/viewer/re_renderer/src/video/decoder/web.rs @@ -107,7 +107,7 @@ impl VideoDecoder { render_context: &RenderContext, data: Arc, ) -> Result { - re_log::debug!("{:?}", data.samples); + re_log::debug!("{:?}", data.segments); let frames = Arc::new(Mutex::new(Vec::with_capacity(16))); let decode_error = Arc::new(Mutex::new(None)); @@ -118,7 +118,7 @@ impl VideoDecoder { let frames = frames.clone(); let decode_error = decode_error.clone(); move |frame: web_sys::VideoFrame| { - web_sys::console::log_1(&frame); + // web_sys::console::log_1(&frame); let composition_timestamp = Time::from_micros(frame.timestamp().unwrap_or(0.0), timescale); let duration = Time::from_micros(frame.duration().unwrap_or(0.0), timescale); @@ -209,14 +209,14 @@ impl VideoDecoder { } let presentation_timestamp = Time::from_secs(presentation_timestamp_s, self.data.timescale); - if let Err(err) = self.enqueue_requested_segments2(presentation_timestamp) { + if let Err(err) = self.enqueue_requested_segments(presentation_timestamp) { return FrameDecodingResult::Error(err); } self.try_present_frame(presentation_timestamp) } - fn enqueue_requested_segments2( + fn enqueue_requested_segments( &mut self, presentation_timestamp: Time, ) -> Result<(), DecodingError> { @@ -304,65 +304,6 @@ impl VideoDecoder { Ok(()) } - fn enqueue_requested_segments( - &mut self, - presentation_timestamp: Time, - ) -> Result<(), DecodingError> { - let Some(requested_segment_idx) = latest_at_idx( - &self.data.segments, - |segment| segment.start, - &presentation_timestamp, - ) else { - return Err(DecodingError::EmptyVideo); - }; - let requested_segment = &self.data.segments[requested_segment_idx]; - - let Some(requested_sample_idx) = latest_at_idx( - &self.data.samples[requested_segment.range()], - |sample| sample.decode_timestamp, - &presentation_timestamp, - ) else { - // This should never happen, because segments are never empty. - return Err(DecodingError::EmptySegment); - }; - - // Enqueue segments as needed. We maintain a buffer of 2 segments, so we can - // always smoothly transition to the next segment. - // We can always start decoding from any segment, because segments always begin - // with a keyframe. - // Backward seeks or seeks across many segments trigger a reset of the decoder, - // because decoding all the samples between the previous sample and the requested - // one would mean decoding and immediately discarding more frames than we otherwise - // need to. - if requested_segment_idx != self.current_segment_idx { - let segment_distance = requested_segment_idx.checked_sub(self.current_segment_idx); - if segment_distance == Some(1) { - // forward seek to next segment - queue up the one _after_ requested - self.enqueue_segment(requested_segment_idx + 1); - } else { - // Startup, forward seek by N>1, or backward seek across segments -> reset decoder - self.reset()?; - self.enqueue_segment(requested_segment_idx); - self.enqueue_segment(requested_segment_idx + 1); - } - } else if requested_sample_idx != self.current_sample_idx { - // special case: handle seeking backwards within a single segment - // this is super inefficient, but it's the only way to handle it - // while maintaining a buffer of 2 segments - let sample_distance = requested_sample_idx as isize - self.current_sample_idx as isize; - if sample_distance < 0 { - self.reset()?; - self.enqueue_segment(requested_segment_idx); - self.enqueue_segment(requested_segment_idx + 1); - } - } - - self.current_segment_idx = requested_segment_idx; - self.current_sample_idx = requested_sample_idx; - - Ok(()) - } - fn try_present_frame(&mut self, presentation_timestamp: Time) -> FrameDecodingResult { let timescale = self.data.timescale; @@ -476,7 +417,7 @@ impl VideoDecoder { return; }; - web_sys::console::log_1(&chunk); + // web_sys::console::log_1(&chunk); if let Err(err) = self.decoder.decode(&chunk) { *self.decode_error.lock() = Some(DecodingError::DecodeChunk(js_error_to_string(&err))); } @@ -484,6 +425,8 @@ impl VideoDecoder { /// Reset the video decoder and discard all frames. fn reset(&mut self) -> Result<(), DecodingError> { + re_log::debug!("resetting video decoder"); + self.decoder .reset() .map_err(|err| DecodingError::ResetFailure(js_error_to_string(&err)))?; @@ -503,6 +446,8 @@ fn copy_video_frame_to_texture( frame: &web_sys::VideoFrame, texture: &wgpu::Texture, ) { + web_sys::console::log_1(&frame); + let size = wgpu::Extent3d { width: frame.display_width(), height: frame.display_height(), From 025ae798b09242f97b91663faaa70b031507e53f Mon Sep 17 00:00:00 2001 From: jprochazk Date: Thu, 26 Sep 2024 15:06:46 +0200 Subject: [PATCH 3/4] remove debug logging --- crates/viewer/re_renderer/src/video/decoder/web.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/crates/viewer/re_renderer/src/video/decoder/web.rs b/crates/viewer/re_renderer/src/video/decoder/web.rs index 0146b5ba0cb0..56112e4950dc 100644 --- a/crates/viewer/re_renderer/src/video/decoder/web.rs +++ b/crates/viewer/re_renderer/src/video/decoder/web.rs @@ -107,8 +107,6 @@ impl VideoDecoder { render_context: &RenderContext, data: Arc, ) -> Result { - re_log::debug!("{:?}", data.segments); - let frames = Arc::new(Mutex::new(Vec::with_capacity(16))); let decode_error = Arc::new(Mutex::new(None)); @@ -118,7 +116,6 @@ impl VideoDecoder { let frames = frames.clone(); let decode_error = decode_error.clone(); move |frame: web_sys::VideoFrame| { - // web_sys::console::log_1(&frame); let composition_timestamp = Time::from_micros(frame.timestamp().unwrap_or(0.0), timescale); let duration = Time::from_micros(frame.duration().unwrap_or(0.0), timescale); @@ -261,8 +258,6 @@ impl VideoDecoder { return Err(DecodingError::EmptyVideo); }; - re_log::debug!("decode={decode_sample_idx} segment={requested_segment_idx} sample={requested_sample_idx}"); - // 4. Enqueue segments as needed. // // We maintain a buffer of 2 segments, so we can always smoothly transition to the next segment. @@ -417,7 +412,6 @@ impl VideoDecoder { return; }; - // web_sys::console::log_1(&chunk); if let Err(err) = self.decoder.decode(&chunk) { *self.decode_error.lock() = Some(DecodingError::DecodeChunk(js_error_to_string(&err))); } @@ -446,8 +440,6 @@ fn copy_video_frame_to_texture( frame: &web_sys::VideoFrame, texture: &wgpu::Texture, ) { - web_sys::console::log_1(&frame); - let size = wgpu::Extent3d { width: frame.display_width(), height: frame.display_height(), From 4da00c4487192c05669bc8bbb2eb60055c51ff56 Mon Sep 17 00:00:00 2001 From: jprochazk Date: Thu, 26 Sep 2024 15:19:58 +0200 Subject: [PATCH 4/4] nit --- crates/viewer/re_renderer/src/video/decoder/web.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/viewer/re_renderer/src/video/decoder/web.rs b/crates/viewer/re_renderer/src/video/decoder/web.rs index 56112e4950dc..7656f54f3a8a 100644 --- a/crates/viewer/re_renderer/src/video/decoder/web.rs +++ b/crates/viewer/re_renderer/src/video/decoder/web.rs @@ -240,7 +240,7 @@ impl VideoDecoder { // 2. Search _backwards_, starting at `decode_sample_idx`, looking for // the first sample where `sample.composition_timestamp <= presentation_timestamp`. - // This is the sample the user requested. + // This is the sample which when decoded will be presented at the timestamp the user requested. let Some(requested_sample_idx) = self.data.samples[..=decode_sample_idx] .iter() .rposition(|sample| sample.composition_timestamp <= presentation_timestamp)