Almost got it to compile!

rerun-io · Oct 1, 2024 · 0b2c27c · 0b2c27c
1 parent 308df1c
commit 0b2c27c
Show file tree

Hide file tree

Showing 6 changed files with 123 additions and 65 deletions.
diff --git a/crates/store/re_video/src/decode/av1.rs b/crates/store/re_video/src/decode/av1.rs
@@ -9,7 +9,9 @@ use crossbeam::{
 };
 use dav1d::{PixelLayout, PlanarImageComponent};
 
-use super::{Chunk, Frame, PixelFormat, TimeMs};
+use crate::Time;
+
+use super::{Chunk, Frame, PixelFormat};
 
 pub struct Decoder {
     _thread: std::thread::JoinHandle<()>,
@@ -379,12 +381,10 @@ fn i444_to_rgba(picture: &dav1d::Picture) -> Vec<u8> {
     rgba
 }
 
-// We need to convert between `TimeMs` and `i64` because `dav1d` uses `i64` for timestamps.
-fn time_to_i64(time: TimeMs) -> i64 {
-    // multiply by 1000 to lose less precision
-    (time.as_f64() * 1000.0) as i64
+fn time_to_i64(time: Time) -> i64 {
+    time.0 as _ // TODO: what is the timescale of dav1d?
 }
 
-fn i64_to_time(i64: i64) -> TimeMs {
-    TimeMs::new(i64 as f64 / 1000.0)
+fn i64_to_time(i64: i64) -> Time {
+    Time::new(i64 as _) // TODO: what is the timescale of dav1d?
 }
diff --git a/crates/store/re_video/src/decode/mod.rs b/crates/store/re_video/src/decode/mod.rs
@@ -2,21 +2,21 @@
 
 pub mod av1;
 
-use crate::TimeMs;
+use crate::Time;
 
 pub struct Chunk {
     pub data: Vec<u8>,
-    pub timestamp: TimeMs,
-    pub duration: TimeMs,
+    pub timestamp: Time,
+    pub duration: Time,
 }
 
 pub struct Frame {
     pub data: Vec<u8>,
     pub width: u32,
     pub height: u32,
     pub format: PixelFormat,
-    pub timestamp: TimeMs,
-    pub duration: TimeMs,
+    pub timestamp: Time,
+    pub duration: Time,
 }
 
 pub enum PixelFormat {

diff --git a/crates/store/re_video/src/demux/mod.rs b/crates/store/re_video/src/demux/mod.rs
@@ -156,11 +156,13 @@ impl Segment {
 #[derive(Debug, Clone)]
 pub struct Sample {
     /// Time at which this sample appears in the decoded bitstream, in time units.
+    ///
+    /// `decode_timestamp <= composition_timestamp`
     pub decode_timestamp: Time,
 
     /// Time at which this sample appears in the frame stream, in time units.
     ///
-    /// `composition >= decode`
+    /// `decode_timestamp <= composition_timestamp`
     pub composition_timestamp: Time,
 
     /// Duration of the sample, in time units.

diff --git a/crates/store/re_video/src/lib.rs b/crates/store/re_video/src/lib.rs
@@ -1,10 +1,10 @@
 //! Video decoding library.
 
-pub mod decode;
-pub mod demux;
+mod decode;
+mod demux;
 
-pub use decode::{av1, Frame};
-pub use demux::{VideoData, VideoLoadError};
+pub use decode::{av1, Chunk, Frame, PixelFormat};
+pub use demux::{Sample, VideoData, VideoLoadError};
 pub use re_mp4::{TrackId, TrackKind};
 
 use ordered_float::OrderedFloat;
@@ -26,10 +26,11 @@ impl TimeMs {
 
 /// A value in time units.
 #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub struct Time(u64);
+pub struct Time(u64); // TODO: i64
 
 impl Time {
     pub const ZERO: Self = Self(0);
+    pub const MAX: Self = Self(u64::MAX);
 
     /// Create a new value in _time units_.
     ///
@@ -83,6 +84,15 @@ impl Time {
     }
 }
 
+impl std::ops::Sub for Time {
+    type Output = Self;
+
+    #[inline]
+    fn sub(self, rhs: Self) -> Self::Output {
+        Self(self.0.saturating_sub(rhs.0))
+    }
+}
+
 /// The number of time units per second.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub struct Timescale(u64);

diff --git a/crates/viewer/re_renderer/src/video/decoder/native.rs b/crates/viewer/re_renderer/src/video/decoder/native.rs
@@ -13,8 +13,7 @@ use crate::{video::FrameDecodingResult, RenderContext};
 use super::latest_at_idx;
 
 use parking_lot::Mutex;
-use re_video::TimeMs;
-use re_video::{decode::Frame, Time};
+use re_video::{Frame, Time};
 
 use super::alloc_video_frame_texture;
 
@@ -24,10 +23,10 @@ pub struct VideoDecoder {
     queue: Arc<wgpu::Queue>,
     texture: GpuTexture2D,
     zeroed_texture: GpuTexture2D,
-    decoder: re_video::decode::av1::Decoder,
+    decoder: re_video::av1::Decoder,
 
     frames: Arc<Mutex<Vec<Frame>>>,
-    last_used_frame_timestamp: TimeMs,
+    last_used_frame_timestamp: Time,
     current_segment_idx: usize,
     current_sample_idx: usize,
 }
@@ -37,13 +36,14 @@ impl VideoDecoder {
         render_context: &RenderContext,
         data: Arc<re_video::VideoData>,
         _hw_acceleration: DecodeHardwareAcceleration,
-    ) -> Option<Self> {
+    ) -> Result<Self, DecodingError> {
         let frames = Arc::new(Mutex::new(Vec::new()));
 
+        // TODO: check that data is av1, and return error elsewise
         // TEMP: assuming `av1`, because `re_video` demuxer will panic if it's not
-        let decoder = re_video::decode::av1::Decoder::new({
+        let decoder = re_video::av1::Decoder::new({
             let frames = frames.clone();
-            move |frame: re_video::decode::Frame| {
+            move |frame: re_video::Frame| {
                 frames.lock().push(frame);
             }
         });
@@ -63,15 +63,15 @@ impl VideoDecoder {
             data.config.coded_height as u32,
         );
 
-        Some(Self {
+        Ok(Self {
             data,
             queue,
             texture,
             zeroed_texture,
             decoder,
 
             frames,
-            last_used_frame_timestamp: TimeMs::new(f64::MAX),
+            last_used_frame_timestamp: Time::MAX,
             current_segment_idx: usize::MAX,
             current_sample_idx: usize::MAX,
         })
@@ -128,12 +128,12 @@ impl VideoDecoder {
                 requested_segment_idx as isize - self.current_segment_idx as isize;
             if segment_distance == 1 {
                 // forward seek to next segment - queue up the one _after_ requested
-                self.enqueue_all(requested_segment_idx + 1);
+                self.enqueue_segment(requested_segment_idx + 1);
             } else {
                 // forward seek by N>1 OR backward seek across segments - reset
                 self.reset();
-                self.enqueue_all(requested_segment_idx);
-                self.enqueue_all(requested_segment_idx + 1);
+                self.enqueue_segment(requested_segment_idx);
+                self.enqueue_segment(requested_segment_idx + 1);
             }
         } else if requested_sample_idx != self.current_sample_idx {
             // special case: handle seeking backwards within a single segment
@@ -142,8 +142,8 @@ impl VideoDecoder {
             let sample_distance = requested_sample_idx as isize - self.current_sample_idx as isize;
             if sample_distance < 0 {
                 self.reset();
-                self.enqueue_all(requested_segment_idx);
-                self.enqueue_all(requested_segment_idx + 1);
+                self.enqueue_segment(requested_segment_idx);
+                self.enqueue_segment(requested_segment_idx + 1);
             }
         }
 
@@ -171,61 +171,54 @@ impl VideoDecoder {
         let frame_idx = 0;
         let frame = &frames[frame_idx];
 
-        // https://w3c.github.io/webcodecs/#output-videoframes 1. 1. states:
-        //   Let timestamp and duration be the timestamp and duration from the EncodedVideoChunk associated with output.
-        // we always provide both, so they should always be available
-        let frame_timestamp_ms = frame.timestamp().map(TimeMs::new).unwrap_or_default();
-        let frame_duration_ms = frame.duration().map(TimeMs::new).unwrap_or_default();
-
         // This handles the case when we have a buffered frame that's older than the requested timestamp.
         // We don't want to show this frame to the user, because it's not actually the one they requested,
         // so instead return the last decoded frame.
-        if presentation_timestamp - frame_timestamp_ms > frame_duration_ms {
+        if presentation_timestamp - frame.timestamp > frame.duration {
             return Ok(VideoFrameTexture::Pending(self.texture.clone()));
         }
 
-        if self.last_used_frame_timestamp != frame_timestamp_ms {
+        if self.last_used_frame_timestamp != frame.timestamp {
+            self.last_used_frame_timestamp = frame.timestamp;
             copy_video_frame_to_texture(&self.queue, frame, &self.texture.texture);
-            self.last_used_frame_timestamp = frame_timestamp_ms;
         }
 
-        self.texture.clone()
+        Ok(VideoFrameTexture::Ready(self.texture.clone()))
     }
 
     /// Enqueue all samples in the given segment.
     ///
     /// Does nothing if the index is out of bounds.
-    fn enqueue_all(&self, segment_idx: usize) {
+    fn enqueue_segment(&self, segment_idx: usize) -> Result<(), DecodingError> {
         let Some(segment) = self.data.segments.get(segment_idx) else {
-            return;
+            return Ok(());
         };
 
-        self.enqueue(&segment.samples[0], true);
-        for sample in &segment.samples[1..] {
-            self.enqueue(sample, false);
+        let samples = &self.data.samples[segment.range()];
+
+        // The first sample in a segment is always a key frame:
+        self.enqueue_sample(&samples[0], true)?;
+        for sample in &samples[1..] {
+            self.enqueue_sample(sample, false)?;
         }
+
+        Ok(())
     }
 
     /// Enqueue the given sample.
-    fn enqueue(&self, sample: &re_video::Sample, is_key: bool) {
-        let data = Uint8Array::from(&self.data.get(sample));
-        let type_ = if is_key {
-            EncodedVideoChunkType::Key
-        } else {
-            EncodedVideoChunkType::Delta
-        };
-        let chunk = EncodedVideoChunkInit::new(&data, sample.timestamp.as_f64(), type_);
-        chunk.set_duration(sample.duration.as_f64());
-        let Some(chunk) = EncodedVideoChunk::new(&chunk)
-            .inspect_err(|err| {
-                re_log::error!("failed to create video chunk: {}", js_error_to_string(err));
-            })
-            .ok()
-        else {
-            return;
+    fn enqueue_sample(&self, sample: &re_video::Sample, is_key: bool) -> Result<(), DecodingError> {
+        let data = &self.data.data[sample.byte_offset as usize
+            ..sample.byte_offset as usize + sample.byte_length as usize]; // TODO: range check
+
+        let chunk = re_video::Chunk {
+            data: data.to_vec(),
+            timestamp: sample.decode_timestamp,
+            duration: sample.duration,
         };
 
-        self.decoder.decode(&chunk);
+        self.decoder.decode(chunk);
+
+        Ok(())
     }
 
     /// Reset the video decoder and discard all frames.
@@ -236,3 +229,56 @@ impl VideoDecoder {
         drop(frames.drain(..));
     }
 }
+
+fn copy_video_frame_to_texture(
+    queue: &Queue,
+    frame: &Frame,
+    texture: &wgpu::Texture,
+) -> Result<DecodingError> {
+    let size = wgpu::Extent3d {
+        width: frame.display_width(),
+        height: frame.display_height(),
+        depth_or_array_layers: 1,
+    };
+    let source = {
+        // TODO(jan): The wgpu version we're using doesn't support `VideoFrame` yet.
+        // This got fixed in https://github.com/gfx-rs/wgpu/pull/6170 but hasn't shipped yet.
+        // So instead, we just pretend this is a `HtmlVideoElement` instead.
+        // SAFETY: Depends on the fact that `wgpu` passes the object through as-is,
+        // and doesn't actually inspect it in any way. The browser then does its own
+        // typecheck that doesn't care what kind of image source wgpu gave it.
+        #[allow(unsafe_code)]
+        let frame = unsafe {
+            std::mem::transmute::<web_sys::VideoFrame, web_sys::HtmlVideoElement>(
+                frame.clone().expect("Failed to clone the video frame"),
+            )
+        };
+        // Fake width & height to work around wgpu validating this as if it was a `HtmlVideoElement`.
+        // Since it thinks this is a `HtmlVideoElement`, it will want to call `videoWidth` and `videoHeight`
+        // on it to validate the size.
+        // We simply redirect `displayWidth`/`displayHeight` to `videoWidth`/`videoHeight` to make it work!
+        let display_width = js_sys::Reflect::get(&frame, &"displayWidth".into())
+            .expect("Failed to get displayWidth property from VideoFrame.");
+        js_sys::Reflect::set(&frame, &"videoWidth".into(), &display_width)
+            .expect("Failed to set videoWidth property.");
+        let display_height = js_sys::Reflect::get(&frame, &"displayHeight".into())
+            .expect("Failed to get displayHeight property from VideoFrame.");
+        js_sys::Reflect::set(&frame, &"videoHeight".into(), &display_height)
+            .expect("Failed to set videoHeight property.");
+
+        wgpu_types::ImageCopyExternalImage {
+            source: wgpu_types::ExternalImageSource::HTMLVideoElement(frame),
+            origin: wgpu_types::Origin2d { x: 0, y: 0 },
+            flip_y: false,
+        }
+    };
+    let dest = wgpu::ImageCopyTextureTagged {
+        texture,
+        mip_level: 0,
+        origin: wgpu::Origin3d { x: 0, y: 0, z: 0 },
+        aspect: wgpu::TextureAspect::All,
+        color_space: wgpu::PredefinedColorSpace::Srgb,
+        premultiplied_alpha: false,
+    };
+    queue.copy_external_image_to_texture(&source, dest, size);
+}
diff --git a/crates/viewer/re_renderer/src/video/decoder/web.rs b/crates/viewer/re_renderer/src/video/decoder/web.rs
@@ -150,7 +150,7 @@ impl VideoDecoder {
             decoder_output,
             hw_acceleration,
 
-            last_used_frame_timestamp: Time::new(u64::MAX),
+            last_used_frame_timestamp: Time::MAX,
             current_segment_idx: usize::MAX,
             current_sample_idx: usize::MAX,