Fix playback of HDR AV1 videos in native viewer (#7978)

### What * Closes #7973 * Part of #7594 Review with ignoring whitespace changes #### Before <img width="1639" alt="Screenshot 2024-11-02 at 12 02 13" src="https://github.com/user-attachments/assets/500e5b10-c82f-483b-9e2d-c5cc75f29e52"> #### After <img width="1639" alt="Screenshot 2024-11-02 at 12 00 59" src="https://github.com/user-attachments/assets/c7067210-bb47-4cf7-8633-30a8b3edb42a"> #### VLC (for reference) <img width="1552" alt="Screenshot 2024-11-02 at 16 43 10" src="https://github.com/user-attachments/assets/f85be55c-3878-4fa0-b6fd-3b54f015622a"> ### Checklist * [x] I have read and agree to [Contributor Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and the [Code of Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md) * [x] I've included a screenshot or gif (if applicable) * [x] I have tested the web demo (if applicable): * Using examples from latest `main` build: [rerun.io/viewer](https://rerun.io/viewer/pr/7978?manifest_url=https://app.rerun.io/version/main/examples_manifest.json) * Using full set of examples from `nightly` build: [rerun.io/viewer](https://rerun.io/viewer/pr/7978?manifest_url=https://app.rerun.io/version/nightly/examples_manifest.json) * [x] The PR title and labels are set such as to maximize their usefulness for the next release's CHANGELOG * [x] If applicable, add a new check to the [release checklist](https://github.com/rerun-io/rerun/blob/main/tests/python/release_checklist)! * [x] If have noted any breaking changes to the log API in `CHANGELOG.md` and the migration guide - [PR Build Summary](https://build.rerun.io/pr/7978) - [Recent benchmark results](https://build.rerun.io/graphs/crates.html) - [Wasm size tracking](https://build.rerun.io/graphs/sizes.html) To run all checks from `main`, comment on the PR with `@rerun-bot full-check`. --------- Co-authored-by: Andreas Reich <[email protected]>
rerun-io · Nov 2, 2024 · 04435c4 · 04435c4
1 parent df8dc62
commit 04435c4
Show file tree

Hide file tree

Showing 6 changed files with 139 additions and 104 deletions.
diff --git a/crates/store/re_video/src/decode/av1.rs b/crates/store/re_video/src/decode/av1.rs
@@ -105,7 +105,8 @@ impl SyncDav1dDecoder {
             };
             match picture {
                 Ok(picture) => {
-                    output_picture(&self.debug_name, &picture, on_output);
+                    let frame = create_frame(&self.debug_name, &picture);
+                    on_output(frame);
                     count += 1;
                 }
                 Err(dav1d::Error::Again) => {
@@ -121,98 +122,115 @@ impl SyncDav1dDecoder {
     }
 }
 
-fn output_picture(
-    debug_name: &str,
-    picture: &dav1d::Picture,
-    on_output: &(dyn Fn(Result<Frame>) + Send + Sync),
-) {
-    let data = {
-        re_tracing::profile_scope!("copy_picture_data");
-
-        match picture.pixel_layout() {
-            PixelLayout::I400 => picture.plane(PlanarImageComponent::Y).to_vec(),
-            PixelLayout::I420 | PixelLayout::I422 | PixelLayout::I444 => {
-                // TODO(#7594): If `picture.bit_depth()` isn't 8 we have a problem:
-                // We can't handle high bit depths yet and the YUV converter at the other side
-                // bases its opinion on what an acceptable number of incoming bytes is on this.
-                // So we just clamp to that expectation, ignoring `picture.stride(PlanarImageComponent::Y)` & friends.
-                // Note that `bit_depth` is either 8 or 16, which is semi-independent `bits_per_component` (which is None/8/10/12).
-                if picture.bit_depth() != 8 {
-                    re_log::warn_once!(
-                        "Video {debug_name:?} uses {} bis per component. Only a bit depth of 8 bit is currently unsupported.",
-                        picture.bits_per_component().map_or(picture.bit_depth(), |bpc| bpc.0)
-                    );
-                }
+fn create_frame(debug_name: &str, picture: &dav1d::Picture) -> Result<Frame> {
+    re_tracing::profile_function!();
+
+    let bits_per_component = picture
+        .bits_per_component()
+        .map_or(picture.bit_depth(), |bpc| bpc.0);
+
+    let bytes_per_component = if bits_per_component == 8 {
+        1
+    } else if 8 < bits_per_component && bits_per_component <= 16 {
+        // TODO(#7594): Support HDR video.
+        // We currently handle HDR videos by throwing away the lowest bits,
+        // and doing so rather slowly on CPU. It works, but the colors won't be perfectly correct.
+        re_log::warn_once!(
+            "{debug_name:?} is a High-Dynamic-Range (HDR) video with {bits_per_component} bits per component. Rerun does not support this fully. Color accuracy and performance may suffer.",
+        );
+        // Note that `bit_depth` is either 8 or 16, which is semi-independent `bits_per_component` (which is None/8/10/12).
+        2
+    } else {
+        return Err(Error::BadBitsPerComponent(bits_per_component));
+    };
+
+    let mut data = match picture.pixel_layout() {
+        // Monochrome
+        PixelLayout::I400 => picture.plane(PlanarImageComponent::Y).to_vec(),
+
+        PixelLayout::I420 | PixelLayout::I422 | PixelLayout::I444 => {
+            let height_y = picture.height() as usize;
+            let height_uv = match picture.pixel_layout() {
+                PixelLayout::I400 => 0,
+                PixelLayout::I420 => height_y / 2,
+                PixelLayout::I422 | PixelLayout::I444 => height_y,
+            };
+
+            let packed_stride_y = bytes_per_component * picture.width() as usize;
+            let actual_stride_y = picture.stride(PlanarImageComponent::Y) as usize;
 
-                let height_y = picture.height() as usize;
-                let height_uv = match picture.pixel_layout() {
-                    PixelLayout::I400 => 0,
-                    PixelLayout::I420 => height_y / 2,
-                    PixelLayout::I422 | PixelLayout::I444 => height_y,
-                };
-
-                let packed_stride_y = picture.width() as usize;
-                let actual_stride_y = picture.stride(PlanarImageComponent::Y) as usize;
-
-                let packed_stride_uv = match picture.pixel_layout() {
-                    PixelLayout::I400 => 0,
-                    PixelLayout::I420 | PixelLayout::I422 => packed_stride_y / 2,
-                    PixelLayout::I444 => packed_stride_y,
-                };
-                let actual_stride_uv = picture.stride(PlanarImageComponent::U) as usize; // U / V stride is always the same.
-
-                let num_packed_bytes_y = packed_stride_y * height_y;
-                let num_packed_bytes_uv = packed_stride_uv * height_uv;
-
-                if actual_stride_y == packed_stride_y && actual_stride_uv == packed_stride_uv {
-                    // Best case scenario: There's no additional strides at all, so we can just copy the data directly.
-                    // TODO(andreas): This still has *significant* overhead for 8k video. Can we take ownership of the data instead without a copy?
-                    re_tracing::profile_scope!("fast path");
-                    let plane_y = &picture.plane(PlanarImageComponent::Y)[0..num_packed_bytes_y];
-                    let plane_u = &picture.plane(PlanarImageComponent::U)[0..num_packed_bytes_uv];
-                    let plane_v = &picture.plane(PlanarImageComponent::V)[0..num_packed_bytes_uv];
-                    [plane_y, plane_u, plane_v].concat()
-                } else {
-                    // At least either y or u/v have strides.
-                    //
-                    // We could make our image ingestion pipeline even more sophisticated and pass that stride information through.
-                    // But given that this is a matter of replacing a single large memcpy with a few hundred _still_ quite large ones,
-                    // this should not make a lot of difference (citation needed!).
-
-                    let mut data = Vec::with_capacity(num_packed_bytes_y + num_packed_bytes_uv * 2);
-                    {
-                        let plane = picture.plane(PlanarImageComponent::Y);
-                        if packed_stride_y == actual_stride_y {
-                            data.extend_from_slice(&plane[0..num_packed_bytes_y]);
-                        } else {
-                            re_tracing::profile_scope!("slow path, y-plane");
-
-                            for y in 0..height_y {
-                                let offset = y * actual_stride_y;
-                                data.extend_from_slice(&plane[offset..(offset + packed_stride_y)]);
-                            }
+            let packed_stride_uv = match picture.pixel_layout() {
+                PixelLayout::I400 => 0,
+                PixelLayout::I420 | PixelLayout::I422 => packed_stride_y / 2,
+                PixelLayout::I444 => packed_stride_y,
+            };
+            let actual_stride_uv = picture.stride(PlanarImageComponent::U) as usize; // U / V stride is always the same.
+
+            let num_packed_bytes_y = packed_stride_y * height_y;
+            let num_packed_bytes_uv = packed_stride_uv * height_uv;
+
+            if actual_stride_y == packed_stride_y && actual_stride_uv == packed_stride_uv {
+                // Best case scenario: There's no additional strides at all, so we can just copy the data directly.
+                // TODO(andreas): This still has *significant* overhead for 8k video. Can we take ownership of the data instead without a copy?
+                re_tracing::profile_scope!("fast path");
+                let plane_y = &picture.plane(PlanarImageComponent::Y)[0..num_packed_bytes_y];
+                let plane_u = &picture.plane(PlanarImageComponent::U)[0..num_packed_bytes_uv];
+                let plane_v = &picture.plane(PlanarImageComponent::V)[0..num_packed_bytes_uv];
+                [plane_y, plane_u, plane_v].concat()
+            } else {
+                // At least either y or u/v have strides.
+                //
+                // We could make our image ingestion pipeline even more sophisticated and pass that stride information through.
+                // But given that this is a matter of replacing a single large memcpy with a few hundred _still_ quite large ones,
+                // this should not make a lot of difference (citation needed!).
+
+                let mut data = Vec::with_capacity(num_packed_bytes_y + num_packed_bytes_uv * 2);
+                {
+                    let plane = picture.plane(PlanarImageComponent::Y);
+                    if packed_stride_y == actual_stride_y {
+                        data.extend_from_slice(&plane[0..num_packed_bytes_y]);
+                    } else {
+                        re_tracing::profile_scope!("slow path, y-plane");
+
+                        for y in 0..height_y {
+                            let offset = y * actual_stride_y;
+                            data.extend_from_slice(&plane[offset..(offset + packed_stride_y)]);
                         }
                     }
-                    for comp in [PlanarImageComponent::U, PlanarImageComponent::V] {
-                        let plane = picture.plane(comp);
-                        if actual_stride_uv == packed_stride_uv {
-                            data.extend_from_slice(&plane[0..num_packed_bytes_uv]);
-                        } else {
-                            re_tracing::profile_scope!("slow path, u/v-plane");
-
-                            for y in 0..height_uv {
-                                let offset = y * actual_stride_uv;
-                                data.extend_from_slice(&plane[offset..(offset + packed_stride_uv)]);
-                            }
+                }
+                for comp in [PlanarImageComponent::U, PlanarImageComponent::V] {
+                    let plane = picture.plane(comp);
+                    if actual_stride_uv == packed_stride_uv {
+                        data.extend_from_slice(&plane[0..num_packed_bytes_uv]);
+                    } else {
+                        re_tracing::profile_scope!("slow path, u/v-plane");
+
+                        for y in 0..height_uv {
+                            let offset = y * actual_stride_uv;
+                            data.extend_from_slice(&plane[offset..(offset + packed_stride_uv)]);
                         }
                     }
-
-                    data
                 }
+
+                data
             }
         }
     };
 
+    if bytes_per_component == 2 {
+        re_tracing::profile_scope!("Truncate HDR"); // costs around 1.5ms per megapixel on MacBook Pro M3 Max
+        let rshift = bits_per_component - 8; // we throw away the low bits
+        data = data
+            .chunks(2)
+            .map(|c| {
+                let lo = c[0] as u16;
+                let hi = c[1] as u16;
+                let full = (hi << 8) | lo;
+                (full >> rshift) as u8
+            })
+            .collect();
+    }
+
     let format = PixelFormat::Yuv {
         layout: match picture.pixel_layout() {
             PixelLayout::I400 => YuvPixelLayout::Y400,
@@ -227,7 +245,7 @@ fn output_picture(
         coefficients: yuv_matrix_coefficients(debug_name, picture),
     };
 
-    let frame = Frame {
+    Ok(Frame {
         content: FrameContent {
             data,
             width: picture.width(),
@@ -238,8 +256,7 @@ fn output_picture(
             presentation_timestamp: Time(picture.timestamp().unwrap_or(0)),
             duration: Time(picture.duration()),
         },
-    };
-    on_output(Ok(frame));
+    })
 }
 
 fn yuv_matrix_coefficients(debug_name: &str, picture: &dav1d::Picture) -> YuvMatrixCoefficients {

diff --git a/crates/store/re_video/src/decode/mod.rs b/crates/store/re_video/src/decode/mod.rs
@@ -110,7 +110,10 @@ pub enum Error {
 
     #[cfg(target_arch = "wasm32")]
     #[error(transparent)]
-    WebDecoderError(#[from] webcodecs::Error),
+    WebDecoder(#[from] webcodecs::Error),
+
+    #[error("Unsupported bits per component: {0}")]
+    BadBitsPerComponent(usize),
 }
 
 pub type Result<T = (), E = Error> = std::result::Result<T, E>;
@@ -167,14 +170,14 @@ pub fn new_decoder(
             {
                 if cfg!(debug_assertions) {
                     return Err(Error::NoNativeAv1Debug); // because debug builds of rav1d is EXTREMELY slow
-                } else {
-                    re_log::trace!("Decoding AV1…");
-                    return Ok(Box::new(async_decoder_wrapper::AsyncDecoderWrapper::new(
-                        debug_name.to_owned(),
-                        Box::new(av1::SyncDav1dDecoder::new(debug_name.to_owned())?),
-                        on_output,
-                    )));
                 }
+
+                re_log::trace!("Decoding AV1…");
+                return Ok(Box::new(async_decoder_wrapper::AsyncDecoderWrapper::new(
+                    debug_name.to_owned(),
+                    Box::new(av1::SyncDav1dDecoder::new(debug_name.to_owned())?),
+                    on_output,
+                )));
             }
         }
 

diff --git a/crates/store/re_video/src/decode/webcodecs.rs b/crates/store/re_video/src/decode/webcodecs.rs
@@ -190,7 +190,7 @@ fn init_video_decoder(
     };
 
     let on_error = Closure::wrap(Box::new(move |err: js_sys::Error| {
-        on_output_callback(Err(super::Error::WebDecoderError(Error::Decoding(
+        on_output_callback(Err(super::Error::WebDecoder(Error::Decoding(
             js_error_to_string(&err),
         ))));
     }) as Box<dyn Fn(js_sys::Error)>);

diff --git a/crates/viewer/re_data_ui/src/video.rs b/crates/viewer/re_data_ui/src/video.rs
@@ -33,14 +33,23 @@ pub fn show_video_blob_info(
                     )),
                 );
                 if let Some(bit_depth) = data.config.stsd.contents.bit_depth() {
-                    let mut bit_depth = bit_depth.to_string();
-                    if data.is_monochrome() == Some(true) {
-                        bit_depth = format!("{bit_depth} (monochrome)");
-                    }
-
-                    ui.list_item_flat_noninteractive(
-                        PropertyContent::new("Bit depth").value_text(bit_depth),
-                    );
+                    ui.list_item_flat_noninteractive(PropertyContent::new("Bit depth").value_fn(
+                        |ui, _| {
+                            ui.label(bit_depth.to_string());
+                            if 8 < bit_depth {
+                                // TODO(#7594): HDR videos
+                                ui.warning_label("HDR").on_hover_ui(|ui| {
+                                    ui.label(
+                                        "High-dynamic-range videos not yet supported by Rerun",
+                                    );
+                                    ui.hyperlink("https://github.com/rerun-io/rerun/issues/7594");
+                                });
+                            }
+                            if data.is_monochrome() == Some(true) {
+                                ui.label("(monochrome)");
+                            }
+                        },
+                    ));
                 }
                 if let Some(subsampling_mode) = data.subsampling_mode() {
                     // Don't show subsampling mode for monochrome, doesn't make sense usually.

diff --git a/crates/viewer/re_ui/src/ui_ext.rs b/crates/viewer/re_ui/src/ui_ext.rs
@@ -18,6 +18,12 @@ pub trait UiExt {
     fn ui(&self) -> &egui::Ui;
     fn ui_mut(&mut self) -> &mut egui::Ui;
 
+    /// Shows a warning label.
+    fn warning_label(&mut self, warning_text: &str) -> egui::Response {
+        let label = egui::Label::new(self.ui().ctx().warning_text(warning_text));
+        self.ui_mut().add(label)
+    }
+
     /// Shows a small error label with the given text on hover and copies the text to the clipboard on click.
     fn error_label(&mut self, error_text: &str) -> egui::Response {
         let label = egui::Label::new(self.ui().ctx().error_text("Error"))

diff --git a/crates/viewer/re_viewer/src/web_tools.rs b/crates/viewer/re_viewer/src/web_tools.rs
@@ -143,7 +143,7 @@ pub fn url_to_receiver(
             re_rrdp_comms::stream_recording(url, Some(ui_waker)).map_err(|err| err.into())
         }
         #[cfg(not(feature = "rrdp"))]
-        EndpointCategory::DataPlatform(url) => {
+        EndpointCategory::DataPlatform(_url) => {
             anyhow::bail!("Missing 'rrdp' feature flag");
         }
         EndpointCategory::WebEventListener(url) => {