Skip to content

Commit

Permalink
Refactor video decoding (#7625)
Browse files Browse the repository at this point in the history
### What
* Closes #7583 

This refactors the video decoder to unify the native and web decoder
logic.

It changes how error handling is done quite a bit, so this will require
some testing.

This PR also hides the spinner if the current frame is less than 400ms
outdated.
This means a small hickup during decoding will not cause the spinner to
show up,
but jumping a big step in the video stream (moving the time cursor by a
large step), will make the spinner show up immediately, making Rerun
feel responsive.

### Checklist
* [x] I have read and agree to [Contributor
Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and
the [Code of
Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md)
* [x] I've included a screenshot or gif (if applicable)
* [x] I have tested the web demo (if applicable):
* Using examples from latest `main` build:
[rerun.io/viewer](https://rerun.io/viewer/pr/7625?manifest_url=https://app.rerun.io/version/main/examples_manifest.json)
* Using full set of examples from `nightly` build:
[rerun.io/viewer](https://rerun.io/viewer/pr/7625?manifest_url=https://app.rerun.io/version/nightly/examples_manifest.json)
* [x] The PR title and labels are set such as to maximize their
usefulness for the next release's CHANGELOG
* [x] If applicable, add a new check to the [release
checklist](https://github.com/rerun-io/rerun/blob/main/tests/python/release_checklist)!
* [x] If have noted any breaking changes to the log API in
`CHANGELOG.md` and the migration guide

- [PR Build Summary](https://build.rerun.io/pr/7625)
- [Recent benchmark results](https://build.rerun.io/graphs/crates.html)
- [Wasm size tracking](https://build.rerun.io/graphs/sizes.html)

To run all checks from `main`, comment on the PR with `@rerun-bot
full-check`.
  • Loading branch information
emilk authored Oct 8, 2024
1 parent 87e76f8 commit 187b673
Show file tree
Hide file tree
Showing 13 changed files with 642 additions and 814 deletions.
6 changes: 2 additions & 4 deletions crates/store/re_video/examples/frames.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ use std::{
use indicatif::ProgressBar;
use parking_lot::Mutex;

use re_video::demux::mp4::load_mp4;

fn main() {
// frames <video.mp4>
let args: Vec<_> = std::env::args().collect();
Expand All @@ -27,11 +25,11 @@ fn main() {
println!("Decoding {video_path}");

let video = std::fs::read(video_path).expect("failed to read video");
let video = load_mp4(&video).expect("failed to load video");
let video = re_video::VideoData::load_mp4(&video).expect("failed to load video");

println!(
"{} {}x{}",
video.segments.len(),
video.gops.len(),
video.config.coded_width,
video.config.coded_height
);
Expand Down
31 changes: 19 additions & 12 deletions crates/store/re_video/src/demux/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! Video demultiplexing.
//!
//! Parses a video file into a raw [`VideoData`] struct, which contains basic metadata and a list of [`Segment`]s.
//! Parses a video file into a raw [`VideoData`] struct, which contains basic metadata and a list of [`GroupOfPictures`]s.
//!
//! The entry point is [`VideoData::load_from_bytes`]
//! which produces an instance of [`VideoData`] from any supported video container.
Expand All @@ -26,9 +26,9 @@ pub struct VideoData {
/// Duration of the video, in time units.
pub duration: Time,

/// We split video into segments, each beginning with a key frame,
/// We split video into GOPs, each beginning with a key frame,
/// followed by any number of delta frames.
pub segments: Vec<Segment>,
pub gops: Vec<GroupOfPictures>,

/// Samples contain the byte offsets into `data` for each frame.
///
Expand All @@ -54,7 +54,8 @@ impl VideoData {
/// at the very least the should be a way to extract only metadata.
pub fn load_from_bytes(data: &[u8], media_type: &str) -> Result<Self, VideoLoadError> {
match media_type {
"video/mp4" => mp4::load_mp4(data),
"video/mp4" => Self::load_mp4(data),

media_type => {
if media_type.starts_with("video/") {
Err(VideoLoadError::UnsupportedMimeType {
Expand Down Expand Up @@ -111,7 +112,7 @@ impl VideoData {
pub fn frame_timestamps_ns(&self) -> impl Iterator<Item = i64> + '_ {
// Segments are guaranteed to be sorted among each other, but within a segment,
// presentation timestamps may not be sorted since this is sorted by decode timestamps.
self.segments.iter().flat_map(|seg| {
self.gops.iter().flat_map(|seg| {
self.samples[seg.range()]
.iter()
.map(|sample| sample.composition_timestamp.into_nanos(self.timescale))
Expand All @@ -138,18 +139,20 @@ impl VideoData {
}
}

/// A segment of a video.
/// A Group of Pictures (GOP) always starts with an I-frame, followed by delta-frames.
///
/// See <https://en.wikipedia.org/wiki/Group_of_pictures> for more.
#[derive(Debug, Clone)]
pub struct Segment {
/// Decode timestamp of the first sample in this segment, in time units.
pub struct GroupOfPictures {
/// Decode timestamp of the first sample in this GOP, in time units.
pub start: Time,

/// Range of samples contained in this segment.
/// Range of samples contained in this GOP.
pub sample_range: Range<u32>,
}

impl Segment {
/// The segment's `sample_range` mapped to `usize` for slicing.
impl GroupOfPictures {
/// The GOP's `sample_range` mapped to `usize` for slicing.
pub fn range(&self) -> Range<usize> {
Range {
start: self.sample_range.start as usize,
Expand All @@ -163,11 +166,15 @@ impl Segment {
pub struct Sample {
/// Time at which this sample appears in the decoded bitstream, in time units.
///
/// Samples should be decoded in this order.
///
/// `decode_timestamp <= composition_timestamp`
pub decode_timestamp: Time,

/// Time at which this sample appears in the frame stream, in time units.
///
/// The frame should be shown at this time.
///
/// `decode_timestamp <= composition_timestamp`
pub composition_timestamp: Time,

Expand Down Expand Up @@ -245,7 +252,7 @@ impl std::fmt::Debug for VideoData {
.field("config", &self.config)
.field("timescale", &self.timescale)
.field("duration", &self.duration)
.field("segments", &self.segments)
.field("gops", &self.gops)
.field(
"samples",
&self.samples.iter().enumerate().collect::<Vec<_>>(),
Expand Down
150 changes: 76 additions & 74 deletions crates/store/re_video/src/demux/mp4.rs
Original file line number Diff line number Diff line change
@@ -1,89 +1,91 @@
#![allow(clippy::map_err_ignore)]

use super::{Config, Sample, Segment, VideoData, VideoLoadError};
use super::{Config, GroupOfPictures, Sample, VideoData, VideoLoadError};

use crate::{Time, Timescale};

pub fn load_mp4(bytes: &[u8]) -> Result<VideoData, VideoLoadError> {
let mp4 = re_mp4::Mp4::read_bytes(bytes)?;

let mp4_tracks = mp4.tracks().iter().map(|(k, t)| (*k, t.kind)).collect();

let track = mp4
.tracks()
.values()
.find(|t| t.kind == Some(re_mp4::TrackKind::Video))
.ok_or_else(|| VideoLoadError::NoVideoTrack)?;

let codec = track
.codec_string(&mp4)
.ok_or_else(|| VideoLoadError::UnsupportedCodec(unknown_codec_fourcc(&mp4, track)))?;
let description = track
.raw_codec_config(&mp4)
.ok_or_else(|| VideoLoadError::UnsupportedCodec(unknown_codec_fourcc(&mp4, track)))?;

let coded_height = track.height;
let coded_width = track.width;

let config = Config {
codec,
description,
coded_height,
coded_width,
};

let timescale = Timescale::new(track.timescale);
let duration = Time::new(track.duration as i64);
let mut samples = Vec::<Sample>::new();
let mut segments = Vec::<Segment>::new();
let mut segment_sample_start_index = 0;
let data = track.data.clone();

for sample in &track.samples {
if sample.is_sync && !samples.is_empty() {
let start = samples[segment_sample_start_index].decode_timestamp;
let sample_range = segment_sample_start_index as u32..samples.len() as u32;
segments.push(Segment {
impl VideoData {
pub fn load_mp4(bytes: &[u8]) -> Result<Self, VideoLoadError> {
let mp4 = re_mp4::Mp4::read_bytes(bytes)?;

let mp4_tracks = mp4.tracks().iter().map(|(k, t)| (*k, t.kind)).collect();

let track = mp4
.tracks()
.values()
.find(|t| t.kind == Some(re_mp4::TrackKind::Video))
.ok_or_else(|| VideoLoadError::NoVideoTrack)?;

let codec = track
.codec_string(&mp4)
.ok_or_else(|| VideoLoadError::UnsupportedCodec(unknown_codec_fourcc(&mp4, track)))?;
let description = track
.raw_codec_config(&mp4)
.ok_or_else(|| VideoLoadError::UnsupportedCodec(unknown_codec_fourcc(&mp4, track)))?;

let coded_height = track.height;
let coded_width = track.width;

let config = Config {
codec,
description,
coded_height,
coded_width,
};

let timescale = Timescale::new(track.timescale);
let duration = Time::new(track.duration as i64);
let mut samples = Vec::<Sample>::new();
let mut gops = Vec::<GroupOfPictures>::new();
let mut gop_sample_start_index = 0;
let data = track.data.clone();

for sample in &track.samples {
if sample.is_sync && !samples.is_empty() {
let start = samples[gop_sample_start_index].decode_timestamp;
let sample_range = gop_sample_start_index as u32..samples.len() as u32;
gops.push(GroupOfPictures {
start,
sample_range,
});
gop_sample_start_index = samples.len();
}

let decode_timestamp = Time::new(sample.decode_timestamp as i64);
let composition_timestamp = Time::new(sample.composition_timestamp as i64);
let duration = Time::new(sample.duration as i64);

let byte_offset = sample.offset as u32;
let byte_length = sample.size as u32;

samples.push(Sample {
decode_timestamp,
composition_timestamp,
duration,
byte_offset,
byte_length,
});
}

if !samples.is_empty() {
let start = samples[gop_sample_start_index].decode_timestamp;
let sample_range = gop_sample_start_index as u32..samples.len() as u32;
gops.push(GroupOfPictures {
start,
sample_range,
});
segment_sample_start_index = samples.len();
}

let decode_timestamp = Time::new(sample.decode_timestamp as i64);
let composition_timestamp = Time::new(sample.composition_timestamp as i64);
let duration = Time::new(sample.duration as i64);

let byte_offset = sample.offset as u32;
let byte_length = sample.size as u32;

samples.push(Sample {
decode_timestamp,
composition_timestamp,
Ok(Self {
config,
timescale,
duration,
byte_offset,
byte_length,
});
}

if !samples.is_empty() {
let start = samples[segment_sample_start_index].decode_timestamp;
let sample_range = segment_sample_start_index as u32..samples.len() as u32;
segments.push(Segment {
start,
sample_range,
});
gops,
samples,
data,
mp4_tracks,
})
}

Ok(VideoData {
config,
timescale,
duration,
segments,
samples,
data,
mp4_tracks,
})
}

fn unknown_codec_fourcc(mp4: &re_mp4::Mp4, track: &re_mp4::Track) -> re_mp4::FourCC {
Expand Down
9 changes: 9 additions & 0 deletions crates/store/re_video/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,15 @@ impl Time {
}
}

impl std::ops::Add for Time {
type Output = Self;

#[inline]
fn add(self, rhs: Self) -> Self::Output {
Self(self.0.saturating_add(rhs.0))
}
}

impl std::ops::Sub for Time {
type Output = Self;

Expand Down
93 changes: 0 additions & 93 deletions crates/store/re_video/src/mp4.rs

This file was deleted.

Loading

0 comments on commit 187b673

Please sign in to comment.