Skip to content

Commit

Permalink
improve av1 fast path further
Browse files Browse the repository at this point in the history
  • Loading branch information
Wumpf committed Oct 11, 2024
1 parent ff09910 commit df56fe4
Showing 1 changed file with 40 additions and 28 deletions.
68 changes: 40 additions & 28 deletions crates/store/re_video/src/decode/av1.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,39 +153,51 @@ fn output_picture(picture: &dav1d::Picture, on_output: &(dyn Fn(Result<Frame>) +
let num_packed_bytes_y = packed_stride_y * height_y;
let num_packed_bytes_uv = packed_stride_uv * height_uv;

let mut data = Vec::with_capacity(num_packed_bytes_y + num_packed_bytes_uv * 2);

// We could make our image ingestion pipeline even more sophisticated and pass that stride information through.
// But given that this is a matter of replacing a single large memcpy with a few hundred _still_ quite large ones,
// this should not make a lot of difference (citation needed!).
{
let plane = picture.plane(PlanarImageComponent::Y);
if actual_stride_y != packed_stride_y {
re_tracing::profile_scope!("slow path copy y-plane");

for y in 0..height_y {
let offset = y * actual_stride_y;
data.extend_from_slice(&plane[offset..(offset + packed_stride_y)]);
if actual_stride_y == packed_stride_y && actual_stride_uv == packed_stride_uv {
// Best case scenario: There's no additional strides at all, so we can just copy the data directly.
// TODO(andreas): This still has *significant* overhead for 8k video. Can we take ownership of the data instead without a copy?
re_tracing::profile_scope!("fast path");
let plane_y = &picture.plane(PlanarImageComponent::Y)[0..num_packed_bytes_y];
let plane_u = &picture.plane(PlanarImageComponent::U)[0..num_packed_bytes_uv];
let plane_v = &picture.plane(PlanarImageComponent::V)[0..num_packed_bytes_uv];
[plane_y, plane_u, plane_v].concat()
} else {
// At least either y or u/v have strides.
//
// We could make our image ingestion pipeline even more sophisticated and pass that stride information through.
// But given that this is a matter of replacing a single large memcpy with a few hundred _still_ quite large ones,
// this should not make a lot of difference (citation needed!).

let mut data = Vec::with_capacity(num_packed_bytes_y + num_packed_bytes_uv * 2);
{
let plane = picture.plane(PlanarImageComponent::Y);
if actual_stride_y != packed_stride_y {
re_tracing::profile_scope!("slow path, y-plane");

for y in 0..height_y {
let offset = y * actual_stride_y;
data.extend_from_slice(&plane[offset..(offset + packed_stride_y)]);
}
} else {
data.extend_from_slice(&plane[0..num_packed_bytes_y]);
}
} else {
data.extend_from_slice(&plane[0..num_packed_bytes_y]);
}
}
for comp in [PlanarImageComponent::U, PlanarImageComponent::V] {
let plane = picture.plane(comp);
if actual_stride_uv != packed_stride_uv {
re_tracing::profile_scope!("slow path copy u/v-plane");

for y in 0..height_uv {
let offset = y * actual_stride_uv;
data.extend_from_slice(&plane[offset..(offset + packed_stride_uv)]);
for comp in [PlanarImageComponent::U, PlanarImageComponent::V] {
let plane = picture.plane(comp);
if actual_stride_uv != packed_stride_uv {
re_tracing::profile_scope!("slow path, u/v-plane");

for y in 0..height_uv {
let offset = y * actual_stride_uv;
data.extend_from_slice(&plane[offset..(offset + packed_stride_uv)]);
}
} else {
data.extend_from_slice(&plane[0..num_packed_bytes_uv]);
}
} else {
data.extend_from_slice(&plane[0..num_packed_bytes_uv]);
}
}

data
data
}
}
}
};
Expand Down

0 comments on commit df56fe4

Please sign in to comment.