Skip to content

Commit

Permalink
Merge pull request #410 from jellyfin/improve-vt-overlay-perf
Browse files Browse the repository at this point in the history
avfilter/vf_overlay_videotoolbox: add fast code path for bgra overlay
  • Loading branch information
gnattu authored Jul 7, 2024
2 parents b28524f + 87e34ff commit 0d2ae62
Showing 1 changed file with 224 additions and 50 deletions.
274 changes: 224 additions & 50 deletions debian/patches/0062-avfilter-add-vf_overlay_videotoolbox.patch
Original file line number Diff line number Diff line change
Expand Up @@ -120,26 +120,26 @@ Signed-off-by: Gnattu OC <[email protected]>
create mode 100644 libavfilter/metal/vf_overlay_videotoolbox.metal
create mode 100644 libavfilter/vf_overlay_videotoolbox.m

diff --git a/configure b/configure
index 23066efa32..a7c349d126 100755
--- a/configure
+++ b/configure
@@ -3720,6 +3720,7 @@ overlay_qsv_filter_select="qsvvpp"
Index: FFmpeg/configure
===================================================================
--- FFmpeg.orig/configure
+++ FFmpeg/configure
@@ -3722,6 +3722,7 @@ overlay_qsv_filter_select="qsvvpp"
overlay_vaapi_filter_deps="vaapi VAProcPipelineCaps_blend_flags"
overlay_vulkan_filter_deps="vulkan spirv_compiler"
overlay_rkrga_filter_deps="rkrga"
+overlay_videotoolbox_filter_deps="metal corevideo coreimage videotoolbox"
owdenoise_filter_deps="gpl"
pad_opencl_filter_deps="opencl"
pan_filter_deps="swresample"
diff --git a/doc/filters.texi b/doc/filters.texi
index e0436a5755..bfb77562cb 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -19033,6 +19033,58 @@ See @ref{framesync}.

Index: FFmpeg/doc/filters.texi
===================================================================
--- FFmpeg.orig/doc/filters.texi
+++ FFmpeg/doc/filters.texi
@@ -18351,6 +18351,58 @@ See @ref{framesync}.
This filter also supports the @ref{framesync} options.

+@section overlay_videotoolbox
+
+Overlay one video on top of another.
Expand Down Expand Up @@ -193,13 +193,13 @@ index e0436a5755..bfb77562cb 100644
+@end itemize
+
@section owdenoise

Apply Overcomplete Wavelet denoiser.
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index f6c1d641d6..ea1389ab57 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -401,6 +401,9 @@ OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER) += vf_overlay_opencl.o opencl.o \
Index: FFmpeg/libavfilter/Makefile
===================================================================
--- FFmpeg.orig/libavfilter/Makefile
+++ FFmpeg/libavfilter/Makefile
@@ -403,6 +403,9 @@ OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER)
OBJS-$(CONFIG_OVERLAY_QSV_FILTER) += vf_overlay_qsv.o framesync.o
OBJS-$(CONFIG_OVERLAY_VAAPI_FILTER) += vf_overlay_vaapi.o framesync.o vaapi_vpp.o
OBJS-$(CONFIG_OVERLAY_VULKAN_FILTER) += vf_overlay_vulkan.o vulkan.o vulkan_filter.o
Expand All @@ -209,43 +209,43 @@ index f6c1d641d6..ea1389ab57 100644
OBJS-$(CONFIG_OVERLAY_RKRGA_FILTER) += vf_overlay_rkrga.o framesync.o
OBJS-$(CONFIG_OWDENOISE_FILTER) += vf_owdenoise.o
OBJS-$(CONFIG_PAD_FILTER) += vf_pad.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 149bf50997..ec9d975ecb 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -378,6 +378,7 @@ extern const AVFilter ff_vf_overlay_vaapi;
Index: FFmpeg/libavfilter/allfilters.c
===================================================================
--- FFmpeg.orig/libavfilter/allfilters.c
+++ FFmpeg/libavfilter/allfilters.c
@@ -380,6 +380,7 @@ extern const AVFilter ff_vf_overlay_vaap
extern const AVFilter ff_vf_overlay_vulkan;
extern const AVFilter ff_vf_overlay_cuda;
extern const AVFilter ff_vf_overlay_rkrga;
+extern const AVFilter ff_vf_overlay_videotoolbox;
extern const AVFilter ff_vf_owdenoise;
extern const AVFilter ff_vf_pad;
extern const AVFilter ff_vf_pad_opencl;
diff --git a/libavfilter/metal/utils.h b/libavfilter/metal/utils.h
index 7350d42a35..d79c79751c 100644
--- a/libavfilter/metal/utils.h
+++ b/libavfilter/metal/utils.h
@@ -55,5 +55,4 @@ CVMetalTextureRef ff_metal_texture_from_pixbuf(void *avclass,
Index: FFmpeg/libavfilter/metal/utils.h
===================================================================
--- FFmpeg.orig/libavfilter/metal/utils.h
+++ FFmpeg/libavfilter/metal/utils.h
@@ -55,5 +55,4 @@ CVMetalTextureRef ff_metal_texture_from_
int plane,
MTLPixelFormat format)
API_AVAILABLE(macos(10.11), ios(8.0));
-
#endif /* AVFILTER_METAL_UTILS_H */
diff --git a/libavfilter/metal/utils.m b/libavfilter/metal/utils.m
index f365d3ceea..db5c5f6f10 100644
--- a/libavfilter/metal/utils.m
+++ b/libavfilter/metal/utils.m
@@ -55,6 +55,9 @@ CVMetalTextureRef ff_metal_texture_from_pixbuf(void *ctx,
Index: FFmpeg/libavfilter/metal/utils.m
===================================================================
--- FFmpeg.orig/libavfilter/metal/utils.m
+++ FFmpeg/libavfilter/metal/utils.m
@@ -55,6 +55,9 @@ CVMetalTextureRef ff_metal_texture_from_
{
CVMetalTextureRef tex = NULL;
CVReturn ret;
+ bool is_planer = CVPixelBufferIsPlanar(pixbuf);
+ size_t width = is_planer ? CVPixelBufferGetWidthOfPlane(pixbuf, plane) : CVPixelBufferGetWidth(pixbuf);
+ size_t height = is_planer ? CVPixelBufferGetHeightOfPlane(pixbuf, plane) : CVPixelBufferGetHeight(pixbuf);

ret = CVMetalTextureCacheCreateTextureFromImage(
NULL,
@@ -62,8 +65,8 @@ CVMetalTextureRef ff_metal_texture_from_pixbuf(void *ctx,
@@ -62,8 +65,8 @@ CVMetalTextureRef ff_metal_texture_from_
pixbuf,
NULL,
format,
Expand All @@ -256,12 +256,11 @@ index f365d3ceea..db5c5f6f10 100644
plane,
&tex
);
diff --git a/libavfilter/metal/vf_overlay_videotoolbox.metal b/libavfilter/metal/vf_overlay_videotoolbox.metal
new file mode 100644
index 0000000000..936e57e03e
Index: FFmpeg/libavfilter/metal/vf_overlay_videotoolbox.metal
===================================================================
--- /dev/null
+++ b/libavfilter/metal/vf_overlay_videotoolbox.metal
@@ -0,0 +1,58 @@
+++ FFmpeg/libavfilter/metal/vf_overlay_videotoolbox.metal
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2024 Gnattu OC <[email protected]>
+ *
Expand Down Expand Up @@ -320,16 +319,52 @@ index 0000000000..936e57e03e
+ dest.write(result_color, gid);
+ }
+}
Index: libavfilter/vf_overlay_videotoolbox.m
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
+
+/*
+ * Blend shader for sperated yuv main and bgra mask
+ */
+kernel void blend_shader_bgra_overlay(
+ texture2d<float, access::read> source_y [[ texture(0) ]],
+ texture2d<float, access::read> source_uv [[ texture(1) ]],
+ texture2d<float, access::read> mask [[ texture(2) ]],
+ texture2d<float, access::write> dest_y [[ texture(3) ]],
+ texture2d<float, access::write> dest_uv [[ texture(4) ]],
+ constant mtlBlendParams& params [[ buffer(5) ]],
+ uint2 gid [[ thread_position_in_grid ]])
+{
+ const auto mask_size = uint2(mask.get_width(),
+ mask.get_height());
+ const auto loc_overlay = uint2(params.x_position, params.y_position);
+ const auto loc_uv = gid >> 1;
+ if (gid.x < loc_overlay.x ||
+ gid.y < loc_overlay.y ||
+ gid.x >= mask_size.x + loc_overlay.x ||
+ gid.y >= mask_size.y + loc_overlay.y)
+ {
+ float4 source_color_y = source_y.read(gid);
+ float4 source_color_uv = source_uv.read(loc_uv);
+ dest_y.write(source_color_y, gid);
+ dest_uv.write(source_color_uv, loc_uv);
+ } else {
+ float4 source_color_y = source_y.read(gid);
+ float4 source_color_uv = source_uv.read(loc_uv);
+ float4 mask_color = mask.read(gid - loc_overlay);
+ float y_overlay = 0.183 * mask_color.r + 0.614 * mask_color.g + 0.062 * mask_color.b + 0.0625f;
+ float u_overlay = -0.101 * mask_color.r - 0.339 * mask_color.g + 0.439 * mask_color.b + 0.5f;
+ float v_overlay = 0.439 * mask_color.r - 0.399 * mask_color.g - 0.040 * mask_color.b + 0.5f;
+ float alpha_color = mask_color.a;
+ float3 main_color = float3(source_color_y.x, source_color_uv.x, source_color_uv.y);
+ float3 overlay_color = float3(y_overlay, u_overlay, v_overlay);
+ float3 result_color = main_color * (1.0f - alpha_color) + (overlay_color * alpha_color);
+ dest_y.write(float4(result_color.x, 0.0f, 0.0f, 1.0f), gid);
+ dest_uv.write(float4(result_color.y, result_color.z, 0.0f, 1.0f), loc_uv);
+ }
+}
Index: FFmpeg/libavfilter/vf_overlay_videotoolbox.m
===================================================================
diff --git a/libavfilter/vf_overlay_videotoolbox.m b/libavfilter/vf_overlay_videotoolbox.m
new file mode 100644
--- /dev/null (revision 913e5ef1730481306c9607c554aea3043ea0ecd4)
+++ b/libavfilter/vf_overlay_videotoolbox.m (revision 913e5ef1730481306c9607c554aea3043ea0ecd4)
@@ -0,0 +1,609 @@
--- /dev/null
+++ FFmpeg/libavfilter/vf_overlay_videotoolbox.m
@@ -0,0 +1,749 @@
+/*
+ * Copyright (C) 2024 Gnattu OC <[email protected]>
+ *
Expand Down Expand Up @@ -463,6 +498,43 @@ new file mode 100644
+ ff_objc_release(&buffer);
+}
+
+static void call_kernel_bgra_overlay(AVFilterContext *avctx,
+ id<MTLTexture> dst_y,
+ id<MTLTexture> dst_uv,
+ id<MTLTexture> main_y,
+ id<MTLTexture> main_uv,
+ id<MTLTexture> overlay,
+ uint x_position,
+ uint y_position) API_AVAILABLE(macos(10.11), ios(9.0))
+{
+ OverlayVideoToolboxContext *ctx = avctx->priv;
+ // Both the command buffer and encoder are auto-released by objc on default.
+ // Use CFBridgingRetain to get a more C-like behavior.
+ id<MTLCommandBuffer> buffer = CFBridgingRetain(ctx->mtl_queue.commandBuffer);
+ id<MTLComputeCommandEncoder> encoder = CFBridgingRetain((__bridge id<MTLCommandBuffer>)buffer.computeCommandEncoder);
+
+ MtlBlendParams *params = (MtlBlendParams *)ctx->mtl_params_buffer.contents;
+ *params = (MtlBlendParams) {
+ .x_position = x_position,
+ .y_position = y_position,
+ };
+
+ [(__bridge id<MTLComputeCommandEncoder>)encoder setTexture: main_y atIndex: 0];
+ [(__bridge id<MTLComputeCommandEncoder>)encoder setTexture: main_uv atIndex: 1];
+ [(__bridge id<MTLComputeCommandEncoder>)encoder setTexture: overlay atIndex: 2];
+ [(__bridge id<MTLComputeCommandEncoder>)encoder setTexture: dst_y atIndex: 3];
+ [(__bridge id<MTLComputeCommandEncoder>)encoder setTexture: dst_uv atIndex: 4];
+ [(__bridge id<MTLComputeCommandEncoder>)encoder setBuffer: ctx->mtl_params_buffer offset: 0 atIndex: 5];
+ ff_metal_compute_encoder_dispatch(ctx->mtl_device, ctx->mtl_pipeline, (__bridge id<MTLComputeCommandEncoder>)encoder, dst_y.width, dst_y.height);
+ [(__bridge id<MTLComputeCommandEncoder>)encoder endEncoding];
+
+ [(__bridge id<MTLCommandBuffer>)buffer commit];
+ [(__bridge id<MTLCommandBuffer>)buffer waitUntilCompleted];
+
+ ff_objc_release(&encoder);
+ ff_objc_release(&buffer);
+}
+
+// Copies and/or converts one pixel buffer to another.
+// This transparently handles pixel format and color spaces, and will do a conversion if needed.
+static int transfer_pixel_buffer(OverlayVideoToolboxContext *ctx, CVPixelBufferRef source, CVPixelBufferRef destination)
Expand Down Expand Up @@ -623,6 +695,87 @@ new file mode 100644
+ return ff_filter_frame(outlink, output);
+}
+
+static int overlay_vt_blend_bgra_overlay(FFFrameSync *fs) API_AVAILABLE(macos(10.11), ios(9.0))
+{
+ AVFilterContext *avctx = fs->parent;
+ OverlayVideoToolboxContext *ctx = avctx->priv;
+ AVFilterLink *outlink = avctx->outputs[0];
+ AVFilterLink *inlink_main = avctx->inputs[0];
+ AVFilterLink *inlink_overlay = avctx->inputs[1];
+ AVFrame *input_main, *input_overlay;
+ AVFrame *output;
+ AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink_main->hw_frames_ctx->data;
+ AVHWFramesContext *frames_ctx_overlay = (AVHWFramesContext*)inlink_overlay->hw_frames_ctx->data;
+ const AVPixFmtDescriptor *in_main_desc;
+
+ CVMetalTextureRef main_y, main_uv, dst_y, dst_uv, overlay;
+ id<MTLTexture> tex_main_y, tex_main_uv, tex_overlay, tex_dst_y, tex_dst_uv;
+
+ MTLPixelFormat mtl_format_y, mtl_format_uv;
+ OSType cv_format;
+ int ret;
+ int i, overlay_planes = 0;
+
+ in_main_desc = av_pix_fmt_desc_get(frames_ctx->sw_format);
+
+ // read main and overlay frames from inputs
+ ret = ff_framesync_get_frame(fs, 0, &input_main, 0);
+ if (ret < 0)
+ return ret;
+ ret = ff_framesync_get_frame(fs, 1, &input_overlay, 0);
+ if (ret < 0)
+ return ret;
+
+ if (!input_main)
+ return AVERROR_BUG;
+
+ output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+ if (!output)
+ return AVERROR(ENOMEM);
+
+ ret = av_frame_copy_props(output, input_main);
+ if (ret < 0)
+ return ret;
+
+ if (!input_overlay) {
+ ret = transfer_pixel_buffer(ctx, (CVPixelBufferRef)input_main->data[3], (CVPixelBufferRef)output->data[3]);
+ if (ret < 0)
+ return ret;
+ return ff_filter_frame(outlink, output);
+ }
+
+ mtl_format_y = (in_main_desc->comp[0].depth + in_main_desc->comp[0].shift) > 8 ? MTLPixelFormatR16Unorm : MTLPixelFormatR8Unorm;
+ mtl_format_uv = (in_main_desc->comp[0].depth + in_main_desc->comp[0].shift) > 8 ? MTLPixelFormatRG16Unorm : MTLPixelFormatRG8Unorm;
+ main_y = ff_metal_texture_from_pixbuf(avctx, ctx->texture_cache, (CVPixelBufferRef)input_main->data[3], 0, mtl_format_y);
+ main_uv = ff_metal_texture_from_pixbuf(avctx, ctx->texture_cache, (CVPixelBufferRef)input_main->data[3], 1, mtl_format_uv);
+ overlay = ff_metal_texture_from_pixbuf(avctx, ctx->texture_cache, (CVPixelBufferRef)input_overlay->data[3], 0, MTLPixelFormatBGRA8Unorm);
+ dst_y = ff_metal_texture_from_pixbuf(avctx, ctx->texture_cache, (CVPixelBufferRef)output->data[3], 0, mtl_format_y);
+ dst_uv = ff_metal_texture_from_pixbuf(avctx, ctx->texture_cache, (CVPixelBufferRef)output->data[3], 1, mtl_format_uv);
+
+ if (!overlay || !main_y || !main_uv || !dst_y || !dst_uv) {
+ return AVERROR(ENOSYS);
+ }
+
+ tex_main_y = CVMetalTextureGetTexture(main_y);
+ tex_main_uv = CVMetalTextureGetTexture(main_uv);
+ tex_overlay = CVMetalTextureGetTexture(overlay);
+ tex_dst_y = CVMetalTextureGetTexture(dst_y);
+ tex_dst_uv = CVMetalTextureGetTexture(dst_uv);
+
+ call_kernel_bgra_overlay(avctx,
+ tex_dst_y, tex_dst_uv,
+ tex_main_y, tex_main_uv,
+ tex_overlay,
+ ctx->x_position, ctx->y_position);
+ CFRelease(main_y);
+ CFRelease(main_uv);
+ CFRelease(overlay);
+ CFRelease(dst_y);
+ CFRelease(dst_uv);
+
+ return ff_filter_frame(outlink, output);
+}
+
+static av_cold void do_uninit(AVFilterContext *avctx) API_AVAILABLE(macos(10.11), ios(9.0))
+{
+ OverlayVideoToolboxContext *ctx = avctx->priv;
Expand Down Expand Up @@ -824,6 +977,28 @@ new file mode 100644
+ return AVERROR(ENOSYS);
+ }
+
+ // Use fast code path for BGRA overlay
+ if (overlay_frames->sw_format == AV_PIX_FMT_BGRA) {
+ NSError *err = nil;
+ ff_objc_release(&ctx->mtl_pipeline);
+ ff_objc_release(&ctx->mtl_function);
+ ctx->mtl_function = [ctx->mtl_library newFunctionWithName: @"blend_shader_bgra_overlay"];
+ if (!ctx->mtl_function) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to create Metal function!\n");
+ overlay_videotoolbox_uninit(avctx);
+ return AVERROR_EXTERNAL;
+ }
+ ctx->mtl_pipeline = [ctx->mtl_device
+ newComputePipelineStateWithFunction: ctx->mtl_function
+ error: &err];
+ if (err) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create Metal compute pipeline: %s\n", err.description.UTF8String);
+ overlay_videotoolbox_uninit(avctx);
+ return AVERROR_EXTERNAL;
+ }
+ ctx->fs.on_event = &overlay_vt_blend_bgra_overlay;
+ }
+
+ ctx->device_ref = av_buffer_ref(main_frames->device_ref);
+ if (!ctx->device_ref) {
+ av_log(ctx, AV_LOG_ERROR, "A device reference create failed.\n");
Expand Down Expand Up @@ -939,4 +1114,3 @@ new file mode 100644
+ FILTER_OUTPUTS(overlay_videotoolbox_outputs),
+ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};

0 comments on commit 0d2ae62

Please sign in to comment.