Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

avfilter/vf_overlay_videotoolbox: add fast code path for bgra overlay #410

Merged
merged 1 commit into from
Jul 7, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
274 changes: 224 additions & 50 deletions debian/patches/0062-avfilter-add-vf_overlay_videotoolbox.patch
Original file line number Diff line number Diff line change
Expand Up @@ -120,26 +120,26 @@ Signed-off-by: Gnattu OC <[email protected]>
create mode 100644 libavfilter/metal/vf_overlay_videotoolbox.metal
create mode 100644 libavfilter/vf_overlay_videotoolbox.m

diff --git a/configure b/configure
index 23066efa32..a7c349d126 100755
--- a/configure
+++ b/configure
@@ -3720,6 +3720,7 @@ overlay_qsv_filter_select="qsvvpp"
Index: FFmpeg/configure
===================================================================
--- FFmpeg.orig/configure
+++ FFmpeg/configure
@@ -3722,6 +3722,7 @@ overlay_qsv_filter_select="qsvvpp"
overlay_vaapi_filter_deps="vaapi VAProcPipelineCaps_blend_flags"
overlay_vulkan_filter_deps="vulkan spirv_compiler"
overlay_rkrga_filter_deps="rkrga"
+overlay_videotoolbox_filter_deps="metal corevideo coreimage videotoolbox"
owdenoise_filter_deps="gpl"
pad_opencl_filter_deps="opencl"
pan_filter_deps="swresample"
diff --git a/doc/filters.texi b/doc/filters.texi
index e0436a5755..bfb77562cb 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -19033,6 +19033,58 @@ See @ref{framesync}.

Index: FFmpeg/doc/filters.texi
===================================================================
--- FFmpeg.orig/doc/filters.texi
+++ FFmpeg/doc/filters.texi
@@ -18351,6 +18351,58 @@ See @ref{framesync}.
This filter also supports the @ref{framesync} options.

+@section overlay_videotoolbox
+
+Overlay one video on top of another.
Expand Down Expand Up @@ -193,13 +193,13 @@ index e0436a5755..bfb77562cb 100644
+@end itemize
+
@section owdenoise

Apply Overcomplete Wavelet denoiser.
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index f6c1d641d6..ea1389ab57 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -401,6 +401,9 @@ OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER) += vf_overlay_opencl.o opencl.o \
Index: FFmpeg/libavfilter/Makefile
===================================================================
--- FFmpeg.orig/libavfilter/Makefile
+++ FFmpeg/libavfilter/Makefile
@@ -403,6 +403,9 @@ OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER)
OBJS-$(CONFIG_OVERLAY_QSV_FILTER) += vf_overlay_qsv.o framesync.o
OBJS-$(CONFIG_OVERLAY_VAAPI_FILTER) += vf_overlay_vaapi.o framesync.o vaapi_vpp.o
OBJS-$(CONFIG_OVERLAY_VULKAN_FILTER) += vf_overlay_vulkan.o vulkan.o vulkan_filter.o
Expand All @@ -209,43 +209,43 @@ index f6c1d641d6..ea1389ab57 100644
OBJS-$(CONFIG_OVERLAY_RKRGA_FILTER) += vf_overlay_rkrga.o framesync.o
OBJS-$(CONFIG_OWDENOISE_FILTER) += vf_owdenoise.o
OBJS-$(CONFIG_PAD_FILTER) += vf_pad.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 149bf50997..ec9d975ecb 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -378,6 +378,7 @@ extern const AVFilter ff_vf_overlay_vaapi;
Index: FFmpeg/libavfilter/allfilters.c
===================================================================
--- FFmpeg.orig/libavfilter/allfilters.c
+++ FFmpeg/libavfilter/allfilters.c
@@ -380,6 +380,7 @@ extern const AVFilter ff_vf_overlay_vaap
extern const AVFilter ff_vf_overlay_vulkan;
extern const AVFilter ff_vf_overlay_cuda;
extern const AVFilter ff_vf_overlay_rkrga;
+extern const AVFilter ff_vf_overlay_videotoolbox;
extern const AVFilter ff_vf_owdenoise;
extern const AVFilter ff_vf_pad;
extern const AVFilter ff_vf_pad_opencl;
diff --git a/libavfilter/metal/utils.h b/libavfilter/metal/utils.h
index 7350d42a35..d79c79751c 100644
--- a/libavfilter/metal/utils.h
+++ b/libavfilter/metal/utils.h
@@ -55,5 +55,4 @@ CVMetalTextureRef ff_metal_texture_from_pixbuf(void *avclass,
Index: FFmpeg/libavfilter/metal/utils.h
===================================================================
--- FFmpeg.orig/libavfilter/metal/utils.h
+++ FFmpeg/libavfilter/metal/utils.h
@@ -55,5 +55,4 @@ CVMetalTextureRef ff_metal_texture_from_
int plane,
MTLPixelFormat format)
API_AVAILABLE(macos(10.11), ios(8.0));
-
#endif /* AVFILTER_METAL_UTILS_H */
diff --git a/libavfilter/metal/utils.m b/libavfilter/metal/utils.m
index f365d3ceea..db5c5f6f10 100644
--- a/libavfilter/metal/utils.m
+++ b/libavfilter/metal/utils.m
@@ -55,6 +55,9 @@ CVMetalTextureRef ff_metal_texture_from_pixbuf(void *ctx,
Index: FFmpeg/libavfilter/metal/utils.m
===================================================================
--- FFmpeg.orig/libavfilter/metal/utils.m
+++ FFmpeg/libavfilter/metal/utils.m
@@ -55,6 +55,9 @@ CVMetalTextureRef ff_metal_texture_from_
{
CVMetalTextureRef tex = NULL;
CVReturn ret;
+ bool is_planer = CVPixelBufferIsPlanar(pixbuf);
+ size_t width = is_planer ? CVPixelBufferGetWidthOfPlane(pixbuf, plane) : CVPixelBufferGetWidth(pixbuf);
+ size_t height = is_planer ? CVPixelBufferGetHeightOfPlane(pixbuf, plane) : CVPixelBufferGetHeight(pixbuf);

ret = CVMetalTextureCacheCreateTextureFromImage(
NULL,
@@ -62,8 +65,8 @@ CVMetalTextureRef ff_metal_texture_from_pixbuf(void *ctx,
@@ -62,8 +65,8 @@ CVMetalTextureRef ff_metal_texture_from_
pixbuf,
NULL,
format,
Expand All @@ -256,12 +256,11 @@ index f365d3ceea..db5c5f6f10 100644
plane,
&tex
);
diff --git a/libavfilter/metal/vf_overlay_videotoolbox.metal b/libavfilter/metal/vf_overlay_videotoolbox.metal
new file mode 100644
index 0000000000..936e57e03e
Index: FFmpeg/libavfilter/metal/vf_overlay_videotoolbox.metal
===================================================================
--- /dev/null
+++ b/libavfilter/metal/vf_overlay_videotoolbox.metal
@@ -0,0 +1,58 @@
+++ FFmpeg/libavfilter/metal/vf_overlay_videotoolbox.metal
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2024 Gnattu OC <[email protected]>
+ *
Expand Down Expand Up @@ -320,16 +319,52 @@ index 0000000000..936e57e03e
+ dest.write(result_color, gid);
+ }
+}
Index: libavfilter/vf_overlay_videotoolbox.m
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
+
+/*
+ * Blend shader for sperated yuv main and bgra mask
+ */
+kernel void blend_shader_bgra_overlay(
+ texture2d<float, access::read> source_y [[ texture(0) ]],
+ texture2d<float, access::read> source_uv [[ texture(1) ]],
+ texture2d<float, access::read> mask [[ texture(2) ]],
+ texture2d<float, access::write> dest_y [[ texture(3) ]],
+ texture2d<float, access::write> dest_uv [[ texture(4) ]],
+ constant mtlBlendParams& params [[ buffer(5) ]],
+ uint2 gid [[ thread_position_in_grid ]])
+{
+ const auto mask_size = uint2(mask.get_width(),
+ mask.get_height());
+ const auto loc_overlay = uint2(params.x_position, params.y_position);
+ const auto loc_uv = gid >> 1;
+ if (gid.x < loc_overlay.x ||
+ gid.y < loc_overlay.y ||
+ gid.x >= mask_size.x + loc_overlay.x ||
+ gid.y >= mask_size.y + loc_overlay.y)
+ {
+ float4 source_color_y = source_y.read(gid);
+ float4 source_color_uv = source_uv.read(loc_uv);
+ dest_y.write(source_color_y, gid);
+ dest_uv.write(source_color_uv, loc_uv);
+ } else {
+ float4 source_color_y = source_y.read(gid);
+ float4 source_color_uv = source_uv.read(loc_uv);
+ float4 mask_color = mask.read(gid - loc_overlay);
+ float y_overlay = 0.183 * mask_color.r + 0.614 * mask_color.g + 0.062 * mask_color.b + 0.0625f;
+ float u_overlay = -0.101 * mask_color.r - 0.339 * mask_color.g + 0.439 * mask_color.b + 0.5f;
+ float v_overlay = 0.439 * mask_color.r - 0.399 * mask_color.g - 0.040 * mask_color.b + 0.5f;
+ float alpha_color = mask_color.a;
+ float3 main_color = float3(source_color_y.x, source_color_uv.x, source_color_uv.y);
+ float3 overlay_color = float3(y_overlay, u_overlay, v_overlay);
+ float3 result_color = main_color * (1.0f - alpha_color) + (overlay_color * alpha_color);
+ dest_y.write(float4(result_color.x, 0.0f, 0.0f, 1.0f), gid);
+ dest_uv.write(float4(result_color.y, result_color.z, 0.0f, 1.0f), loc_uv);
+ }
+}
Index: FFmpeg/libavfilter/vf_overlay_videotoolbox.m
===================================================================
diff --git a/libavfilter/vf_overlay_videotoolbox.m b/libavfilter/vf_overlay_videotoolbox.m
new file mode 100644
--- /dev/null (revision 913e5ef1730481306c9607c554aea3043ea0ecd4)
+++ b/libavfilter/vf_overlay_videotoolbox.m (revision 913e5ef1730481306c9607c554aea3043ea0ecd4)
@@ -0,0 +1,609 @@
--- /dev/null
+++ FFmpeg/libavfilter/vf_overlay_videotoolbox.m
@@ -0,0 +1,749 @@
+/*
+ * Copyright (C) 2024 Gnattu OC <[email protected]>
+ *
Expand Down Expand Up @@ -463,6 +498,43 @@ new file mode 100644
+ ff_objc_release(&buffer);
+}
+
+static void call_kernel_bgra_overlay(AVFilterContext *avctx,
+ id<MTLTexture> dst_y,
+ id<MTLTexture> dst_uv,
+ id<MTLTexture> main_y,
+ id<MTLTexture> main_uv,
+ id<MTLTexture> overlay,
+ uint x_position,
+ uint y_position) API_AVAILABLE(macos(10.11), ios(9.0))
+{
+ OverlayVideoToolboxContext *ctx = avctx->priv;
+ // Both the command buffer and encoder are auto-released by objc on default.
+ // Use CFBridgingRetain to get a more C-like behavior.
+ id<MTLCommandBuffer> buffer = CFBridgingRetain(ctx->mtl_queue.commandBuffer);
+ id<MTLComputeCommandEncoder> encoder = CFBridgingRetain((__bridge id<MTLCommandBuffer>)buffer.computeCommandEncoder);
+
+ MtlBlendParams *params = (MtlBlendParams *)ctx->mtl_params_buffer.contents;
+ *params = (MtlBlendParams) {
+ .x_position = x_position,
+ .y_position = y_position,
+ };
+
+ [(__bridge id<MTLComputeCommandEncoder>)encoder setTexture: main_y atIndex: 0];
+ [(__bridge id<MTLComputeCommandEncoder>)encoder setTexture: main_uv atIndex: 1];
+ [(__bridge id<MTLComputeCommandEncoder>)encoder setTexture: overlay atIndex: 2];
+ [(__bridge id<MTLComputeCommandEncoder>)encoder setTexture: dst_y atIndex: 3];
+ [(__bridge id<MTLComputeCommandEncoder>)encoder setTexture: dst_uv atIndex: 4];
+ [(__bridge id<MTLComputeCommandEncoder>)encoder setBuffer: ctx->mtl_params_buffer offset: 0 atIndex: 5];
+ ff_metal_compute_encoder_dispatch(ctx->mtl_device, ctx->mtl_pipeline, (__bridge id<MTLComputeCommandEncoder>)encoder, dst_y.width, dst_y.height);
+ [(__bridge id<MTLComputeCommandEncoder>)encoder endEncoding];
+
+ [(__bridge id<MTLCommandBuffer>)buffer commit];
+ [(__bridge id<MTLCommandBuffer>)buffer waitUntilCompleted];
+
+ ff_objc_release(&encoder);
+ ff_objc_release(&buffer);
+}
+
+// Copies and/or converts one pixel buffer to another.
+// This transparently handles pixel format and color spaces, and will do a conversion if needed.
+static int transfer_pixel_buffer(OverlayVideoToolboxContext *ctx, CVPixelBufferRef source, CVPixelBufferRef destination)
Expand Down Expand Up @@ -623,6 +695,87 @@ new file mode 100644
+ return ff_filter_frame(outlink, output);
+}
+
+static int overlay_vt_blend_bgra_overlay(FFFrameSync *fs) API_AVAILABLE(macos(10.11), ios(9.0))
+{
+ AVFilterContext *avctx = fs->parent;
+ OverlayVideoToolboxContext *ctx = avctx->priv;
+ AVFilterLink *outlink = avctx->outputs[0];
+ AVFilterLink *inlink_main = avctx->inputs[0];
+ AVFilterLink *inlink_overlay = avctx->inputs[1];
+ AVFrame *input_main, *input_overlay;
+ AVFrame *output;
+ AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink_main->hw_frames_ctx->data;
+ AVHWFramesContext *frames_ctx_overlay = (AVHWFramesContext*)inlink_overlay->hw_frames_ctx->data;
+ const AVPixFmtDescriptor *in_main_desc;
+
+ CVMetalTextureRef main_y, main_uv, dst_y, dst_uv, overlay;
+ id<MTLTexture> tex_main_y, tex_main_uv, tex_overlay, tex_dst_y, tex_dst_uv;
+
+ MTLPixelFormat mtl_format_y, mtl_format_uv;
+ OSType cv_format;
+ int ret;
+ int i, overlay_planes = 0;
+
+ in_main_desc = av_pix_fmt_desc_get(frames_ctx->sw_format);
+
+ // read main and overlay frames from inputs
+ ret = ff_framesync_get_frame(fs, 0, &input_main, 0);
+ if (ret < 0)
+ return ret;
+ ret = ff_framesync_get_frame(fs, 1, &input_overlay, 0);
+ if (ret < 0)
+ return ret;
+
+ if (!input_main)
+ return AVERROR_BUG;
+
+ output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+ if (!output)
+ return AVERROR(ENOMEM);
+
+ ret = av_frame_copy_props(output, input_main);
+ if (ret < 0)
+ return ret;
+
+ if (!input_overlay) {
+ ret = transfer_pixel_buffer(ctx, (CVPixelBufferRef)input_main->data[3], (CVPixelBufferRef)output->data[3]);
+ if (ret < 0)
+ return ret;
+ return ff_filter_frame(outlink, output);
+ }
+
+ mtl_format_y = (in_main_desc->comp[0].depth + in_main_desc->comp[0].shift) > 8 ? MTLPixelFormatR16Unorm : MTLPixelFormatR8Unorm;
+ mtl_format_uv = (in_main_desc->comp[0].depth + in_main_desc->comp[0].shift) > 8 ? MTLPixelFormatRG16Unorm : MTLPixelFormatRG8Unorm;
+ main_y = ff_metal_texture_from_pixbuf(avctx, ctx->texture_cache, (CVPixelBufferRef)input_main->data[3], 0, mtl_format_y);
+ main_uv = ff_metal_texture_from_pixbuf(avctx, ctx->texture_cache, (CVPixelBufferRef)input_main->data[3], 1, mtl_format_uv);
+ overlay = ff_metal_texture_from_pixbuf(avctx, ctx->texture_cache, (CVPixelBufferRef)input_overlay->data[3], 0, MTLPixelFormatBGRA8Unorm);
+ dst_y = ff_metal_texture_from_pixbuf(avctx, ctx->texture_cache, (CVPixelBufferRef)output->data[3], 0, mtl_format_y);
+ dst_uv = ff_metal_texture_from_pixbuf(avctx, ctx->texture_cache, (CVPixelBufferRef)output->data[3], 1, mtl_format_uv);
+
+ if (!overlay || !main_y || !main_uv || !dst_y || !dst_uv) {
+ return AVERROR(ENOSYS);
+ }
+
+ tex_main_y = CVMetalTextureGetTexture(main_y);
+ tex_main_uv = CVMetalTextureGetTexture(main_uv);
+ tex_overlay = CVMetalTextureGetTexture(overlay);
+ tex_dst_y = CVMetalTextureGetTexture(dst_y);
+ tex_dst_uv = CVMetalTextureGetTexture(dst_uv);
+
+ call_kernel_bgra_overlay(avctx,
+ tex_dst_y, tex_dst_uv,
+ tex_main_y, tex_main_uv,
+ tex_overlay,
+ ctx->x_position, ctx->y_position);
+ CFRelease(main_y);
+ CFRelease(main_uv);
+ CFRelease(overlay);
+ CFRelease(dst_y);
+ CFRelease(dst_uv);
+
+ return ff_filter_frame(outlink, output);
+}
+
+static av_cold void do_uninit(AVFilterContext *avctx) API_AVAILABLE(macos(10.11), ios(9.0))
+{
+ OverlayVideoToolboxContext *ctx = avctx->priv;
Expand Down Expand Up @@ -824,6 +977,28 @@ new file mode 100644
+ return AVERROR(ENOSYS);
+ }
+
+ // Use fast code path for BGRA overlay
+ if (overlay_frames->sw_format == AV_PIX_FMT_BGRA) {
+ NSError *err = nil;
+ ff_objc_release(&ctx->mtl_pipeline);
+ ff_objc_release(&ctx->mtl_function);
+ ctx->mtl_function = [ctx->mtl_library newFunctionWithName: @"blend_shader_bgra_overlay"];
+ if (!ctx->mtl_function) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to create Metal function!\n");
+ overlay_videotoolbox_uninit(avctx);
+ return AVERROR_EXTERNAL;
+ }
+ ctx->mtl_pipeline = [ctx->mtl_device
+ newComputePipelineStateWithFunction: ctx->mtl_function
+ error: &err];
+ if (err) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create Metal compute pipeline: %s\n", err.description.UTF8String);
+ overlay_videotoolbox_uninit(avctx);
+ return AVERROR_EXTERNAL;
+ }
+ ctx->fs.on_event = &overlay_vt_blend_bgra_overlay;
+ }
+
+ ctx->device_ref = av_buffer_ref(main_frames->device_ref);
+ if (!ctx->device_ref) {
+ av_log(ctx, AV_LOG_ERROR, "A device reference create failed.\n");
Expand Down Expand Up @@ -939,4 +1114,3 @@ new file mode 100644
+ FILTER_OUTPUTS(overlay_videotoolbox_outputs),
+ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};

Loading