Skip to content

Commit

Permalink
avfilter/tonemap_[opencl,cuda]: use macro
Browse files Browse the repository at this point in the history
  • Loading branch information
gnattu committed Oct 13, 2024
1 parent 669c8de commit 81e35ea
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 41 deletions.
31 changes: 14 additions & 17 deletions debian/patches/0004-add-cuda-tonemap-impl.patch
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ Index: FFmpeg/libavfilter/colorspace.h
===================================================================
--- FFmpeg.orig/libavfilter/colorspace.h
+++ FFmpeg/libavfilter/colorspace.h
@@ -23,10 +23,69 @@
@@ -23,10 +23,66 @@
#include "libavutil/csp.h"
#include "libavutil/frame.h"
#include "libavutil/pixfmt.h"
Expand Down Expand Up @@ -291,17 +291,14 @@ Index: FFmpeg/libavfilter/colorspace.h
+ * Which means the input scale = (2^n - 1) / (219 * 2^(n-8)) and input offset = 2^(n-4)) / (219 * 2^(n-8)) for Y' and
+ * 2^(n-1)) / (7 * 2^(n-3)) for Cb'Cr'
+ */
+#define INPUT_Y_SCALE_10B 1.1678082192f
+#define INPUT_UV_SCALE_10B 1.1417410714f
+#define INPUT_Y_SCALE_12B 1.1689497717f
+#define INPUT_UV_SCALE_12B 1.1428571429f
+#define INPUT_Y_SCALE(n) ((double)((1 << (n)) - 1) / (219 * (1 << ((n) - 8))))
+#define INPUT_UV_SCALE(n) ((double)((1 << (n)) - 1) / (224 * (1 << ((n) - 8))))
+
+/*
+ * GPU will interpret 10bit and 12bit color as 16bit int
+ * but that will introduce a slight (2^(16-n))/2^16 quantization offset which we want to compensate for
+*/
+#define QUANTIZATION_OFFSET_10B 0.0009765774014f
+#define QUANTIZATION_OFFSET_12B 0.0002441443503f
+#define QUANTIZATION_OFFSET(n) ((double)(1 << (16 - (n))) / ((1 << 16) - 1))
+
+// Parsed metadata from the Dolby Vision RPU
+struct DoviMetadata {
Expand All @@ -327,7 +324,7 @@ Index: FFmpeg/libavfilter/colorspace.h
void ff_matrix_mul_3x3(double dst[3][3],
const double src1[3][3], const double src2[3][3]);
void ff_matrix_mul_3x3_vec(double dst[3], const double vec[3], const double mat[3][3]);
@@ -38,4 +97,19 @@ void ff_fill_rgb2yuv_table(const AVLumaC
@@ -38,4 +94,19 @@ void ff_fill_rgb2yuv_table(const AVLumaC
double ff_determine_signal_peak(AVFrame *in);
void ff_update_hdr_metadata(AVFrame *in, double peak);

Expand Down Expand Up @@ -2439,21 +2436,21 @@ Index: FFmpeg/libavfilter/vf_tonemap_cuda.c
+ if ((ret = get_rgb2rgb_matrix(in_pri, out_pri, rgb2rgb_matrix)) < 0)
+ return ret;
+
+ if (s->in_desc->comp[0].depth == 10) {
+ input_quantization_offset = QUANTIZATION_OFFSET_10B;
+ input_y_scale = INPUT_Y_SCALE_10B;
+ input_uv_scale = INPUT_UV_SCALE_10B;
+ } else if (s->in_desc->comp[0].depth == 16) {
+ if (s->in_desc->comp[0].depth == 16) {
+ // Assume 16bit is actually 12bit for now as that is what the hardware decoders producing
+ // and what videos are actually encoded in
+ input_quantization_offset = QUANTIZATION_OFFSET_12B;
+ input_y_scale = INPUT_Y_SCALE_12B;
+ input_uv_scale = INPUT_UV_SCALE_12B;
+ input_quantization_offset = QUANTIZATION_OFFSET(12);
+ input_y_scale = INPUT_Y_SCALE(12);
+ input_uv_scale = INPUT_UV_SCALE(12);
+ } else {
+ input_quantization_offset = QUANTIZATION_OFFSET(s->in_desc->comp[0].depth);
+ input_y_scale = INPUT_Y_SCALE(s->in_desc->comp[0].depth);
+ input_uv_scale = INPUT_UV_SCALE(s->in_desc->comp[0].depth);
+ }
+
+ if (s->out_desc->comp[0].depth == 10) {
+ // Don't handle 12b offset for now and assume 16b output is real 16b out to make it consistent with other filters
+ output_quantization_offset = QUANTIZATION_OFFSET_10B;
+ output_quantization_offset = QUANTIZATION_OFFSET(10);
+ }
+
+ if (s->out_desc->comp[0].depth > 8) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1458,7 +1458,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c

switch(ctx->tonemap) {
case TONEMAP_GAMMA:
@@ -139,59 +353,210 @@ static int tonemap_opencl_init(AVFilterC
@@ -139,59 +353,204 @@ static int tonemap_opencl_init(AVFilterC
if (isnan(ctx->param))
ctx->param = 0.3f;
break;
Expand Down Expand Up @@ -1655,36 +1655,30 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
if (ctx->range_out == AVCOL_RANGE_JPEG)
av_bprintf(&header, "#define FULL_RANGE_OUT\n");

+ if (ctx->in_desc->comp[0].depth == 10) {
+ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET_10B);
+ av_bprintf(&header, "#define INPUT_Y_SCALE %ff\n", INPUT_Y_SCALE_10B);
+ av_bprintf(&header, "#define INPUT_UV_SCALE %ff\n", INPUT_UV_SCALE_10B);
+ } else if (ctx->in_desc->comp[0].depth == 16) {
+ if (ctx->in_desc->comp[0].depth == 16) {
+ // Assume 16bit is actually 12bit for now as that is what the hardware decoders producing
+ // and what videos are actually encoded in
+ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET_12B);
+ av_bprintf(&header, "#define INPUT_Y_SCALE %ff\n", INPUT_Y_SCALE_12B);
+ av_bprintf(&header, "#define INPUT_UV_SCALE %ff\n", INPUT_UV_SCALE_12B);
+ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET(12));
+ av_bprintf(&header, "#define INPUT_Y_SCALE %ff\n", INPUT_Y_SCALE(12));
+ av_bprintf(&header, "#define INPUT_UV_SCALE %ff\n", INPUT_UV_SCALE(12));
+ } else {
+ // should not happen, but for completeness
+ // once we can tell if the input is real 16bit we can use this branch
+ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET 0.0f\n");
+ av_bprintf(&header, "#define INPUT_Y_SCALE 1.0f\n");
+ av_bprintf(&header, "#define INPUT_UV_SCALE 1.0f\n");
+ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET(ctx->in_desc->comp[0].depth));
+ av_bprintf(&header, "#define INPUT_Y_SCALE %ff\n", INPUT_Y_SCALE(ctx->in_desc->comp[0].depth));
+ av_bprintf(&header, "#define INPUT_UV_SCALE %ff\n", INPUT_UV_SCALE(ctx->in_desc->comp[0].depth));
+ }
+
+ if (ctx->out_desc->comp[0].depth > 8) {
+ av_bprintf(&header, "#define RESCALE_LIMITED_RANGE_OUTPUT\n");
+ }
+
+ if (ctx->out_desc->comp[0].depth == 10)
+ av_bprintf(&header, "#define OUTPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET_10B);
+ av_bprintf(&header, "#define OUTPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET(10));
+ // Don't handle 12b offset for now and assume 16b output is real 16b out to make it consistent with other filters
+
av_bprintf(&header, "#define chroma_loc %d\n", (int)ctx->chroma_loc);

if (rgb2rgb_passthrough)
@@ -199,19 +564,41 @@ static int tonemap_opencl_init(AVFilterC
@@ -199,19 +558,41 @@ static int tonemap_opencl_init(AVFilterC
else
ff_opencl_print_const_matrix_3x3(&header, "rgb2rgb", rgb2rgb);

Expand Down Expand Up @@ -1733,7 +1727,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
ctx->colorspace_out, av_color_space_name(ctx->colorspace_out));
goto fail;
}
@@ -219,24 +606,23 @@ static int tonemap_opencl_init(AVFilterC
@@ -219,24 +600,23 @@ static int tonemap_opencl_init(AVFilterC
ff_fill_rgb2yuv_table(luma_dst, rgb2yuv);
ff_opencl_print_const_matrix_3x3(&header, "yuv_matrix", rgb2yuv);

Expand Down Expand Up @@ -1773,7 +1767,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c

av_log(avctx, AV_LOG_DEBUG, "Generated OpenCL header:\n%s\n", header.str);
opencl_sources[0] = header.str;
@@ -254,46 +640,171 @@ static int tonemap_opencl_init(AVFilterC
@@ -254,46 +634,171 @@ static int tonemap_opencl_init(AVFilterC
CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL "
"command queue %d.\n", cle);

Expand Down Expand Up @@ -1963,7 +1957,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
ret = ff_opencl_filter_config_output(outlink);
if (ret < 0)
return ret;
@@ -308,13 +819,46 @@ static int launch_kernel(AVFilterContext
@@ -308,13 +813,46 @@ static int launch_kernel(AVFilterContext
size_t global_work[2];
size_t local_work[2];
cl_int cle;
Expand Down Expand Up @@ -2012,7 +2006,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c

local_work[0] = 16;
local_work[1] = 16;
@@ -338,13 +882,10 @@ static int tonemap_opencl_filter_frame(A
@@ -338,13 +876,10 @@ static int tonemap_opencl_filter_frame(A
AVFilterContext *avctx = inlink->dst;
AVFilterLink *outlink = avctx->outputs[0];
TonemapOpenCLContext *ctx = avctx->priv;
Expand All @@ -2027,7 +2021,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c

av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
av_get_pix_fmt_name(input->format),
@@ -363,8 +904,49 @@ static int tonemap_opencl_filter_frame(A
@@ -363,8 +898,49 @@ static int tonemap_opencl_filter_frame(A
if (err < 0)
goto fail;

Expand Down Expand Up @@ -2079,7 +2073,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c

if (ctx->trc != -1)
output->color_trc = ctx->trc;
@@ -385,72 +967,50 @@ static int tonemap_opencl_filter_frame(A
@@ -385,72 +961,50 @@ static int tonemap_opencl_filter_frame(A
ctx->range_out = output->color_range;
ctx->chroma_loc = output->chroma_location;

Expand Down Expand Up @@ -2175,7 +2169,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
av_frame_free(&input);
av_frame_free(&output);
return err;
@@ -458,24 +1018,9 @@ fail:
@@ -458,24 +1012,9 @@ fail:

static av_cold void tonemap_opencl_uninit(AVFilterContext *avctx)
{
Expand All @@ -2202,7 +2196,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c

ff_opencl_filter_uninit(avctx);
}
@@ -483,37 +1028,50 @@ static av_cold void tonemap_opencl_unini
@@ -483,37 +1022,50 @@ static av_cold void tonemap_opencl_unini
#define OFFSET(x) offsetof(TonemapOpenCLContext, x)
#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
static const AVOption tonemap_opencl_options[] = {
Expand Down

0 comments on commit 81e35ea

Please sign in to comment.