From 81e35eae8d6b7bc660e6901d8acb22f75bbad176 Mon Sep 17 00:00:00 2001 From: gnattu Date: Sun, 13 Oct 2024 18:57:20 +0800 Subject: [PATCH] avfilter/tonemap_[opencl,cuda]: use macro --- .../patches/0004-add-cuda-tonemap-impl.patch | 31 +++++++------- ...-and-code-refactor-to-opencl-tonemap.patch | 42 ++++++++----------- 2 files changed, 32 insertions(+), 41 deletions(-) diff --git a/debian/patches/0004-add-cuda-tonemap-impl.patch b/debian/patches/0004-add-cuda-tonemap-impl.patch index 1ffe51d421..b480685f55 100644 --- a/debian/patches/0004-add-cuda-tonemap-impl.patch +++ b/debian/patches/0004-add-cuda-tonemap-impl.patch @@ -257,7 +257,7 @@ Index: FFmpeg/libavfilter/colorspace.h =================================================================== --- FFmpeg.orig/libavfilter/colorspace.h +++ FFmpeg/libavfilter/colorspace.h -@@ -23,10 +23,69 @@ +@@ -23,10 +23,66 @@ #include "libavutil/csp.h" #include "libavutil/frame.h" #include "libavutil/pixfmt.h" @@ -291,17 +291,14 @@ Index: FFmpeg/libavfilter/colorspace.h + * Which means the input scale = (2^n - 1) / (219 * 2^(n-8)) and input offset = 2^(n-4)) / (219 * 2^(n-8)) for Y' and + * 2^(n-1)) / (7 * 2^(n-3)) for Cb'Cr' + */ -+#define INPUT_Y_SCALE_10B 1.1678082192f -+#define INPUT_UV_SCALE_10B 1.1417410714f -+#define INPUT_Y_SCALE_12B 1.1689497717f -+#define INPUT_UV_SCALE_12B 1.1428571429f ++#define INPUT_Y_SCALE(n) ((double)((1 << (n)) - 1) / (219 * (1 << ((n) - 8)))) ++#define INPUT_UV_SCALE(n) ((double)((1 << (n)) - 1) / (224 * (1 << ((n) - 8)))) + +/* + * GPU will interpret 10bit and 12bit color as 16bit int + * but that will introduce a slight (2^(16-n))/2^16 quantization offset which we want to compensate for +*/ -+#define QUANTIZATION_OFFSET_10B 0.0009765774014f -+#define QUANTIZATION_OFFSET_12B 0.0002441443503f ++#define QUANTIZATION_OFFSET(n) ((double)(1 << (16 - (n))) / ((1 << 16) - 1)) + +// Parsed metadata from the Dolby Vision RPU +struct DoviMetadata { @@ -327,7 +324,7 @@ Index: FFmpeg/libavfilter/colorspace.h void ff_matrix_mul_3x3(double dst[3][3], const double src1[3][3], const double src2[3][3]); void ff_matrix_mul_3x3_vec(double dst[3], const double vec[3], const double mat[3][3]); -@@ -38,4 +97,19 @@ void ff_fill_rgb2yuv_table(const AVLumaC +@@ -38,4 +94,19 @@ void ff_fill_rgb2yuv_table(const AVLumaC double ff_determine_signal_peak(AVFrame *in); void ff_update_hdr_metadata(AVFrame *in, double peak); @@ -2439,21 +2436,21 @@ Index: FFmpeg/libavfilter/vf_tonemap_cuda.c + if ((ret = get_rgb2rgb_matrix(in_pri, out_pri, rgb2rgb_matrix)) < 0) + return ret; + -+ if (s->in_desc->comp[0].depth == 10) { -+ input_quantization_offset = QUANTIZATION_OFFSET_10B; -+ input_y_scale = INPUT_Y_SCALE_10B; -+ input_uv_scale = INPUT_UV_SCALE_10B; -+ } else if (s->in_desc->comp[0].depth == 16) { ++ if (s->in_desc->comp[0].depth == 16) { + // Assume 16bit is actually 12bit for now as that is what the hardware decoders producing + // and what videos are actually encoded in -+ input_quantization_offset = QUANTIZATION_OFFSET_12B; -+ input_y_scale = INPUT_Y_SCALE_12B; -+ input_uv_scale = INPUT_UV_SCALE_12B; ++ input_quantization_offset = QUANTIZATION_OFFSET(12); ++ input_y_scale = INPUT_Y_SCALE(12); ++ input_uv_scale = INPUT_UV_SCALE(12); ++ } else { ++ input_quantization_offset = QUANTIZATION_OFFSET(s->in_desc->comp[0].depth); ++ input_y_scale = INPUT_Y_SCALE(s->in_desc->comp[0].depth); ++ input_uv_scale = INPUT_UV_SCALE(s->in_desc->comp[0].depth); + } + + if (s->out_desc->comp[0].depth == 10) { + // Don't handle 12b offset for now and assume 16b output is real 16b out to make it consistent with other filters -+ output_quantization_offset = QUANTIZATION_OFFSET_10B; ++ output_quantization_offset = QUANTIZATION_OFFSET(10); + } + + if (s->out_desc->comp[0].depth > 8) { diff --git a/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch b/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch index 99bb46248f..a59af09f78 100644 --- a/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch +++ b/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch @@ -1458,7 +1458,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c switch(ctx->tonemap) { case TONEMAP_GAMMA: -@@ -139,59 +353,210 @@ static int tonemap_opencl_init(AVFilterC +@@ -139,59 +353,204 @@ static int tonemap_opencl_init(AVFilterC if (isnan(ctx->param)) ctx->param = 0.3f; break; @@ -1655,22 +1655,16 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c if (ctx->range_out == AVCOL_RANGE_JPEG) av_bprintf(&header, "#define FULL_RANGE_OUT\n"); -+ if (ctx->in_desc->comp[0].depth == 10) { -+ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET_10B); -+ av_bprintf(&header, "#define INPUT_Y_SCALE %ff\n", INPUT_Y_SCALE_10B); -+ av_bprintf(&header, "#define INPUT_UV_SCALE %ff\n", INPUT_UV_SCALE_10B); -+ } else if (ctx->in_desc->comp[0].depth == 16) { ++ if (ctx->in_desc->comp[0].depth == 16) { + // Assume 16bit is actually 12bit for now as that is what the hardware decoders producing + // and what videos are actually encoded in -+ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET_12B); -+ av_bprintf(&header, "#define INPUT_Y_SCALE %ff\n", INPUT_Y_SCALE_12B); -+ av_bprintf(&header, "#define INPUT_UV_SCALE %ff\n", INPUT_UV_SCALE_12B); ++ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET(12)); ++ av_bprintf(&header, "#define INPUT_Y_SCALE %ff\n", INPUT_Y_SCALE(12)); ++ av_bprintf(&header, "#define INPUT_UV_SCALE %ff\n", INPUT_UV_SCALE(12)); + } else { -+ // should not happen, but for completeness -+ // once we can tell if the input is real 16bit we can use this branch -+ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET 0.0f\n"); -+ av_bprintf(&header, "#define INPUT_Y_SCALE 1.0f\n"); -+ av_bprintf(&header, "#define INPUT_UV_SCALE 1.0f\n"); ++ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET(ctx->in_desc->comp[0].depth)); ++ av_bprintf(&header, "#define INPUT_Y_SCALE %ff\n", INPUT_Y_SCALE(ctx->in_desc->comp[0].depth)); ++ av_bprintf(&header, "#define INPUT_UV_SCALE %ff\n", INPUT_UV_SCALE(ctx->in_desc->comp[0].depth)); + } + + if (ctx->out_desc->comp[0].depth > 8) { @@ -1678,13 +1672,13 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c + } + + if (ctx->out_desc->comp[0].depth == 10) -+ av_bprintf(&header, "#define OUTPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET_10B); ++ av_bprintf(&header, "#define OUTPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET(10)); + // Don't handle 12b offset for now and assume 16b output is real 16b out to make it consistent with other filters + av_bprintf(&header, "#define chroma_loc %d\n", (int)ctx->chroma_loc); if (rgb2rgb_passthrough) -@@ -199,19 +564,41 @@ static int tonemap_opencl_init(AVFilterC +@@ -199,19 +558,41 @@ static int tonemap_opencl_init(AVFilterC else ff_opencl_print_const_matrix_3x3(&header, "rgb2rgb", rgb2rgb); @@ -1733,7 +1727,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c ctx->colorspace_out, av_color_space_name(ctx->colorspace_out)); goto fail; } -@@ -219,24 +606,23 @@ static int tonemap_opencl_init(AVFilterC +@@ -219,24 +600,23 @@ static int tonemap_opencl_init(AVFilterC ff_fill_rgb2yuv_table(luma_dst, rgb2yuv); ff_opencl_print_const_matrix_3x3(&header, "yuv_matrix", rgb2yuv); @@ -1773,7 +1767,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c av_log(avctx, AV_LOG_DEBUG, "Generated OpenCL header:\n%s\n", header.str); opencl_sources[0] = header.str; -@@ -254,46 +640,171 @@ static int tonemap_opencl_init(AVFilterC +@@ -254,46 +634,171 @@ static int tonemap_opencl_init(AVFilterC CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL " "command queue %d.\n", cle); @@ -1963,7 +1957,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c ret = ff_opencl_filter_config_output(outlink); if (ret < 0) return ret; -@@ -308,13 +819,46 @@ static int launch_kernel(AVFilterContext +@@ -308,13 +813,46 @@ static int launch_kernel(AVFilterContext size_t global_work[2]; size_t local_work[2]; cl_int cle; @@ -2012,7 +2006,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c local_work[0] = 16; local_work[1] = 16; -@@ -338,13 +882,10 @@ static int tonemap_opencl_filter_frame(A +@@ -338,13 +876,10 @@ static int tonemap_opencl_filter_frame(A AVFilterContext *avctx = inlink->dst; AVFilterLink *outlink = avctx->outputs[0]; TonemapOpenCLContext *ctx = avctx->priv; @@ -2027,7 +2021,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n", av_get_pix_fmt_name(input->format), -@@ -363,8 +904,49 @@ static int tonemap_opencl_filter_frame(A +@@ -363,8 +898,49 @@ static int tonemap_opencl_filter_frame(A if (err < 0) goto fail; @@ -2079,7 +2073,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c if (ctx->trc != -1) output->color_trc = ctx->trc; -@@ -385,72 +967,50 @@ static int tonemap_opencl_filter_frame(A +@@ -385,72 +961,50 @@ static int tonemap_opencl_filter_frame(A ctx->range_out = output->color_range; ctx->chroma_loc = output->chroma_location; @@ -2175,7 +2169,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c av_frame_free(&input); av_frame_free(&output); return err; -@@ -458,24 +1018,9 @@ fail: +@@ -458,24 +1012,9 @@ fail: static av_cold void tonemap_opencl_uninit(AVFilterContext *avctx) { @@ -2202,7 +2196,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c ff_opencl_filter_uninit(avctx); } -@@ -483,37 +1028,50 @@ static av_cold void tonemap_opencl_unini +@@ -483,37 +1022,50 @@ static av_cold void tonemap_opencl_unini #define OFFSET(x) offsetof(TonemapOpenCLContext, x) #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) static const AVOption tonemap_opencl_options[] = {