From 4a02f81771b48280ae44cbbe70e07126c01b8e14 Mon Sep 17 00:00:00 2001 From: gnattu Date: Sat, 12 Oct 2024 16:39:47 +0800 Subject: [PATCH 1/7] avfilter/tonemap_*: fix quantization errors for tv range handling --- .../patches/0004-add-cuda-tonemap-impl.patch | 91 +++++++---- ...-and-code-refactor-to-opencl-tonemap.patch | 141 +++++++++++------- ...2-add-vf-tonemap-videotoolbox-filter.patch | 68 ++++++--- 3 files changed, 199 insertions(+), 101 deletions(-) diff --git a/debian/patches/0004-add-cuda-tonemap-impl.patch b/debian/patches/0004-add-cuda-tonemap-impl.patch index 2cbf84d317..0cfa26fe39 100644 --- a/debian/patches/0004-add-cuda-tonemap-impl.patch +++ b/debian/patches/0004-add-cuda-tonemap-impl.patch @@ -324,7 +324,7 @@ Index: FFmpeg/libavfilter/cuda/colorspace_common.h =================================================================== --- /dev/null +++ FFmpeg/libavfilter/cuda/colorspace_common.h -@@ -0,0 +1,338 @@ +@@ -0,0 +1,348 @@ +/* + * This file is part of FFmpeg. + * @@ -361,10 +361,6 @@ Index: FFmpeg/libavfilter/cuda/colorspace_common.h +#define ARIB_B67_B 0.28466892f +#define ARIB_B67_C 0.55991073f + -+#define LIMITED_BLACK 0.06256109482f -+#define LIMITED_WHITE 0.9188660802f -+#define LIMITED_RANGE 0.8563049854f -+ +#define FLOAT_EPS 1e-6f + +extern __constant__ const float ref_white; @@ -379,6 +375,13 @@ Index: FFmpeg/libavfilter/cuda/colorspace_common.h +extern __constant__ const float yuv_matrix[9], rgb_matrix[9]; +extern __constant__ const float pq_max_lum_div_ref_white; +extern __constant__ const float ref_white_div_pq_max_lum; ++extern __constant__ const float input_quantization_offset; ++extern __constant__ const float output_quantization_offset; ++extern __constant__ const float input_y_scale; ++extern __constant__ const float input_uv_scale; ++extern __constant__ const float output_quantization_factor; ++extern __constant__ const float output_quantization_scale; ++ + +static __inline__ __device__ float get_luma_dst(float3 c, const float3& luma_dst) { + return luma_dst.x * c.x + luma_dst.y * c.y + luma_dst.z * c.z; @@ -501,16 +504,20 @@ Index: FFmpeg/libavfilter/cuda/colorspace_common.h +} + +static __inline__ __device__ float3 yuv2rgb(float y, float u, float v) { -+ u -= 0.5f; -+ v -= 0.5f; ++ y += y > 0.0f ? input_quantization_offset : 0.0f; ++ u += u > 0.0f ? input_quantization_offset : 0.0f; ++ v += v > 0.0f ? input_quantization_offset : 0.0f; ++ if (range_src == AVCOL_RANGE_MPEG) { ++ y = input_y_scale * y - 0.07305936073f; ++ u = input_uv_scale * u - 0.5714285714f; ++ v = input_uv_scale * v - 0.5714285714f; ++ } else { ++ u -= 0.5f; ++ v -= 0.5f; ++ } + float r = y * rgb_matrix[0] + u * rgb_matrix[1] + v * rgb_matrix[2]; + float g = y * rgb_matrix[3] + u * rgb_matrix[4] + v * rgb_matrix[5]; + float b = y * rgb_matrix[6] + u * rgb_matrix[7] + v * rgb_matrix[8]; -+ if (range_src == AVCOL_RANGE_MPEG) { -+ r = (r - LIMITED_BLACK) / LIMITED_RANGE; -+ g = (g - LIMITED_BLACK) / LIMITED_RANGE; -+ b = (b - LIMITED_BLACK) / LIMITED_RANGE; -+ } + + return make_float3(r, g, b); +} @@ -523,26 +530,29 @@ Index: FFmpeg/libavfilter/cuda/colorspace_common.h +} + +static __inline__ __device__ float3 rgb2yuv(float r, float g, float b) { -+ if (range_dst == AVCOL_RANGE_MPEG) { -+ r = r * LIMITED_RANGE + LIMITED_BLACK; -+ g = g * LIMITED_RANGE + LIMITED_BLACK; -+ b = b * LIMITED_RANGE + LIMITED_BLACK; -+ } + float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2]; + float u = r*yuv_matrix[3] + g*yuv_matrix[4] + b*yuv_matrix[5]; + float v = r*yuv_matrix[6] + g*yuv_matrix[7] + b*yuv_matrix[8]; -+ u += 0.5f; -+ v += 0.5f; ++ if (range_dst == AVCOL_RANGE_MPEG) { ++ y = floorf(((219.0f * y + 16.0f) * output_quantization_factor) + 0.5f) / output_quantization_scale; ++ u = floorf(((224.0f * u + 128.0f) * output_quantization_factor) + 0.5f) / output_quantization_scale; ++ v = floorf(((224.0f * v + 128.0f) * output_quantization_factor) + 0.5f) / output_quantization_scale; ++ } else { ++ u += 0.5f; ++ v += 0.5f; ++ } ++ y -= y > 0.0f ? output_quantization_offset : 0.0f; ++ u -= u > 0.0f ? output_quantization_offset : 0.0f; ++ v -= v > 0.0f ? output_quantization_offset : 0.0f; + return make_float3(y, u, v); +} + +static __inline__ __device__ float rgb2y(float r, float g, float b) { ++ float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2]; + if (range_dst == AVCOL_RANGE_MPEG) { -+ r = r * LIMITED_RANGE + LIMITED_BLACK; -+ g = g * LIMITED_RANGE + LIMITED_BLACK; -+ b = b * LIMITED_RANGE + LIMITED_BLACK; ++ y = floorf(((219.0f * y + 16.0f) * output_quantization_factor) + 0.5f) / output_quantization_scale; + } -+ float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2]; ++ y -= y > 0.0f ? output_quantization_offset : 0.0f; + return y; +} + @@ -1775,7 +1785,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_cuda.c =================================================================== --- /dev/null +++ FFmpeg/libavfilter/vf_tonemap_cuda.c -@@ -0,0 +1,1131 @@ +@@ -0,0 +1,1162 @@ +/* + * This file is part of FFmpeg. + * @@ -2287,6 +2297,12 @@ Index: FFmpeg/libavfilter/vf_tonemap_cuda.c + enum AVColorPrimaries in_pri = s->in_pri, out_pri = s->out_pri; + enum AVColorRange in_range = s->in_range, out_range = s->out_range; + int d = s->in_desc->comp[0].depth > s->out_desc->comp[0].depth && s->ditherTex; ++ float input_quantization_offset = 0.0f; ++ float output_quantization_offset = 0.0f; ++ float input_y_scale = 1.0f; ++ float input_uv_scale = 1.0f; ++ float output_quantization_factor = 1.0f; ++ float output_quantization_scale = 255.0f; + char info_log[4096], error_log[4096]; + CUjit_option options[] = { CU_JIT_INFO_LOG_BUFFER, + CU_JIT_ERROR_LOG_BUFFER, @@ -2396,6 +2412,25 @@ Index: FFmpeg/libavfilter/vf_tonemap_cuda.c + if ((ret = get_rgb2rgb_matrix(in_pri, out_pri, rgb2rgb_matrix)) < 0) + return ret; + ++ if (s->in_desc->comp[0].depth == 10) { ++ input_quantization_offset = 0.0009765774014f; ++ input_y_scale = 1.1678082192f; ++ input_uv_scale = 1.1417410714f; ++ } else if (s->in_desc->comp[0].depth == 16) { ++ input_quantization_offset = 0.0002441443503f; ++ input_y_scale = 1.1689497717f; ++ input_uv_scale = 1.1428571429f; ++ } ++ ++ if (s->out_desc->comp[0].depth == 10) { ++ output_quantization_offset = 0.0009765774014f; ++ } ++ ++ if (s->out_desc->comp[0].depth > 8) { ++ output_quantization_factor = 256.0f; ++ output_quantization_scale = 65535.0f; ++ } ++ + av_bprint_init(&constants, 2048, AV_BPRINT_SIZE_UNLIMITED); + + av_bprintf(&constants, ".version 3.2\n"); @@ -2433,6 +2468,12 @@ Index: FFmpeg/libavfilter/vf_tonemap_cuda.c + CONSTANT(".f32 desat_param = %f", s->desat_param); + CONSTANT(".f32 pq_max_lum_div_ref_white = %f", (float)(ST2084_MAX_LUMINANCE / REFERENCE_WHITE_ALT)); + CONSTANT(".f32 ref_white_div_pq_max_lum = %f", (float)(REFERENCE_WHITE_ALT / ST2084_MAX_LUMINANCE)); ++ CONSTANT(".f32 input_quantization_offset = %f", input_quantization_offset); ++ CONSTANT(".f32 input_y_scale = %f", input_y_scale); ++ CONSTANT(".f32 input_uv_scale = %f", input_uv_scale); ++ CONSTANT(".f32 output_quantization_offset = %f", output_quantization_offset); ++ CONSTANT(".f32 output_quantization_factor = %f", output_quantization_factor); ++ CONSTANT(".f32 output_quantization_scale = %f", output_quantization_scale); + CONSTANT_M("rgb_matrix", (s->dovi ? s->dovi->nonlinear : rgb_matrix)); + CONSTANT_M("yuv_matrix", yuv_matrix); + CONSTANT_A(".u8 rgb2rgb_passthrough = %i", 1, in_pri == out_pri); @@ -2862,7 +2903,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_cuda.c + { "enabled", 0, 0, AV_OPT_TYPE_CONST, {.i64 = 1}, 0, 0, FLAGS, .unit = "tradeoff" }, + { "peak", "Signal peak override", OFFSET(peak), AV_OPT_TYPE_DOUBLE, {.dbl = 0}, 0, DBL_MAX, FLAGS }, + { "param", "Tonemap parameter", OFFSET(param), AV_OPT_TYPE_DOUBLE, {.dbl = NAN}, DBL_MIN, DBL_MAX, FLAGS }, -+ { "desat", "Desaturation parameter", OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, {.dbl = 0.5}, 0, DBL_MAX, FLAGS }, ++ { "desat", "Desaturation parameter", OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, {.dbl = 0}, 0, DBL_MAX, FLAGS }, + { "threshold", "Scene detection threshold", OFFSET(scene_threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 0.2}, 0, DBL_MAX, FLAGS }, + { NULL }, +}; diff --git a/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch b/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch index 98723bbc95..ccef661793 100644 --- a/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch +++ b/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch @@ -65,7 +65,7 @@ Index: FFmpeg/libavfilter/opencl/colorspace_common.cl =================================================================== --- FFmpeg.orig/libavfilter/opencl/colorspace_common.cl +++ FFmpeg/libavfilter/opencl/colorspace_common.cl -@@ -17,7 +17,21 @@ +@@ -17,7 +17,17 @@ */ #define ST2084_MAX_LUMINANCE 10000.0f @@ -80,15 +80,11 @@ Index: FFmpeg/libavfilter/opencl/colorspace_common.cl +#define ARIB_B67_B 0.28466892f +#define ARIB_B67_C 0.55991073f + -+#define LIMITED_BLACK 0.06256109482f -+#define LIMITED_WHITE 0.9188660802f -+#define LIMITED_RANGE 0.8563049854f -+ +#define FLOAT_EPS 1e-6f #if chroma_loc == 1 #define chroma_sample(a,b,c,d) (((a) + (c)) * 0.5f) -@@ -33,92 +47,134 @@ +@@ -33,88 +43,134 @@ #define chroma_sample(a,b,c,d) (((a) + (b) + (c) + (d)) * 0.25f) #endif @@ -268,57 +264,69 @@ Index: FFmpeg/libavfilter/opencl/colorspace_common.cl + float3 yuv2rgb(float y, float u, float v) { -#ifdef FULL_RANGE_IN - u -= 0.5f; v -= 0.5f; --#else +- u -= 0.5f; v -= 0.5f; ++ y += mix(0.0f, INPUT_QUANTIZATION_OFFSET, y > 0.0f); ++ u += mix(0.0f, INPUT_QUANTIZATION_OFFSET, u > 0.0f); ++ v += mix(0.0f, INPUT_QUANTIZATION_OFFSET, v > 0.0f); ++#ifndef FULL_RANGE_IN ++ y = INPUT_Y_SCALE * y - 0.07305936073f; ++ u = INPUT_UV_SCALE * u - 0.5714285714f; ++ v = INPUT_UV_SCALE * v - 0.5714285714f; + #else - y = (y * 255.0f - 16.0f) / 219.0f; - u = (u * 255.0f - 128.0f) / 224.0f; - v = (v * 255.0f - 128.0f) / 224.0f; --#endif ++ u -= 0.5f; v -= 0.5f; + #endif float r = y * rgb_matrix[0] + u * rgb_matrix[1] + v * rgb_matrix[2]; float g = y * rgb_matrix[3] + u * rgb_matrix[4] + v * rgb_matrix[5]; - float b = y * rgb_matrix[6] + u * rgb_matrix[7] + v * rgb_matrix[8]; -+#ifndef FULL_RANGE_IN -+ r = (r - LIMITED_BLACK) / LIMITED_RANGE; -+ g = (g - LIMITED_BLACK) / LIMITED_RANGE; -+ b = (b - LIMITED_BLACK) / LIMITED_RANGE; -+#endif - return (float3)(r, g, b); - } - -@@ -135,22 +191,25 @@ float3 yuv2lrgb(float3 yuv) { - } - - float3 rgb2yuv(float r, float g, float b) { -+#ifndef FULL_RANGE_OUT -+ r = r * LIMITED_RANGE + LIMITED_BLACK; -+ g = g * LIMITED_RANGE + LIMITED_BLACK; -+ b = b * LIMITED_RANGE + LIMITED_BLACK; -+#endif +@@ -138,19 +194,39 @@ float3 rgb2yuv(float r, float g, float b float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2]; float u = r*yuv_matrix[3] + g*yuv_matrix[4] + b*yuv_matrix[5]; float v = r*yuv_matrix[6] + g*yuv_matrix[7] + b*yuv_matrix[8]; -#ifdef FULL_RANGE_OUT - u += 0.5f; v += 0.5f; --#else +- u += 0.5f; v += 0.5f; ++#ifndef FULL_RANGE_OUT ++ #ifdef RESCALE_LIMITED_RANGE_OUTPUT ++ y = floor(((219.0f * y + 16.0f) * 256.0f) + 0.5f) / 65535.0f; ++ u = floor(((224.0f * u + 128.0f) * 256.0f) + 0.5f) / 65535.0f; ++ v = floor(((224.0f * v + 128.0f) * 256.0f) + 0.5f) / 65535.0f; ++ #else ++ y = floor((219.0f * y + 16.0f) + 0.5f) / 255.0f; ++ u = floor((224.0f * u + 128.0f) + 0.5f) / 255.0f; ++ v = floor((224.0f * v + 128.0f) + 0.5f) / 255.0f; ++ #endif + #else - y = (219.0f * y + 16.0f) / 255.0f; - u = (224.0f * u + 128.0f) / 255.0f; - v = (224.0f * v + 128.0f) / 255.0f; --#endif ++ u += 0.5f; v += 0.5f; ++#endif ++#ifdef OUTPUT_QUANTIZATION_OFFSET ++ y -= mix(0.0f, OUTPUT_QUANTIZATION_OFFSET, y > 0.0f); ++ u -= mix(0.0f, OUTPUT_QUANTIZATION_OFFSET, u > 0.0f); ++ v -= mix(0.0f, OUTPUT_QUANTIZATION_OFFSET, v > 0.0f); + #endif return (float3)(y, u, v); } float rgb2y(float r, float g, float b) { -+#ifndef FULL_RANGE_OUT -+ r = r * LIMITED_RANGE + LIMITED_BLACK; -+ g = g * LIMITED_RANGE + LIMITED_BLACK; -+ b = b * LIMITED_RANGE + LIMITED_BLACK; -+#endif float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2]; - y = (219.0f * y + 16.0f) / 255.0f; ++#ifndef FULL_RANGE_OUT ++ #ifdef RESCALE_LIMITED_RANGE_OUTPUT ++ y = floor(((219.0f * y + 16.0f) * 256.0f) + 0.5f) / 65535.0f; ++ #else ++ y = floor((219.0f * y + 16.0f) + 0.5f) / 255.0f; ++ #endif ++#endif ++#ifdef OUTPUT_QUANTIZATION_OFFSET ++ y -= mix(0.0f, OUTPUT_QUANTIZATION_OFFSET, y > 0.0f); ++#endif return y; } -@@ -188,18 +247,101 @@ float3 lrgb2lrgb(float3 c) { +@@ -188,18 +264,101 @@ float3 lrgb2lrgb(float3 c) { #endif } @@ -1450,7 +1458,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c switch(ctx->tonemap) { case TONEMAP_GAMMA: -@@ -139,53 +353,178 @@ static int tonemap_opencl_init(AVFilterC +@@ -139,59 +353,207 @@ static int tonemap_opencl_init(AVFilterC if (isnan(ctx->param)) ctx->param = 0.3f; break; @@ -1644,7 +1652,36 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c if (ctx->range_in == AVCOL_RANGE_JPEG) av_bprintf(&header, "#define FULL_RANGE_IN\n"); -@@ -199,19 +538,41 @@ static int tonemap_opencl_init(AVFilterC + if (ctx->range_out == AVCOL_RANGE_JPEG) + av_bprintf(&header, "#define FULL_RANGE_OUT\n"); + ++ if (ctx->in_desc->comp[0].depth == 10) { ++ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET 0.0009765774014f\n"); ++ av_bprintf(&header, "#define INPUT_Y_SCALE 1.1678082192f\n"); ++ av_bprintf(&header, "#define INPUT_UV_SCALE 1.1417410714f\n"); ++ } else if (ctx->in_desc->comp[0].depth == 16) { ++ // 16bit texture is actually 12bit ++ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET 0.0002441443503f\n"); ++ av_bprintf(&header, "#define INPUT_Y_SCALE 1.1689497717f\n"); ++ av_bprintf(&header, "#define INPUT_UV_SCALE 1.1428571429f\n"); ++ } else { ++ // should not happen, but for completeness ++ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET 0.0f\n"); ++ av_bprintf(&header, "#define INPUT_Y_SCALE 1.0f\n"); ++ av_bprintf(&header, "#define INPUT_UV_SCALE 1.0f\n"); ++ } ++ ++ if (ctx->out_desc->comp[0].depth > 8) { ++ av_bprintf(&header, "#define RESCALE_LIMITED_RANGE_OUTPUT\n"); ++ } ++ ++ if (ctx->in_desc->comp[0].depth == 10) ++ av_bprintf(&header, "#define OUTPUT_QUANTIZATION_OFFSET 0.0009765774014f\n"); ++ + av_bprintf(&header, "#define chroma_loc %d\n", (int)ctx->chroma_loc); + + if (rgb2rgb_passthrough) +@@ -199,19 +561,41 @@ static int tonemap_opencl_init(AVFilterC else ff_opencl_print_const_matrix_3x3(&header, "rgb2rgb", rgb2rgb); @@ -1693,7 +1730,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c ctx->colorspace_out, av_color_space_name(ctx->colorspace_out)); goto fail; } -@@ -219,24 +580,23 @@ static int tonemap_opencl_init(AVFilterC +@@ -219,24 +603,23 @@ static int tonemap_opencl_init(AVFilterC ff_fill_rgb2yuv_table(luma_dst, rgb2yuv); ff_opencl_print_const_matrix_3x3(&header, "yuv_matrix", rgb2yuv); @@ -1733,7 +1770,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c av_log(avctx, AV_LOG_DEBUG, "Generated OpenCL header:\n%s\n", header.str); opencl_sources[0] = header.str; -@@ -254,46 +614,171 @@ static int tonemap_opencl_init(AVFilterC +@@ -254,46 +637,171 @@ static int tonemap_opencl_init(AVFilterC CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL " "command queue %d.\n", cle); @@ -1904,13 +1941,14 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c + av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n", + av_get_pix_fmt_name(out_format)); + return AVERROR(ENOSYS); - } ++ } + if (in_desc->comp[0].depth != 10 && in_desc->comp[0].depth != 16) { + av_log(ctx, AV_LOG_ERROR, "Unsupported input format depth: %d\n", + in_desc->comp[0].depth); + return AVERROR(ENOSYS); -+ } -+ + } + +- s->ocf.output_format = s->format == AV_PIX_FMT_NONE ? AV_PIX_FMT_NV12 : s->format; + ctx->in_fmt = in_format; + ctx->out_fmt = out_format; + ctx->in_desc = in_desc; @@ -1918,12 +1956,11 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c + ctx->in_planes = av_pix_fmt_count_planes(in_format); + ctx->out_planes = av_pix_fmt_count_planes(out_format); + ctx->ocf.output_format = out_format; - -- s->ocf.output_format = s->format == AV_PIX_FMT_NONE ? AV_PIX_FMT_NV12 : s->format; ++ ret = ff_opencl_filter_config_output(outlink); if (ret < 0) return ret; -@@ -308,13 +793,46 @@ static int launch_kernel(AVFilterContext +@@ -308,13 +816,46 @@ static int launch_kernel(AVFilterContext size_t global_work[2]; size_t local_work[2]; cl_int cle; @@ -1972,7 +2009,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c local_work[0] = 16; local_work[1] = 16; -@@ -338,13 +856,10 @@ static int tonemap_opencl_filter_frame(A +@@ -338,13 +879,10 @@ static int tonemap_opencl_filter_frame(A AVFilterContext *avctx = inlink->dst; AVFilterLink *outlink = avctx->outputs[0]; TonemapOpenCLContext *ctx = avctx->priv; @@ -1987,7 +2024,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n", av_get_pix_fmt_name(input->format), -@@ -363,8 +878,49 @@ static int tonemap_opencl_filter_frame(A +@@ -363,8 +901,49 @@ static int tonemap_opencl_filter_frame(A if (err < 0) goto fail; @@ -2039,7 +2076,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c if (ctx->trc != -1) output->color_trc = ctx->trc; -@@ -385,72 +941,50 @@ static int tonemap_opencl_filter_frame(A +@@ -385,72 +964,50 @@ static int tonemap_opencl_filter_frame(A ctx->range_out = output->color_range; ctx->chroma_loc = output->chroma_location; @@ -2135,7 +2172,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c av_frame_free(&input); av_frame_free(&output); return err; -@@ -458,24 +992,9 @@ fail: +@@ -458,24 +1015,9 @@ fail: static av_cold void tonemap_opencl_uninit(AVFilterContext *avctx) { @@ -2162,7 +2199,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c ff_opencl_filter_uninit(avctx); } -@@ -483,37 +1002,50 @@ static av_cold void tonemap_opencl_unini +@@ -483,37 +1025,50 @@ static av_cold void tonemap_opencl_unini #define OFFSET(x) offsetof(TonemapOpenCLContext, x) #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) static const AVOption tonemap_opencl_options[] = { @@ -2239,7 +2276,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c + { "enabled", 0, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "tradeoff" }, + { "peak", "Signal peak override", OFFSET(peak), AV_OPT_TYPE_DOUBLE, { .dbl = 0 }, 0, DBL_MAX, FLAGS }, + { "param", "Tonemap parameter", OFFSET(param), AV_OPT_TYPE_DOUBLE, { .dbl = NAN }, DBL_MIN, DBL_MAX, FLAGS }, -+ { "desat", "Desaturation parameter", OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, { .dbl = 0.5}, 0, DBL_MAX, FLAGS }, ++ { "desat", "Desaturation parameter", OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, { .dbl = 0 }, 0, DBL_MAX, FLAGS }, + { "threshold", "Scene detection threshold", OFFSET(scene_threshold), AV_OPT_TYPE_DOUBLE, { .dbl = 0.2 }, 0, DBL_MAX, FLAGS }, { NULL } }; diff --git a/debian/patches/0052-add-vf-tonemap-videotoolbox-filter.patch b/debian/patches/0052-add-vf-tonemap-videotoolbox-filter.patch index 714199449a..2ec57a4921 100644 --- a/debian/patches/0052-add-vf-tonemap-videotoolbox-filter.patch +++ b/debian/patches/0052-add-vf-tonemap-videotoolbox-filter.patch @@ -40,7 +40,7 @@ Index: FFmpeg/libavfilter/metal/vf_tonemap_videotoolbox.metal =================================================================== --- /dev/null +++ FFmpeg/libavfilter/metal/vf_tonemap_videotoolbox.metal -@@ -0,0 +1,894 @@ +@@ -0,0 +1,914 @@ +/* + * Copyright (c) 2024 Gnattu OC + * @@ -83,10 +83,6 @@ Index: FFmpeg/libavfilter/metal/vf_tonemap_videotoolbox.metal + +#define FLOAT_EPS 1e-6f + -+#define LIMITED_BLACK 0.06256109482f -+#define LIMITED_WHITE 0.9188660802f -+#define LIMITED_RANGE 0.8563049854f -+ +constant float ref_white [[function_constant(0)]]; +constant float tone_param [[function_constant(1)]]; +constant float desat_param [[function_constant(2)]]; @@ -274,15 +270,21 @@ Index: FFmpeg/libavfilter/metal/vf_tonemap_videotoolbox.metal +// ------------ +// Color conversion +float3 yuv2rgb(float y, float u, float v) { -+ u -= 0.5f; -+ v -= 0.5f; ++ y += mix(0.0f, 0.0009765774014f, y > 0.0f); ++ u += mix(0.0f, 0.0009765774014f, u > 0.0f); ++ v += mix(0.0f, 0.0009765774014f, v > 0.0f); ++ if (is_full_range_in) { ++ u -= 0.5f; ++ v -= 0.5f; ++ } else { ++ y = 1.1678082192f * y - 0.07305936073f; ++ u = 1.1417410714f * u - 0.5714285714f; ++ v = 1.1417410714f * v - 0.5714285714f; ++ } + float r = (y * rgb_matrix_1[0]) + (u * rgb_matrix_1[1]) + (v * rgb_matrix_1[2]); + float g = (y * rgb_matrix_2[0]) + (u * rgb_matrix_2[1]) + (v * rgb_matrix_2[2]); + float b = (y * rgb_matrix_3[0]) + (u * rgb_matrix_3[1]) + (v * rgb_matrix_3[2]); + float3 c = float3(r, g, b); -+ if (!is_full_range_in) { -+ c = (c - LIMITED_BLACK) / LIMITED_RANGE; -+ } + return c; +} + @@ -298,26 +300,44 @@ Index: FFmpeg/libavfilter/metal/vf_tonemap_videotoolbox.metal +} + +float3 rgb2yuv(float r, float g, float b) { -+ if (!is_full_range_out) { -+ r = r * LIMITED_RANGE + LIMITED_BLACK; -+ g = g * LIMITED_RANGE + LIMITED_BLACK; -+ b = b * LIMITED_RANGE + LIMITED_BLACK; ++ float y = (r * yuv_matrix_1[0]) + (g * yuv_matrix_1[1]) + (b * yuv_matrix_1[2]); ++ float u = (r * yuv_matrix_2[0]) + (g * yuv_matrix_2[1]) + (b * yuv_matrix_2[2]); ++ float v = (r * yuv_matrix_3[0]) + (g * yuv_matrix_3[1]) + (b * yuv_matrix_3[2]); ++ if (is_full_range_out) { ++ u += 0.5f; ++ v += 0.5f; ++ } else { ++ if (enable_dither) { ++ y = floor((219.0f * y + 16.0f) + 0.5f) / 255.0f; ++ u = floor((224.0f * u + 128.0f) + 0.5f) / 255.0f; ++ v = floor((224.0f * v + 128.0f) + 0.5f) / 255.0f; ++ } else { ++ y = floor(((219.0f * y + 16.0f) * 256.0f) + 0.5f) / 65535.0f; ++ u = floor(((224.0f * u + 128.0f) * 256.0f) + 0.5f) / 65535.0f; ++ v = floor(((224.0f * v + 128.0f) * 256.0f) + 0.5f) / 65535.0f; ++ } ++ } ++ // in rgb2yuv conversion, enable_dither means output is 8bit ++ if (!enable_dither) { ++ y -= mix(0.0f, 0.0009765774014f, y > 0.0f); ++ u -= mix(0.0f, 0.0009765774014f, u > 0.0f); ++ v -= mix(0.0f, 0.0009765774014f, v > 0.0f); + } -+ float y = (r*yuv_matrix_1[0]) + (g*yuv_matrix_1[1]) + (b*yuv_matrix_1[2]); -+ float u = (r*yuv_matrix_2[0]) + (g*yuv_matrix_2[1]) + (b*yuv_matrix_2[2]); -+ float v = (r*yuv_matrix_3[0]) + (g*yuv_matrix_3[1]) + (b*yuv_matrix_3[2]); -+ u += 0.5f; -+ v += 0.5f; + return float3(y, u, v); +} + +float rgb2y(float r, float g, float b) { ++ float y = (r*yuv_matrix_1[0]) + (g*yuv_matrix_1[1]) + (b*yuv_matrix_1[2]); + if (!is_full_range_out) { -+ r = r * LIMITED_RANGE + LIMITED_BLACK; -+ g = g * LIMITED_RANGE + LIMITED_BLACK; -+ b = b * LIMITED_RANGE + LIMITED_BLACK; ++ if (enable_dither) { ++ y = floor((219.0f * y + 16.0f) + 0.5f) / 255.0f; ++ } else { ++ y = floor(((219.0f * y + 16.0f) * 256.0f) + 0.5f) / 65535.0f; ++ } ++ } ++ if (!enable_dither) { ++ y -= mix(0.0f, 0.0009765774014f, y > 0.0f); + } -+ float y = (r*yuv_matrix_1[0]) + (g*yuv_matrix_1[1]) + (b*yuv_matrix_1[2]); + return y; +} + @@ -2062,7 +2082,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_videotoolbox.m + { "apply_dovi", "Apply Dolby Vision metadata if possible", OFFSET(apply_dovi), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, + { "peak", "Signal peak override", OFFSET(peak), AV_OPT_TYPE_DOUBLE, { .dbl = 0 }, 0, DBL_MAX, FLAGS }, + { "param", "Tonemap parameter", OFFSET(param), AV_OPT_TYPE_DOUBLE, { .dbl = NAN }, DBL_MIN, DBL_MAX, FLAGS }, -+ { "desat", "Desaturation parameter", OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, { .dbl = 0.5}, 0, DBL_MAX, FLAGS }, ++ { "desat", "Desaturation parameter", OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, { .dbl = 0 }, 0, DBL_MAX, FLAGS }, + { "threshold", "Scene detection threshold", OFFSET(scene_threshold), AV_OPT_TYPE_DOUBLE, { .dbl = 0.2 }, 0, DBL_MAX, FLAGS }, + { NULL } +}; From 1dd26f0a4a901274481a12689b86c9599be97284 Mon Sep 17 00:00:00 2001 From: gnattu Date: Sat, 12 Oct 2024 16:40:46 +0800 Subject: [PATCH 2/7] lavc/videotoolboxenc: always set pixel buffer info --- ...et-videotoolboxenc-pixel-buffer-info.patch | 31 +++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 32 insertions(+) create mode 100644 debian/patches/0076-alway-set-videotoolboxenc-pixel-buffer-info.patch diff --git a/debian/patches/0076-alway-set-videotoolboxenc-pixel-buffer-info.patch b/debian/patches/0076-alway-set-videotoolboxenc-pixel-buffer-info.patch new file mode 100644 index 0000000000..df99d8c8bc --- /dev/null +++ b/debian/patches/0076-alway-set-videotoolboxenc-pixel-buffer-info.patch @@ -0,0 +1,31 @@ +Index: FFmpeg/libavcodec/videotoolboxenc.c +=================================================================== +--- FFmpeg.orig/libavcodec/videotoolboxenc.c ++++ FFmpeg/libavcodec/videotoolboxenc.c +@@ -1034,9 +1034,10 @@ static int create_cv_pixel_buffer_info(A + CFNumberRef width_num = NULL; + CFNumberRef height_num = NULL; + CFMutableDictionaryRef pixel_buffer_info = NULL; ++ enum AVPixelFormat pix_fmt = avctx->pix_fmt == AV_PIX_FMT_VIDEOTOOLBOX ? avctx->sw_pix_fmt : avctx->pix_fmt; + int cv_color_format; + int status = get_cv_pixel_format(avctx, +- avctx->pix_fmt, ++ pix_fmt, + avctx->color_range, + &cv_color_format, + NULL); +@@ -1687,11 +1688,9 @@ static int vtenc_configure_encoder(AVCod + kCFBooleanTrue); + } + +- if (avctx->pix_fmt != AV_PIX_FMT_VIDEOTOOLBOX) { +- status = create_cv_pixel_buffer_info(avctx, &pixel_buffer_info); +- if (status) +- goto init_cleanup; +- } ++ status = create_cv_pixel_buffer_info(avctx, &pixel_buffer_info); ++ if (status) ++ goto init_cleanup; + + vtctx->dts_delta = vtctx->has_b_frames ? -1 : 0; + diff --git a/debian/patches/series b/debian/patches/series index 28c2cdfaeb..89de7fae22 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -73,3 +73,4 @@ 0073-add-12bit-decoding-on-videotoolbox.patch 0074-fix-the-sub2video-perf-regressions.patch 0075-allow-vpl-qsv-to-init-with-the-legacy-msdk-path.patch +0076-alway-set-videotoolboxenc-pixel-buffer-info.patch From 669c8de11101a341ead07afffe136fb0be2071c4 Mon Sep 17 00:00:00 2001 From: gnattu Date: Sun, 13 Oct 2024 16:53:37 +0800 Subject: [PATCH 3/7] avfilter/tonemap_*: better documentation for constants --- .../patches/0004-add-cuda-tonemap-impl.patch | 54 ++++++++++++++----- ...-and-code-refactor-to-opencl-tonemap.patch | 41 +++++++------- ...2-add-vf-tonemap-videotoolbox-filter.patch | 6 ++- 3 files changed, 68 insertions(+), 33 deletions(-) diff --git a/debian/patches/0004-add-cuda-tonemap-impl.patch b/debian/patches/0004-add-cuda-tonemap-impl.patch index 0cfa26fe39..1ffe51d421 100644 --- a/debian/patches/0004-add-cuda-tonemap-impl.patch +++ b/debian/patches/0004-add-cuda-tonemap-impl.patch @@ -257,7 +257,7 @@ Index: FFmpeg/libavfilter/colorspace.h =================================================================== --- FFmpeg.orig/libavfilter/colorspace.h +++ FFmpeg/libavfilter/colorspace.h -@@ -23,10 +23,42 @@ +@@ -23,10 +23,69 @@ #include "libavutil/csp.h" #include "libavutil/frame.h" #include "libavutil/pixfmt.h" @@ -276,6 +276,33 @@ Index: FFmpeg/libavfilter/colorspace.h +#define ARIB_B67_C 0.55991073f +#define FLOAT_EPS 1e-6f + ++/* ++ * Pre-calculated constants used for YCbCr narrow to full range scaling ++ * The base formula is the quantization formula derived from BT.2100 Table 9: ++ * Where Y' = Round [(219 * E′ + 16) * 2^(n−8)], ++ * Cb',Cr' = Round [(224 * E′ + 128) * 2^(n−8)] ++ * where E' is the signal value in [0,1] range and n is the bit depth. Round is rounding towards 0. ++ * For inputs, the inverse is used where we are solving for E' for a given Y'Cb'Cr' normalized by GPU ++ * in [0,1] range. The GPU will interpret color as a 16bit int value, and solving for E' becomes: ++ * E' = (Y' - 2^(n-4)) / (219 * 2^(n-8)) ++ * E' = (Cb'Cr' - 2^(n-1)) / (7 * 2^(n-3)) ++ * Y' and Cb'Cr' is in the range of [0, 2^n - 1] in original formula, we need to scale the value normalized to [0,1]: ++ * C = Y'Cb'Cr' * (2^n - 1) ++ * Which means the input scale = (2^n - 1) / (219 * 2^(n-8)) and input offset = 2^(n-4)) / (219 * 2^(n-8)) for Y' and ++ * 2^(n-1)) / (7 * 2^(n-3)) for Cb'Cr' ++ */ ++#define INPUT_Y_SCALE_10B 1.1678082192f ++#define INPUT_UV_SCALE_10B 1.1417410714f ++#define INPUT_Y_SCALE_12B 1.1689497717f ++#define INPUT_UV_SCALE_12B 1.1428571429f ++ ++/* ++ * GPU will interpret 10bit and 12bit color as 16bit int ++ * but that will introduce a slight (2^(16-n))/2^16 quantization offset which we want to compensate for ++*/ ++#define QUANTIZATION_OFFSET_10B 0.0009765774014f ++#define QUANTIZATION_OFFSET_12B 0.0002441443503f ++ +// Parsed metadata from the Dolby Vision RPU +struct DoviMetadata { + float nonlinear_offset[3]; // input offset ("ycc_to_rgb_offset") @@ -300,7 +327,7 @@ Index: FFmpeg/libavfilter/colorspace.h void ff_matrix_mul_3x3(double dst[3][3], const double src1[3][3], const double src2[3][3]); void ff_matrix_mul_3x3_vec(double dst[3], const double vec[3], const double mat[3][3]); -@@ -38,4 +70,19 @@ void ff_fill_rgb2yuv_table(const AVLumaC +@@ -38,4 +97,19 @@ void ff_fill_rgb2yuv_table(const AVLumaC double ff_determine_signal_peak(AVFrame *in); void ff_update_hdr_metadata(AVFrame *in, double peak); @@ -1785,7 +1812,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_cuda.c =================================================================== --- /dev/null +++ FFmpeg/libavfilter/vf_tonemap_cuda.c -@@ -0,0 +1,1162 @@ +@@ -0,0 +1,1165 @@ +/* + * This file is part of FFmpeg. + * @@ -2413,22 +2440,25 @@ Index: FFmpeg/libavfilter/vf_tonemap_cuda.c + return ret; + + if (s->in_desc->comp[0].depth == 10) { -+ input_quantization_offset = 0.0009765774014f; -+ input_y_scale = 1.1678082192f; -+ input_uv_scale = 1.1417410714f; ++ input_quantization_offset = QUANTIZATION_OFFSET_10B; ++ input_y_scale = INPUT_Y_SCALE_10B; ++ input_uv_scale = INPUT_UV_SCALE_10B; + } else if (s->in_desc->comp[0].depth == 16) { -+ input_quantization_offset = 0.0002441443503f; -+ input_y_scale = 1.1689497717f; -+ input_uv_scale = 1.1428571429f; ++ // Assume 16bit is actually 12bit for now as that is what the hardware decoders producing ++ // and what videos are actually encoded in ++ input_quantization_offset = QUANTIZATION_OFFSET_12B; ++ input_y_scale = INPUT_Y_SCALE_12B; ++ input_uv_scale = INPUT_UV_SCALE_12B; + } + + if (s->out_desc->comp[0].depth == 10) { -+ output_quantization_offset = 0.0009765774014f; ++ // Don't handle 12b offset for now and assume 16b output is real 16b out to make it consistent with other filters ++ output_quantization_offset = QUANTIZATION_OFFSET_10B; + } + + if (s->out_desc->comp[0].depth > 8) { -+ output_quantization_factor = 256.0f; -+ output_quantization_scale = 65535.0f; ++ output_quantization_factor = 256.0f; // 2^(16-8) ++ output_quantization_scale = 65535.0f; // 2^16 - 1 + } + + av_bprint_init(&constants, 2048, AV_BPRINT_SIZE_UNLIMITED); diff --git a/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch b/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch index ccef661793..99bb46248f 100644 --- a/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch +++ b/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch @@ -1458,7 +1458,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c switch(ctx->tonemap) { case TONEMAP_GAMMA: -@@ -139,59 +353,207 @@ static int tonemap_opencl_init(AVFilterC +@@ -139,59 +353,210 @@ static int tonemap_opencl_init(AVFilterC if (isnan(ctx->param)) ctx->param = 0.3f; break; @@ -1656,16 +1656,18 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c av_bprintf(&header, "#define FULL_RANGE_OUT\n"); + if (ctx->in_desc->comp[0].depth == 10) { -+ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET 0.0009765774014f\n"); -+ av_bprintf(&header, "#define INPUT_Y_SCALE 1.1678082192f\n"); -+ av_bprintf(&header, "#define INPUT_UV_SCALE 1.1417410714f\n"); ++ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET_10B); ++ av_bprintf(&header, "#define INPUT_Y_SCALE %ff\n", INPUT_Y_SCALE_10B); ++ av_bprintf(&header, "#define INPUT_UV_SCALE %ff\n", INPUT_UV_SCALE_10B); + } else if (ctx->in_desc->comp[0].depth == 16) { -+ // 16bit texture is actually 12bit -+ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET 0.0002441443503f\n"); -+ av_bprintf(&header, "#define INPUT_Y_SCALE 1.1689497717f\n"); -+ av_bprintf(&header, "#define INPUT_UV_SCALE 1.1428571429f\n"); ++ // Assume 16bit is actually 12bit for now as that is what the hardware decoders producing ++ // and what videos are actually encoded in ++ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET_12B); ++ av_bprintf(&header, "#define INPUT_Y_SCALE %ff\n", INPUT_Y_SCALE_12B); ++ av_bprintf(&header, "#define INPUT_UV_SCALE %ff\n", INPUT_UV_SCALE_12B); + } else { + // should not happen, but for completeness ++ // once we can tell if the input is real 16bit we can use this branch + av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET 0.0f\n"); + av_bprintf(&header, "#define INPUT_Y_SCALE 1.0f\n"); + av_bprintf(&header, "#define INPUT_UV_SCALE 1.0f\n"); @@ -1675,13 +1677,14 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c + av_bprintf(&header, "#define RESCALE_LIMITED_RANGE_OUTPUT\n"); + } + -+ if (ctx->in_desc->comp[0].depth == 10) -+ av_bprintf(&header, "#define OUTPUT_QUANTIZATION_OFFSET 0.0009765774014f\n"); ++ if (ctx->out_desc->comp[0].depth == 10) ++ av_bprintf(&header, "#define OUTPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET_10B); ++ // Don't handle 12b offset for now and assume 16b output is real 16b out to make it consistent with other filters + av_bprintf(&header, "#define chroma_loc %d\n", (int)ctx->chroma_loc); if (rgb2rgb_passthrough) -@@ -199,19 +561,41 @@ static int tonemap_opencl_init(AVFilterC +@@ -199,19 +564,41 @@ static int tonemap_opencl_init(AVFilterC else ff_opencl_print_const_matrix_3x3(&header, "rgb2rgb", rgb2rgb); @@ -1730,7 +1733,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c ctx->colorspace_out, av_color_space_name(ctx->colorspace_out)); goto fail; } -@@ -219,24 +603,23 @@ static int tonemap_opencl_init(AVFilterC +@@ -219,24 +606,23 @@ static int tonemap_opencl_init(AVFilterC ff_fill_rgb2yuv_table(luma_dst, rgb2yuv); ff_opencl_print_const_matrix_3x3(&header, "yuv_matrix", rgb2yuv); @@ -1770,7 +1773,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c av_log(avctx, AV_LOG_DEBUG, "Generated OpenCL header:\n%s\n", header.str); opencl_sources[0] = header.str; -@@ -254,46 +637,171 @@ static int tonemap_opencl_init(AVFilterC +@@ -254,46 +640,171 @@ static int tonemap_opencl_init(AVFilterC CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL " "command queue %d.\n", cle); @@ -1960,7 +1963,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c ret = ff_opencl_filter_config_output(outlink); if (ret < 0) return ret; -@@ -308,13 +816,46 @@ static int launch_kernel(AVFilterContext +@@ -308,13 +819,46 @@ static int launch_kernel(AVFilterContext size_t global_work[2]; size_t local_work[2]; cl_int cle; @@ -2009,7 +2012,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c local_work[0] = 16; local_work[1] = 16; -@@ -338,13 +879,10 @@ static int tonemap_opencl_filter_frame(A +@@ -338,13 +882,10 @@ static int tonemap_opencl_filter_frame(A AVFilterContext *avctx = inlink->dst; AVFilterLink *outlink = avctx->outputs[0]; TonemapOpenCLContext *ctx = avctx->priv; @@ -2024,7 +2027,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n", av_get_pix_fmt_name(input->format), -@@ -363,8 +901,49 @@ static int tonemap_opencl_filter_frame(A +@@ -363,8 +904,49 @@ static int tonemap_opencl_filter_frame(A if (err < 0) goto fail; @@ -2076,7 +2079,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c if (ctx->trc != -1) output->color_trc = ctx->trc; -@@ -385,72 +964,50 @@ static int tonemap_opencl_filter_frame(A +@@ -385,72 +967,50 @@ static int tonemap_opencl_filter_frame(A ctx->range_out = output->color_range; ctx->chroma_loc = output->chroma_location; @@ -2172,7 +2175,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c av_frame_free(&input); av_frame_free(&output); return err; -@@ -458,24 +1015,9 @@ fail: +@@ -458,24 +1018,9 @@ fail: static av_cold void tonemap_opencl_uninit(AVFilterContext *avctx) { @@ -2199,7 +2202,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c ff_opencl_filter_uninit(avctx); } -@@ -483,37 +1025,50 @@ static av_cold void tonemap_opencl_unini +@@ -483,37 +1028,50 @@ static av_cold void tonemap_opencl_unini #define OFFSET(x) offsetof(TonemapOpenCLContext, x) #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) static const AVOption tonemap_opencl_options[] = { diff --git a/debian/patches/0052-add-vf-tonemap-videotoolbox-filter.patch b/debian/patches/0052-add-vf-tonemap-videotoolbox-filter.patch index 2ec57a4921..30e2fcff49 100644 --- a/debian/patches/0052-add-vf-tonemap-videotoolbox-filter.patch +++ b/debian/patches/0052-add-vf-tonemap-videotoolbox-filter.patch @@ -40,7 +40,7 @@ Index: FFmpeg/libavfilter/metal/vf_tonemap_videotoolbox.metal =================================================================== --- /dev/null +++ FFmpeg/libavfilter/metal/vf_tonemap_videotoolbox.metal -@@ -0,0 +1,914 @@ +@@ -0,0 +1,916 @@ +/* + * Copyright (c) 2024 Gnattu OC + * @@ -269,6 +269,7 @@ Index: FFmpeg/libavfilter/metal/vf_tonemap_videotoolbox.metal + +// ------------ +// Color conversion ++// See libavfilter/colorspace.h for derivation of these constants +float3 yuv2rgb(float y, float u, float v) { + y += mix(0.0f, 0.0009765774014f, y > 0.0f); + u += mix(0.0f, 0.0009765774014f, u > 0.0f); @@ -317,7 +318,8 @@ Index: FFmpeg/libavfilter/metal/vf_tonemap_videotoolbox.metal + v = floor(((224.0f * v + 128.0f) * 256.0f) + 0.5f) / 65535.0f; + } + } -+ // in rgb2yuv conversion, enable_dither means output is 8bit ++ // in rgb2yuv conversion, enable_dither means output is 8bit in metal pipeline ++ // use this to check if we need the 10bit offset + if (!enable_dither) { + y -= mix(0.0f, 0.0009765774014f, y > 0.0f); + u -= mix(0.0f, 0.0009765774014f, u > 0.0f); From 81e35eae8d6b7bc660e6901d8acb22f75bbad176 Mon Sep 17 00:00:00 2001 From: gnattu Date: Sun, 13 Oct 2024 18:57:20 +0800 Subject: [PATCH 4/7] avfilter/tonemap_[opencl,cuda]: use macro --- .../patches/0004-add-cuda-tonemap-impl.patch | 31 +++++++------- ...-and-code-refactor-to-opencl-tonemap.patch | 42 ++++++++----------- 2 files changed, 32 insertions(+), 41 deletions(-) diff --git a/debian/patches/0004-add-cuda-tonemap-impl.patch b/debian/patches/0004-add-cuda-tonemap-impl.patch index 1ffe51d421..b480685f55 100644 --- a/debian/patches/0004-add-cuda-tonemap-impl.patch +++ b/debian/patches/0004-add-cuda-tonemap-impl.patch @@ -257,7 +257,7 @@ Index: FFmpeg/libavfilter/colorspace.h =================================================================== --- FFmpeg.orig/libavfilter/colorspace.h +++ FFmpeg/libavfilter/colorspace.h -@@ -23,10 +23,69 @@ +@@ -23,10 +23,66 @@ #include "libavutil/csp.h" #include "libavutil/frame.h" #include "libavutil/pixfmt.h" @@ -291,17 +291,14 @@ Index: FFmpeg/libavfilter/colorspace.h + * Which means the input scale = (2^n - 1) / (219 * 2^(n-8)) and input offset = 2^(n-4)) / (219 * 2^(n-8)) for Y' and + * 2^(n-1)) / (7 * 2^(n-3)) for Cb'Cr' + */ -+#define INPUT_Y_SCALE_10B 1.1678082192f -+#define INPUT_UV_SCALE_10B 1.1417410714f -+#define INPUT_Y_SCALE_12B 1.1689497717f -+#define INPUT_UV_SCALE_12B 1.1428571429f ++#define INPUT_Y_SCALE(n) ((double)((1 << (n)) - 1) / (219 * (1 << ((n) - 8)))) ++#define INPUT_UV_SCALE(n) ((double)((1 << (n)) - 1) / (224 * (1 << ((n) - 8)))) + +/* + * GPU will interpret 10bit and 12bit color as 16bit int + * but that will introduce a slight (2^(16-n))/2^16 quantization offset which we want to compensate for +*/ -+#define QUANTIZATION_OFFSET_10B 0.0009765774014f -+#define QUANTIZATION_OFFSET_12B 0.0002441443503f ++#define QUANTIZATION_OFFSET(n) ((double)(1 << (16 - (n))) / ((1 << 16) - 1)) + +// Parsed metadata from the Dolby Vision RPU +struct DoviMetadata { @@ -327,7 +324,7 @@ Index: FFmpeg/libavfilter/colorspace.h void ff_matrix_mul_3x3(double dst[3][3], const double src1[3][3], const double src2[3][3]); void ff_matrix_mul_3x3_vec(double dst[3], const double vec[3], const double mat[3][3]); -@@ -38,4 +97,19 @@ void ff_fill_rgb2yuv_table(const AVLumaC +@@ -38,4 +94,19 @@ void ff_fill_rgb2yuv_table(const AVLumaC double ff_determine_signal_peak(AVFrame *in); void ff_update_hdr_metadata(AVFrame *in, double peak); @@ -2439,21 +2436,21 @@ Index: FFmpeg/libavfilter/vf_tonemap_cuda.c + if ((ret = get_rgb2rgb_matrix(in_pri, out_pri, rgb2rgb_matrix)) < 0) + return ret; + -+ if (s->in_desc->comp[0].depth == 10) { -+ input_quantization_offset = QUANTIZATION_OFFSET_10B; -+ input_y_scale = INPUT_Y_SCALE_10B; -+ input_uv_scale = INPUT_UV_SCALE_10B; -+ } else if (s->in_desc->comp[0].depth == 16) { ++ if (s->in_desc->comp[0].depth == 16) { + // Assume 16bit is actually 12bit for now as that is what the hardware decoders producing + // and what videos are actually encoded in -+ input_quantization_offset = QUANTIZATION_OFFSET_12B; -+ input_y_scale = INPUT_Y_SCALE_12B; -+ input_uv_scale = INPUT_UV_SCALE_12B; ++ input_quantization_offset = QUANTIZATION_OFFSET(12); ++ input_y_scale = INPUT_Y_SCALE(12); ++ input_uv_scale = INPUT_UV_SCALE(12); ++ } else { ++ input_quantization_offset = QUANTIZATION_OFFSET(s->in_desc->comp[0].depth); ++ input_y_scale = INPUT_Y_SCALE(s->in_desc->comp[0].depth); ++ input_uv_scale = INPUT_UV_SCALE(s->in_desc->comp[0].depth); + } + + if (s->out_desc->comp[0].depth == 10) { + // Don't handle 12b offset for now and assume 16b output is real 16b out to make it consistent with other filters -+ output_quantization_offset = QUANTIZATION_OFFSET_10B; ++ output_quantization_offset = QUANTIZATION_OFFSET(10); + } + + if (s->out_desc->comp[0].depth > 8) { diff --git a/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch b/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch index 99bb46248f..a59af09f78 100644 --- a/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch +++ b/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch @@ -1458,7 +1458,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c switch(ctx->tonemap) { case TONEMAP_GAMMA: -@@ -139,59 +353,210 @@ static int tonemap_opencl_init(AVFilterC +@@ -139,59 +353,204 @@ static int tonemap_opencl_init(AVFilterC if (isnan(ctx->param)) ctx->param = 0.3f; break; @@ -1655,22 +1655,16 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c if (ctx->range_out == AVCOL_RANGE_JPEG) av_bprintf(&header, "#define FULL_RANGE_OUT\n"); -+ if (ctx->in_desc->comp[0].depth == 10) { -+ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET_10B); -+ av_bprintf(&header, "#define INPUT_Y_SCALE %ff\n", INPUT_Y_SCALE_10B); -+ av_bprintf(&header, "#define INPUT_UV_SCALE %ff\n", INPUT_UV_SCALE_10B); -+ } else if (ctx->in_desc->comp[0].depth == 16) { ++ if (ctx->in_desc->comp[0].depth == 16) { + // Assume 16bit is actually 12bit for now as that is what the hardware decoders producing + // and what videos are actually encoded in -+ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET_12B); -+ av_bprintf(&header, "#define INPUT_Y_SCALE %ff\n", INPUT_Y_SCALE_12B); -+ av_bprintf(&header, "#define INPUT_UV_SCALE %ff\n", INPUT_UV_SCALE_12B); ++ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET(12)); ++ av_bprintf(&header, "#define INPUT_Y_SCALE %ff\n", INPUT_Y_SCALE(12)); ++ av_bprintf(&header, "#define INPUT_UV_SCALE %ff\n", INPUT_UV_SCALE(12)); + } else { -+ // should not happen, but for completeness -+ // once we can tell if the input is real 16bit we can use this branch -+ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET 0.0f\n"); -+ av_bprintf(&header, "#define INPUT_Y_SCALE 1.0f\n"); -+ av_bprintf(&header, "#define INPUT_UV_SCALE 1.0f\n"); ++ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET(ctx->in_desc->comp[0].depth)); ++ av_bprintf(&header, "#define INPUT_Y_SCALE %ff\n", INPUT_Y_SCALE(ctx->in_desc->comp[0].depth)); ++ av_bprintf(&header, "#define INPUT_UV_SCALE %ff\n", INPUT_UV_SCALE(ctx->in_desc->comp[0].depth)); + } + + if (ctx->out_desc->comp[0].depth > 8) { @@ -1678,13 +1672,13 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c + } + + if (ctx->out_desc->comp[0].depth == 10) -+ av_bprintf(&header, "#define OUTPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET_10B); ++ av_bprintf(&header, "#define OUTPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET(10)); + // Don't handle 12b offset for now and assume 16b output is real 16b out to make it consistent with other filters + av_bprintf(&header, "#define chroma_loc %d\n", (int)ctx->chroma_loc); if (rgb2rgb_passthrough) -@@ -199,19 +564,41 @@ static int tonemap_opencl_init(AVFilterC +@@ -199,19 +558,41 @@ static int tonemap_opencl_init(AVFilterC else ff_opencl_print_const_matrix_3x3(&header, "rgb2rgb", rgb2rgb); @@ -1733,7 +1727,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c ctx->colorspace_out, av_color_space_name(ctx->colorspace_out)); goto fail; } -@@ -219,24 +606,23 @@ static int tonemap_opencl_init(AVFilterC +@@ -219,24 +600,23 @@ static int tonemap_opencl_init(AVFilterC ff_fill_rgb2yuv_table(luma_dst, rgb2yuv); ff_opencl_print_const_matrix_3x3(&header, "yuv_matrix", rgb2yuv); @@ -1773,7 +1767,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c av_log(avctx, AV_LOG_DEBUG, "Generated OpenCL header:\n%s\n", header.str); opencl_sources[0] = header.str; -@@ -254,46 +640,171 @@ static int tonemap_opencl_init(AVFilterC +@@ -254,46 +634,171 @@ static int tonemap_opencl_init(AVFilterC CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL " "command queue %d.\n", cle); @@ -1963,7 +1957,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c ret = ff_opencl_filter_config_output(outlink); if (ret < 0) return ret; -@@ -308,13 +819,46 @@ static int launch_kernel(AVFilterContext +@@ -308,13 +813,46 @@ static int launch_kernel(AVFilterContext size_t global_work[2]; size_t local_work[2]; cl_int cle; @@ -2012,7 +2006,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c local_work[0] = 16; local_work[1] = 16; -@@ -338,13 +882,10 @@ static int tonemap_opencl_filter_frame(A +@@ -338,13 +876,10 @@ static int tonemap_opencl_filter_frame(A AVFilterContext *avctx = inlink->dst; AVFilterLink *outlink = avctx->outputs[0]; TonemapOpenCLContext *ctx = avctx->priv; @@ -2027,7 +2021,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n", av_get_pix_fmt_name(input->format), -@@ -363,8 +904,49 @@ static int tonemap_opencl_filter_frame(A +@@ -363,8 +898,49 @@ static int tonemap_opencl_filter_frame(A if (err < 0) goto fail; @@ -2079,7 +2073,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c if (ctx->trc != -1) output->color_trc = ctx->trc; -@@ -385,72 +967,50 @@ static int tonemap_opencl_filter_frame(A +@@ -385,72 +961,50 @@ static int tonemap_opencl_filter_frame(A ctx->range_out = output->color_range; ctx->chroma_loc = output->chroma_location; @@ -2175,7 +2169,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c av_frame_free(&input); av_frame_free(&output); return err; -@@ -458,24 +1018,9 @@ fail: +@@ -458,24 +1012,9 @@ fail: static av_cold void tonemap_opencl_uninit(AVFilterContext *avctx) { @@ -2202,7 +2196,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c ff_opencl_filter_uninit(avctx); } -@@ -483,37 +1028,50 @@ static av_cold void tonemap_opencl_unini +@@ -483,37 +1022,50 @@ static av_cold void tonemap_opencl_unini #define OFFSET(x) offsetof(TonemapOpenCLContext, x) #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) static const AVOption tonemap_opencl_options[] = { From 7dd58ae4d382bc4379c08c0730cb5ddd0758a818 Mon Sep 17 00:00:00 2001 From: gnattu Date: Sun, 13 Oct 2024 22:43:34 +0800 Subject: [PATCH 5/7] avfilter/tonemap_opencl: print more digits to header --- ...-eetf-and-code-refactor-to-opencl-tonemap.patch | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch b/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch index a59af09f78..959294dac9 100644 --- a/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch +++ b/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch @@ -1658,13 +1658,13 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c + if (ctx->in_desc->comp[0].depth == 16) { + // Assume 16bit is actually 12bit for now as that is what the hardware decoders producing + // and what videos are actually encoded in -+ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET(12)); -+ av_bprintf(&header, "#define INPUT_Y_SCALE %ff\n", INPUT_Y_SCALE(12)); -+ av_bprintf(&header, "#define INPUT_UV_SCALE %ff\n", INPUT_UV_SCALE(12)); ++ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %.13lff\n", QUANTIZATION_OFFSET(12)); ++ av_bprintf(&header, "#define INPUT_Y_SCALE %.13lff\n", INPUT_Y_SCALE(12)); ++ av_bprintf(&header, "#define INPUT_UV_SCALE %.13lff\n", INPUT_UV_SCALE(12)); + } else { -+ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET(ctx->in_desc->comp[0].depth)); -+ av_bprintf(&header, "#define INPUT_Y_SCALE %ff\n", INPUT_Y_SCALE(ctx->in_desc->comp[0].depth)); -+ av_bprintf(&header, "#define INPUT_UV_SCALE %ff\n", INPUT_UV_SCALE(ctx->in_desc->comp[0].depth)); ++ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %.13lff\n", QUANTIZATION_OFFSET(ctx->in_desc->comp[0].depth)); ++ av_bprintf(&header, "#define INPUT_Y_SCALE %.13lff\n", INPUT_Y_SCALE(ctx->in_desc->comp[0].depth)); ++ av_bprintf(&header, "#define INPUT_UV_SCALE %.13lff\n", INPUT_UV_SCALE(ctx->in_desc->comp[0].depth)); + } + + if (ctx->out_desc->comp[0].depth > 8) { @@ -1672,7 +1672,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c + } + + if (ctx->out_desc->comp[0].depth == 10) -+ av_bprintf(&header, "#define OUTPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET(10)); ++ av_bprintf(&header, "#define OUTPUT_QUANTIZATION_OFFSET %.13lff\n", QUANTIZATION_OFFSET(10)); + // Don't handle 12b offset for now and assume 16b output is real 16b out to make it consistent with other filters + av_bprintf(&header, "#define chroma_loc %d\n", (int)ctx->chroma_loc); From 4c121420ab0682735831d8b2f4d6520939ca3571 Mon Sep 17 00:00:00 2001 From: gnattu Date: Mon, 14 Oct 2024 07:46:54 +0800 Subject: [PATCH 6/7] avfilter/tonemap_[opencl,cuda]: extend precision for constants --- .../patches/0004-add-cuda-tonemap-impl.patch | 30 ++-- ...-and-code-refactor-to-opencl-tonemap.patch | 135 +++++++++--------- 2 files changed, 83 insertions(+), 82 deletions(-) diff --git a/debian/patches/0004-add-cuda-tonemap-impl.patch b/debian/patches/0004-add-cuda-tonemap-impl.patch index b480685f55..d88e0f85c7 100644 --- a/debian/patches/0004-add-cuda-tonemap-impl.patch +++ b/debian/patches/0004-add-cuda-tonemap-impl.patch @@ -2468,12 +2468,12 @@ Index: FFmpeg/libavfilter/vf_tonemap_cuda.c + av_bprintf(&constants, ".visible .const .align " #align " " decl ";\n", __VA_ARGS__) +#define CONSTANT(decl, ...) CONSTANT_A(decl, 4, __VA_ARGS__) +#define CONSTANT_M(a, b) \ -+ CONSTANT(".f32 " a "[] = {%f, %f, %f, %f, %f, %f, %f, %f, %f}", \ ++ CONSTANT(".f32 " a "[] = {%.13lf, %.13lf, %.13lf, %.13lf, %.13lf, %.13lf, %.13lf, %.13lf, %.13lf}", \ + b[0][0], b[0][1], b[0][2], \ + b[1][0], b[1][1], b[1][2], \ + b[2][0], b[2][1], b[2][2]) +#define CONSTANT_C(a, b, c, d) \ -+ CONSTANT(".f32 " a "[] = {%f, %f, %f}", \ ++ CONSTANT(".f32 " a "[] = {%.13lf, %.13lf, %.13lf}", \ + b, c, d) + + CONSTANT(".u32 depth_src = %i", (int)s->in_desc->comp[0].depth); @@ -2488,19 +2488,19 @@ Index: FFmpeg/libavfilter/vf_tonemap_cuda.c + CONSTANT(".u32 chroma_loc_dst = %i", (int)s->out_chroma_loc); + CONSTANT(".u32 tonemap_func = %i", (int)s->tonemap); + CONSTANT(".u32 enable_dither = %i", (int)(s->in_desc->comp[0].depth > s->out_desc->comp[0].depth)); -+ CONSTANT(".f32 dither_size = %f", (float)ff_fruit_dither_size); -+ CONSTANT(".f32 dither_quantization = %f", (float)((1 << s->out_desc->comp[0].depth) - 1)); -+ CONSTANT(".f32 ref_white = %f", REFERENCE_WHITE_ALT); -+ CONSTANT(".f32 tone_param = %f", s->param); -+ CONSTANT(".f32 desat_param = %f", s->desat_param); -+ CONSTANT(".f32 pq_max_lum_div_ref_white = %f", (float)(ST2084_MAX_LUMINANCE / REFERENCE_WHITE_ALT)); -+ CONSTANT(".f32 ref_white_div_pq_max_lum = %f", (float)(REFERENCE_WHITE_ALT / ST2084_MAX_LUMINANCE)); -+ CONSTANT(".f32 input_quantization_offset = %f", input_quantization_offset); -+ CONSTANT(".f32 input_y_scale = %f", input_y_scale); -+ CONSTANT(".f32 input_uv_scale = %f", input_uv_scale); -+ CONSTANT(".f32 output_quantization_offset = %f", output_quantization_offset); -+ CONSTANT(".f32 output_quantization_factor = %f", output_quantization_factor); -+ CONSTANT(".f32 output_quantization_scale = %f", output_quantization_scale); ++ CONSTANT(".f32 dither_size = %.1f", (float)ff_fruit_dither_size); ++ CONSTANT(".f32 dither_quantization = %.1f", (float)((1 << s->out_desc->comp[0].depth) - 1)); ++ CONSTANT(".f32 ref_white = %.4f", REFERENCE_WHITE_ALT); ++ CONSTANT(".f32 tone_param = %.4f", s->param); ++ CONSTANT(".f32 desat_param = %.4f", s->desat_param); ++ CONSTANT(".f32 pq_max_lum_div_ref_white = %.13lf", (float)(ST2084_MAX_LUMINANCE / REFERENCE_WHITE_ALT)); ++ CONSTANT(".f32 ref_white_div_pq_max_lum = %.13lf", (float)(REFERENCE_WHITE_ALT / ST2084_MAX_LUMINANCE)); ++ CONSTANT(".f32 input_quantization_offset = %.13lf", input_quantization_offset); ++ CONSTANT(".f32 input_y_scale = %.13lf", input_y_scale); ++ CONSTANT(".f32 input_uv_scale = %.13lf", input_uv_scale); ++ CONSTANT(".f32 output_quantization_offset = %.13lf", output_quantization_offset); ++ CONSTANT(".f32 output_quantization_factor = %.13lf", output_quantization_factor); ++ CONSTANT(".f32 output_quantization_scale = %.13lf", output_quantization_scale); + CONSTANT_M("rgb_matrix", (s->dovi ? s->dovi->nonlinear : rgb_matrix)); + CONSTANT_M("yuv_matrix", yuv_matrix); + CONSTANT_A(".u8 rgb2rgb_passthrough = %i", 1, in_pri == out_pri); diff --git a/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch b/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch index 959294dac9..fedbdc2955 100644 --- a/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch +++ b/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch @@ -16,7 +16,7 @@ Index: FFmpeg/libavfilter/opencl.c for (i = 0; i < 3; i++) { for (j = 0; j < 3; j++) - av_bprintf(buf, " %.5ff,", mat[i][j]); -+ av_bprintf(buf, " %ff,", mat[i][j]); ++ av_bprintf(buf, " %.13lff,", mat[i][j]); av_bprintf(buf, "\n"); } av_bprintf(buf, "};\n"); @@ -183,33 +183,21 @@ Index: FFmpeg/libavfilter/opencl/colorspace_common.cl + float den = 1.0f + ST2084_C3 * xpow; + return native_powr(1.0f + num / den, ST2084_M2); +#endif - } - --float inverse_eotf_bt1886(float c) { -- return c < 0.0f ? 0.0f : powr(c, 1.0f / 2.4f); ++} ++ +float inverse_eotf_st2084(float x) { + x *= ref_white_div_pq_max_lum; + return inverse_eotf_st2084_common(x); - } - --float oetf_bt709(float c) { -- c = c < 0.0f ? 0.0f : c; -- float r1 = 4.5f * c; -- float r2 = 1.099f * powr(c, 0.45f) - 0.099f; -- return c < 0.018f ? r1 : r2; --} --float inverse_oetf_bt709(float c) { -- float r1 = c / 4.5f; -- float r2 = powr((c + 0.099f) / 1.099f, 1.0f / 0.45f); -- return c < 0.081f ? r1 : r2; ++} ++ +float4 eotf_st2084x4(float4 x) { + x.x = eotf_st2084_common(x.x); + x.y = eotf_st2084_common(x.y); + x.z = eotf_st2084_common(x.z); + x.w = eotf_st2084_common(x.w); + return x * pq_max_lum_div_ref_white; - } - ++} ++ +float4 inverse_eotf_st2084x4(float4 x) { + x *= ref_white_div_pq_max_lum; + x.x = inverse_eotf_st2084_common(x.x); @@ -232,20 +220,32 @@ Index: FFmpeg/libavfilter/opencl/colorspace_common.cl + return x <= (1.0f / 12.0f) + ? native_sqrt(3.0f * x) + : (ARIB_B67_A * native_log(12.0f * x - ARIB_B67_B) + ARIB_B67_C); -+} -+ + } + +-float inverse_eotf_bt1886(float c) { +- return c < 0.0f ? 0.0f : powr(c, 1.0f / 2.4f); +float inverse_oetf_arib_b67(float x) { + x = fmax(x, 0.0f); + return x <= 0.5f + ? (x * x) * (1.0f / 3.0f) + : (native_exp((x - ARIB_B67_C) / ARIB_B67_A) + ARIB_B67_B) * (1.0f / 12.0f); -+} -+ + } + +-float oetf_bt709(float c) { +- c = c < 0.0f ? 0.0f : c; +- float r1 = 4.5f * c; +- float r2 = 1.099f * powr(c, 0.45f) - 0.099f; +- return c < 0.018f ? r1 : r2; +-} +-float inverse_oetf_bt709(float c) { +- float r1 = c / 4.5f; +- float r2 = powr((c + 0.099f) / 1.099f, 1.0f / 0.45f); +- return c < 0.081f ? r1 : r2; +// linearizer for HLG/ARIB-B67 +float eotf_arib_b67(float x) { + return ootf_1_2(inverse_oetf_arib_b67(x)) * 5.0f; -+} -+ + } + +// delinearizer for HLG/ARIB-B67 +float inverse_eotf_arib_b67(float x) { + return oetf_arib_b67(inverse_ootf_1_2(x / 5.0f)); @@ -265,13 +265,13 @@ Index: FFmpeg/libavfilter/opencl/colorspace_common.cl float3 yuv2rgb(float y, float u, float v) { -#ifdef FULL_RANGE_IN - u -= 0.5f; v -= 0.5f; -+ y += mix(0.0f, INPUT_QUANTIZATION_OFFSET, y > 0.0f); -+ u += mix(0.0f, INPUT_QUANTIZATION_OFFSET, u > 0.0f); -+ v += mix(0.0f, INPUT_QUANTIZATION_OFFSET, v > 0.0f); ++ y += mix(0.0f, input_quantization_offset, y > 0.0f); ++ u += mix(0.0f, input_quantization_offset, u > 0.0f); ++ v += mix(0.0f, input_quantization_offset, v > 0.0f); +#ifndef FULL_RANGE_IN -+ y = INPUT_Y_SCALE * y - 0.07305936073f; -+ u = INPUT_UV_SCALE * u - 0.5714285714f; -+ v = INPUT_UV_SCALE * v - 0.5714285714f; ++ y = input_y_scale * y - 0.07305936073f; ++ u = input_uv_scale * u - 0.5714285714f; ++ v = input_uv_scale * v - 0.5714285714f; #else - y = (y * 255.0f - 16.0f) / 219.0f; - u = (u * 255.0f - 128.0f) / 224.0f; @@ -280,7 +280,7 @@ Index: FFmpeg/libavfilter/opencl/colorspace_common.cl #endif float r = y * rgb_matrix[0] + u * rgb_matrix[1] + v * rgb_matrix[2]; float g = y * rgb_matrix[3] + u * rgb_matrix[4] + v * rgb_matrix[5]; -@@ -138,19 +194,39 @@ float3 rgb2yuv(float r, float g, float b +@@ -138,19 +194,37 @@ float3 rgb2yuv(float r, float g, float b float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2]; float u = r*yuv_matrix[3] + g*yuv_matrix[4] + b*yuv_matrix[5]; float v = r*yuv_matrix[6] + g*yuv_matrix[7] + b*yuv_matrix[8]; @@ -301,12 +301,10 @@ Index: FFmpeg/libavfilter/opencl/colorspace_common.cl - u = (224.0f * u + 128.0f) / 255.0f; - v = (224.0f * v + 128.0f) / 255.0f; + u += 0.5f; v += 0.5f; -+#endif -+#ifdef OUTPUT_QUANTIZATION_OFFSET -+ y -= mix(0.0f, OUTPUT_QUANTIZATION_OFFSET, y > 0.0f); -+ u -= mix(0.0f, OUTPUT_QUANTIZATION_OFFSET, u > 0.0f); -+ v -= mix(0.0f, OUTPUT_QUANTIZATION_OFFSET, v > 0.0f); #endif ++ y -= mix(0.0f, output_quantization_offset, y > 0.0f); ++ u -= mix(0.0f, output_quantization_offset, u > 0.0f); ++ v -= mix(0.0f, output_quantization_offset, v > 0.0f); return (float3)(y, u, v); } @@ -326,7 +324,7 @@ Index: FFmpeg/libavfilter/opencl/colorspace_common.cl return y; } -@@ -188,18 +264,101 @@ float3 lrgb2lrgb(float3 c) { +@@ -188,18 +262,101 @@ float3 lrgb2lrgb(float3 c) { #endif } @@ -1290,7 +1288,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c + if (ctx->lin_lut) { + av_bprintf(buf, "__constant float lin_lut[%d] = {\n", LUT_SIZE); + for (i = 0; i < LUT_SIZE; i++) -+ av_bprintf(buf, " %ff,", ctx->lin_lut[i]); ++ av_bprintf(buf, " %.13ff,", ctx->lin_lut[i]); + av_bprintf(buf, "};\n"); + } +} @@ -1458,7 +1456,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c switch(ctx->tonemap) { case TONEMAP_GAMMA: -@@ -139,59 +353,204 @@ static int tonemap_opencl_init(AVFilterC +@@ -139,59 +353,207 @@ static int tonemap_opencl_init(AVFilterC if (isnan(ctx->param)) ctx->param = 0.3f; break; @@ -1610,9 +1608,9 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c + av_bprintf(&header, "__constant float scene_threshold = %.4ff;\n", ctx->scene_threshold); + -+ av_bprintf(&header, "__constant float pq_max_lum_div_ref_white = %ff;\n", ++ av_bprintf(&header, "__constant float pq_max_lum_div_ref_white = %.13ff;\n", + (ST2084_MAX_LUMINANCE / REFERENCE_WHITE_ALT)); -+ av_bprintf(&header, "__constant float ref_white_div_pq_max_lum = %ff;\n", ++ av_bprintf(&header, "__constant float ref_white_div_pq_max_lum = %.13ff;\n", + (REFERENCE_WHITE_ALT / ST2084_MAX_LUMINANCE)); + av_bprintf(&header, "#define TONE_FUNC %s\n", tonemap_func[ctx->tonemap]); @@ -1658,27 +1656,30 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c + if (ctx->in_desc->comp[0].depth == 16) { + // Assume 16bit is actually 12bit for now as that is what the hardware decoders producing + // and what videos are actually encoded in -+ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %.13lff\n", QUANTIZATION_OFFSET(12)); -+ av_bprintf(&header, "#define INPUT_Y_SCALE %.13lff\n", INPUT_Y_SCALE(12)); -+ av_bprintf(&header, "#define INPUT_UV_SCALE %.13lff\n", INPUT_UV_SCALE(12)); ++ av_bprintf(&header, "__constant float input_quantization_offset = %.13lff;\n", QUANTIZATION_OFFSET(12)); ++ av_bprintf(&header, "__constant float input_y_scale = %.13lff;\n", INPUT_Y_SCALE(12)); ++ av_bprintf(&header, "__constant float input_uv_scale = %.13lff;\n", INPUT_UV_SCALE(12)); + } else { -+ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %.13lff\n", QUANTIZATION_OFFSET(ctx->in_desc->comp[0].depth)); -+ av_bprintf(&header, "#define INPUT_Y_SCALE %.13lff\n", INPUT_Y_SCALE(ctx->in_desc->comp[0].depth)); -+ av_bprintf(&header, "#define INPUT_UV_SCALE %.13lff\n", INPUT_UV_SCALE(ctx->in_desc->comp[0].depth)); ++ av_bprintf(&header, "__constant float input_quantization_offset = %.13lff;\n", QUANTIZATION_OFFSET(ctx->in_desc->comp[0].depth)); ++ av_bprintf(&header, "__constant float input_y_scale = %.13lff;\n", INPUT_Y_SCALE(ctx->in_desc->comp[0].depth)); ++ av_bprintf(&header, "__constant float input_uv_scale = %.13lff;\n", INPUT_UV_SCALE(ctx->in_desc->comp[0].depth)); + } + + if (ctx->out_desc->comp[0].depth > 8) { + av_bprintf(&header, "#define RESCALE_LIMITED_RANGE_OUTPUT\n"); + } + -+ if (ctx->out_desc->comp[0].depth == 10) -+ av_bprintf(&header, "#define OUTPUT_QUANTIZATION_OFFSET %.13lff\n", QUANTIZATION_OFFSET(10)); -+ // Don't handle 12b offset for now and assume 16b output is real 16b out to make it consistent with other filters ++ if (ctx->out_desc->comp[0].depth == 10) { ++ av_bprintf(&header, "__constant float output_quantization_offset = %.13lff;\n", QUANTIZATION_OFFSET(10)); ++ } else { ++ // Don't handle 12b offset for now and assume 16b output is real 16b out to make it consistent with other filters ++ av_bprintf(&header, "__constant float output_quantization_offset = 0.0f;\n"); ++ } + av_bprintf(&header, "#define chroma_loc %d\n", (int)ctx->chroma_loc); if (rgb2rgb_passthrough) -@@ -199,19 +558,41 @@ static int tonemap_opencl_init(AVFilterC +@@ -199,19 +561,41 @@ static int tonemap_opencl_init(AVFilterC else ff_opencl_print_const_matrix_3x3(&header, "rgb2rgb", rgb2rgb); @@ -1701,7 +1702,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c + for (j = 0; j < 3; j++) + ycc2rgb_offset[i] -= ctx->dovi->nonlinear[i][j] * ctx->dovi->nonlinear_offset[j]; + } -+ av_bprintf(&header, "__constant float3 ycc2rgb_offset = {%ff, %ff, %ff};\n", ++ av_bprintf(&header, "__constant float3 ycc2rgb_offset = {%.13ff, %.13ff, %.13ff};\n", + ycc2rgb_offset[0], ycc2rgb_offset[1], ycc2rgb_offset[2]); + ff_matrix_mul_3x3(lms2rgb, dovi_lms2rgb_matrix, ctx->dovi->linear); + ff_opencl_print_const_matrix_3x3(&header, "rgb_matrix", ctx->dovi->nonlinear); //ycc2rgb @@ -1727,7 +1728,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c ctx->colorspace_out, av_color_space_name(ctx->colorspace_out)); goto fail; } -@@ -219,24 +600,23 @@ static int tonemap_opencl_init(AVFilterC +@@ -219,24 +603,23 @@ static int tonemap_opencl_init(AVFilterC ff_fill_rgb2yuv_table(luma_dst, rgb2yuv); ff_opencl_print_const_matrix_3x3(&header, "yuv_matrix", rgb2yuv); @@ -1738,7 +1739,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c - av_bprintf(&header, "constant float3 luma_src = {%.4ff, %.4ff, %.4ff};\n", - av_q2d(luma_src->cr), av_q2d(luma_src->cg), av_q2d(luma_src->cb)); - av_bprintf(&header, "constant float3 luma_dst = {%.4ff, %.4ff, %.4ff};\n", -+ av_bprintf(&header, "__constant float3 luma_dst = {%ff, %ff, %ff};\n", ++ av_bprintf(&header, "__constant float3 luma_dst = {%.13ff, %.13ff, %.13ff};\n", av_q2d(luma_dst->cr), av_q2d(luma_dst->cg), av_q2d(luma_dst->cb)); - av_bprintf(&header, "#define linearize %s\n", linearize_funcs[ctx->trc_in]); @@ -1767,7 +1768,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c av_log(avctx, AV_LOG_DEBUG, "Generated OpenCL header:\n%s\n", header.str); opencl_sources[0] = header.str; -@@ -254,46 +634,171 @@ static int tonemap_opencl_init(AVFilterC +@@ -254,46 +637,171 @@ static int tonemap_opencl_init(AVFilterC CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL " "command queue %d.\n", cle); @@ -1938,14 +1939,13 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c + av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n", + av_get_pix_fmt_name(out_format)); + return AVERROR(ENOSYS); -+ } + } + if (in_desc->comp[0].depth != 10 && in_desc->comp[0].depth != 16) { + av_log(ctx, AV_LOG_ERROR, "Unsupported input format depth: %d\n", + in_desc->comp[0].depth); + return AVERROR(ENOSYS); - } - -- s->ocf.output_format = s->format == AV_PIX_FMT_NONE ? AV_PIX_FMT_NV12 : s->format; ++ } ++ + ctx->in_fmt = in_format; + ctx->out_fmt = out_format; + ctx->in_desc = in_desc; @@ -1953,11 +1953,12 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c + ctx->in_planes = av_pix_fmt_count_planes(in_format); + ctx->out_planes = av_pix_fmt_count_planes(out_format); + ctx->ocf.output_format = out_format; -+ + +- s->ocf.output_format = s->format == AV_PIX_FMT_NONE ? AV_PIX_FMT_NV12 : s->format; ret = ff_opencl_filter_config_output(outlink); if (ret < 0) return ret; -@@ -308,13 +813,46 @@ static int launch_kernel(AVFilterContext +@@ -308,13 +816,46 @@ static int launch_kernel(AVFilterContext size_t global_work[2]; size_t local_work[2]; cl_int cle; @@ -2006,7 +2007,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c local_work[0] = 16; local_work[1] = 16; -@@ -338,13 +876,10 @@ static int tonemap_opencl_filter_frame(A +@@ -338,13 +879,10 @@ static int tonemap_opencl_filter_frame(A AVFilterContext *avctx = inlink->dst; AVFilterLink *outlink = avctx->outputs[0]; TonemapOpenCLContext *ctx = avctx->priv; @@ -2021,7 +2022,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n", av_get_pix_fmt_name(input->format), -@@ -363,8 +898,49 @@ static int tonemap_opencl_filter_frame(A +@@ -363,8 +901,49 @@ static int tonemap_opencl_filter_frame(A if (err < 0) goto fail; @@ -2073,7 +2074,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c if (ctx->trc != -1) output->color_trc = ctx->trc; -@@ -385,72 +961,50 @@ static int tonemap_opencl_filter_frame(A +@@ -385,72 +964,50 @@ static int tonemap_opencl_filter_frame(A ctx->range_out = output->color_range; ctx->chroma_loc = output->chroma_location; @@ -2169,7 +2170,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c av_frame_free(&input); av_frame_free(&output); return err; -@@ -458,24 +1012,9 @@ fail: +@@ -458,24 +1015,9 @@ fail: static av_cold void tonemap_opencl_uninit(AVFilterContext *avctx) { @@ -2196,7 +2197,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c ff_opencl_filter_uninit(avctx); } -@@ -483,37 +1022,50 @@ static av_cold void tonemap_opencl_unini +@@ -483,37 +1025,50 @@ static av_cold void tonemap_opencl_unini #define OFFSET(x) offsetof(TonemapOpenCLContext, x) #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) static const AVOption tonemap_opencl_options[] = { From 96edc7d0b268991b37041a815c6690c89144851a Mon Sep 17 00:00:00 2001 From: gnattu Date: Mon, 14 Oct 2024 13:49:53 +0800 Subject: [PATCH 7/7] avfilter/tonemap_opencl: cleanup leftover --- ...-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch b/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch index fedbdc2955..9595d01ea8 100644 --- a/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch +++ b/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch @@ -280,7 +280,7 @@ Index: FFmpeg/libavfilter/opencl/colorspace_common.cl #endif float r = y * rgb_matrix[0] + u * rgb_matrix[1] + v * rgb_matrix[2]; float g = y * rgb_matrix[3] + u * rgb_matrix[4] + v * rgb_matrix[5]; -@@ -138,19 +194,37 @@ float3 rgb2yuv(float r, float g, float b +@@ -138,19 +194,35 @@ float3 rgb2yuv(float r, float g, float b float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2]; float u = r*yuv_matrix[3] + g*yuv_matrix[4] + b*yuv_matrix[5]; float v = r*yuv_matrix[6] + g*yuv_matrix[7] + b*yuv_matrix[8]; @@ -318,13 +318,11 @@ Index: FFmpeg/libavfilter/opencl/colorspace_common.cl + y = floor((219.0f * y + 16.0f) + 0.5f) / 255.0f; + #endif +#endif -+#ifdef OUTPUT_QUANTIZATION_OFFSET -+ y -= mix(0.0f, OUTPUT_QUANTIZATION_OFFSET, y > 0.0f); -+#endif ++ y -= mix(0.0f, output_quantization_offset, y > 0.0f); return y; } -@@ -188,18 +262,101 @@ float3 lrgb2lrgb(float3 c) { +@@ -188,18 +260,101 @@ float3 lrgb2lrgb(float3 c) { #endif }