diff --git a/debian/patches/0004-add-cuda-tonemap-impl.patch b/debian/patches/0004-add-cuda-tonemap-impl.patch index 0cfa26fe39..1ffe51d421 100644 --- a/debian/patches/0004-add-cuda-tonemap-impl.patch +++ b/debian/patches/0004-add-cuda-tonemap-impl.patch @@ -257,7 +257,7 @@ Index: FFmpeg/libavfilter/colorspace.h =================================================================== --- FFmpeg.orig/libavfilter/colorspace.h +++ FFmpeg/libavfilter/colorspace.h -@@ -23,10 +23,42 @@ +@@ -23,10 +23,69 @@ #include "libavutil/csp.h" #include "libavutil/frame.h" #include "libavutil/pixfmt.h" @@ -276,6 +276,33 @@ Index: FFmpeg/libavfilter/colorspace.h +#define ARIB_B67_C 0.55991073f +#define FLOAT_EPS 1e-6f + ++/* ++ * Pre-calculated constants used for YCbCr narrow to full range scaling ++ * The base formula is the quantization formula derived from BT.2100 Table 9: ++ * Where Y' = Round [(219 * E′ + 16) * 2^(n−8)], ++ * Cb',Cr' = Round [(224 * E′ + 128) * 2^(n−8)] ++ * where E' is the signal value in [0,1] range and n is the bit depth. Round is rounding towards 0. ++ * For inputs, the inverse is used where we are solving for E' for a given Y'Cb'Cr' normalized by GPU ++ * in [0,1] range. The GPU will interpret color as a 16bit int value, and solving for E' becomes: ++ * E' = (Y' - 2^(n-4)) / (219 * 2^(n-8)) ++ * E' = (Cb'Cr' - 2^(n-1)) / (7 * 2^(n-3)) ++ * Y' and Cb'Cr' is in the range of [0, 2^n - 1] in original formula, we need to scale the value normalized to [0,1]: ++ * C = Y'Cb'Cr' * (2^n - 1) ++ * Which means the input scale = (2^n - 1) / (219 * 2^(n-8)) and input offset = 2^(n-4)) / (219 * 2^(n-8)) for Y' and ++ * 2^(n-1)) / (7 * 2^(n-3)) for Cb'Cr' ++ */ ++#define INPUT_Y_SCALE_10B 1.1678082192f ++#define INPUT_UV_SCALE_10B 1.1417410714f ++#define INPUT_Y_SCALE_12B 1.1689497717f ++#define INPUT_UV_SCALE_12B 1.1428571429f ++ ++/* ++ * GPU will interpret 10bit and 12bit color as 16bit int ++ * but that will introduce a slight (2^(16-n))/2^16 quantization offset which we want to compensate for ++*/ ++#define QUANTIZATION_OFFSET_10B 0.0009765774014f ++#define QUANTIZATION_OFFSET_12B 0.0002441443503f ++ +// Parsed metadata from the Dolby Vision RPU +struct DoviMetadata { + float nonlinear_offset[3]; // input offset ("ycc_to_rgb_offset") @@ -300,7 +327,7 @@ Index: FFmpeg/libavfilter/colorspace.h void ff_matrix_mul_3x3(double dst[3][3], const double src1[3][3], const double src2[3][3]); void ff_matrix_mul_3x3_vec(double dst[3], const double vec[3], const double mat[3][3]); -@@ -38,4 +70,19 @@ void ff_fill_rgb2yuv_table(const AVLumaC +@@ -38,4 +97,19 @@ void ff_fill_rgb2yuv_table(const AVLumaC double ff_determine_signal_peak(AVFrame *in); void ff_update_hdr_metadata(AVFrame *in, double peak); @@ -1785,7 +1812,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_cuda.c =================================================================== --- /dev/null +++ FFmpeg/libavfilter/vf_tonemap_cuda.c -@@ -0,0 +1,1162 @@ +@@ -0,0 +1,1165 @@ +/* + * This file is part of FFmpeg. + * @@ -2413,22 +2440,25 @@ Index: FFmpeg/libavfilter/vf_tonemap_cuda.c + return ret; + + if (s->in_desc->comp[0].depth == 10) { -+ input_quantization_offset = 0.0009765774014f; -+ input_y_scale = 1.1678082192f; -+ input_uv_scale = 1.1417410714f; ++ input_quantization_offset = QUANTIZATION_OFFSET_10B; ++ input_y_scale = INPUT_Y_SCALE_10B; ++ input_uv_scale = INPUT_UV_SCALE_10B; + } else if (s->in_desc->comp[0].depth == 16) { -+ input_quantization_offset = 0.0002441443503f; -+ input_y_scale = 1.1689497717f; -+ input_uv_scale = 1.1428571429f; ++ // Assume 16bit is actually 12bit for now as that is what the hardware decoders producing ++ // and what videos are actually encoded in ++ input_quantization_offset = QUANTIZATION_OFFSET_12B; ++ input_y_scale = INPUT_Y_SCALE_12B; ++ input_uv_scale = INPUT_UV_SCALE_12B; + } + + if (s->out_desc->comp[0].depth == 10) { -+ output_quantization_offset = 0.0009765774014f; ++ // Don't handle 12b offset for now and assume 16b output is real 16b out to make it consistent with other filters ++ output_quantization_offset = QUANTIZATION_OFFSET_10B; + } + + if (s->out_desc->comp[0].depth > 8) { -+ output_quantization_factor = 256.0f; -+ output_quantization_scale = 65535.0f; ++ output_quantization_factor = 256.0f; // 2^(16-8) ++ output_quantization_scale = 65535.0f; // 2^16 - 1 + } + + av_bprint_init(&constants, 2048, AV_BPRINT_SIZE_UNLIMITED); diff --git a/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch b/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch index ccef661793..99bb46248f 100644 --- a/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch +++ b/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch @@ -1458,7 +1458,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c switch(ctx->tonemap) { case TONEMAP_GAMMA: -@@ -139,59 +353,207 @@ static int tonemap_opencl_init(AVFilterC +@@ -139,59 +353,210 @@ static int tonemap_opencl_init(AVFilterC if (isnan(ctx->param)) ctx->param = 0.3f; break; @@ -1656,16 +1656,18 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c av_bprintf(&header, "#define FULL_RANGE_OUT\n"); + if (ctx->in_desc->comp[0].depth == 10) { -+ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET 0.0009765774014f\n"); -+ av_bprintf(&header, "#define INPUT_Y_SCALE 1.1678082192f\n"); -+ av_bprintf(&header, "#define INPUT_UV_SCALE 1.1417410714f\n"); ++ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET_10B); ++ av_bprintf(&header, "#define INPUT_Y_SCALE %ff\n", INPUT_Y_SCALE_10B); ++ av_bprintf(&header, "#define INPUT_UV_SCALE %ff\n", INPUT_UV_SCALE_10B); + } else if (ctx->in_desc->comp[0].depth == 16) { -+ // 16bit texture is actually 12bit -+ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET 0.0002441443503f\n"); -+ av_bprintf(&header, "#define INPUT_Y_SCALE 1.1689497717f\n"); -+ av_bprintf(&header, "#define INPUT_UV_SCALE 1.1428571429f\n"); ++ // Assume 16bit is actually 12bit for now as that is what the hardware decoders producing ++ // and what videos are actually encoded in ++ av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET_12B); ++ av_bprintf(&header, "#define INPUT_Y_SCALE %ff\n", INPUT_Y_SCALE_12B); ++ av_bprintf(&header, "#define INPUT_UV_SCALE %ff\n", INPUT_UV_SCALE_12B); + } else { + // should not happen, but for completeness ++ // once we can tell if the input is real 16bit we can use this branch + av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET 0.0f\n"); + av_bprintf(&header, "#define INPUT_Y_SCALE 1.0f\n"); + av_bprintf(&header, "#define INPUT_UV_SCALE 1.0f\n"); @@ -1675,13 +1677,14 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c + av_bprintf(&header, "#define RESCALE_LIMITED_RANGE_OUTPUT\n"); + } + -+ if (ctx->in_desc->comp[0].depth == 10) -+ av_bprintf(&header, "#define OUTPUT_QUANTIZATION_OFFSET 0.0009765774014f\n"); ++ if (ctx->out_desc->comp[0].depth == 10) ++ av_bprintf(&header, "#define OUTPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET_10B); ++ // Don't handle 12b offset for now and assume 16b output is real 16b out to make it consistent with other filters + av_bprintf(&header, "#define chroma_loc %d\n", (int)ctx->chroma_loc); if (rgb2rgb_passthrough) -@@ -199,19 +561,41 @@ static int tonemap_opencl_init(AVFilterC +@@ -199,19 +564,41 @@ static int tonemap_opencl_init(AVFilterC else ff_opencl_print_const_matrix_3x3(&header, "rgb2rgb", rgb2rgb); @@ -1730,7 +1733,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c ctx->colorspace_out, av_color_space_name(ctx->colorspace_out)); goto fail; } -@@ -219,24 +603,23 @@ static int tonemap_opencl_init(AVFilterC +@@ -219,24 +606,23 @@ static int tonemap_opencl_init(AVFilterC ff_fill_rgb2yuv_table(luma_dst, rgb2yuv); ff_opencl_print_const_matrix_3x3(&header, "yuv_matrix", rgb2yuv); @@ -1770,7 +1773,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c av_log(avctx, AV_LOG_DEBUG, "Generated OpenCL header:\n%s\n", header.str); opencl_sources[0] = header.str; -@@ -254,46 +637,171 @@ static int tonemap_opencl_init(AVFilterC +@@ -254,46 +640,171 @@ static int tonemap_opencl_init(AVFilterC CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL " "command queue %d.\n", cle); @@ -1960,7 +1963,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c ret = ff_opencl_filter_config_output(outlink); if (ret < 0) return ret; -@@ -308,13 +816,46 @@ static int launch_kernel(AVFilterContext +@@ -308,13 +819,46 @@ static int launch_kernel(AVFilterContext size_t global_work[2]; size_t local_work[2]; cl_int cle; @@ -2009,7 +2012,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c local_work[0] = 16; local_work[1] = 16; -@@ -338,13 +879,10 @@ static int tonemap_opencl_filter_frame(A +@@ -338,13 +882,10 @@ static int tonemap_opencl_filter_frame(A AVFilterContext *avctx = inlink->dst; AVFilterLink *outlink = avctx->outputs[0]; TonemapOpenCLContext *ctx = avctx->priv; @@ -2024,7 +2027,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n", av_get_pix_fmt_name(input->format), -@@ -363,8 +901,49 @@ static int tonemap_opencl_filter_frame(A +@@ -363,8 +904,49 @@ static int tonemap_opencl_filter_frame(A if (err < 0) goto fail; @@ -2076,7 +2079,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c if (ctx->trc != -1) output->color_trc = ctx->trc; -@@ -385,72 +964,50 @@ static int tonemap_opencl_filter_frame(A +@@ -385,72 +967,50 @@ static int tonemap_opencl_filter_frame(A ctx->range_out = output->color_range; ctx->chroma_loc = output->chroma_location; @@ -2172,7 +2175,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c av_frame_free(&input); av_frame_free(&output); return err; -@@ -458,24 +1015,9 @@ fail: +@@ -458,24 +1018,9 @@ fail: static av_cold void tonemap_opencl_uninit(AVFilterContext *avctx) { @@ -2199,7 +2202,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c ff_opencl_filter_uninit(avctx); } -@@ -483,37 +1025,50 @@ static av_cold void tonemap_opencl_unini +@@ -483,37 +1028,50 @@ static av_cold void tonemap_opencl_unini #define OFFSET(x) offsetof(TonemapOpenCLContext, x) #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) static const AVOption tonemap_opencl_options[] = { diff --git a/debian/patches/0052-add-vf-tonemap-videotoolbox-filter.patch b/debian/patches/0052-add-vf-tonemap-videotoolbox-filter.patch index 2ec57a4921..30e2fcff49 100644 --- a/debian/patches/0052-add-vf-tonemap-videotoolbox-filter.patch +++ b/debian/patches/0052-add-vf-tonemap-videotoolbox-filter.patch @@ -40,7 +40,7 @@ Index: FFmpeg/libavfilter/metal/vf_tonemap_videotoolbox.metal =================================================================== --- /dev/null +++ FFmpeg/libavfilter/metal/vf_tonemap_videotoolbox.metal -@@ -0,0 +1,914 @@ +@@ -0,0 +1,916 @@ +/* + * Copyright (c) 2024 Gnattu OC + * @@ -269,6 +269,7 @@ Index: FFmpeg/libavfilter/metal/vf_tonemap_videotoolbox.metal + +// ------------ +// Color conversion ++// See libavfilter/colorspace.h for derivation of these constants +float3 yuv2rgb(float y, float u, float v) { + y += mix(0.0f, 0.0009765774014f, y > 0.0f); + u += mix(0.0f, 0.0009765774014f, u > 0.0f); @@ -317,7 +318,8 @@ Index: FFmpeg/libavfilter/metal/vf_tonemap_videotoolbox.metal + v = floor(((224.0f * v + 128.0f) * 256.0f) + 0.5f) / 65535.0f; + } + } -+ // in rgb2yuv conversion, enable_dither means output is 8bit ++ // in rgb2yuv conversion, enable_dither means output is 8bit in metal pipeline ++ // use this to check if we need the 10bit offset + if (!enable_dither) { + y -= mix(0.0f, 0.0009765774014f, y > 0.0f); + u -= mix(0.0f, 0.0009765774014f, u > 0.0f);