diff --git a/debian/patches/0004-add-cuda-tonemap-impl.patch b/debian/patches/0004-add-cuda-tonemap-impl.patch index 2cbf84d317..d88e0f85c7 100644 --- a/debian/patches/0004-add-cuda-tonemap-impl.patch +++ b/debian/patches/0004-add-cuda-tonemap-impl.patch @@ -257,7 +257,7 @@ Index: FFmpeg/libavfilter/colorspace.h =================================================================== --- FFmpeg.orig/libavfilter/colorspace.h +++ FFmpeg/libavfilter/colorspace.h -@@ -23,10 +23,42 @@ +@@ -23,10 +23,66 @@ #include "libavutil/csp.h" #include "libavutil/frame.h" #include "libavutil/pixfmt.h" @@ -276,6 +276,30 @@ Index: FFmpeg/libavfilter/colorspace.h +#define ARIB_B67_C 0.55991073f +#define FLOAT_EPS 1e-6f + ++/* ++ * Pre-calculated constants used for YCbCr narrow to full range scaling ++ * The base formula is the quantization formula derived from BT.2100 Table 9: ++ * Where Y' = Round [(219 * E′ + 16) * 2^(n−8)], ++ * Cb',Cr' = Round [(224 * E′ + 128) * 2^(n−8)] ++ * where E' is the signal value in [0,1] range and n is the bit depth. Round is rounding towards 0. ++ * For inputs, the inverse is used where we are solving for E' for a given Y'Cb'Cr' normalized by GPU ++ * in [0,1] range. The GPU will interpret color as a 16bit int value, and solving for E' becomes: ++ * E' = (Y' - 2^(n-4)) / (219 * 2^(n-8)) ++ * E' = (Cb'Cr' - 2^(n-1)) / (7 * 2^(n-3)) ++ * Y' and Cb'Cr' is in the range of [0, 2^n - 1] in original formula, we need to scale the value normalized to [0,1]: ++ * C = Y'Cb'Cr' * (2^n - 1) ++ * Which means the input scale = (2^n - 1) / (219 * 2^(n-8)) and input offset = 2^(n-4)) / (219 * 2^(n-8)) for Y' and ++ * 2^(n-1)) / (7 * 2^(n-3)) for Cb'Cr' ++ */ ++#define INPUT_Y_SCALE(n) ((double)((1 << (n)) - 1) / (219 * (1 << ((n) - 8)))) ++#define INPUT_UV_SCALE(n) ((double)((1 << (n)) - 1) / (224 * (1 << ((n) - 8)))) ++ ++/* ++ * GPU will interpret 10bit and 12bit color as 16bit int ++ * but that will introduce a slight (2^(16-n))/2^16 quantization offset which we want to compensate for ++*/ ++#define QUANTIZATION_OFFSET(n) ((double)(1 << (16 - (n))) / ((1 << 16) - 1)) ++ +// Parsed metadata from the Dolby Vision RPU +struct DoviMetadata { + float nonlinear_offset[3]; // input offset ("ycc_to_rgb_offset") @@ -300,7 +324,7 @@ Index: FFmpeg/libavfilter/colorspace.h void ff_matrix_mul_3x3(double dst[3][3], const double src1[3][3], const double src2[3][3]); void ff_matrix_mul_3x3_vec(double dst[3], const double vec[3], const double mat[3][3]); -@@ -38,4 +70,19 @@ void ff_fill_rgb2yuv_table(const AVLumaC +@@ -38,4 +94,19 @@ void ff_fill_rgb2yuv_table(const AVLumaC double ff_determine_signal_peak(AVFrame *in); void ff_update_hdr_metadata(AVFrame *in, double peak); @@ -324,7 +348,7 @@ Index: FFmpeg/libavfilter/cuda/colorspace_common.h =================================================================== --- /dev/null +++ FFmpeg/libavfilter/cuda/colorspace_common.h -@@ -0,0 +1,338 @@ +@@ -0,0 +1,348 @@ +/* + * This file is part of FFmpeg. + * @@ -361,10 +385,6 @@ Index: FFmpeg/libavfilter/cuda/colorspace_common.h +#define ARIB_B67_B 0.28466892f +#define ARIB_B67_C 0.55991073f + -+#define LIMITED_BLACK 0.06256109482f -+#define LIMITED_WHITE 0.9188660802f -+#define LIMITED_RANGE 0.8563049854f -+ +#define FLOAT_EPS 1e-6f + +extern __constant__ const float ref_white; @@ -379,6 +399,13 @@ Index: FFmpeg/libavfilter/cuda/colorspace_common.h +extern __constant__ const float yuv_matrix[9], rgb_matrix[9]; +extern __constant__ const float pq_max_lum_div_ref_white; +extern __constant__ const float ref_white_div_pq_max_lum; ++extern __constant__ const float input_quantization_offset; ++extern __constant__ const float output_quantization_offset; ++extern __constant__ const float input_y_scale; ++extern __constant__ const float input_uv_scale; ++extern __constant__ const float output_quantization_factor; ++extern __constant__ const float output_quantization_scale; ++ + +static __inline__ __device__ float get_luma_dst(float3 c, const float3& luma_dst) { + return luma_dst.x * c.x + luma_dst.y * c.y + luma_dst.z * c.z; @@ -501,16 +528,20 @@ Index: FFmpeg/libavfilter/cuda/colorspace_common.h +} + +static __inline__ __device__ float3 yuv2rgb(float y, float u, float v) { -+ u -= 0.5f; -+ v -= 0.5f; ++ y += y > 0.0f ? input_quantization_offset : 0.0f; ++ u += u > 0.0f ? input_quantization_offset : 0.0f; ++ v += v > 0.0f ? input_quantization_offset : 0.0f; ++ if (range_src == AVCOL_RANGE_MPEG) { ++ y = input_y_scale * y - 0.07305936073f; ++ u = input_uv_scale * u - 0.5714285714f; ++ v = input_uv_scale * v - 0.5714285714f; ++ } else { ++ u -= 0.5f; ++ v -= 0.5f; ++ } + float r = y * rgb_matrix[0] + u * rgb_matrix[1] + v * rgb_matrix[2]; + float g = y * rgb_matrix[3] + u * rgb_matrix[4] + v * rgb_matrix[5]; + float b = y * rgb_matrix[6] + u * rgb_matrix[7] + v * rgb_matrix[8]; -+ if (range_src == AVCOL_RANGE_MPEG) { -+ r = (r - LIMITED_BLACK) / LIMITED_RANGE; -+ g = (g - LIMITED_BLACK) / LIMITED_RANGE; -+ b = (b - LIMITED_BLACK) / LIMITED_RANGE; -+ } + + return make_float3(r, g, b); +} @@ -523,26 +554,29 @@ Index: FFmpeg/libavfilter/cuda/colorspace_common.h +} + +static __inline__ __device__ float3 rgb2yuv(float r, float g, float b) { -+ if (range_dst == AVCOL_RANGE_MPEG) { -+ r = r * LIMITED_RANGE + LIMITED_BLACK; -+ g = g * LIMITED_RANGE + LIMITED_BLACK; -+ b = b * LIMITED_RANGE + LIMITED_BLACK; -+ } + float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2]; + float u = r*yuv_matrix[3] + g*yuv_matrix[4] + b*yuv_matrix[5]; + float v = r*yuv_matrix[6] + g*yuv_matrix[7] + b*yuv_matrix[8]; -+ u += 0.5f; -+ v += 0.5f; ++ if (range_dst == AVCOL_RANGE_MPEG) { ++ y = floorf(((219.0f * y + 16.0f) * output_quantization_factor) + 0.5f) / output_quantization_scale; ++ u = floorf(((224.0f * u + 128.0f) * output_quantization_factor) + 0.5f) / output_quantization_scale; ++ v = floorf(((224.0f * v + 128.0f) * output_quantization_factor) + 0.5f) / output_quantization_scale; ++ } else { ++ u += 0.5f; ++ v += 0.5f; ++ } ++ y -= y > 0.0f ? output_quantization_offset : 0.0f; ++ u -= u > 0.0f ? output_quantization_offset : 0.0f; ++ v -= v > 0.0f ? output_quantization_offset : 0.0f; + return make_float3(y, u, v); +} + +static __inline__ __device__ float rgb2y(float r, float g, float b) { ++ float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2]; + if (range_dst == AVCOL_RANGE_MPEG) { -+ r = r * LIMITED_RANGE + LIMITED_BLACK; -+ g = g * LIMITED_RANGE + LIMITED_BLACK; -+ b = b * LIMITED_RANGE + LIMITED_BLACK; ++ y = floorf(((219.0f * y + 16.0f) * output_quantization_factor) + 0.5f) / output_quantization_scale; + } -+ float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2]; ++ y -= y > 0.0f ? output_quantization_offset : 0.0f; + return y; +} + @@ -1775,7 +1809,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_cuda.c =================================================================== --- /dev/null +++ FFmpeg/libavfilter/vf_tonemap_cuda.c -@@ -0,0 +1,1131 @@ +@@ -0,0 +1,1165 @@ +/* + * This file is part of FFmpeg. + * @@ -2287,6 +2321,12 @@ Index: FFmpeg/libavfilter/vf_tonemap_cuda.c + enum AVColorPrimaries in_pri = s->in_pri, out_pri = s->out_pri; + enum AVColorRange in_range = s->in_range, out_range = s->out_range; + int d = s->in_desc->comp[0].depth > s->out_desc->comp[0].depth && s->ditherTex; ++ float input_quantization_offset = 0.0f; ++ float output_quantization_offset = 0.0f; ++ float input_y_scale = 1.0f; ++ float input_uv_scale = 1.0f; ++ float output_quantization_factor = 1.0f; ++ float output_quantization_scale = 255.0f; + char info_log[4096], error_log[4096]; + CUjit_option options[] = { CU_JIT_INFO_LOG_BUFFER, + CU_JIT_ERROR_LOG_BUFFER, @@ -2396,6 +2436,28 @@ Index: FFmpeg/libavfilter/vf_tonemap_cuda.c + if ((ret = get_rgb2rgb_matrix(in_pri, out_pri, rgb2rgb_matrix)) < 0) + return ret; + ++ if (s->in_desc->comp[0].depth == 16) { ++ // Assume 16bit is actually 12bit for now as that is what the hardware decoders producing ++ // and what videos are actually encoded in ++ input_quantization_offset = QUANTIZATION_OFFSET(12); ++ input_y_scale = INPUT_Y_SCALE(12); ++ input_uv_scale = INPUT_UV_SCALE(12); ++ } else { ++ input_quantization_offset = QUANTIZATION_OFFSET(s->in_desc->comp[0].depth); ++ input_y_scale = INPUT_Y_SCALE(s->in_desc->comp[0].depth); ++ input_uv_scale = INPUT_UV_SCALE(s->in_desc->comp[0].depth); ++ } ++ ++ if (s->out_desc->comp[0].depth == 10) { ++ // Don't handle 12b offset for now and assume 16b output is real 16b out to make it consistent with other filters ++ output_quantization_offset = QUANTIZATION_OFFSET(10); ++ } ++ ++ if (s->out_desc->comp[0].depth > 8) { ++ output_quantization_factor = 256.0f; // 2^(16-8) ++ output_quantization_scale = 65535.0f; // 2^16 - 1 ++ } ++ + av_bprint_init(&constants, 2048, AV_BPRINT_SIZE_UNLIMITED); + + av_bprintf(&constants, ".version 3.2\n"); @@ -2406,12 +2468,12 @@ Index: FFmpeg/libavfilter/vf_tonemap_cuda.c + av_bprintf(&constants, ".visible .const .align " #align " " decl ";\n", __VA_ARGS__) +#define CONSTANT(decl, ...) CONSTANT_A(decl, 4, __VA_ARGS__) +#define CONSTANT_M(a, b) \ -+ CONSTANT(".f32 " a "[] = {%f, %f, %f, %f, %f, %f, %f, %f, %f}", \ ++ CONSTANT(".f32 " a "[] = {%.13lf, %.13lf, %.13lf, %.13lf, %.13lf, %.13lf, %.13lf, %.13lf, %.13lf}", \ + b[0][0], b[0][1], b[0][2], \ + b[1][0], b[1][1], b[1][2], \ + b[2][0], b[2][1], b[2][2]) +#define CONSTANT_C(a, b, c, d) \ -+ CONSTANT(".f32 " a "[] = {%f, %f, %f}", \ ++ CONSTANT(".f32 " a "[] = {%.13lf, %.13lf, %.13lf}", \ + b, c, d) + + CONSTANT(".u32 depth_src = %i", (int)s->in_desc->comp[0].depth); @@ -2426,13 +2488,19 @@ Index: FFmpeg/libavfilter/vf_tonemap_cuda.c + CONSTANT(".u32 chroma_loc_dst = %i", (int)s->out_chroma_loc); + CONSTANT(".u32 tonemap_func = %i", (int)s->tonemap); + CONSTANT(".u32 enable_dither = %i", (int)(s->in_desc->comp[0].depth > s->out_desc->comp[0].depth)); -+ CONSTANT(".f32 dither_size = %f", (float)ff_fruit_dither_size); -+ CONSTANT(".f32 dither_quantization = %f", (float)((1 << s->out_desc->comp[0].depth) - 1)); -+ CONSTANT(".f32 ref_white = %f", REFERENCE_WHITE_ALT); -+ CONSTANT(".f32 tone_param = %f", s->param); -+ CONSTANT(".f32 desat_param = %f", s->desat_param); -+ CONSTANT(".f32 pq_max_lum_div_ref_white = %f", (float)(ST2084_MAX_LUMINANCE / REFERENCE_WHITE_ALT)); -+ CONSTANT(".f32 ref_white_div_pq_max_lum = %f", (float)(REFERENCE_WHITE_ALT / ST2084_MAX_LUMINANCE)); ++ CONSTANT(".f32 dither_size = %.1f", (float)ff_fruit_dither_size); ++ CONSTANT(".f32 dither_quantization = %.1f", (float)((1 << s->out_desc->comp[0].depth) - 1)); ++ CONSTANT(".f32 ref_white = %.4f", REFERENCE_WHITE_ALT); ++ CONSTANT(".f32 tone_param = %.4f", s->param); ++ CONSTANT(".f32 desat_param = %.4f", s->desat_param); ++ CONSTANT(".f32 pq_max_lum_div_ref_white = %.13lf", (float)(ST2084_MAX_LUMINANCE / REFERENCE_WHITE_ALT)); ++ CONSTANT(".f32 ref_white_div_pq_max_lum = %.13lf", (float)(REFERENCE_WHITE_ALT / ST2084_MAX_LUMINANCE)); ++ CONSTANT(".f32 input_quantization_offset = %.13lf", input_quantization_offset); ++ CONSTANT(".f32 input_y_scale = %.13lf", input_y_scale); ++ CONSTANT(".f32 input_uv_scale = %.13lf", input_uv_scale); ++ CONSTANT(".f32 output_quantization_offset = %.13lf", output_quantization_offset); ++ CONSTANT(".f32 output_quantization_factor = %.13lf", output_quantization_factor); ++ CONSTANT(".f32 output_quantization_scale = %.13lf", output_quantization_scale); + CONSTANT_M("rgb_matrix", (s->dovi ? s->dovi->nonlinear : rgb_matrix)); + CONSTANT_M("yuv_matrix", yuv_matrix); + CONSTANT_A(".u8 rgb2rgb_passthrough = %i", 1, in_pri == out_pri); @@ -2862,7 +2930,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_cuda.c + { "enabled", 0, 0, AV_OPT_TYPE_CONST, {.i64 = 1}, 0, 0, FLAGS, .unit = "tradeoff" }, + { "peak", "Signal peak override", OFFSET(peak), AV_OPT_TYPE_DOUBLE, {.dbl = 0}, 0, DBL_MAX, FLAGS }, + { "param", "Tonemap parameter", OFFSET(param), AV_OPT_TYPE_DOUBLE, {.dbl = NAN}, DBL_MIN, DBL_MAX, FLAGS }, -+ { "desat", "Desaturation parameter", OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, {.dbl = 0.5}, 0, DBL_MAX, FLAGS }, ++ { "desat", "Desaturation parameter", OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, {.dbl = 0}, 0, DBL_MAX, FLAGS }, + { "threshold", "Scene detection threshold", OFFSET(scene_threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 0.2}, 0, DBL_MAX, FLAGS }, + { NULL }, +}; diff --git a/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch b/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch index 98723bbc95..9595d01ea8 100644 --- a/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch +++ b/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch @@ -16,7 +16,7 @@ Index: FFmpeg/libavfilter/opencl.c for (i = 0; i < 3; i++) { for (j = 0; j < 3; j++) - av_bprintf(buf, " %.5ff,", mat[i][j]); -+ av_bprintf(buf, " %ff,", mat[i][j]); ++ av_bprintf(buf, " %.13lff,", mat[i][j]); av_bprintf(buf, "\n"); } av_bprintf(buf, "};\n"); @@ -65,7 +65,7 @@ Index: FFmpeg/libavfilter/opencl/colorspace_common.cl =================================================================== --- FFmpeg.orig/libavfilter/opencl/colorspace_common.cl +++ FFmpeg/libavfilter/opencl/colorspace_common.cl -@@ -17,7 +17,21 @@ +@@ -17,7 +17,17 @@ */ #define ST2084_MAX_LUMINANCE 10000.0f @@ -80,15 +80,11 @@ Index: FFmpeg/libavfilter/opencl/colorspace_common.cl +#define ARIB_B67_B 0.28466892f +#define ARIB_B67_C 0.55991073f + -+#define LIMITED_BLACK 0.06256109482f -+#define LIMITED_WHITE 0.9188660802f -+#define LIMITED_RANGE 0.8563049854f -+ +#define FLOAT_EPS 1e-6f #if chroma_loc == 1 #define chroma_sample(a,b,c,d) (((a) + (c)) * 0.5f) -@@ -33,92 +47,134 @@ +@@ -33,88 +43,134 @@ #define chroma_sample(a,b,c,d) (((a) + (b) + (c) + (d)) * 0.25f) #endif @@ -187,33 +183,21 @@ Index: FFmpeg/libavfilter/opencl/colorspace_common.cl + float den = 1.0f + ST2084_C3 * xpow; + return native_powr(1.0f + num / den, ST2084_M2); +#endif - } - --float inverse_eotf_bt1886(float c) { -- return c < 0.0f ? 0.0f : powr(c, 1.0f / 2.4f); ++} ++ +float inverse_eotf_st2084(float x) { + x *= ref_white_div_pq_max_lum; + return inverse_eotf_st2084_common(x); - } - --float oetf_bt709(float c) { -- c = c < 0.0f ? 0.0f : c; -- float r1 = 4.5f * c; -- float r2 = 1.099f * powr(c, 0.45f) - 0.099f; -- return c < 0.018f ? r1 : r2; --} --float inverse_oetf_bt709(float c) { -- float r1 = c / 4.5f; -- float r2 = powr((c + 0.099f) / 1.099f, 1.0f / 0.45f); -- return c < 0.081f ? r1 : r2; ++} ++ +float4 eotf_st2084x4(float4 x) { + x.x = eotf_st2084_common(x.x); + x.y = eotf_st2084_common(x.y); + x.z = eotf_st2084_common(x.z); + x.w = eotf_st2084_common(x.w); + return x * pq_max_lum_div_ref_white; - } - ++} ++ +float4 inverse_eotf_st2084x4(float4 x) { + x *= ref_white_div_pq_max_lum; + x.x = inverse_eotf_st2084_common(x.x); @@ -236,20 +220,32 @@ Index: FFmpeg/libavfilter/opencl/colorspace_common.cl + return x <= (1.0f / 12.0f) + ? native_sqrt(3.0f * x) + : (ARIB_B67_A * native_log(12.0f * x - ARIB_B67_B) + ARIB_B67_C); -+} -+ + } + +-float inverse_eotf_bt1886(float c) { +- return c < 0.0f ? 0.0f : powr(c, 1.0f / 2.4f); +float inverse_oetf_arib_b67(float x) { + x = fmax(x, 0.0f); + return x <= 0.5f + ? (x * x) * (1.0f / 3.0f) + : (native_exp((x - ARIB_B67_C) / ARIB_B67_A) + ARIB_B67_B) * (1.0f / 12.0f); -+} -+ + } + +-float oetf_bt709(float c) { +- c = c < 0.0f ? 0.0f : c; +- float r1 = 4.5f * c; +- float r2 = 1.099f * powr(c, 0.45f) - 0.099f; +- return c < 0.018f ? r1 : r2; +-} +-float inverse_oetf_bt709(float c) { +- float r1 = c / 4.5f; +- float r2 = powr((c + 0.099f) / 1.099f, 1.0f / 0.45f); +- return c < 0.081f ? r1 : r2; +// linearizer for HLG/ARIB-B67 +float eotf_arib_b67(float x) { + return ootf_1_2(inverse_oetf_arib_b67(x)) * 5.0f; -+} -+ + } + +// delinearizer for HLG/ARIB-B67 +float inverse_eotf_arib_b67(float x) { + return oetf_arib_b67(inverse_ootf_1_2(x / 5.0f)); @@ -268,57 +264,65 @@ Index: FFmpeg/libavfilter/opencl/colorspace_common.cl + float3 yuv2rgb(float y, float u, float v) { -#ifdef FULL_RANGE_IN - u -= 0.5f; v -= 0.5f; --#else +- u -= 0.5f; v -= 0.5f; ++ y += mix(0.0f, input_quantization_offset, y > 0.0f); ++ u += mix(0.0f, input_quantization_offset, u > 0.0f); ++ v += mix(0.0f, input_quantization_offset, v > 0.0f); ++#ifndef FULL_RANGE_IN ++ y = input_y_scale * y - 0.07305936073f; ++ u = input_uv_scale * u - 0.5714285714f; ++ v = input_uv_scale * v - 0.5714285714f; + #else - y = (y * 255.0f - 16.0f) / 219.0f; - u = (u * 255.0f - 128.0f) / 224.0f; - v = (v * 255.0f - 128.0f) / 224.0f; --#endif ++ u -= 0.5f; v -= 0.5f; + #endif float r = y * rgb_matrix[0] + u * rgb_matrix[1] + v * rgb_matrix[2]; float g = y * rgb_matrix[3] + u * rgb_matrix[4] + v * rgb_matrix[5]; - float b = y * rgb_matrix[6] + u * rgb_matrix[7] + v * rgb_matrix[8]; -+#ifndef FULL_RANGE_IN -+ r = (r - LIMITED_BLACK) / LIMITED_RANGE; -+ g = (g - LIMITED_BLACK) / LIMITED_RANGE; -+ b = (b - LIMITED_BLACK) / LIMITED_RANGE; -+#endif - return (float3)(r, g, b); - } - -@@ -135,22 +191,25 @@ float3 yuv2lrgb(float3 yuv) { - } - - float3 rgb2yuv(float r, float g, float b) { -+#ifndef FULL_RANGE_OUT -+ r = r * LIMITED_RANGE + LIMITED_BLACK; -+ g = g * LIMITED_RANGE + LIMITED_BLACK; -+ b = b * LIMITED_RANGE + LIMITED_BLACK; -+#endif +@@ -138,19 +194,35 @@ float3 rgb2yuv(float r, float g, float b float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2]; float u = r*yuv_matrix[3] + g*yuv_matrix[4] + b*yuv_matrix[5]; float v = r*yuv_matrix[6] + g*yuv_matrix[7] + b*yuv_matrix[8]; -#ifdef FULL_RANGE_OUT - u += 0.5f; v += 0.5f; --#else +- u += 0.5f; v += 0.5f; ++#ifndef FULL_RANGE_OUT ++ #ifdef RESCALE_LIMITED_RANGE_OUTPUT ++ y = floor(((219.0f * y + 16.0f) * 256.0f) + 0.5f) / 65535.0f; ++ u = floor(((224.0f * u + 128.0f) * 256.0f) + 0.5f) / 65535.0f; ++ v = floor(((224.0f * v + 128.0f) * 256.0f) + 0.5f) / 65535.0f; ++ #else ++ y = floor((219.0f * y + 16.0f) + 0.5f) / 255.0f; ++ u = floor((224.0f * u + 128.0f) + 0.5f) / 255.0f; ++ v = floor((224.0f * v + 128.0f) + 0.5f) / 255.0f; ++ #endif + #else - y = (219.0f * y + 16.0f) / 255.0f; - u = (224.0f * u + 128.0f) / 255.0f; - v = (224.0f * v + 128.0f) / 255.0f; --#endif ++ u += 0.5f; v += 0.5f; + #endif ++ y -= mix(0.0f, output_quantization_offset, y > 0.0f); ++ u -= mix(0.0f, output_quantization_offset, u > 0.0f); ++ v -= mix(0.0f, output_quantization_offset, v > 0.0f); return (float3)(y, u, v); } float rgb2y(float r, float g, float b) { -+#ifndef FULL_RANGE_OUT -+ r = r * LIMITED_RANGE + LIMITED_BLACK; -+ g = g * LIMITED_RANGE + LIMITED_BLACK; -+ b = b * LIMITED_RANGE + LIMITED_BLACK; -+#endif float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2]; - y = (219.0f * y + 16.0f) / 255.0f; ++#ifndef FULL_RANGE_OUT ++ #ifdef RESCALE_LIMITED_RANGE_OUTPUT ++ y = floor(((219.0f * y + 16.0f) * 256.0f) + 0.5f) / 65535.0f; ++ #else ++ y = floor((219.0f * y + 16.0f) + 0.5f) / 255.0f; ++ #endif ++#endif ++ y -= mix(0.0f, output_quantization_offset, y > 0.0f); return y; } -@@ -188,18 +247,101 @@ float3 lrgb2lrgb(float3 c) { +@@ -188,18 +260,101 @@ float3 lrgb2lrgb(float3 c) { #endif } @@ -1282,7 +1286,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c + if (ctx->lin_lut) { + av_bprintf(buf, "__constant float lin_lut[%d] = {\n", LUT_SIZE); + for (i = 0; i < LUT_SIZE; i++) -+ av_bprintf(buf, " %ff,", ctx->lin_lut[i]); ++ av_bprintf(buf, " %.13ff,", ctx->lin_lut[i]); + av_bprintf(buf, "};\n"); + } +} @@ -1450,7 +1454,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c switch(ctx->tonemap) { case TONEMAP_GAMMA: -@@ -139,53 +353,178 @@ static int tonemap_opencl_init(AVFilterC +@@ -139,59 +353,207 @@ static int tonemap_opencl_init(AVFilterC if (isnan(ctx->param)) ctx->param = 0.3f; break; @@ -1602,9 +1606,9 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c + av_bprintf(&header, "__constant float scene_threshold = %.4ff;\n", ctx->scene_threshold); + -+ av_bprintf(&header, "__constant float pq_max_lum_div_ref_white = %ff;\n", ++ av_bprintf(&header, "__constant float pq_max_lum_div_ref_white = %.13ff;\n", + (ST2084_MAX_LUMINANCE / REFERENCE_WHITE_ALT)); -+ av_bprintf(&header, "__constant float ref_white_div_pq_max_lum = %ff;\n", ++ av_bprintf(&header, "__constant float ref_white_div_pq_max_lum = %.13ff;\n", + (REFERENCE_WHITE_ALT / ST2084_MAX_LUMINANCE)); + av_bprintf(&header, "#define TONE_FUNC %s\n", tonemap_func[ctx->tonemap]); @@ -1644,7 +1648,36 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c if (ctx->range_in == AVCOL_RANGE_JPEG) av_bprintf(&header, "#define FULL_RANGE_IN\n"); -@@ -199,19 +538,41 @@ static int tonemap_opencl_init(AVFilterC + if (ctx->range_out == AVCOL_RANGE_JPEG) + av_bprintf(&header, "#define FULL_RANGE_OUT\n"); + ++ if (ctx->in_desc->comp[0].depth == 16) { ++ // Assume 16bit is actually 12bit for now as that is what the hardware decoders producing ++ // and what videos are actually encoded in ++ av_bprintf(&header, "__constant float input_quantization_offset = %.13lff;\n", QUANTIZATION_OFFSET(12)); ++ av_bprintf(&header, "__constant float input_y_scale = %.13lff;\n", INPUT_Y_SCALE(12)); ++ av_bprintf(&header, "__constant float input_uv_scale = %.13lff;\n", INPUT_UV_SCALE(12)); ++ } else { ++ av_bprintf(&header, "__constant float input_quantization_offset = %.13lff;\n", QUANTIZATION_OFFSET(ctx->in_desc->comp[0].depth)); ++ av_bprintf(&header, "__constant float input_y_scale = %.13lff;\n", INPUT_Y_SCALE(ctx->in_desc->comp[0].depth)); ++ av_bprintf(&header, "__constant float input_uv_scale = %.13lff;\n", INPUT_UV_SCALE(ctx->in_desc->comp[0].depth)); ++ } ++ ++ if (ctx->out_desc->comp[0].depth > 8) { ++ av_bprintf(&header, "#define RESCALE_LIMITED_RANGE_OUTPUT\n"); ++ } ++ ++ if (ctx->out_desc->comp[0].depth == 10) { ++ av_bprintf(&header, "__constant float output_quantization_offset = %.13lff;\n", QUANTIZATION_OFFSET(10)); ++ } else { ++ // Don't handle 12b offset for now and assume 16b output is real 16b out to make it consistent with other filters ++ av_bprintf(&header, "__constant float output_quantization_offset = 0.0f;\n"); ++ } ++ + av_bprintf(&header, "#define chroma_loc %d\n", (int)ctx->chroma_loc); + + if (rgb2rgb_passthrough) +@@ -199,19 +561,41 @@ static int tonemap_opencl_init(AVFilterC else ff_opencl_print_const_matrix_3x3(&header, "rgb2rgb", rgb2rgb); @@ -1667,7 +1700,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c + for (j = 0; j < 3; j++) + ycc2rgb_offset[i] -= ctx->dovi->nonlinear[i][j] * ctx->dovi->nonlinear_offset[j]; + } -+ av_bprintf(&header, "__constant float3 ycc2rgb_offset = {%ff, %ff, %ff};\n", ++ av_bprintf(&header, "__constant float3 ycc2rgb_offset = {%.13ff, %.13ff, %.13ff};\n", + ycc2rgb_offset[0], ycc2rgb_offset[1], ycc2rgb_offset[2]); + ff_matrix_mul_3x3(lms2rgb, dovi_lms2rgb_matrix, ctx->dovi->linear); + ff_opencl_print_const_matrix_3x3(&header, "rgb_matrix", ctx->dovi->nonlinear); //ycc2rgb @@ -1693,7 +1726,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c ctx->colorspace_out, av_color_space_name(ctx->colorspace_out)); goto fail; } -@@ -219,24 +580,23 @@ static int tonemap_opencl_init(AVFilterC +@@ -219,24 +603,23 @@ static int tonemap_opencl_init(AVFilterC ff_fill_rgb2yuv_table(luma_dst, rgb2yuv); ff_opencl_print_const_matrix_3x3(&header, "yuv_matrix", rgb2yuv); @@ -1704,7 +1737,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c - av_bprintf(&header, "constant float3 luma_src = {%.4ff, %.4ff, %.4ff};\n", - av_q2d(luma_src->cr), av_q2d(luma_src->cg), av_q2d(luma_src->cb)); - av_bprintf(&header, "constant float3 luma_dst = {%.4ff, %.4ff, %.4ff};\n", -+ av_bprintf(&header, "__constant float3 luma_dst = {%ff, %ff, %ff};\n", ++ av_bprintf(&header, "__constant float3 luma_dst = {%.13ff, %.13ff, %.13ff};\n", av_q2d(luma_dst->cr), av_q2d(luma_dst->cg), av_q2d(luma_dst->cb)); - av_bprintf(&header, "#define linearize %s\n", linearize_funcs[ctx->trc_in]); @@ -1733,7 +1766,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c av_log(avctx, AV_LOG_DEBUG, "Generated OpenCL header:\n%s\n", header.str); opencl_sources[0] = header.str; -@@ -254,46 +614,171 @@ static int tonemap_opencl_init(AVFilterC +@@ -254,46 +637,171 @@ static int tonemap_opencl_init(AVFilterC CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL " "command queue %d.\n", cle); @@ -1923,7 +1956,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c ret = ff_opencl_filter_config_output(outlink); if (ret < 0) return ret; -@@ -308,13 +793,46 @@ static int launch_kernel(AVFilterContext +@@ -308,13 +816,46 @@ static int launch_kernel(AVFilterContext size_t global_work[2]; size_t local_work[2]; cl_int cle; @@ -1972,7 +2005,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c local_work[0] = 16; local_work[1] = 16; -@@ -338,13 +856,10 @@ static int tonemap_opencl_filter_frame(A +@@ -338,13 +879,10 @@ static int tonemap_opencl_filter_frame(A AVFilterContext *avctx = inlink->dst; AVFilterLink *outlink = avctx->outputs[0]; TonemapOpenCLContext *ctx = avctx->priv; @@ -1987,7 +2020,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n", av_get_pix_fmt_name(input->format), -@@ -363,8 +878,49 @@ static int tonemap_opencl_filter_frame(A +@@ -363,8 +901,49 @@ static int tonemap_opencl_filter_frame(A if (err < 0) goto fail; @@ -2039,7 +2072,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c if (ctx->trc != -1) output->color_trc = ctx->trc; -@@ -385,72 +941,50 @@ static int tonemap_opencl_filter_frame(A +@@ -385,72 +964,50 @@ static int tonemap_opencl_filter_frame(A ctx->range_out = output->color_range; ctx->chroma_loc = output->chroma_location; @@ -2135,7 +2168,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c av_frame_free(&input); av_frame_free(&output); return err; -@@ -458,24 +992,9 @@ fail: +@@ -458,24 +1015,9 @@ fail: static av_cold void tonemap_opencl_uninit(AVFilterContext *avctx) { @@ -2162,7 +2195,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c ff_opencl_filter_uninit(avctx); } -@@ -483,37 +1002,50 @@ static av_cold void tonemap_opencl_unini +@@ -483,37 +1025,50 @@ static av_cold void tonemap_opencl_unini #define OFFSET(x) offsetof(TonemapOpenCLContext, x) #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) static const AVOption tonemap_opencl_options[] = { @@ -2239,7 +2272,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c + { "enabled", 0, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "tradeoff" }, + { "peak", "Signal peak override", OFFSET(peak), AV_OPT_TYPE_DOUBLE, { .dbl = 0 }, 0, DBL_MAX, FLAGS }, + { "param", "Tonemap parameter", OFFSET(param), AV_OPT_TYPE_DOUBLE, { .dbl = NAN }, DBL_MIN, DBL_MAX, FLAGS }, -+ { "desat", "Desaturation parameter", OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, { .dbl = 0.5}, 0, DBL_MAX, FLAGS }, ++ { "desat", "Desaturation parameter", OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, { .dbl = 0 }, 0, DBL_MAX, FLAGS }, + { "threshold", "Scene detection threshold", OFFSET(scene_threshold), AV_OPT_TYPE_DOUBLE, { .dbl = 0.2 }, 0, DBL_MAX, FLAGS }, { NULL } }; diff --git a/debian/patches/0052-add-vf-tonemap-videotoolbox-filter.patch b/debian/patches/0052-add-vf-tonemap-videotoolbox-filter.patch index 714199449a..30e2fcff49 100644 --- a/debian/patches/0052-add-vf-tonemap-videotoolbox-filter.patch +++ b/debian/patches/0052-add-vf-tonemap-videotoolbox-filter.patch @@ -40,7 +40,7 @@ Index: FFmpeg/libavfilter/metal/vf_tonemap_videotoolbox.metal =================================================================== --- /dev/null +++ FFmpeg/libavfilter/metal/vf_tonemap_videotoolbox.metal -@@ -0,0 +1,894 @@ +@@ -0,0 +1,916 @@ +/* + * Copyright (c) 2024 Gnattu OC + * @@ -83,10 +83,6 @@ Index: FFmpeg/libavfilter/metal/vf_tonemap_videotoolbox.metal + +#define FLOAT_EPS 1e-6f + -+#define LIMITED_BLACK 0.06256109482f -+#define LIMITED_WHITE 0.9188660802f -+#define LIMITED_RANGE 0.8563049854f -+ +constant float ref_white [[function_constant(0)]]; +constant float tone_param [[function_constant(1)]]; +constant float desat_param [[function_constant(2)]]; @@ -273,16 +269,23 @@ Index: FFmpeg/libavfilter/metal/vf_tonemap_videotoolbox.metal + +// ------------ +// Color conversion ++// See libavfilter/colorspace.h for derivation of these constants +float3 yuv2rgb(float y, float u, float v) { -+ u -= 0.5f; -+ v -= 0.5f; ++ y += mix(0.0f, 0.0009765774014f, y > 0.0f); ++ u += mix(0.0f, 0.0009765774014f, u > 0.0f); ++ v += mix(0.0f, 0.0009765774014f, v > 0.0f); ++ if (is_full_range_in) { ++ u -= 0.5f; ++ v -= 0.5f; ++ } else { ++ y = 1.1678082192f * y - 0.07305936073f; ++ u = 1.1417410714f * u - 0.5714285714f; ++ v = 1.1417410714f * v - 0.5714285714f; ++ } + float r = (y * rgb_matrix_1[0]) + (u * rgb_matrix_1[1]) + (v * rgb_matrix_1[2]); + float g = (y * rgb_matrix_2[0]) + (u * rgb_matrix_2[1]) + (v * rgb_matrix_2[2]); + float b = (y * rgb_matrix_3[0]) + (u * rgb_matrix_3[1]) + (v * rgb_matrix_3[2]); + float3 c = float3(r, g, b); -+ if (!is_full_range_in) { -+ c = (c - LIMITED_BLACK) / LIMITED_RANGE; -+ } + return c; +} + @@ -298,26 +301,45 @@ Index: FFmpeg/libavfilter/metal/vf_tonemap_videotoolbox.metal +} + +float3 rgb2yuv(float r, float g, float b) { -+ if (!is_full_range_out) { -+ r = r * LIMITED_RANGE + LIMITED_BLACK; -+ g = g * LIMITED_RANGE + LIMITED_BLACK; -+ b = b * LIMITED_RANGE + LIMITED_BLACK; ++ float y = (r * yuv_matrix_1[0]) + (g * yuv_matrix_1[1]) + (b * yuv_matrix_1[2]); ++ float u = (r * yuv_matrix_2[0]) + (g * yuv_matrix_2[1]) + (b * yuv_matrix_2[2]); ++ float v = (r * yuv_matrix_3[0]) + (g * yuv_matrix_3[1]) + (b * yuv_matrix_3[2]); ++ if (is_full_range_out) { ++ u += 0.5f; ++ v += 0.5f; ++ } else { ++ if (enable_dither) { ++ y = floor((219.0f * y + 16.0f) + 0.5f) / 255.0f; ++ u = floor((224.0f * u + 128.0f) + 0.5f) / 255.0f; ++ v = floor((224.0f * v + 128.0f) + 0.5f) / 255.0f; ++ } else { ++ y = floor(((219.0f * y + 16.0f) * 256.0f) + 0.5f) / 65535.0f; ++ u = floor(((224.0f * u + 128.0f) * 256.0f) + 0.5f) / 65535.0f; ++ v = floor(((224.0f * v + 128.0f) * 256.0f) + 0.5f) / 65535.0f; ++ } ++ } ++ // in rgb2yuv conversion, enable_dither means output is 8bit in metal pipeline ++ // use this to check if we need the 10bit offset ++ if (!enable_dither) { ++ y -= mix(0.0f, 0.0009765774014f, y > 0.0f); ++ u -= mix(0.0f, 0.0009765774014f, u > 0.0f); ++ v -= mix(0.0f, 0.0009765774014f, v > 0.0f); + } -+ float y = (r*yuv_matrix_1[0]) + (g*yuv_matrix_1[1]) + (b*yuv_matrix_1[2]); -+ float u = (r*yuv_matrix_2[0]) + (g*yuv_matrix_2[1]) + (b*yuv_matrix_2[2]); -+ float v = (r*yuv_matrix_3[0]) + (g*yuv_matrix_3[1]) + (b*yuv_matrix_3[2]); -+ u += 0.5f; -+ v += 0.5f; + return float3(y, u, v); +} + +float rgb2y(float r, float g, float b) { ++ float y = (r*yuv_matrix_1[0]) + (g*yuv_matrix_1[1]) + (b*yuv_matrix_1[2]); + if (!is_full_range_out) { -+ r = r * LIMITED_RANGE + LIMITED_BLACK; -+ g = g * LIMITED_RANGE + LIMITED_BLACK; -+ b = b * LIMITED_RANGE + LIMITED_BLACK; ++ if (enable_dither) { ++ y = floor((219.0f * y + 16.0f) + 0.5f) / 255.0f; ++ } else { ++ y = floor(((219.0f * y + 16.0f) * 256.0f) + 0.5f) / 65535.0f; ++ } ++ } ++ if (!enable_dither) { ++ y -= mix(0.0f, 0.0009765774014f, y > 0.0f); + } -+ float y = (r*yuv_matrix_1[0]) + (g*yuv_matrix_1[1]) + (b*yuv_matrix_1[2]); + return y; +} + @@ -2062,7 +2084,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_videotoolbox.m + { "apply_dovi", "Apply Dolby Vision metadata if possible", OFFSET(apply_dovi), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, + { "peak", "Signal peak override", OFFSET(peak), AV_OPT_TYPE_DOUBLE, { .dbl = 0 }, 0, DBL_MAX, FLAGS }, + { "param", "Tonemap parameter", OFFSET(param), AV_OPT_TYPE_DOUBLE, { .dbl = NAN }, DBL_MIN, DBL_MAX, FLAGS }, -+ { "desat", "Desaturation parameter", OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, { .dbl = 0.5}, 0, DBL_MAX, FLAGS }, ++ { "desat", "Desaturation parameter", OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, { .dbl = 0 }, 0, DBL_MAX, FLAGS }, + { "threshold", "Scene detection threshold", OFFSET(scene_threshold), AV_OPT_TYPE_DOUBLE, { .dbl = 0.2 }, 0, DBL_MAX, FLAGS }, + { NULL } +}; diff --git a/debian/patches/0076-alway-set-videotoolboxenc-pixel-buffer-info.patch b/debian/patches/0076-alway-set-videotoolboxenc-pixel-buffer-info.patch new file mode 100644 index 0000000000..df99d8c8bc --- /dev/null +++ b/debian/patches/0076-alway-set-videotoolboxenc-pixel-buffer-info.patch @@ -0,0 +1,31 @@ +Index: FFmpeg/libavcodec/videotoolboxenc.c +=================================================================== +--- FFmpeg.orig/libavcodec/videotoolboxenc.c ++++ FFmpeg/libavcodec/videotoolboxenc.c +@@ -1034,9 +1034,10 @@ static int create_cv_pixel_buffer_info(A + CFNumberRef width_num = NULL; + CFNumberRef height_num = NULL; + CFMutableDictionaryRef pixel_buffer_info = NULL; ++ enum AVPixelFormat pix_fmt = avctx->pix_fmt == AV_PIX_FMT_VIDEOTOOLBOX ? avctx->sw_pix_fmt : avctx->pix_fmt; + int cv_color_format; + int status = get_cv_pixel_format(avctx, +- avctx->pix_fmt, ++ pix_fmt, + avctx->color_range, + &cv_color_format, + NULL); +@@ -1687,11 +1688,9 @@ static int vtenc_configure_encoder(AVCod + kCFBooleanTrue); + } + +- if (avctx->pix_fmt != AV_PIX_FMT_VIDEOTOOLBOX) { +- status = create_cv_pixel_buffer_info(avctx, &pixel_buffer_info); +- if (status) +- goto init_cleanup; +- } ++ status = create_cv_pixel_buffer_info(avctx, &pixel_buffer_info); ++ if (status) ++ goto init_cleanup; + + vtctx->dts_delta = vtctx->has_b_frames ? -1 : 0; + diff --git a/debian/patches/series b/debian/patches/series index 28c2cdfaeb..89de7fae22 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -73,3 +73,4 @@ 0073-add-12bit-decoding-on-videotoolbox.patch 0074-fix-the-sub2video-perf-regressions.patch 0075-allow-vpl-qsv-to-init-with-the-legacy-msdk-path.patch +0076-alway-set-videotoolboxenc-pixel-buffer-info.patch