avfilter/tonemap_[opencl,cuda]: use macro

jellyfin · Oct 13, 2024 · 81e35ea · 81e35ea
1 parent 669c8de
commit 81e35ea
Show file tree

Hide file tree

Showing 2 changed files with 32 additions and 41 deletions.
diff --git a/debian/patches/0004-add-cuda-tonemap-impl.patch b/debian/patches/0004-add-cuda-tonemap-impl.patch
@@ -257,7 +257,7 @@ Index: FFmpeg/libavfilter/colorspace.h
 ===================================================================
 --- FFmpeg.orig/libavfilter/colorspace.h
 +++ FFmpeg/libavfilter/colorspace.h
-@@ -23,10 +23,69 @@
+@@ -23,10 +23,66 @@
  #include "libavutil/csp.h"
  #include "libavutil/frame.h"
  #include "libavutil/pixfmt.h"
@@ -291,17 +291,14 @@ Index: FFmpeg/libavfilter/colorspace.h
 + * Which means the input scale = (2^n - 1) / (219 * 2^(n-8)) and input offset = 2^(n-4)) / (219 * 2^(n-8)) for Y' and
 + * 2^(n-1)) / (7 * 2^(n-3)) for Cb'Cr'
 + */
-+#define INPUT_Y_SCALE_10B 1.1678082192f
-+#define INPUT_UV_SCALE_10B 1.1417410714f
-+#define INPUT_Y_SCALE_12B 1.1689497717f
-+#define INPUT_UV_SCALE_12B 1.1428571429f
++#define INPUT_Y_SCALE(n)  ((double)((1 << (n)) - 1) / (219 * (1 << ((n) - 8))))
++#define INPUT_UV_SCALE(n) ((double)((1 << (n)) - 1) / (224 * (1 << ((n) - 8))))
 +
 +/*
 + * GPU will interpret 10bit and 12bit color as 16bit int
 + * but that will introduce a slight (2^(16-n))/2^16 quantization offset which we want to compensate for
 +*/
-+#define QUANTIZATION_OFFSET_10B 0.0009765774014f
-+#define QUANTIZATION_OFFSET_12B 0.0002441443503f
++#define QUANTIZATION_OFFSET(n) ((double)(1 << (16 - (n))) / ((1 << 16) - 1))
 +
 +// Parsed metadata from the Dolby Vision RPU
 +struct DoviMetadata {
@@ -327,7 +324,7 @@ Index: FFmpeg/libavfilter/colorspace.h
  void ff_matrix_mul_3x3(double dst[3][3],
                 const double src1[3][3], const double src2[3][3]);
  void ff_matrix_mul_3x3_vec(double dst[3], const double vec[3], const double mat[3][3]);
-@@ -38,4 +97,19 @@ void ff_fill_rgb2yuv_table(const AVLumaC
+@@ -38,4 +94,19 @@ void ff_fill_rgb2yuv_table(const AVLumaC
  double ff_determine_signal_peak(AVFrame *in);
  void ff_update_hdr_metadata(AVFrame *in, double peak);
 
@@ -2439,21 +2436,21 @@ Index: FFmpeg/libavfilter/vf_tonemap_cuda.c
 +    if ((ret = get_rgb2rgb_matrix(in_pri, out_pri, rgb2rgb_matrix)) < 0)
 +        return ret;
 +
-+    if (s->in_desc->comp[0].depth == 10) {
-+        input_quantization_offset = QUANTIZATION_OFFSET_10B;
-+        input_y_scale = INPUT_Y_SCALE_10B;
-+        input_uv_scale = INPUT_UV_SCALE_10B;
-+    } else if (s->in_desc->comp[0].depth == 16) {
++    if (s->in_desc->comp[0].depth == 16) {
 +        // Assume 16bit is actually 12bit for now as that is what the hardware decoders producing
 +        // and what videos are actually encoded in
-+        input_quantization_offset = QUANTIZATION_OFFSET_12B;
-+        input_y_scale = INPUT_Y_SCALE_12B;
-+        input_uv_scale = INPUT_UV_SCALE_12B;
++        input_quantization_offset = QUANTIZATION_OFFSET(12);
++        input_y_scale = INPUT_Y_SCALE(12);
++        input_uv_scale = INPUT_UV_SCALE(12);
++    } else {
++        input_quantization_offset = QUANTIZATION_OFFSET(s->in_desc->comp[0].depth);
++        input_y_scale = INPUT_Y_SCALE(s->in_desc->comp[0].depth);
++        input_uv_scale = INPUT_UV_SCALE(s->in_desc->comp[0].depth);
 +    }
 +
 +    if (s->out_desc->comp[0].depth == 10) {
 +        // Don't handle 12b offset for now and assume 16b output is real 16b out to make it consistent with other filters
-+        output_quantization_offset = QUANTIZATION_OFFSET_10B;
++        output_quantization_offset = QUANTIZATION_OFFSET(10);
 +    }
 +
 +    if (s->out_desc->comp[0].depth > 8) {

diff --git a/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch b/debian/patches/0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch
@@ -1458,7 +1458,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
 
      switch(ctx->tonemap) {
      case TONEMAP_GAMMA:
-@@ -139,59 +353,210 @@ static int tonemap_opencl_init(AVFilterC
+@@ -139,59 +353,204 @@ static int tonemap_opencl_init(AVFilterC
          if (isnan(ctx->param))
              ctx->param = 0.3f;
          break;
@@ -1655,36 +1655,30 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
      if (ctx->range_out == AVCOL_RANGE_JPEG)
          av_bprintf(&header, "#define FULL_RANGE_OUT\n");
 
-+    if (ctx->in_desc->comp[0].depth == 10) {
-+        av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET_10B);
-+        av_bprintf(&header, "#define INPUT_Y_SCALE %ff\n", INPUT_Y_SCALE_10B);
-+        av_bprintf(&header, "#define INPUT_UV_SCALE %ff\n", INPUT_UV_SCALE_10B);
-+    } else if (ctx->in_desc->comp[0].depth == 16) {
++    if (ctx->in_desc->comp[0].depth == 16) {
 +        // Assume 16bit is actually 12bit for now as that is what the hardware decoders producing
 +        // and what videos are actually encoded in
-+        av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET_12B);
-+        av_bprintf(&header, "#define INPUT_Y_SCALE %ff\n", INPUT_Y_SCALE_12B);
-+        av_bprintf(&header, "#define INPUT_UV_SCALE %ff\n", INPUT_UV_SCALE_12B);
++        av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET(12));
++        av_bprintf(&header, "#define INPUT_Y_SCALE %ff\n", INPUT_Y_SCALE(12));
++        av_bprintf(&header, "#define INPUT_UV_SCALE %ff\n", INPUT_UV_SCALE(12));
 +    } else {
-+        // should not happen, but for completeness
-+        // once we can tell if the input is real 16bit we can use this branch
-+        av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET 0.0f\n");
-+        av_bprintf(&header, "#define INPUT_Y_SCALE 1.0f\n");
-+        av_bprintf(&header, "#define INPUT_UV_SCALE 1.0f\n");
++        av_bprintf(&header, "#define INPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET(ctx->in_desc->comp[0].depth));
++        av_bprintf(&header, "#define INPUT_Y_SCALE %ff\n", INPUT_Y_SCALE(ctx->in_desc->comp[0].depth));
++        av_bprintf(&header, "#define INPUT_UV_SCALE %ff\n", INPUT_UV_SCALE(ctx->in_desc->comp[0].depth));
 +    }
 +
 +    if (ctx->out_desc->comp[0].depth > 8) {
 +        av_bprintf(&header, "#define RESCALE_LIMITED_RANGE_OUTPUT\n");
 +    }
 +
 +    if (ctx->out_desc->comp[0].depth == 10)
-+        av_bprintf(&header, "#define OUTPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET_10B);
++        av_bprintf(&header, "#define OUTPUT_QUANTIZATION_OFFSET %ff\n", QUANTIZATION_OFFSET(10));
 +    // Don't handle 12b offset for now and assume 16b output is real 16b out to make it consistent with other filters
 +
      av_bprintf(&header, "#define chroma_loc %d\n", (int)ctx->chroma_loc);
 
      if (rgb2rgb_passthrough)
-@@ -199,19 +564,41 @@ static int tonemap_opencl_init(AVFilterC
+@@ -199,19 +558,41 @@ static int tonemap_opencl_init(AVFilterC
      else
          ff_opencl_print_const_matrix_3x3(&header, "rgb2rgb", rgb2rgb);
 
@@ -1733,7 +1727,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
                 ctx->colorspace_out, av_color_space_name(ctx->colorspace_out));
          goto fail;
      }
-@@ -219,24 +606,23 @@ static int tonemap_opencl_init(AVFilterC
+@@ -219,24 +600,23 @@ static int tonemap_opencl_init(AVFilterC
      ff_fill_rgb2yuv_table(luma_dst, rgb2yuv);
      ff_opencl_print_const_matrix_3x3(&header, "yuv_matrix", rgb2yuv);
 
@@ -1773,7 +1767,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
 
      av_log(avctx, AV_LOG_DEBUG, "Generated OpenCL header:\n%s\n", header.str);
      opencl_sources[0] = header.str;
-@@ -254,46 +640,171 @@ static int tonemap_opencl_init(AVFilterC
+@@ -254,46 +634,171 @@ static int tonemap_opencl_init(AVFilterC
      CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL "
                       "command queue %d.\n", cle);
 
@@ -1963,7 +1957,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
      ret = ff_opencl_filter_config_output(outlink);
      if (ret < 0)
          return ret;
-@@ -308,13 +819,46 @@ static int launch_kernel(AVFilterContext
+@@ -308,13 +813,46 @@ static int launch_kernel(AVFilterContext
      size_t global_work[2];
      size_t local_work[2];
      cl_int cle;
@@ -2012,7 +2006,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
 
      local_work[0]  = 16;
      local_work[1]  = 16;
-@@ -338,13 +882,10 @@ static int tonemap_opencl_filter_frame(A
+@@ -338,13 +876,10 @@ static int tonemap_opencl_filter_frame(A
      AVFilterContext    *avctx = inlink->dst;
      AVFilterLink     *outlink = avctx->outputs[0];
      TonemapOpenCLContext *ctx = avctx->priv;
@@ -2027,7 +2021,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
 
      av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
             av_get_pix_fmt_name(input->format),
-@@ -363,8 +904,49 @@ static int tonemap_opencl_filter_frame(A
+@@ -363,8 +898,49 @@ static int tonemap_opencl_filter_frame(A
      if (err < 0)
          goto fail;
 
@@ -2079,7 +2073,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
 
      if (ctx->trc != -1)
          output->color_trc = ctx->trc;
-@@ -385,72 +967,50 @@ static int tonemap_opencl_filter_frame(A
+@@ -385,72 +961,50 @@ static int tonemap_opencl_filter_frame(A
      ctx->range_out = output->color_range;
      ctx->chroma_loc = output->chroma_location;
 
@@ -2175,7 +2169,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
      av_frame_free(&input);
      av_frame_free(&output);
      return err;
-@@ -458,24 +1018,9 @@ fail:
+@@ -458,24 +1012,9 @@ fail:
 
  static av_cold void tonemap_opencl_uninit(AVFilterContext *avctx)
  {
@@ -2202,7 +2196,7 @@ Index: FFmpeg/libavfilter/vf_tonemap_opencl.c
 
      ff_opencl_filter_uninit(avctx);
  }
-@@ -483,37 +1028,50 @@ static av_cold void tonemap_opencl_unini
+@@ -483,37 +1022,50 @@ static av_cold void tonemap_opencl_unini
  #define OFFSET(x) offsetof(TonemapOpenCLContext, x)
  #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
  static const AVOption tonemap_opencl_options[] = {