Fix CUVID crash on resolution change (#418)

Also adds a bunch of other changes necessary to better support mid-stream resolution changes. Unfortunately with CUVID there still seems to be a brief flash of green (looks to be the length of the decoder's internal frame buffer) but we can tackle that separately. This PR simply makes the transcoder 1. not crash, and 2. correctly encode mid-stream rotations, including with CPUs
livepeer · Aug 19, 2024 · 20131b6 · 20131b6
2 parents b5181eb + 0e6fd2e
commit 20131b6
Show file tree

Hide file tree

Showing 8 changed files with 400 additions and 18 deletions.
diff --git a/ffmpeg/decoder.c b/ffmpeg/decoder.c
@@ -22,12 +22,13 @@ static int lpms_receive_frame(struct input_ctx *ictx, AVCodecContext *dec, AVFra
     return ret;
 }
 
-static int send_first_pkt(struct input_ctx *ictx)
+static int send_flush_pkt(struct input_ctx *ictx)
 {
   if (ictx->flushed) return 0;
-  if (!ictx->first_pkt) return lpms_ERR_INPUT_NOKF;
+  if (!ictx->flush_pkt) return lpms_ERR_INPUT_NOKF;
 
-  int ret = avcodec_send_packet(ictx->vc, ictx->first_pkt);
+  int ret = avcodec_send_packet(ictx->vc, ictx->flush_pkt);
+  if (ret == AVERROR(EAGAIN)) return ret; // decoder is mid-reset
   ictx->sentinel_count++;
   if (ret < 0) {
     LPMS_ERR(packet_cleanup, "Error sending flush packet");
@@ -68,13 +69,25 @@ int decode_in(struct input_ctx *ictx, AVPacket *pkt, AVFrame *frame, int *stream
     return 0;
   }
 
-  if (!ictx->first_pkt && pkt->flags & AV_PKT_FLAG_KEY && decoder == ictx->vc) {
-    ictx->first_pkt = av_packet_clone(pkt);
-    ictx->first_pkt->pts = -1;
+  // Set up flush packet. Do this every keyframe in case the underlying frame changes
+  if (pkt->flags & AV_PKT_FLAG_KEY && decoder == ictx->vc) {
+    if (!ictx->flush_pkt) ictx->flush_pkt = av_packet_clone(pkt);
+    else {
+      av_packet_unref(ictx->flush_pkt);
+      av_packet_ref(ictx->flush_pkt, pkt);
+    }
+    ictx->flush_pkt->pts = -1;
   }
 
   ret = lpms_send_packet(ictx, decoder, pkt);
-  if (ret < 0) {
+  if (ret == AVERROR(EAGAIN)) {
+    // Usually means the decoder needs to drain itself - block demuxing until then
+    // Seems to happen during mid-stream resolution changes
+    if (ictx->blocked_pkt) LPMS_ERR_RETURN("unexpectedly got multiple blocked packets");
+    ictx->blocked_pkt = av_packet_clone(pkt);
+    if (!ictx->blocked_pkt) LPMS_ERR_RETURN("could not clone packet for blocking");
+    // continue in an attempt to drain the decoder
+  } else if (ret < 0) {
     LPMS_ERR_RETURN("Error sending packet to decoder");
   }
   ret = lpms_receive_frame(ictx, decoder, frame);
@@ -104,8 +117,10 @@ int flush_in(struct input_ctx *ictx, AVFrame *frame, int *stream_index)
   // TODO this is unnecessary for SW decoding! SW process should match audio
   if (ictx->vc && !ictx->flushed && ictx->pkt_diff > 0) {
     ictx->flushing = 1;
-    ret = send_first_pkt(ictx);
-    if (ret < 0) {
+    ret = send_flush_pkt(ictx);
+    if (ret == AVERROR(EAGAIN)) {
+      // do nothing; decoder recently reset and needs to drain so let it
+    } else if (ret < 0) {
       ictx->flushed = 1;
       return ret;
     }
@@ -137,7 +152,10 @@ int process_in(struct input_ctx *ictx, AVFrame *frame, AVPacket *pkt,
   av_packet_unref(pkt);
 
   // Demux next packet
-  ret = demux_in(ictx, pkt);
+  if (ictx->blocked_pkt) {
+    av_packet_move_ref(pkt, ictx->blocked_pkt);
+    av_packet_free(&ictx->blocked_pkt);
+  } else ret = demux_in(ictx, pkt);
   // See if we got anything
   if (ret == AVERROR_EOF) {
     // no more packets, flush the decoder(s)
@@ -376,5 +394,6 @@ void free_input(struct input_ctx *inctx)
   if (inctx->hw_device_ctx) av_buffer_unref(&inctx->hw_device_ctx);
   if (inctx->last_frame_v) av_frame_free(&inctx->last_frame_v);
   if (inctx->last_frame_a) av_frame_free(&inctx->last_frame_a);
+  if (inctx->blocked_pkt) av_packet_free(&inctx->blocked_pkt);
 }
 
diff --git a/ffmpeg/decoder.h b/ffmpeg/decoder.h
@@ -20,7 +20,7 @@ struct input_ctx {
   char *xcoderParams;
 
   // Decoder flush
-  AVPacket *first_pkt;
+  AVPacket *flush_pkt;
   int flushed;
   int flushing;
   // The diff of `packets sent - frames recv` serves as an estimate of
@@ -33,6 +33,9 @@ struct input_ctx {
 #define SENTINEL_MAX 8
   uint16_t sentinel_count;
 
+  // Packet held while decoder is blocked and needs to drain
+  AVPacket *blocked_pkt;
+
   // Filter flush
   AVFrame *last_frame_v, *last_frame_a;
 

diff --git a/ffmpeg/encoder.c b/ffmpeg/encoder.c
@@ -255,7 +255,11 @@ int open_output(struct output_ctx *octx, struct input_ctx *ictx)
 	if(strcmp(octx->xcoderParams,"")!=0){
 	    av_opt_set(vc->priv_data, "xcoder-params", octx->xcoderParams, 0);
 	}
-    ret = avcodec_open2(vc, codec, &octx->video->opts);
+    // copy codec options and open encoder
+    AVDictionary *opts = NULL;
+    if (octx->video->opts) av_dict_copy(&opts, octx->video->opts, 0);
+    ret = avcodec_open2(vc, codec, &opts);
+    if (opts) av_dict_free(&opts);
     if (ret < 0) LPMS_ERR(open_output_err, "Error opening video encoder");
     octx->hw_type = ictx->hw_type;
   }
@@ -332,12 +336,81 @@ int reopen_output(struct output_ctx *octx, struct input_ctx *ictx)
   return ret;
 }
 
-static int encode(AVCodecContext* encoder, AVFrame *frame, struct output_ctx* octx, AVStream* ost)
+int encode(AVCodecContext* encoder, AVFrame *frame, struct output_ctx* octx, AVStream* ost)
 {
   int ret = 0;
   AVPacket *pkt = NULL;
 
   if (AVMEDIA_TYPE_VIDEO == ost->codecpar->codec_type && frame) {
+    if (encoder->width != frame->width || encoder->height != frame->height) {
+      // Frame dimensions changed so need to re-init encoder
+      const AVCodec *codec = avcodec_find_encoder_by_name(octx->video->name);
+      if (!codec) LPMS_ERR(encode_cleanup, "Unable to find encoder");
+      AVCodecContext *vc = avcodec_alloc_context3(codec);
+      if (!vc) LPMS_ERR(encode_cleanup, "Unable to alloc video encoder");
+      // copy any additional params needed from AVCodecParameters
+      AVCodecParameters *codecpar = avcodec_parameters_alloc();
+      if (!codecpar) LPMS_ERR(encode_cleanup, "Unable to alloc codec params");
+      avcodec_parameters_from_context(codecpar, encoder);
+      avcodec_parameters_to_context(vc, codecpar);
+      avcodec_parameters_free(&codecpar);
+      // manually set some additional fields
+      vc->width = frame->width;
+      vc->height = frame->height;
+      vc->time_base = encoder->time_base;
+      vc->flags = encoder->flags;
+      vc->rc_min_rate = encoder->rc_min_rate;
+      vc->rc_max_rate = encoder->rc_max_rate;
+      vc->bit_rate = encoder->bit_rate;
+      vc->rc_buffer_size = encoder->rc_buffer_size;
+      if (encoder->hw_frames_ctx) {
+        if (octx->vf.active && av_buffersink_get_hw_frames_ctx(octx->vf.sink_ctx)) {
+          vc->hw_frames_ctx =
+            av_buffer_ref(av_buffersink_get_hw_frames_ctx(octx->vf.sink_ctx));
+          if (!vc->hw_frames_ctx) {
+            LPMS_ERR(encode_cleanup, "Unable to re-alloc encoder hwframes")
+          }
+        } else {
+          vc->hw_frames_ctx = av_buffer_ref(encoder->hw_frames_ctx);
+        }
+      }
+
+      // flush old encoder
+      AVPacket *pkt = av_packet_alloc();
+      if (!pkt) LPMS_ERR(encode_cleanup, "Unable to alloc flush packet");
+      avcodec_send_frame(encoder, NULL);
+      AVRational time_base = encoder->time_base;
+      while (!ret) {
+        av_packet_unref(pkt);
+        ret = avcodec_receive_packet(encoder, pkt);
+        // TODO error handling
+        if (!ret) {
+          if (!octx->fps.den && octx->vf.active) {
+            // adjust timestamps for filter passthrough
+            time_base = octx->vf.time_base;
+            int64_t pts_dts = pkt->pts - pkt->dts;
+            pkt->pts = (int64_t)pkt->opaque; // already in filter timebase
+            pkt->dts = pkt->pts - av_rescale_q(pts_dts, encoder->time_base, time_base);
+          }
+          mux(pkt, time_base, octx, ost);
+        } else if (AVERROR_EOF != ret) {
+          av_packet_free(&pkt);
+          LPMS_ERR(encode_cleanup, "did not get eof");
+        }
+      }
+      av_packet_free(&pkt);
+      avcodec_free_context(&octx->vc);
+
+      // copy codec options and open encoder
+      AVDictionary *opts = NULL;
+      if (octx->video->opts) av_dict_copy(&opts, octx->video->opts, 0);
+      ret = avcodec_open2(vc, codec, &opts);
+      if (opts) av_dict_free(&opts);
+      if (ret < 0) LPMS_ERR(encode_cleanup, "Error opening video encoder");
+      if (octx->gop_pts_len) octx->next_kf_pts = frame->pts + octx->gop_pts_len;
+      octx->vc = vc;
+      encoder = vc;
+    }
     if (!octx->res->frames) {
       frame->pict_type = AV_PICTURE_TYPE_I;
     }

diff --git a/ffmpeg/encoder.h b/ffmpeg/encoder.h
@@ -12,5 +12,6 @@ void free_output(struct output_ctx *octx);
 int process_out(struct input_ctx *ictx, struct output_ctx *octx, AVCodecContext *encoder, AVStream *ost,
   struct filter_ctx *filter, AVFrame *inf);
 int mux(AVPacket *pkt, AVRational tb, struct output_ctx *octx, AVStream *ost);
+int encode(AVCodecContext* encoder, AVFrame *frame, struct output_ctx* octx, AVStream* ost);
 
 #endif // _LPMS_ENCODER_H_