Merge branch 'master' into ai-video-rebase-main

livepeer · Sep 30, 2024 · 4ae995b · 4ae995b
2 parents 28406cf + fe5aff1
commit 4ae995b
Show file tree

Hide file tree

Showing 16 changed files with 790 additions and 271 deletions.
diff --git a/data/audio.mp3 b/data/audio.mp3
diff --git a/data/audio.ogg b/data/audio.ogg
diff --git a/ffmpeg/api_test.go b/ffmpeg/api_test.go
@@ -104,11 +104,7 @@ func TestTranscoderAPI_InvalidFile(t *testing.T) {
 	// fail # 1
 	in.Fname = "none"
 	_, err := tc.Transcode(in, out)
-	if err == nil || err.Error() != "TranscoderInvalidVideo" {
-		// Early codec check didn't find video in missing input file so we get `TranscoderInvalidVideo`
-		//  instead of `No such file or directory`
-		t.Error("Expected 'TranscoderInvalidVideo', got ", err)
-	}
+	require.Error(t, err, "No such file or directory")
 
 	// success # 1
 	in.Fname = "../transcoder/test.ts"

diff --git a/ffmpeg/decoder.c b/ffmpeg/decoder.c
@@ -22,12 +22,13 @@ static int lpms_receive_frame(struct input_ctx *ictx, AVCodecContext *dec, AVFra
     return ret;
 }
 
-static int send_first_pkt(struct input_ctx *ictx)
+static int send_flush_pkt(struct input_ctx *ictx)
 {
   if (ictx->flushed) return 0;
-  if (!ictx->first_pkt) return lpms_ERR_INPUT_NOKF;
+  if (!ictx->flush_pkt) return lpms_ERR_INPUT_NOKF;
 
-  int ret = avcodec_send_packet(ictx->vc, ictx->first_pkt);
+  int ret = avcodec_send_packet(ictx->vc, ictx->flush_pkt);
+  if (ret == AVERROR(EAGAIN)) return ret; // decoder is mid-reset
   ictx->sentinel_count++;
   if (ret < 0) {
     LPMS_ERR(packet_cleanup, "Error sending flush packet");
@@ -68,13 +69,25 @@ int decode_in(struct input_ctx *ictx, AVPacket *pkt, AVFrame *frame, int *stream
     return 0;
   }
 
-  if (!ictx->first_pkt && pkt->flags & AV_PKT_FLAG_KEY && decoder == ictx->vc) {
-    ictx->first_pkt = av_packet_clone(pkt);
-    ictx->first_pkt->pts = -1;
+  // Set up flush packet. Do this every keyframe in case the underlying frame changes
+  if (pkt->flags & AV_PKT_FLAG_KEY && decoder == ictx->vc) {
+    if (!ictx->flush_pkt) ictx->flush_pkt = av_packet_clone(pkt);
+    else {
+      av_packet_unref(ictx->flush_pkt);
+      av_packet_ref(ictx->flush_pkt, pkt);
+    }
+    ictx->flush_pkt->pts = -1;
   }
 
   ret = lpms_send_packet(ictx, decoder, pkt);
-  if (ret < 0) {
+  if (ret == AVERROR(EAGAIN)) {
+    // Usually means the decoder needs to drain itself - block demuxing until then
+    // Seems to happen during mid-stream resolution changes
+    if (ictx->blocked_pkt) LPMS_ERR_RETURN("unexpectedly got multiple blocked packets");
+    ictx->blocked_pkt = av_packet_clone(pkt);
+    if (!ictx->blocked_pkt) LPMS_ERR_RETURN("could not clone packet for blocking");
+    // continue in an attempt to drain the decoder
+  } else if (ret < 0) {
     LPMS_ERR_RETURN("Error sending packet to decoder");
   }
   ret = lpms_receive_frame(ictx, decoder, frame);
@@ -104,8 +117,10 @@ int flush_in(struct input_ctx *ictx, AVFrame *frame, int *stream_index)
   // TODO this is unnecessary for SW decoding! SW process should match audio
   if (ictx->vc && !ictx->flushed && ictx->pkt_diff > 0) {
     ictx->flushing = 1;
-    ret = send_first_pkt(ictx);
-    if (ret < 0) {
+    ret = send_flush_pkt(ictx);
+    if (ret == AVERROR(EAGAIN)) {
+      // do nothing; decoder recently reset and needs to drain so let it
+    } else if (ret < 0) {
       ictx->flushed = 1;
       return ret;
     }
@@ -137,7 +152,10 @@ int process_in(struct input_ctx *ictx, AVFrame *frame, AVPacket *pkt,
   av_packet_unref(pkt);
 
   // Demux next packet
-  ret = demux_in(ictx, pkt);
+  if (ictx->blocked_pkt) {
+    av_packet_move_ref(pkt, ictx->blocked_pkt);
+    av_packet_free(&ictx->blocked_pkt);
+  } else ret = demux_in(ictx, pkt);
   // See if we got anything
   if (ret == AVERROR_EOF) {
     // no more packets, flush the decoder(s)
@@ -376,5 +394,6 @@ void free_input(struct input_ctx *inctx)
   if (inctx->hw_device_ctx) av_buffer_unref(&inctx->hw_device_ctx);
   if (inctx->last_frame_v) av_frame_free(&inctx->last_frame_v);
   if (inctx->last_frame_a) av_frame_free(&inctx->last_frame_a);
+  if (inctx->blocked_pkt) av_packet_free(&inctx->blocked_pkt);
 }
 
diff --git a/ffmpeg/decoder.h b/ffmpeg/decoder.h
@@ -20,7 +20,7 @@ struct input_ctx {
   char *xcoderParams;
 
   // Decoder flush
-  AVPacket *first_pkt;
+  AVPacket *flush_pkt;
   int flushed;
   int flushing;
   // The diff of `packets sent - frames recv` serves as an estimate of
@@ -33,6 +33,9 @@ struct input_ctx {
 #define SENTINEL_MAX 8
   uint16_t sentinel_count;
 
+  // Packet held while decoder is blocked and needs to drain
+  AVPacket *blocked_pkt;
+
   // Filter flush
   AVFrame *last_frame_v, *last_frame_a;
 

diff --git a/ffmpeg/encoder.c b/ffmpeg/encoder.c
@@ -255,7 +255,11 @@ int open_output(struct output_ctx *octx, struct input_ctx *ictx)
 	if(strcmp(octx->xcoderParams,"")!=0){
 	    av_opt_set(vc->priv_data, "xcoder-params", octx->xcoderParams, 0);
 	}
-    ret = avcodec_open2(vc, codec, &octx->video->opts);
+    // copy codec options and open encoder
+    AVDictionary *opts = NULL;
+    if (octx->video->opts) av_dict_copy(&opts, octx->video->opts, 0);
+    ret = avcodec_open2(vc, codec, &opts);
+    if (opts) av_dict_free(&opts);
     if (ret < 0) LPMS_ERR(open_output_err, "Error opening video encoder");
     octx->hw_type = ictx->hw_type;
   }
@@ -282,6 +286,8 @@ int open_output(struct output_ctx *octx, struct input_ctx *ictx)
     if (ret < 0) LPMS_ERR(open_output_err, "Error opening output file");
   }
 
+  if (octx->metadata) av_dict_copy(&oc->metadata, octx->metadata, 0);
+
   ret = avformat_write_header(oc, &octx->muxer->opts);
   if (ret < 0) LPMS_ERR(open_output_err, "Error writing header");
 
@@ -320,6 +326,8 @@ int reopen_output(struct output_ctx *octx, struct input_ctx *ictx)
     ret = avio_open(&octx->oc->pb, octx->fname, AVIO_FLAG_WRITE);
     if (ret < 0) LPMS_ERR(reopen_out_err, "Error re-opening output file");
   }
+
+  if (octx->metadata) av_dict_copy(&octx->oc->metadata, octx->metadata, 0);
   ret = avformat_write_header(octx->oc, &octx->muxer->opts);
   if (ret < 0) LPMS_ERR(reopen_out_err, "Error re-writing header");
 
@@ -332,12 +340,81 @@ int reopen_output(struct output_ctx *octx, struct input_ctx *ictx)
   return ret;
 }
 
-static int encode(AVCodecContext* encoder, AVFrame *frame, struct output_ctx* octx, AVStream* ost)
+int encode(AVCodecContext* encoder, AVFrame *frame, struct output_ctx* octx, AVStream* ost)
 {
   int ret = 0;
   AVPacket *pkt = NULL;
 
   if (AVMEDIA_TYPE_VIDEO == ost->codecpar->codec_type && frame) {
+    if (encoder->width != frame->width || encoder->height != frame->height) {
+      // Frame dimensions changed so need to re-init encoder
+      const AVCodec *codec = avcodec_find_encoder_by_name(octx->video->name);
+      if (!codec) LPMS_ERR(encode_cleanup, "Unable to find encoder");
+      AVCodecContext *vc = avcodec_alloc_context3(codec);
+      if (!vc) LPMS_ERR(encode_cleanup, "Unable to alloc video encoder");
+      // copy any additional params needed from AVCodecParameters
+      AVCodecParameters *codecpar = avcodec_parameters_alloc();
+      if (!codecpar) LPMS_ERR(encode_cleanup, "Unable to alloc codec params");
+      avcodec_parameters_from_context(codecpar, encoder);
+      avcodec_parameters_to_context(vc, codecpar);
+      avcodec_parameters_free(&codecpar);
+      // manually set some additional fields
+      vc->width = frame->width;
+      vc->height = frame->height;
+      vc->time_base = encoder->time_base;
+      vc->flags = encoder->flags;
+      vc->rc_min_rate = encoder->rc_min_rate;
+      vc->rc_max_rate = encoder->rc_max_rate;
+      vc->bit_rate = encoder->bit_rate;
+      vc->rc_buffer_size = encoder->rc_buffer_size;
+      if (encoder->hw_frames_ctx) {
+        if (octx->vf.active && av_buffersink_get_hw_frames_ctx(octx->vf.sink_ctx)) {
+          vc->hw_frames_ctx =
+            av_buffer_ref(av_buffersink_get_hw_frames_ctx(octx->vf.sink_ctx));
+          if (!vc->hw_frames_ctx) {
+            LPMS_ERR(encode_cleanup, "Unable to re-alloc encoder hwframes")
+          }
+        } else {
+          vc->hw_frames_ctx = av_buffer_ref(encoder->hw_frames_ctx);
+        }
+      }
+
+      // flush old encoder
+      AVPacket *pkt = av_packet_alloc();
+      if (!pkt) LPMS_ERR(encode_cleanup, "Unable to alloc flush packet");
+      avcodec_send_frame(encoder, NULL);
+      AVRational time_base = encoder->time_base;
+      while (!ret) {
+        av_packet_unref(pkt);
+        ret = avcodec_receive_packet(encoder, pkt);
+        // TODO error handling
+        if (!ret) {
+          if (!octx->fps.den && octx->vf.active) {
+            // adjust timestamps for filter passthrough
+            time_base = octx->vf.time_base;
+            int64_t pts_dts = pkt->pts - pkt->dts;
+            pkt->pts = (int64_t)pkt->opaque; // already in filter timebase
+            pkt->dts = pkt->pts - av_rescale_q(pts_dts, encoder->time_base, time_base);
+          }
+          mux(pkt, time_base, octx, ost);
+        } else if (AVERROR_EOF != ret) {
+          av_packet_free(&pkt);
+          LPMS_ERR(encode_cleanup, "did not get eof");
+        }
+      }
+      av_packet_free(&pkt);
+      avcodec_free_context(&octx->vc);
+
+      // copy codec options and open encoder
+      AVDictionary *opts = NULL;
+      if (octx->video->opts) av_dict_copy(&opts, octx->video->opts, 0);
+      ret = avcodec_open2(vc, codec, &opts);
+      if (opts) av_dict_free(&opts);
+      if (ret < 0) LPMS_ERR(encode_cleanup, "Error opening video encoder");
+      if (octx->gop_pts_len) octx->next_kf_pts = frame->pts + octx->gop_pts_len;
+      octx->vc = vc;
+      encoder = vc;
+    }
     if (!octx->res->frames) {
       frame->pict_type = AV_PICTURE_TYPE_I;
     }
@@ -373,8 +450,9 @@ static int encode(AVCodecContext* encoder, AVFrame *frame, struct output_ctx* oc
     if (AVMEDIA_TYPE_VIDEO == ost->codecpar->codec_type && !octx->fps.den && octx->vf.active) {
       // try to preserve source timestamps for fps passthrough.
       time_base = octx->vf.time_base;
+      int64_t pts_dts_diff = pkt->pts - pkt->dts;
       pkt->pts = (int64_t)pkt->opaque; // already in filter timebase
-      pkt->dts = av_rescale_q(pkt->dts, encoder->time_base, time_base);
+      pkt->dts = pkt->pts - av_rescale_q(pts_dts_diff, encoder->time_base, time_base);
     }
     ret = mux(pkt, time_base, octx, ost);
     if (ret < 0) goto encode_cleanup;

diff --git a/ffmpeg/encoder.h b/ffmpeg/encoder.h
@@ -12,5 +12,6 @@ void free_output(struct output_ctx *octx);
 int process_out(struct input_ctx *ictx, struct output_ctx *octx, AVCodecContext *encoder, AVStream *ost,
   struct filter_ctx *filter, AVFrame *inf);
 int mux(AVPacket *pkt, AVRational tb, struct output_ctx *octx, AVStream *ost);
+int encode(AVCodecContext* encoder, AVFrame *frame, struct output_ctx* octx, AVStream* ost);
 
 #endif // _LPMS_ENCODER_H_
diff --git a/ffmpeg/extras.c b/ffmpeg/extras.c
@@ -168,6 +168,9 @@ int lpms_get_codec_info(char *fname, pcodec_info out)
     out->dur = ic->duration / AV_TIME_BASE;
   }
   // Return
+  if (ic->iformat && ic->iformat->name) {
+    strncpy(out->format_name, ic->iformat->name, MIN(strlen(out->format_name), strlen(ic->iformat->name)) + 1);
+  }
   if (video_present && vc->name) {
       strncpy(out->video_codec, vc->name, MIN(strlen(out->video_codec), strlen(vc->name))+1);
       // If video track is present extract pixel format info
@@ -186,6 +189,7 @@ int lpms_get_codec_info(char *fname, pcodec_info out)
   }
   if (audio_present && ac->name) {
       strncpy(out->audio_codec, ac->name, MIN(strlen(out->audio_codec), strlen(ac->name))+1);
+      out->audio_bit_rate = ic->streams[astream]->codecpar->bit_rate;
   } else {
       // Indicate failure to extract audio codec from given container
       out->audio_codec[0] = 0;

diff --git a/ffmpeg/extras.h b/ffmpeg/extras.h
@@ -2,8 +2,10 @@
 #define _LPMS_EXTRAS_H_
 
 typedef struct s_codec_info {
+  char * format_name;
   char * video_codec;
   char * audio_codec;
+  int    audio_bit_rate;
   int    pixel_format;
   int    width;
   int    height;