[FFmpeg-devel] [PATCH v2 5/7] avcodec/mediacodecenc: remove the strategy to create DTS

Wed Dec 7 11:31:20 EET 2022

From: Zhao Zhili <zhilizhao at tencent.com>

Use input PTS as DTS has multiple problems:
1. If there is no reordering, it's better to just use the output
PTS as DTS, since encoder may change the timestamp value (do it
on purpose or rounding error).

2. If there is reordering, input PTS should be shift a few frames
as DTS to satisfy the requirement of PTS >= DTS. I can't find a
reliable way to determine how many frames to be shift. For example,
we don't known if the encoder use hierarchical B frames. The
max_num_reorder_frames can be get from VUI, but VUI is optional.

3. Encoder dropping frames makes the case worse. Android has an
BITRATE_MODE_CBR_FD option to allow it explicitly.
---
 libavcodec/mediacodecenc.c | 28 +---------------------------
 1 file changed, 1 insertion(+), 27 deletions(-)

diff --git a/libavcodec/mediacodecenc.c b/libavcodec/mediacodecenc.c
index 7f2ae88285..8e28a50e0d 100644
--- a/libavcodec/mediacodecenc.c
+++ b/libavcodec/mediacodecenc.c
@@ -64,18 +64,6 @@ typedef struct MediaCodecEncContext {
 
     uint8_t *extradata;
     int extradata_size;
-
-    // Since MediaCodec doesn't output DTS, use a timestamp queue to save pts
-    // of AVFrame and generate DTS for AVPacket.
-    //
-    // This doesn't work when use Surface as input, in that case frames can be
-    // sent to encoder without our notice. One exception is frames come from
-    // our MediaCodec decoder wrapper, since we can control it's render by
-    // av_mediacodec_release_buffer.
-    int64_t timestamps[32];
-    int ts_head;
-    int ts_tail;
-
     int eof_sent;
 
     AVFrame *frame;
@@ -368,11 +356,6 @@ static int mediacodec_receive(AVCodecContext *avctx,
     }
     memcpy(pkt->data + extradata_size, out_buf + out_info.offset, out_info.size);
     pkt->pts = av_rescale_q(out_info.presentationTimeUs, AV_TIME_BASE_Q, avctx->time_base);
-    if (s->ts_tail != s->ts_head) {
-        pkt->dts = s->timestamps[s->ts_tail];
-        s->ts_tail = (s->ts_tail + 1) % FF_ARRAY_ELEMS(s->timestamps);
-    }
-
     if (out_info.flags & ff_AMediaCodec_getBufferFlagKeyFrame(codec))
         pkt->flags |= AV_PKT_FLAG_KEY;
     ret = 0;
@@ -437,14 +420,8 @@ static int mediacodec_send(AVCodecContext *avctx,
             return ff_AMediaCodec_signalEndOfInputStream(codec);
         }
 
-
-        if (frame->data[3]) {
-            pts = av_rescale_q(frame->pts, avctx->time_base, AV_TIME_BASE_Q);
-            s->timestamps[s->ts_head] = frame->pts;
-            s->ts_head = (s->ts_head + 1) % FF_ARRAY_ELEMS(s->timestamps);
-
+        if (frame->data[3])
             av_mediacodec_release_buffer((AVMediaCodecBuffer *)frame->data[3], 1);
-        }
         return 0;
     }
 
@@ -463,9 +440,6 @@ static int mediacodec_send(AVCodecContext *avctx,
         copy_frame_to_buffer(avctx, frame, input_buf, input_size);
 
         pts = av_rescale_q(frame->pts, avctx->time_base, AV_TIME_BASE_Q);
-
-        s->timestamps[s->ts_head] = frame->pts;
-        s->ts_head = (s->ts_head + 1) % FF_ARRAY_ELEMS(s->timestamps);
     } else {
         flags |= ff_AMediaCodec_getBufferFlagEndOfStream(codec);
         s->eof_sent = 1;
-- 
2.25.1