[FFmpeg-cvslog] avformat/spdifenc: fix handling of large TrueHD frames

Anssi Hannula git at videolan.org
Thu Feb 20 22:25:40 EET 2020


ffmpeg | branch: master | Anssi Hannula <anssi.hannula at iki.fi> | Wed Feb 19 22:46:35 2020 +0200| [36e156bef02566d70cea46cc5e00b3e5d5ed3286] | committer: Anssi Hannula

avformat/spdifenc: fix handling of large TrueHD frames

The TrueHD IEC 61937 encapsulation code uses a very naive method of
always inserting 24 TrueHD frames evenly in a MAT frame. This does not
work for larger frames as they may exceed the size of 1/24th of a MAT
frame.

To fix that, use the input_timing field in the TrueHD frame to determine
the proper position of the TrueHD frame in the MAT frame. That field is
basically a dts field, telling the time to feed this frame to the
decoder in sample count units.

This can cause a TrueHD frame to be split between two MAT frames, so a
second concatenation hd_buf is added, alternating with the first buffer.

Large frames are preceded by smaller frames that have input_timing
values that cause the frames to be sent out faster than the nominal rate
(i.e. increasing decoder latency, long decoder buffer), allowing the
larger frames to then be sent out slower than the nominal rate as the
decoder has enough data buffered to keep it busy.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=36e156bef02566d70cea46cc5e00b3e5d5ed3286
---

 libavformat/spdifenc.c | 215 ++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 168 insertions(+), 47 deletions(-)

diff --git a/libavformat/spdifenc.c b/libavformat/spdifenc.c
index 6b6b1dd3b1..dcfab87cc4 100644
--- a/libavformat/spdifenc.c
+++ b/libavformat/spdifenc.c
@@ -1,7 +1,7 @@
 /*
  * IEC 61937 muxer
  * Copyright (c) 2009 Bartlomiej Wolowiec
- * Copyright (c) 2010 Anssi Hannula
+ * Copyright (c) 2010, 2020 Anssi Hannula
  * Copyright (c) 2010 Carl Eugen Hoyos
  *
  * This file is part of FFmpeg.
@@ -69,13 +69,18 @@ typedef struct IEC61937Context {
     int use_preamble;               ///< preamble enabled (disabled for exactly pre-padded DTS)
     int extra_bswap;                ///< extra bswap for payload (for LE DTS => standard BE DTS)
 
-    uint8_t *hd_buf[1];             ///< allocated buffer to concatenate hd audio frames
+    uint8_t *hd_buf[2];             ///< allocated buffers to concatenate hd audio frames
     int hd_buf_size;                ///< size of the hd audio buffer (eac3, dts4)
-    int hd_buf_count;               ///< number of frames in the hd audio buffer (eac3, truehd)
-    int hd_buf_filled;              ///< amount of bytes in the hd audio buffer (eac3)
+    int hd_buf_count;               ///< number of frames in the hd audio buffer (eac3)
+    int hd_buf_filled;              ///< amount of bytes in the hd audio buffer (eac3, truehd)
+    int hd_buf_idx;                 ///< active hd buffer index (truehd)
 
     int dtshd_skip;                 ///< counter used for skipping DTS-HD frames
 
+    uint16_t truehd_prev_time;      ///< input_timing from the last frame
+    int truehd_prev_size;           ///< previous frame size in bytes, including any MAT codes
+    int truehd_samples_per_frame;   ///< samples per frame for padding calculation
+
     /* AVOptions: */
     int dtshd_rate;
     int dtshd_fallback;
@@ -384,62 +389,175 @@ static int spdif_header_aac(AVFormatContext *s, AVPacket *pkt)
 /*
  * It seems Dolby TrueHD frames have to be encapsulated in MAT frames before
  * they can be encapsulated in IEC 61937.
- * Here we encapsulate 24 TrueHD frames in a single MAT frame, padding them
- * to achieve constant rate.
- * The actual format of a MAT frame is unknown, but the below seems to work.
- * However, it seems it is not actually necessary for the 24 TrueHD frames to
- * be in an exact alignment with the MAT frame.
  */
+#define MAT_PKT_OFFSET          61440
 #define MAT_FRAME_SIZE          61424
-#define TRUEHD_FRAME_OFFSET     2560
-#define MAT_MIDDLE_CODE_OFFSET  -4
+
+static const uint8_t mat_start_code[20] = {
+    0x07, 0x9E, 0x00, 0x03, 0x84, 0x01, 0x01, 0x01, 0x80, 0x00, 0x56, 0xA5, 0x3B, 0xF4, 0x81, 0x83,
+    0x49, 0x80, 0x77, 0xE0,
+};
+static const uint8_t mat_middle_code[12] = {
+    0xC3, 0xC1, 0x42, 0x49, 0x3B, 0xFA, 0x82, 0x83, 0x49, 0x80, 0x77, 0xE0,
+};
+static const uint8_t mat_end_code[16] = {
+    0xC3, 0xC2, 0xC0, 0xC4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x97, 0x11,
+};
+
+#define MAT_CODE(position, data) { .pos = position, .code = data, .len = sizeof(data) }
+
+static const struct {
+    unsigned int pos;
+    const uint8_t *code;
+    unsigned int len;
+} mat_codes[] = {
+    MAT_CODE(0, mat_start_code),
+    MAT_CODE(30708, mat_middle_code),
+    MAT_CODE(MAT_FRAME_SIZE - sizeof(mat_end_code), mat_end_code),
+};
 
 static int spdif_header_truehd(AVFormatContext *s, AVPacket *pkt)
 {
     IEC61937Context *ctx = s->priv_data;
-    int mat_code_length = 0;
-    static const char mat_end_code[16] = { 0xC3, 0xC2, 0xC0, 0xC4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x97, 0x11 };
-
-    if (!ctx->hd_buf_count) {
-        static const char mat_start_code[20] = { 0x07, 0x9E, 0x00, 0x03, 0x84, 0x01, 0x01, 0x01, 0x80, 0x00, 0x56, 0xA5, 0x3B, 0xF4, 0x81, 0x83, 0x49, 0x80, 0x77, 0xE0 };
-        mat_code_length = sizeof(mat_start_code) + BURST_HEADER_SIZE;
-        memcpy(ctx->hd_buf[0], mat_start_code, sizeof(mat_start_code));
-
-    } else if (ctx->hd_buf_count == 12) {
-        static const char mat_middle_code[12] = { 0xC3, 0xC1, 0x42, 0x49, 0x3B, 0xFA, 0x82, 0x83, 0x49, 0x80, 0x77, 0xE0 };
-        mat_code_length = sizeof(mat_middle_code) + MAT_MIDDLE_CODE_OFFSET;
-        memcpy(&ctx->hd_buf[0][12 * TRUEHD_FRAME_OFFSET - BURST_HEADER_SIZE + MAT_MIDDLE_CODE_OFFSET],
-               mat_middle_code, sizeof(mat_middle_code));
+    uint8_t *hd_buf = ctx->hd_buf[ctx->hd_buf_idx];
+    int ratebits;
+    int padding_remaining = 0;
+    uint16_t input_timing;
+    int total_frame_size = pkt->size;
+    const uint8_t *dataptr = pkt->data;
+    int data_remaining = pkt->size;
+    int have_pkt = 0;
+    int next_code_idx;
+
+    if (pkt->size < 10)
+        return AVERROR_INVALIDDATA;
+
+    if (AV_RB24(pkt->data + 4) == 0xf8726f) {
+        /* major sync unit, fetch sample rate */
+        if (pkt->data[7] == 0xba)
+            ratebits = pkt->data[8] >> 8;
+        else if (pkt->data[7] == 0xbb)
+            ratebits = pkt->data[9] >> 8;
+        else
+            return AVERROR_INVALIDDATA;
+
+        ctx->truehd_samples_per_frame = 40 << (ratebits & 3);
+        av_log(s, AV_LOG_TRACE, "TrueHD samples per frame: %d\n",
+               ctx->truehd_samples_per_frame);
     }
 
-    if (pkt->size > TRUEHD_FRAME_OFFSET - mat_code_length) {
-        /* if such frames exist, we'd need some more complex logic to
-         * distribute the TrueHD frames in the MAT frame */
-        avpriv_request_sample(s, "Too large TrueHD frame of %d bytes",
-                              pkt->size);
-        return AVERROR_PATCHWELCOME;
+    if (!ctx->truehd_samples_per_frame)
+        return AVERROR_INVALIDDATA;
+
+    input_timing = AV_RB16(pkt->data + 2);
+    if (ctx->truehd_prev_size) {
+        uint16_t delta_samples = input_timing - ctx->truehd_prev_time;
+        /*
+         * One multiple-of-48kHz frame is 1/1200 sec and the IEC 61937 rate
+         * is 768kHz = 768000*4 bytes/sec.
+         * The nominal space per frame is therefore
+         * (768000*4 bytes/sec) * (1/1200 sec) = 2560 bytes.
+         * For multiple-of-44.1kHz frames: 1/1102.5 sec, 705.6kHz, 2560 bytes.
+         *
+         * 2560 is divisible by truehd_samples_per_frame.
+         */
+        int delta_bytes = delta_samples * 2560 / ctx->truehd_samples_per_frame;
+
+        /* padding needed before this frame */
+        padding_remaining = delta_bytes - ctx->truehd_prev_size;
+
+        av_log(s, AV_LOG_TRACE, "delta_samples: %"PRIu16", delta_bytes: %d\n",
+               delta_samples, delta_bytes);
+
+        /* sanity check */
+        if (padding_remaining < 0 || padding_remaining >= MAT_FRAME_SIZE / 2) {
+            avpriv_request_sample(s, "Unusual frame timing: %"PRIu16" => %"PRIu16", %d samples/frame",
+                                  ctx->truehd_prev_time, input_timing, ctx->truehd_samples_per_frame);
+            padding_remaining = 0;
+        }
     }
 
-    memcpy(&ctx->hd_buf[0][ctx->hd_buf_count * TRUEHD_FRAME_OFFSET - BURST_HEADER_SIZE + mat_code_length],
-           pkt->data, pkt->size);
-    if (ctx->hd_buf_count < 23) {
-        memset(&ctx->hd_buf[0][ctx->hd_buf_count * TRUEHD_FRAME_OFFSET - BURST_HEADER_SIZE + mat_code_length + pkt->size],
-               0, TRUEHD_FRAME_OFFSET - pkt->size - mat_code_length);
-    } else {
-        size_t padding = MAT_FRAME_SIZE - (ctx->hd_buf_count * TRUEHD_FRAME_OFFSET - BURST_HEADER_SIZE + pkt->size);
-        memset(&ctx->hd_buf[0][MAT_FRAME_SIZE - padding], 0, padding);
+    for (next_code_idx = 0; next_code_idx < FF_ARRAY_ELEMS(mat_codes); next_code_idx++)
+        if (ctx->hd_buf_filled <= mat_codes[next_code_idx].pos)
+            break;
+
+    if (next_code_idx >= FF_ARRAY_ELEMS(mat_codes))
+        return AVERROR_BUG;
+
+    while (padding_remaining || data_remaining ||
+           mat_codes[next_code_idx].pos == ctx->hd_buf_filled) {
+
+        if (mat_codes[next_code_idx].pos == ctx->hd_buf_filled) {
+            /* time to insert MAT code */
+            int code_len = mat_codes[next_code_idx].len;
+            int code_len_remaining = code_len;
+            memcpy(hd_buf + mat_codes[next_code_idx].pos,
+                   mat_codes[next_code_idx].code, code_len);
+            ctx->hd_buf_filled += code_len;
+
+            next_code_idx++;
+            if (next_code_idx == FF_ARRAY_ELEMS(mat_codes)) {
+                next_code_idx = 0;
+
+                /* this was the last code, move to the next MAT frame */
+                have_pkt = 1;
+                ctx->out_buf = hd_buf;
+                ctx->hd_buf_idx ^= 1;
+                hd_buf = ctx->hd_buf[ctx->hd_buf_idx];
+                ctx->hd_buf_filled = 0;
+
+                /* inter-frame gap has to be counted as well, add it */
+                code_len_remaining += MAT_PKT_OFFSET - MAT_FRAME_SIZE;
+            }
+
+            if (padding_remaining) {
+                /* consider the MAT code as padding */
+                int counted_as_padding = FFMIN(padding_remaining,
+                                               code_len_remaining);
+                padding_remaining -= counted_as_padding;
+                code_len_remaining -= counted_as_padding;
+            }
+            /* count the remainder of the code as part of frame size */
+            if (code_len_remaining)
+                total_frame_size += code_len_remaining;
+        }
+
+        if (padding_remaining) {
+            int padding_to_insert = FFMIN(mat_codes[next_code_idx].pos - ctx->hd_buf_filled,
+                                          padding_remaining);
+
+            memset(hd_buf + ctx->hd_buf_filled, 0, padding_to_insert);
+            ctx->hd_buf_filled += padding_to_insert;
+            padding_remaining -= padding_to_insert;
+
+            if (padding_remaining)
+                continue; /* time to insert MAT code */
+        }
+
+        if (data_remaining) {
+            int data_to_insert = FFMIN(mat_codes[next_code_idx].pos - ctx->hd_buf_filled,
+                                       data_remaining);
+
+            memcpy(hd_buf + ctx->hd_buf_filled, dataptr, data_to_insert);
+            ctx->hd_buf_filled += data_to_insert;
+            dataptr += data_to_insert;
+            data_remaining -= data_to_insert;
+        }
     }
 
-    if (++ctx->hd_buf_count < 24){
+    ctx->truehd_prev_size = total_frame_size;
+    ctx->truehd_prev_time = input_timing;
+
+    av_log(s, AV_LOG_TRACE, "TrueHD frame inserted, total size %d, buffer position %d\n",
+           total_frame_size, ctx->hd_buf_filled);
+
+    if (!have_pkt) {
         ctx->pkt_offset = 0;
         return 0;
     }
-    memcpy(&ctx->hd_buf[0][MAT_FRAME_SIZE - sizeof(mat_end_code)], mat_end_code, sizeof(mat_end_code));
-    ctx->hd_buf_count = 0;
 
     ctx->data_type   = IEC61937_TRUEHD;
-    ctx->pkt_offset  = 61440;
-    ctx->out_buf     = ctx->hd_buf[0];
+    ctx->pkt_offset  = MAT_PKT_OFFSET;
     ctx->out_bytes   = MAT_FRAME_SIZE;
     ctx->length_code = MAT_FRAME_SIZE;
     return 0;
@@ -470,9 +588,11 @@ static int spdif_write_header(AVFormatContext *s)
     case AV_CODEC_ID_TRUEHD:
     case AV_CODEC_ID_MLP:
         ctx->header_info = spdif_header_truehd;
-        ctx->hd_buf[0] = av_malloc(MAT_FRAME_SIZE);
-        if (!ctx->hd_buf[0])
-            return AVERROR(ENOMEM);
+        for (int i = 0; i < FF_ARRAY_ELEMS(ctx->hd_buf); i++) {
+            ctx->hd_buf[i] = av_malloc(MAT_FRAME_SIZE);
+            if (!ctx->hd_buf[i])
+                return AVERROR(ENOMEM);
+        }
         break;
     default:
         avpriv_report_missing_feature(s, "Codec %d",
@@ -486,7 +606,8 @@ static void spdif_deinit(AVFormatContext *s)
 {
     IEC61937Context *ctx = s->priv_data;
     av_freep(&ctx->buffer);
-    av_freep(&ctx->hd_buf[0]);
+    for (int i = 0; i < FF_ARRAY_ELEMS(ctx->hd_buf); i++)
+        av_freep(&ctx->hd_buf[i]);
 }
 
 static av_always_inline void spdif_put_16(IEC61937Context *ctx,



More information about the ffmpeg-cvslog mailing list