[FFmpeg-devel] [PATCH] avformat/mp3dec: fix gapless audio support

Sat Sep 20 11:10:19 CEST 2014

The code already had skipping of initial padding, but discarding
trailing frame padding was missing.

This is somewhat questionable, because it will make the decoder discard
any data after the declared file size in the LAME header. But note that
skipping full frames at the end of the stream is required. Encoders
actually create such files.

---
By the way, can someone explain to me why mp3dec adjusts st->start_time?
It looks like this will be propagated to AVFormatContext.start_time. But
I would expect that the start time of mp3s is always 0, and that
libavcodec does _not_ change the timestamp of the first packet.
---
 libavformat/avformat.h |  8 ++++++++
 libavformat/mp3dec.c   |  1 +
 libavformat/utils.c    | 17 ++++++++++++++++-
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index b915148..2370cb0 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -1031,6 +1031,14 @@ typedef struct AVStream {
     int skip_samples;
 
     /**
+     * If not 0, the first audio sample that should be discarded from the stream.
+     * This is broken by design (needs global sample count), but can't be
+     * avoided for broken by design formats such as mp3 with ad-hoc gapless
+     * audio support.
+     */
+    int64_t end_discard_sample;
+
+    /**
      * Number of internally decoded frames, used internally in libavformat, do not access
      * its lifetime differs from info which is why it is not in that structure.
      */
diff --git a/libavformat/mp3dec.c b/libavformat/mp3dec.c
index 688f235..25a2c1b 100644
--- a/libavformat/mp3dec.c
+++ b/libavformat/mp3dec.c
@@ -219,6 +219,7 @@ static void mp3_parse_info_tag(AVFormatContext *s, AVStream *st,
         mp3->start_pad = v>>12;
         mp3->  end_pad = v&4095;
         st->skip_samples = mp3->start_pad + 528 + 1;
+        st->end_discard_sample = -mp3->end_pad  + 528 + 1 + mp3->frames * (int64_t)spf;
         if (!st->start_time)
             st->start_time = av_rescale_q(st->skip_samples,
                                             (AVRational){1, c->sample_rate},
diff --git a/libavformat/utils.c b/libavformat/utils.c
index 6e828f7..f8015cc 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -1255,6 +1255,11 @@ static int read_from_packet_buffer(AVPacketList **pkt_buffer,
     return 0;
 }
 
+static int64_t ts_to_samples(AVStream *st, int64_t ts)
+{
+    return av_rescale(ts, st->time_base.num * st->codec->sample_rate, st->time_base.den);
+}
+
 static int read_frame_internal(AVFormatContext *s, AVPacket *pkt)
 {
     int ret = 0, i, got_packet = 0;
@@ -1352,10 +1357,20 @@ static int read_frame_internal(AVFormatContext *s, AVPacket *pkt)
 
     if (ret >= 0) {
         AVStream *st = s->streams[pkt->stream_index];
-        if (st->skip_samples) {
+        int discard_padding = 0;
+        if (st->end_discard_sample && pkt->pts != AV_NOPTS_VALUE) {
+            int64_t pts = pkt->pts - (is_relative(pkt->pts) ? RELATIVE_TS_BASE : 0);
+            int64_t sample = ts_to_samples(st, pts);
+            int duration = ts_to_samples(st, pkt->duration);
+            int64_t end_sample = sample + duration;
+            if (duration > 0 && end_sample >= st->end_discard_sample)
+                discard_padding = FFMIN(end_sample - st->end_discard_sample, duration);
+        }
+        if (st->skip_samples || discard_padding) {
             uint8_t *p = av_packet_new_side_data(pkt, AV_PKT_DATA_SKIP_SAMPLES, 10);
             if (p) {
                 AV_WL32(p, st->skip_samples);
+                AV_WL32(p + 4, discard_padding);
                 av_log(s, AV_LOG_DEBUG, "demuxer injecting skip %d\n", st->skip_samples);
             }
             st->skip_samples = 0;
-- 
2.1.0