[FFmpeg-devel] [PATCH] avformat/mp3dec: avoid early EOF with concatenated gapless mp3s

Sun Sep 21 12:51:41 CEST 2014

Consider a file created with something like:

    cat file1.mp3 file2.mp3 > result.mp3

Then if file2.mp3 has gapless information, result.mp3 would stop playing
something in the middle. This happens because the gapless info directs
the decoder to discard all samples after a certain position. To make
matters worse, the gapless info of file2.mp3 will be used when playing
the file1.mp3 part, because the gapless info is located at the end of
the file.

While handling concatenated gapless files correctly would be insane and
a lot of effort (especially without scanning the whole file on opening),
it's easy to prevent at least early EOF. Playback will happen to work,
even if it's slightly broken.
---
 libavformat/avformat.h | 9 ++++++++-
 libavformat/mp3dec.c   | 6 ++++--
 libavformat/utils.c    | 7 ++++---
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 2370cb0..78054de 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -1036,7 +1036,14 @@ typedef struct AVStream {
      * avoided for broken by design formats such as mp3 with ad-hoc gapless
      * audio support.
      */
-    int64_t end_discard_sample;
+    int64_t first_discard_sample;
+
+    /**
+     * The sample after last sample that is intended to be discarded after
+     * first_discard_sample. Works on frame boundaries only. Used to prevent
+     * early EOF if the gapless info is broken (considered concatenated mp3s).
+     */
+    int64_t last_discard_sample;
 
     /**
      * Number of internally decoded frames, used internally in libavformat, do not access
diff --git a/libavformat/mp3dec.c b/libavformat/mp3dec.c
index ade1677..cc8c096 100644
--- a/libavformat/mp3dec.c
+++ b/libavformat/mp3dec.c
@@ -219,8 +219,10 @@ static void mp3_parse_info_tag(AVFormatContext *s, AVStream *st,
         mp3->start_pad = v>>12;
         mp3->  end_pad = v&4095;
         st->skip_samples = mp3->start_pad + 528 + 1;
-        if (mp3->frames)
-            st->end_discard_sample = -mp3->end_pad + 528 + 1 + mp3->frames * (int64_t)spf;
+        if (mp3->frames) {
+            st->first_discard_sample = -mp3->end_pad + 528 + 1 + mp3->frames * (int64_t)spf;
+            st->last_discard_sample = mp3->frames * (int64_t)spf;
+        }
         if (!st->start_time)
             st->start_time = av_rescale_q(st->skip_samples,
                                             (AVRational){1, c->sample_rate},
diff --git a/libavformat/utils.c b/libavformat/utils.c
index f8015cc..a149f40 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -1358,13 +1358,14 @@ static int read_frame_internal(AVFormatContext *s, AVPacket *pkt)
     if (ret >= 0) {
         AVStream *st = s->streams[pkt->stream_index];
         int discard_padding = 0;
-        if (st->end_discard_sample && pkt->pts != AV_NOPTS_VALUE) {
+        if (st->first_discard_sample && pkt->pts != AV_NOPTS_VALUE) {
             int64_t pts = pkt->pts - (is_relative(pkt->pts) ? RELATIVE_TS_BASE : 0);
             int64_t sample = ts_to_samples(st, pts);
             int duration = ts_to_samples(st, pkt->duration);
             int64_t end_sample = sample + duration;
-            if (duration > 0 && end_sample >= st->end_discard_sample)
-                discard_padding = FFMIN(end_sample - st->end_discard_sample, duration);
+            if (duration > 0 && end_sample >= st->first_discard_sample &&
+                sample < st->last_discard_sample)
+                discard_padding = FFMIN(end_sample - st->first_discard_sample, duration);
         }
         if (st->skip_samples || discard_padding) {
             uint8_t *p = av_packet_new_side_data(pkt, AV_PKT_DATA_SKIP_SAMPLES, 10);
-- 
2.1.0