[FFmpeg-cvslog] opusdec: properly handle mismatching configurations in multichannel streams

Thu Aug 20 14:29:32 CEST 2015

ffmpeg | branch: release/2.4 | Anton Khirnov <anton at khirnov.net> | Mon Jul 27 11:13:53 2015 +0200| [c49b88b93bca53c04f18d78c27dbf1dc6daea909] | committer: Anton Khirnov

opusdec: properly handle mismatching configurations in multichannel streams

The substreams can have different resampling delays, so an additional
level of buffering is needed to synchronize them.

Bug-Id: 876

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c49b88b93bca53c04f18d78c27dbf1dc6daea909
---

 libavcodec/opus.h    |   10 +++++
 libavcodec/opusdec.c |  103 +++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 99 insertions(+), 14 deletions(-)

diff --git a/libavcodec/opus.h b/libavcodec/opus.h
index c2fac06..accb998 100644
--- a/libavcodec/opus.h
+++ b/libavcodec/opus.h
@@ -164,6 +164,16 @@ typedef struct ChannelMap {
 
 typedef struct OpusContext {
     OpusStreamContext *streams;
+
+    /* current output buffers for each streams */
+    float **out;
+    int   *out_size;
+    /* Buffers for synchronizing the streams when they have different
+     * resampling delays */
+    AVAudioFifo **sync_buffers;
+    /* number of decoded samples for each stream */
+    int         *decoded_samples;
+
     int             nb_streams;
     int      nb_stereo_streams;
 
diff --git a/libavcodec/opusdec.c b/libavcodec/opusdec.c
index 771922e..80a80b4 100644
--- a/libavcodec/opusdec.c
+++ b/libavcodec/opusdec.c
@@ -367,12 +367,17 @@ static int opus_decode_frame(OpusStreamContext *s, const uint8_t *data, int size
 
 static int opus_decode_subpacket(OpusStreamContext *s,
                                  const uint8_t *buf, int buf_size,
+                                 float **out, int out_size,
                                  int nb_samples)
 {
     int output_samples = 0;
     int flush_needed   = 0;
     int i, j, ret;
 
+    s->out[0]   = out[0];
+    s->out[1]   = out[1];
+    s->out_size = out_size;
+
     /* check if we need to flush the resampler */
     if (avresample_is_open(s->avr)) {
         if (buf) {
@@ -450,9 +455,16 @@ static int opus_decode_packet(AVCodecContext *avctx, void *data,
     const uint8_t *buf  = avpkt->data;
     int buf_size        = avpkt->size;
     int coded_samples   = 0;
-    int decoded_samples = 0;
+    int decoded_samples = INT_MAX;
+    int delayed_samples = 0;
     int i, ret;
 
+    /* calculate the number of delayed samples */
+    for (i = 0; i < c->nb_streams; i++) {
+        delayed_samples = FFMAX(delayed_samples,
+                                c->streams[i].delayed_samples + av_audio_fifo_size(c->sync_buffers[i]));
+    }
+
     /* decode the header of the first sub-packet to find out the sample count */
     if (buf) {
         OpusPacket *pkt = &c->streams[0].packet;
@@ -465,7 +477,7 @@ static int opus_decode_packet(AVCodecContext *avctx, void *data,
         c->streams[0].silk_samplerate = get_silk_samplerate(pkt->config);
     }
 
-    frame->nb_samples = coded_samples + c->streams[0].delayed_samples;
+    frame->nb_samples = coded_samples + delayed_samples;
 
     /* no input or buffered data => nothing to do */
     if (!frame->nb_samples) {
@@ -481,14 +493,43 @@ static int opus_decode_packet(AVCodecContext *avctx, void *data,
     }
     frame->nb_samples = 0;
 
+    memset(c->out, 0, c->nb_streams * 2 * sizeof(*c->out));
     for (i = 0; i < avctx->channels; i++) {
         ChannelMap *map = &c->channel_maps[i];
         if (!map->copy)
-            c->streams[map->stream_idx].out[map->channel_idx] = (float*)frame->extended_data[i];
+            c->out[2 * map->stream_idx + map->channel_idx] = (float*)frame->extended_data[i];
     }
 
-    for (i = 0; i < c->nb_streams; i++)
-        c->streams[i].out_size = frame->linesize[0];
+    /* read the data from the sync buffers */
+    for (i = 0; i < c->nb_streams; i++) {
+        float          **out = c->out + 2 * i;
+        int sync_size = av_audio_fifo_size(c->sync_buffers[i]);
+
+        float sync_dummy[32];
+        int out_dummy = (!out[0]) | ((!out[1]) << 1);
+
+        if (!out[0])
+            out[0] = sync_dummy;
+        if (!out[1])
+            out[1] = sync_dummy;
+        if (out_dummy && sync_size > FF_ARRAY_ELEMS(sync_dummy))
+            return AVERROR_BUG;
+
+        ret = av_audio_fifo_read(c->sync_buffers[i], (void**)out, sync_size);
+        if (ret < 0)
+            return ret;
+
+        if (out_dummy & 1)
+            out[0] = NULL;
+        else
+            out[0] += ret;
+        if (out_dummy & 2)
+            out[1] = NULL;
+        else
+            out[1] += ret;
+
+        c->out_size[i] = frame->linesize[0] - ret * sizeof(float);
+    }
 
     /* decode each sub-packet */
     for (i = 0; i < c->nb_streams; i++) {
@@ -509,20 +550,31 @@ static int opus_decode_packet(AVCodecContext *avctx, void *data,
             s->silk_samplerate = get_silk_samplerate(s->packet.config);
         }
 
-        ret = opus_decode_subpacket(&c->streams[i], buf,
-                                    s->packet.data_size, coded_samples);
+        ret = opus_decode_subpacket(&c->streams[i], buf, s->packet.data_size,
+                                    c->out + 2 * i, c->out_size[i], coded_samples);
         if (ret < 0)
             return ret;
-        if (decoded_samples && ret != decoded_samples) {
-            av_log(avctx, AV_LOG_ERROR, "Different numbers of decoded samples "
-                   "in a multi-channel stream\n");
-            return AVERROR_INVALIDDATA;
-        }
-        decoded_samples = ret;
+        c->decoded_samples[i] = ret;
+        decoded_samples       = FFMIN(decoded_samples, ret);
+
         buf      += s->packet.packet_size;
         buf_size -= s->packet.packet_size;
     }
 
+    /* buffer the extra samples */
+    for (i = 0; i < c->nb_streams; i++) {
+        int buffer_samples = c->decoded_samples[i] - decoded_samples;
+        if (buffer_samples) {
+            float *buf[2] = { c->out[2 * i + 0] ? c->out[2 * i + 0] : (float*)frame->extended_data[0],
+                              c->out[2 * i + 1] ? c->out[2 * i + 1] : (float*)frame->extended_data[0] };
+            buf[0] += buffer_samples;
+            buf[1] += buffer_samples;
+            ret = av_audio_fifo_write(c->sync_buffers[i], (void**)buf, buffer_samples);
+            if (ret < 0)
+                return ret;
+        }
+    }
+
     for (i = 0; i < avctx->channels; i++) {
         ChannelMap *map = &c->channel_maps[i];
 
@@ -563,6 +615,8 @@ static av_cold void opus_decode_flush(AVCodecContext *ctx)
             av_audio_fifo_drain(s->celt_delay, av_audio_fifo_size(s->celt_delay));
         avresample_close(s->avr);
 
+        av_audio_fifo_drain(c->sync_buffers[i], av_audio_fifo_size(c->sync_buffers[i]));
+
         ff_silk_flush(s->silk);
         ff_celt_flush(s->celt);
     }
@@ -587,6 +641,16 @@ static av_cold int opus_decode_close(AVCodecContext *avctx)
     }
 
     av_freep(&c->streams);
+
+    if (c->sync_buffers) {
+        for (i = 0; i < c->nb_streams; i++)
+            av_audio_fifo_free(c->sync_buffers[i]);
+    }
+    av_freep(&c->sync_buffers);
+    av_freep(&c->decoded_samples);
+    av_freep(&c->out);
+    av_freep(&c->out_size);
+
     c->nb_streams = 0;
 
     av_freep(&c->channel_maps);
@@ -611,7 +675,11 @@ static av_cold int opus_decode_init(AVCodecContext *avctx)
 
     /* allocate and init each independent decoder */
     c->streams = av_mallocz_array(c->nb_streams, sizeof(*c->streams));
-    if (!c->streams) {
+    c->out             = av_mallocz_array(c->nb_streams, 2 * sizeof(*c->out));
+    c->out_size        = av_mallocz_array(c->nb_streams, sizeof(*c->out_size));
+    c->sync_buffers    = av_mallocz_array(c->nb_streams, sizeof(*c->sync_buffers));
+    c->decoded_samples = av_mallocz_array(c->nb_streams, sizeof(*c->decoded_samples));
+    if (!c->streams || !c->sync_buffers || !c->decoded_samples || !c->out || !c->out_size) {
         c->nb_streams = 0;
         ret = AVERROR(ENOMEM);
         goto fail;
@@ -658,6 +726,13 @@ static av_cold int opus_decode_init(AVCodecContext *avctx)
             ret = AVERROR(ENOMEM);
             goto fail;
         }
+
+        c->sync_buffers[i] = av_audio_fifo_alloc(avctx->sample_fmt,
+                                                 s->output_channels, 32);
+        if (!c->sync_buffers[i]) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
     }
 
     return 0;