[FFmpeg-devel] [PATCH v3] lavc/audiotoolboxenc: fix noise in encoded audio

Wed Jan 3 04:02:20 EET 2018

On 1/2/2018 1:24 PM, zhangjiejun1992 at gmail.com wrote:
> From: Jiejun Zhang <zhangjiejun1992 at gmail.com>
> 
> This fixes #6940
> 
> Although undocumented, AudioToolbox seems to require the data supplied
> by the callback (i.e. ffat_encode_callback) being unchanged until the
> next time the callback is called. In the old implementation, the
> AVBuffer backing the frame is recycled after the frame is freed, and
> somebody else (maybe the decoder) will write into the AVBuffer and
> change the data. AudioToolbox then encodes some wrong data and noise
> is produced. Retaining a frame reference solves this problem.
> ---
>  libavcodec/audiotoolboxenc.c | 12 ++++++++++++
>  1 file changed, 12 insertions(+)
> 
> diff --git a/libavcodec/audiotoolboxenc.c b/libavcodec/audiotoolboxenc.c
> index 71885d1530..5d3a746348 100644
> --- a/libavcodec/audiotoolboxenc.c
> +++ b/libavcodec/audiotoolboxenc.c
> @@ -48,6 +48,8 @@ typedef struct ATDecodeContext {
>      AudioFrameQueue afq;
>      int eof;
>      int frame_size;
> +
> +    AVFrame* encoding_frame;
>  } ATDecodeContext;
>  
>  static UInt32 ffat_get_format_id(enum AVCodecID codec, int profile)
> @@ -442,6 +444,10 @@ static av_cold int ffat_init_encoder(AVCodecContext *avctx)
>  
>      ff_af_queue_init(avctx, &at->afq);
>  
> +    at->encoding_frame = av_frame_alloc();
> +    if (!at->encoding_frame)
> +        return AVERROR(ENOMEM);
> +
>      return 0;
>  }
>  
> @@ -453,6 +459,7 @@ static OSStatus ffat_encode_callback(AudioConverterRef converter, UInt32 *nb_pac
>      AVCodecContext *avctx = inctx;
>      ATDecodeContext *at = avctx->priv_data;
>      AVFrame *frame;
> +    int ret;
>  
>      if (!at->frame_queue.available) {
>          if (at->eof) {
> @@ -475,6 +482,10 @@ static OSStatus ffat_encode_callback(AudioConverterRef converter, UInt32 *nb_pac
>      if (*nb_packets > frame->nb_samples)
>          *nb_packets = frame->nb_samples;
>  
> +    av_frame_unref(at->encoding_frame);
> +    if ((ret = av_frame_ref(at->encoding_frame, frame)) < 0)
> +        return ret;

Wouldn't you have to set nb_packets to 0 in case of failure? And for a
non libav* callback function maybe this shouldn't return an AVERROR(),
but just 1 instead.

Also, look at audiotoolboxdec.c, where the reference (packet there
instead of frame) is created right before calling
AudioConverterFillComplexBuffer(), as it can fail. Maybe you can do
something like that instead.

> +
>      ff_bufqueue_add(avctx, &at->used_frame_queue, frame);
>  
>      return 0;
> @@ -565,6 +576,7 @@ static av_cold int ffat_close_encoder(AVCodecContext *avctx)
>      ff_bufqueue_discard_all(&at->frame_queue);
>      ff_bufqueue_discard_all(&at->used_frame_queue);
>      ff_af_queue_close(&at->afq);
> +    av_frame_free(&at->encoding_frame);
>      return 0;
>  }
>  
>