[Libav-user] Missing a channel in Dolby 5:1

Wed May 29 16:54:02 CEST 2013

I am using ffmpeg and libswresample to play back movie trailers. For 
stereo playback everything is good, but for Dolby 5.1, I get correct 
playback of all ambient noises but the audio track for voices is missing 
or too low of a sound, like a missing channel.  My only weird 
implementation if that the audio tracks are split into frames before 
playback.
I wonder if someone can suggest what it is I am doing wrong or missing.
Here is some sample code:

#if defined(_WIN32) || defined(_WIN64)
mrv::AudioEngine::AudioFormat kIntSampleFormat = mrv::AudioEngine::kS16LSB;
AVSampleFormat kInternalSampleFormat = AV_SAMPLE_FMT_S16;
unsigned kFormatSize = (unsigned) sizeof(int16_t);
#else
mrv::AudioEngine::AudioFormat kIntSampleFormat = 
mrv::AudioEngine::kFloatLSB;
AVSampleFormat kInternalSampleFormat = AV_SAMPLE_FMT_FLT;
unsigned kFormatSize = (unsigned) sizeof(float);
#endif

...

int CMedia::decode_audio3(AVCodecContext *avctx, int16_t *samples,
               int *frame_size_ptr,
               AVPacket *avpkt)
{
    AVFrame frame = { { 0 } };
    int ret, got_frame = 0;

     ret = avcodec_decode_audio4(avctx, &frame, &got_frame, avpkt);

     if (ret >= 0 && got_frame) {
        int plane_size;
        int planar    = av_sample_fmt_is_planar(avctx->sample_fmt);
        int data_size = av_samples_get_buffer_size(&plane_size, 
avctx->channels,
                           frame.nb_samples,
                           avctx->sample_fmt, 1);
         if (*frame_size_ptr < data_size) {
        IMG_ERROR( "decode_audio3 - Output buffer size is too small for "
               "the current frame ("
               << *frame_size_ptr << " < " << data_size << ")" );
        return AVERROR(EINVAL);
         }

     if ( avctx->sample_fmt != kInternalSampleFormat )
     {
        if (!forw_ctx)
        {
           char buf[256];

           uint64_t  in_ch_layout =
get_valid_channel_layout(avctx->channel_layout, avctx->channels);

           if ( in_ch_layout == 0 ) in_ch_layout = AV_CH_LAYOUT_STEREO;

           av_get_channel_layout_string( buf, 256, avctx->channels,
                         in_ch_layout );

           IMG_INFO("Create audio conversion from " << buf
                << ", channels " << avctx->channels << ", " );
           IMG_INFO( "format "
                << av_get_sample_fmt_name( avctx->sample_fmt )
             << ", sample rate " << avctx->sample_rate << " to" );

           uint64_t out_ch_layout = in_ch_layout;

           av_get_channel_layout_string( buf, 256, avctx->channels,
                         out_ch_layout );
           AVSampleFormat  out_sample_fmt = kInternalSampleFormat;
           AVSampleFormat  in_sample_fmt = avctx->sample_fmt;
           int in_sample_rate = avctx->sample_rate;
           int out_sample_rate = in_sample_rate;
           IMG_INFO( buf << ", channels " << avctx->channels << ", format "
             << av_get_sample_fmt_name( out_sample_fmt )
             << ", sample rate "
             << out_sample_rate);

           forw_ctx  = swr_alloc_set_opts(NULL, out_ch_layout,
                          out_sample_fmt,  out_sample_rate,
                          in_ch_layout,  in_sample_fmt,
                          in_sample_rate,
                          0, NULL);
           if(!forw_ctx) {
          LOG_ERROR("Failed to alloc swresample library");
          return 0;
           }
           if(swr_init(forw_ctx) < 0)
           {
          char buf[256];
          av_get_channel_layout_string(buf, 256, -1, in_ch_layout);
          LOG_ERROR( "Failed to init swresample library with "
                 << buf << " "
                 << av_get_sample_fmt_name(in_sample_fmt)
                 << " frequency: " << in_sample_rate );
          return 0;
           }
        }

        swr_convert(forw_ctx, (uint8_t**)&samples,
                data_size / sizeof(int16_t),
                (const uint8_t **)frame.extended_data,
                frame.nb_samples );

     // If S16, data is half the size after conversion
        if ( kInternalSampleFormat == AV_SAMPLE_FMT_S16 )
           data_size /= 2;

     }
     else
     {
        memcpy(samples, frame.extended_data[0], data_size);

        if (planar && avctx->channels > 1) {
           uint8_t *out = ((uint8_t *)samples) + plane_size;
           for (int ch = 1; ch < avctx->channels; ch++) {
          memcpy(out, frame.extended_data[ch], plane_size);
          out += plane_size;
           }
        }
     }

         *frame_size_ptr = data_size;
     } else {
         *frame_size_ptr = 0;
     }
     return ret;
}

...
CMedia::DecodeStatus
CMedia::decode_audio_packet( boost::int64_t& ptsframe,
                  const boost::int64_t frame,
                  const AVPacket& pkt )
{

   AVStream* stream = get_audio_stream();
   if ( !stream ) return kDecodeNoStream;

   // Get the audio codec context
   AVCodecContext* ctx = stream->codec;

   assert( !_audio_packets.is_seek( pkt ) );
   assert( !_audio_packets.is_flush( pkt ) );
   assert( !_audio_packets.is_preroll( pkt ) );
   assert( !_audio_packets.is_loop_end( pkt ) );
   assert( !_audio_packets.is_loop_start( pkt ) );

   ptsframe = get_frame( stream, pkt );

   // Make sure audio frames are continous during playback to
   // accomodate weird sample rates not evenly divisable by frame rate
   if ( _audio_buf_used != 0 && (!_audio.empty()) )
     {
        ptsframe = _audio_last_frame + 1;
       // assert( ptsframe <= last_frame() );
     }

#ifdef DEBUG
   if ( _audio_buf_used + pkt.size >= _audio_max )
     {
       IMG_ERROR( _("Too much audio used:") << _audio_buf_used  );
     }
#endif

   AVPacket pkt_temp;
   av_init_packet(&pkt_temp);
   pkt_temp.data = pkt.data;
   pkt_temp.size = pkt.size;

   assert( _audio_buf != NULL );
   assert( pkt.size + _audio_buf_used < _audio_max );

   int audio_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
   assert( pkt_temp.size <= audio_size );

   while ( pkt_temp.size > 0 || pkt_temp.data == NULL )
     {
        // Decode the audio into the buffer
        assert( _audio_buf_used + pkt_temp.size <= _audio_max );
        assert( audio_size > 0 );
        int ret = decode_audio3( ctx,
                                ( int16_t * )( (char*)_audio_buf +
_audio_buf_used ),
                                &audio_size, &pkt_temp );

       // If no samples are returned, then break now
       if ( ret <= 0 )
     {
        pkt_temp.size = 0;
        IMG_ERROR( _("Audio missed for frame: ") << ptsframe
               << _(" ret: ") << ret
               << _(" audio max: ")  << _audio_max
               << _(" audio used: ") << _audio_buf_used
                );

       return kDecodeMissingSamples;
     }

       assert( audio_size > 0 );
       assert( audio_size + _audio_buf_used <= _audio_max );

       // Decrement the length by the number of bytes parsed
       pkt_temp.data += ret;
       pkt_temp.size -= ret;

       if ( audio_size <= 0 ) break;

       _audio_buf_used += audio_size;
     }

   if ( pkt_temp.size == 0 ) return kDecodeOK;

   return kDecodeMissingSamples;
}

bool CMedia::open_audio( const short channels,
              const unsigned nSamplesPerSec )
{
   close_audio();

   _samples_per_sec = nSamplesPerSec;
   return _audio_engine->open( channels, nSamplesPerSec,
                   kIntSampleFormat, kFormatSize*8);
}

I can show the implementation of _audio_engine and how I split the audio 
into frames if needed, too.