<div dir="ltr">Hi,<br><br>I have written a small program to convert webm (vorbis) audio to aac format, using FFmpeg libraries - C++ (on Windows using 32 bit Zeranoe FFmpeg builds). After writing this program, I find it is sometimes converting files as per expectation, and at other times, results in larger duration files, and audio playback is broken/awkward as well. <br>


<br>This code appears to be working fine for mp3, which also uses FLTP format (same as vorbis), so technically both look similar.<br><br>Please see below sample code I am using:<br><br>    ////////////////////////////////////////////////<br>


    #include "stdafx.h"<br>    <br>    #include <iostream><br>    #include <fstream><br>    <br>    #include <string><br>    #include <vector><br>    #include <map><br>    <br>    #include <deque><br>


    #include <queue><br>    <br>    #include <math.h><br>    #include <stdlib.h><br>    #include <stdio.h><br>    #include <conio.h><br>    <br>    extern "C"<br>    {<br>    #include "libavcodec/avcodec.h"<br>


    #include "libavformat/avformat.h"<br>    #include "libavdevice/avdevice.h"<br>    #include "libswscale/swscale.h"<br>    #include "libavutil/dict.h"<br>    #include "libavutil/error.h"<br>


    #include "libavutil/opt.h"<br>    #include <libavutil/fifo.h><br>    #include <libavutil/imgutils.h><br>    #include <libavutil/samplefmt.h><br>    #include <libswresample/swresample.h><br>


    }<br>    <br>    AVFormatContext*    fmt_ctx= NULL;<br>    int                    audio_stream_index = -1;<br>    AVCodecContext *    codec_ctx_audio = NULL;<br>    AVCodec*            codec_audio = NULL;<br>    AVFrame*            decoded_frame = NULL;<br>


    uint8_t**            audio_dst_data = NULL;<br>    int                    got_frame = 0;<br>    int                    audiobufsize = 0;<br>    AVPacket            input_packet;<br>    int                    audio_dst_linesize = 0;<br>


    int                    audio_dst_bufsize = 0;<br>    SwrContext *        swr = NULL;<br>    <br>    AVOutputFormat *    output_format = NULL ;<br>    AVFormatContext *    output_fmt_ctx= NULL;<br>    AVStream *            audio_st = NULL;<br>


    AVCodec *            audio_codec = NULL;<br>    double                audio_pts = 0.0;<br>    AVFrame *            out_frame = avcodec_alloc_frame();<br>    <br>    int                    audio_input_frame_size = 0;<br>


    <br>    uint8_t *            audio_data_buf = NULL;<br>    uint8_t *            audio_out = NULL;<br>    int                    audio_bit_rate;<br>    int                    audio_sample_rate;<br>    int                    audio_channels;<br>


    <br>    int decode_packet();<br>    int open_audio_input(char* src_filename);<br>    int decode_frame();<br>    <br>    int open_encoder(char* output_filename);<br>    AVStream *add_audio_stream(AVFormatContext *oc, AVCodec **codec,<br>


        enum AVCodecID codec_id);<br>    int open_audio(AVFormatContext *oc, AVCodec *codec, AVStream *st);<br>    void close_audio(AVFormatContext *oc, AVStream *st);<br>    void write_audio_frame(uint8_t ** audio_src_data, int audio_src_bufsize);<br>


    <br>    int open_audio_input(char* src_filename)<br>    {<br>        int i =0;<br>        /* open input file, and allocate format context */<br>        if (avformat_open_input(&fmt_ctx, src_filename, NULL, NULL) < 0)<br>


        {<br>            fprintf(stderr, "Could not open source file %s\n", src_filename);<br>            exit(1);<br>        }<br>    <br>        // Retrieve stream information<br>        if(avformat_find_stream_info(fmt_ctx, NULL)<0)<br>


            return -1; // Couldn't find stream information<br>    <br>        // Dump information about file onto standard error<br>        av_dump_format(fmt_ctx, 0, src_filename, 0);<br>    <br>        // Find the first video stream<br>


        for(i=0; i<fmt_ctx->nb_streams; i++)<br>        {<br>            if(fmt_ctx->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO)<br>            {<br>                audio_stream_index=i;<br>                break;<br>


            }<br>        }<br>        if ( audio_stream_index != -1 )<br>        {<br>            // Get a pointer to the codec context for the audio stream<br>            codec_ctx_audio=fmt_ctx->streams[audio_stream_index]->codec;<br>


    <br>            // Find the decoder for the video stream<br>            codec_audio=avcodec_find_decoder(codec_ctx_audio->codec_id);<br>            if(codec_audio==NULL) {<br>                fprintf(stderr, "Unsupported audio codec!\n");<br>


                return -1; // Codec not found<br>            }<br>    <br>            // Open codec<br>            AVDictionary *codecDictOptions = NULL;<br>            if(avcodec_open2(codec_ctx_audio, codec_audio, &codecDictOptions)<0)<br>


                return -1; // Could not open codec<br>    <br>            // Set up SWR context once you've got codec information<br>            swr = swr_alloc();<br>            av_opt_set_int(swr, "in_channel_layout",  codec_ctx_audio->channel_layout, 0);<br>


            av_opt_set_int(swr, "out_channel_layout", codec_ctx_audio->channel_layout,  0);<br>            av_opt_set_int(swr, "in_sample_rate",     codec_ctx_audio->sample_rate, 0);<br>            av_opt_set_int(swr, "out_sample_rate",    codec_ctx_audio->sample_rate, 0);<br>


            av_opt_set_sample_fmt(swr, "in_sample_fmt",  codec_ctx_audio->sample_fmt, 0);<br>            av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_S16,  0);<br>            swr_init(swr);<br>


    <br>            // Allocate audio frame<br>            if ( decoded_frame == NULL ) decoded_frame = avcodec_alloc_frame();<br>            int nb_planes = 0;<br>            AVStream* audio_stream = fmt_ctx->streams[audio_stream_index];<br>


            nb_planes = av_sample_fmt_is_planar(codec_ctx_audio->sample_fmt) ? codec_ctx_audio->channels : 1;<br>            int tempSize =  sizeof(uint8_t *) * nb_planes;<br>            audio_dst_data = (uint8_t**)av_mallocz(tempSize);<br>


            if (!audio_dst_data)<br>            {<br>                fprintf(stderr, "Could not allocate audio data buffers\n");<br>            }<br>            else<br>            {<br>                for ( int i = 0 ; i < nb_planes ; i ++ )<br>


                {<br>                    audio_dst_data[i] = NULL;<br>                }<br>            }<br>        }<br>    }<br>    <br>    <br>    int decode_frame()<br>    {<br>        int rv = 0;<br>        got_frame = 0;<br>


        if ( fmt_ctx == NULL  )<br>        {<br>            return rv;<br>        }<br>        int ret = 0;<br>        audiobufsize = 0;<br>        rv = av_read_frame(fmt_ctx, &input_packet);<br>        if ( rv < 0 )<br>


        {<br>            return rv;<br>        }<br>        rv = decode_packet();<br>        // Free the input_packet that was allocated by av_read_frame<br>        av_free_packet(&input_packet);<br>        return rv;<br>


    }<br>    <br>    int decode_packet()<br>    {<br>        int rv = 0;<br>        int ret = 0;<br>    <br>        //audio stream?<br>        if(input_packet.stream_index == audio_stream_index)<br>        {<br>            /* decode audio frame */<br>


            rv = avcodec_decode_audio4(codec_ctx_audio, decoded_frame, &got_frame, &input_packet);<br>            if (rv < 0)<br>            {<br>                fprintf(stderr, "Error decoding audio frame\n");<br>


                //return ret;<br>            }<br>            else<br>            {<br>                if (got_frame)<br>                {<br>                    if ( audio_dst_data[0] == NULL )<br>                    {<br>


                         ret = av_samples_alloc(audio_dst_data, &audio_dst_linesize, decoded_frame->channels,<br>                            decoded_frame->nb_samples, (AVSampleFormat)decoded_frame->format, 1);<br>


                        if (ret < 0)<br>                        {<br>                            fprintf(stderr, "Could not allocate audio buffer\n");<br>                            return AVERROR(ENOMEM);<br>


                        }<br>                        /* TODO: extend return code of the av_samples_* functions so that this call is not needed */<br>                        audio_dst_bufsize = av_samples_get_buffer_size(NULL, audio_st->codec->channels,<br>


                            decoded_frame->nb_samples, (AVSampleFormat)decoded_frame->format, 1);<br>    <br>                        //int16_t* outputBuffer = ...;<br>                        swr_convert(swr, audio_dst_data, out_frame->nb_samples,<br>


                                    (const uint8_t **)(decoded_frame->data), decoded_frame->nb_samples);<br>                        //swr_convert( swr, audio_dst_data, out_frame->nb_samples, (const uint8_t**) decoded_frame->extended_data, decoded_frame->nb_samples );<br>


                    }<br>                    /* copy audio data to destination buffer:<br>                    * this is required since rawaudio expects non aligned data */<br>                    //av_samples_copy(audio_dst_data, decoded_frame->data, 0, 0,<br>


                    //    decoded_frame->nb_samples, decoded_frame->channels, (AVSampleFormat)decoded_frame->format);<br>                }<br>            }<br>        }<br>        return rv;<br>    }<br>    <br>    <br>


    int open_encoder(char* output_filename )<br>    {<br>        int rv = 0;<br>    <br>        /* allocate the output media context */<br>        AVOutputFormat *opfmt = NULL;<br>    <br>        avformat_alloc_output_context2(&output_fmt_ctx, opfmt, NULL, output_filename);<br>


        if (!output_fmt_ctx) {<br>            printf("Could not deduce output format from file extension: using MPEG.\n");<br>            avformat_alloc_output_context2(&output_fmt_ctx, NULL, "mpeg", output_filename);<br>


        }<br>        if (!output_fmt_ctx) {<br>            rv = -1;<br>        }<br>        else<br>        {<br>            output_format = output_fmt_ctx->oformat;<br>        }<br>    <br>        /* Add the audio stream using the default format codecs<br>


        * and initialize the codecs. */<br>        audio_st = NULL;<br>    <br>        if ( output_fmt_ctx )<br>        {<br>            if (output_format->audio_codec != AV_CODEC_ID_NONE)<br>            {<br>                audio_st = add_audio_stream(output_fmt_ctx, &audio_codec, output_format->audio_codec);<br>


            }<br>    <br>            /* Now that all the parameters are set, we can open the audio and<br>            * video codecs and allocate the necessary encode buffers. */<br>            if (audio_st)<br>            {<br>


                rv = open_audio(output_fmt_ctx, audio_codec, audio_st);<br>                if ( rv < 0 ) return rv;<br>            }<br>    <br>            av_dump_format(output_fmt_ctx, 0, output_filename, 1);<br>            /* open the output file, if needed */<br>


            if (!(output_format->flags & AVFMT_NOFILE))<br>            {<br>                if (avio_open(&output_fmt_ctx->pb, output_filename, AVIO_FLAG_WRITE) < 0) {<br>                    fprintf(stderr, "Could not open '%s'\n", output_filename);<br>


                    rv = -1;<br>                }<br>                else<br>                {<br>                    /* Write the stream header, if any. */<br>                    if (avformat_write_header(output_fmt_ctx, NULL) < 0)<br>


                    {<br>                        fprintf(stderr, "Error occurred when opening output file\n");<br>                        rv = -1;<br>                    }<br>                }<br>            }<br>


        }<br>    <br>        return rv;<br>    }<br>    <br>    AVStream *add_audio_stream(AVFormatContext *oc, AVCodec **codec,<br>        enum AVCodecID codec_id)<br>    {<br>        AVCodecContext *c;<br>        AVStream *st;<br>


    <br>        /* find the audio encoder */<br>        *codec = avcodec_find_encoder(codec_id);<br>        if (!(*codec)) {<br>            fprintf(stderr, "Could not find codec\n");<br>            exit(1);<br>        }<br>


    <br>        st = avformat_new_stream(oc, *codec);<br>        if (!st) {<br>            fprintf(stderr, "Could not allocate stream\n");<br>            exit(1);<br>        }<br>        st->id = 1;<br>    <br>


        c = st->codec;<br>    <br>        /* put sample parameters */<br>        c->sample_fmt  = AV_SAMPLE_FMT_S16;<br>        c->bit_rate    = audio_bit_rate;<br>        c->sample_rate = audio_sample_rate;<br>


        c->channels    = audio_channels;<br>    <br>        // some formats want stream headers to be separate<br>        if (oc->oformat->flags & AVFMT_GLOBALHEADER)<br>            c->flags |= CODEC_FLAG_GLOBAL_HEADER;<br>


    <br>        return st;<br>    }<br>    <br>    int open_audio(AVFormatContext *oc, AVCodec *codec, AVStream *st)<br>    {<br>        int ret=0;<br>        AVCodecContext *c;<br>    <br>        st->duration = fmt_ctx->duration;<br>


        c = st->codec;<br>    <br>        /* open it */<br>        ret = avcodec_open2(c, codec, NULL) ;<br>        if ( ret < 0)<br>        {<br>            fprintf(stderr, "could not open codec\n");<br>            return -1;<br>


            //exit(1);<br>        }<br>    <br>        if (c->codec->capabilities & CODEC_CAP_VARIABLE_FRAME_SIZE)<br>            audio_input_frame_size = 10000;<br>        else<br>            audio_input_frame_size = c->frame_size;<br>


        int tempSize = audio_input_frame_size *<br>            av_get_bytes_per_sample(c->sample_fmt) *<br>            c->channels;<br>        return ret;<br>    }<br>    <br>    void close_audio(AVFormatContext *oc, AVStream *st)<br>


    {<br>        avcodec_close(st->codec);<br>    }<br>    <br>    void write_audio_frame(uint8_t ** audio_src_data, int audio_src_bufsize)<br>    {<br>        AVFormatContext *oc = output_fmt_ctx;<br>        AVStream *st = audio_st;<br>


        if ( oc == NULL || st == NULL ) return;<br>        AVCodecContext *c;<br>        AVPacket pkt = { 0 }; // data and size must be 0;<br>        int got_packet;<br>    <br>        av_init_packet(&pkt);<br>        c = st->codec;<br>


    <br>        out_frame->nb_samples = audio_input_frame_size;<br>        int buf_size =         audio_src_bufsize *<br>            av_get_bytes_per_sample(c->sample_fmt) *<br>            c->channels;<br>        avcodec_fill_audio_frame(out_frame, c->channels, c->sample_fmt,<br>


            (uint8_t *) *audio_src_data,<br>            buf_size, 1);<br>        avcodec_encode_audio2(c, &pkt, out_frame, &got_packet);<br>        if (!got_packet)<br>        {<br>        }<br>        else<br>        {<br>


            if (pkt.pts != AV_NOPTS_VALUE)<br>                pkt.pts =  av_rescale_q(pkt.pts, st->codec->time_base, st->time_base);<br>            if (pkt.dts != AV_NOPTS_VALUE)<br>                pkt.dts = av_rescale_q(pkt.dts, st->codec->time_base, st->time_base);<br>


            if ( c && c->coded_frame && c->coded_frame->key_frame)<br>                pkt.flags |= AV_PKT_FLAG_KEY;<br>    <br>             pkt.stream_index = st->index;<br>            pkt.flags |= AV_PKT_FLAG_KEY;<br>


            /* Write the compressed frame to the media file. */<br>            if (av_interleaved_write_frame(oc, &pkt) != 0)<br>            {<br>                fprintf(stderr, "Error while writing audio frame\n");<br>


                exit(1);<br>            }<br>        }<br>        av_free_packet(&pkt);<br>    }<br>    <br>    <br>    void write_delayed_frames(AVFormatContext *oc, AVStream *st)<br>    {<br>        AVCodecContext *c = st->codec;<br>


        int got_output = 0;<br>        int ret = 0;<br>        AVPacket pkt;<br>        pkt.data = NULL;<br>        pkt.size = 0;<br>        av_init_packet(&pkt);<br>        int i = 0;<br>        for (got_output = 1; got_output; i++)<br>


        {<br>            ret = avcodec_encode_audio2(c, &pkt, NULL, &got_output);<br>            if (ret < 0)<br>            {<br>                fprintf(stderr, "error encoding frame\n");<br>                exit(1);<br>


            }<br>            static int64_t tempPts = 0;<br>            static int64_t tempDts = 0;<br>            /* If size is zero, it means the image was buffered. */<br>            if (got_output)<br>            {<br>


                if (pkt.pts != AV_NOPTS_VALUE)<br>                    pkt.pts =  av_rescale_q(pkt.pts, st->codec->time_base, st->time_base);<br>                if (pkt.dts != AV_NOPTS_VALUE)<br>                    pkt.dts = av_rescale_q(pkt.dts, st->codec->time_base, st->time_base);<br>


                if ( c && c->coded_frame && c->coded_frame->key_frame)<br>                    pkt.flags |= AV_PKT_FLAG_KEY;<br>    <br>                pkt.stream_index = st->index;<br>                /* Write the compressed frame to the media file. */<br>


                ret = av_interleaved_write_frame(oc, &pkt);<br>            }<br>            else<br>            {<br>                ret = 0;<br>            }<br>            av_free_packet(&pkt);<br>        }<br>    }<br>


    <br>    int main(int argc, char **argv)<br>    {<br>        /* register all formats and codecs */<br>        av_register_all();<br>        avcodec_register_all();<br>        avformat_network_init();<br>        avdevice_register_all();<br>


        int i =0;<br>        int ret=0;<br>        char src_filename[90] = "test_a.webm";<br>        char dst_filename[90] = "output.aac";<br>        open_audio_input(src_filename);<br>        if ( codec_ctx_audio->bit_rate == 0 ) codec_ctx_audio->bit_rate = 112000;<br>


        audio_bit_rate        = codec_ctx_audio->bit_rate;<br>        audio_sample_rate    = codec_ctx_audio->sample_rate;<br>        audio_channels        = codec_ctx_audio->channels;<br>        open_encoder( dst_filename );<br>


        while(1)<br>        {<br>            int rv = decode_frame();<br>            if ( rv < 0 )<br>            {<br>                break;<br>            }<br>    <br>            if (audio_st)<br>            {<br>                audio_pts = (double)audio_st->pts.val * audio_st->time_base.num /<br>


                    audio_st->time_base.den;<br>            }<br>            else<br>            {<br>                audio_pts = 0.0;<br>            }<br>            if ( codec_ctx_audio )<br>            {<br>                if ( got_frame)<br>


                {<br>                    write_audio_frame( audio_dst_data, audio_dst_bufsize );<br>                }<br>            }<br>            if ( audio_dst_data[0] )<br>            {<br>                av_freep(&audio_dst_data[0]);<br>


                audio_dst_data[0] = NULL;<br>            }<br>            printf("\naudio_pts: %.3f", audio_pts);<br>        }<br>        while(1)<br>        {<br>            if ( audio_dst_data && audio_dst_data[0] )<br>


            {<br>                av_freep(&audio_dst_data[0]);<br>                audio_dst_data[0] = NULL;<br>            }<br>            ret = av_samples_alloc(audio_dst_data, NULL, codec_ctx_audio->channels,<br>


                decoded_frame->nb_samples, AV_SAMPLE_FMT_S16, 0);<br>            ret = swr_convert(swr, audio_dst_data, out_frame->nb_samples,NULL, 0);<br>            if ( ret <= 0 ) break;<br>            write_audio_frame( audio_dst_data, audio_dst_bufsize );<br>


        }<br>        write_delayed_frames( output_fmt_ctx, audio_st );<br>        av_write_trailer(output_fmt_ctx);<br>        close_audio( output_fmt_ctx, audio_st);<br>        swr_free(&swr);<br>        avcodec_free_frame(&out_frame);<br>


        getch();<br>        return 0;<br>    }<br><br>"test_a.webm" input file results in longer duration (40 second output), and if I change it to "jet.webm", it is converted fine.<br><br>Both input files are approximately 18 second duration.<br>


<br>For reference, these files can be downloaded from links below:<br><br><a href="http://www.filedropper.com/testa">http://www.filedropper.com/testa</a> ,<br><a href="http://www.filedropper.com/jet">http://www.filedropper.com/jet</a><br>


<br>Alternatively, they are zipped and uploaded elsewhere as well:<br><br><a href="http://www.files.com/shared/52c3eefe990ea/test_audio_files.zip">http://www.files.com/shared/52c3eefe990ea/test_audio_files.zip</a><br><br>


Could someone kindly guide on what I am doing wrong here?<br><br>Thanks in advance...<br><br>p.s. These files are taken/extracted from different online sources/demos; also posted on SO: <a href="http://stackoverflow.com/questions/20867959/ffmpeg-library-webm-vorbis-audio-to-aac-conversion">http://stackoverflow.com/questions/20867959/ffmpeg-library-webm-vorbis-audio-to-aac-conversion</a><br>


</div>