<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
</head>
<body text="#000000" bgcolor="#FFFFFF">
<br>
<div class="moz-forward-container"><br>
<br>
-------- Исходное сообщение --------
<table class="moz-email-headers-table" border="0" cellpadding="0"
cellspacing="0">
<tbody>
<tr>
<th nowrap="nowrap" valign="BASELINE" align="RIGHT">Тема: </th>
<td>Audio transcoding problem: lost audio in the end</td>
</tr>
<tr>
<th nowrap="nowrap" valign="BASELINE" align="RIGHT">Дата: </th>
<td>Thu, 14 Nov 2013 20:40:43 +0200</td>
</tr>
<tr>
<th nowrap="nowrap" valign="BASELINE" align="RIGHT">От: </th>
<td>Andrew Sherepenko <a class="moz-txt-link-rfc2396E" href="mailto:andrew.sherepenko@gmail.com"><andrew.sherepenko@gmail.com></a></td>
</tr>
<tr>
<th nowrap="nowrap" valign="BASELINE" align="RIGHT">Кому: </th>
<td><a class="moz-txt-link-abbreviated" href="mailto:libav-user@ffmpeg.org">libav-user@ffmpeg.org</a></td>
</tr>
</tbody>
</table>
<br>
<br>
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
<br>
Hello libav and ffmpeg users.<br>
<br>
I am new with ffmpeg and I have a problem with audio transcoding.
I've tried to implement almost <br>
all found tutorials, but it still doesn't work correctly.<br>
<br>
I have an mpeg-ts container with h264 video and ac3 audio codecs.<br>
<br>
I need to change container from mpeg-ts to mp4 with copying video
and change audio format from ac3 to aac or mp3.<br>
<br>
I've alreary changed container and copy video, but my audio stream
has lost last few seconds of audio after transcoding<br>
and I don't understand why.<br>
<br>
For example: if i try to convert 17s video i will lose 3 seconds
of audio in the for mp3 and 5s for aac.<br>
<br>
If somebody have such kind of problem please give an advice what I
did wrong. Or may be someone have a complete example of<br>
transcoding audio fram by frame. It will be very usefull for me.<br>
<br>
And this is my code: <br>
<br>
#include <stdexcept><br>
#include <iostream> <br>
#include <sstream><br>
#include <cstring><br>
#include <cstdlib><br>
#include <cassert><br>
<br>
extern "C" {<br>
#include <libavcodec/avcodec.h><br>
#include <libavformat/avformat.h><br>
#include <libavutil/avutil.h><br>
#include <libavutil/error.h><br>
#include <libavutil/opt.h><br>
#include <libavutil/samplefmt.h><br>
#include <libavutil/rational.h><br>
#include <libavutil/mathematics.h><br>
#include <libavutil/samplefmt.h><br>
#include <libswresample/swresample.h><br>
<br>
}<br>
<br>
void usage(char *argv[]) {<br>
std::cout << "Changes container of media file from any
format (mostly mpeg-ts) to mp4" << std::endl;<br>
std::cout << "usage: " << argv[0] << "
INFILE [OUTFILE]" << std::endl;<br>
}<br>
<br>
void rescalePacket(AVPacket &pkt, int64_t startTime,
AVRational inTimeBase, AVRational outTimeBase) {<br>
if (pkt.pts != AV_NOPTS_VALUE) {<br>
pkt.pts = av_rescale_q(pkt.pts - startTime, inTimeBase,
outTimeBase);<br>
}<br>
<br>
if (pkt.dts != AV_NOPTS_VALUE) {<br>
pkt.dts = av_rescale_q(pkt.dts - startTime, inTimeBase,
outTimeBase);<br>
}<br>
<br>
if (pkt.duration > 0) {<br>
pkt.duration = av_rescale_q(pkt.duration, inTimeBase,
outTimeBase);<br>
}<br>
}<br>
<br>
AVFormatContext* initInputContext(const std::string& fileName)
{<br>
assert(!fileName.empty());<br>
AVFormatContext* context = NULL;<br>
<br>
if (avformat_open_input(&context, fileName.c_str(), NULL,
NULL) < 0) {<br>
std::cout << "Unable to get AVFormatContext from "
<< fileName << std::endl;<br>
return NULL;<br>
}<br>
<br>
if (avformat_find_stream_info(context, NULL) < 0) {<br>
std::cout << "Could not find stream information"
<< std::endl;<br>
return NULL;<br>
}<br>
<br>
return context;<br>
}<br>
<br>
AVFormatContext* initOutputContext(const std::string&
fileName) {<br>
assert(!fileName.empty());<br>
AVFormatContext* formatContext = NULL;<br>
<br>
if (avformat_alloc_output_context2(&formatContext, NULL,
NULL, fileName.c_str()) < 0) {<br>
if (avformat_alloc_output_context2(&formatContext,
NULL, "mp4", NULL) < 0) {<br>
std::cout << "Could not deduce output format: "
<< fileName << std::endl;<br>
}<br>
}<br>
<br>
if (!formatContext) {<br>
std::cout << "Unable to init format context"
<< std::endl;<br>
return NULL;<br>
}<br>
<br>
return formatContext;<br>
}<br>
<br>
AVStream* getStream(AVFormatContext* formatContext, AVMediaType
mediaType) {<br>
assert(formatContext != NULL);<br>
int index = 0;<br>
<br>
AVStream* stream = NULL;<br>
<br>
if ((index = av_find_best_stream(formatContext, mediaType, -1,
-1, NULL, 0)) < 0) {<br>
std::cout << "Stream index not found" <<
std::endl;<br>
return NULL;<br>
}<br>
<br>
stream = formatContext->streams[index];<br>
<br>
if (stream->codec->codec_type == AVMEDIA_TYPE_AUDIO) {<br>
AVCodec* decoder =
avcodec_find_decoder(stream->codec->codec_id);<br>
<br>
if (!decoder) {<br>
std::cout << "Could not find stream decoder with
ID: " << stream->codec->codec_id << std::endl;<br>
} else if (avcodec_open2(stream->codec, decoder, NULL)
< 0) {<br>
std::cout << "Could not open codec: " <<
stream->codec->codec_id << std::endl;<br>
}<br>
}<br>
<br>
return stream;<br>
}<br>
<br>
SwrContext* swrContext = NULL;<br>
uint8_t* rawData = NULL;<br>
int rawDataSize = 0;<br>
<br>
bool initEncoder(AVFormatContext* context, AVStream* inStream,
AVStream* outStream, CodecID codecId) {<br>
swrContext = swr_alloc_set_opts(NULL,
outStream->codec->channel_layout,
outStream->codec->sample_fmt,
outStream->codec->sample_rate,
inStream->codec->channel_layout,
inStream->codec->sample_fmt,
inStream->codec->sample_rate, 0, NULL);<br>
<br>
if (!swrContext) {<br>
return false;<br>
}<br>
<br>
if (swr_init(swrContext) < 0) {<br>
return false;<br>
}<br>
<br>
if (av_samples_alloc(&rawData, &rawDataSize,
outStream->codec->channels,
outStream->codec->frame_size,
outStream->codec->sample_fmt, 1) < 0) {<br>
return false;<br>
}<br>
<br>
return true;<br>
}<br>
<br>
AVStream* createStream(AVFormatContext* formatContext, CodecID
codecId, AVStream* inStream = NULL) {<br>
AVCodec* encoder = avcodec_find_encoder(codecId);<br>
<br>
if (!encoder) {<br>
std::cout << "Could not find stream encoder with ID:
" << codecId;<br>
return NULL;<br>
}<br>
<br>
AVStream* stream = avformat_new_stream(formatContext,
encoder);<br>
<br>
if (!stream) {<br>
std::cout << "Could not create output stream"
<< std::endl;<br>
return NULL;<br>
}<br>
<br>
if (inStream && stream->codec->codec_type ==
AVMEDIA_TYPE_AUDIO) {<br>
stream->id = 1;<br>
stream->codec->bit_rate = 192000;<br>
stream->codec->sample_rate = 44100;<br>
stream->codec->sample_fmt = AV_SAMPLE_FMT_FLT;<br>
stream->codec->channel_layout = AV_CH_LAYOUT_STEREO;<br>
stream->codec->channels =
av_get_channel_layout_nb_channels(stream->codec->channel_layout);<br>
stream->r_frame_rate = inStream->r_frame_rate;<br>
stream->avg_frame_rate = inStream->avg_frame_rate;<br>
stream->duration = inStream->duration;<br>
<br>
AVDictionary *options = NULL;<br>
av_dict_set(&options, "strict", "experimental", 0);<br>
<br>
if (avcodec_open2(stream->codec, encoder, &options)
< 0) {<br>
std::cout << "Could not open codec: " <<
stream->codec->codec_id << std::endl;<br>
}<br>
}<br>
<br>
return stream;<br>
}<br>
<br>
bool copyStreamContext(AVStream* outStream, AVStream* inStream) {<br>
if (avcodec_copy_context(outStream->codec,
inStream->codec) < 0) {<br>
std::cout << "Could not copy codec context" <<
std::endl;<br>
return false;<br>
}<br>
<br>
outStream->codec->codec_tag = 0;<br>
outStream->codec->time_base = outStream->time_base;<br>
outStream->sample_aspect_ratio =
outStream->codec->sample_aspect_ratio;<br>
outStream->r_frame_rate = inStream->r_frame_rate;<br>
outStream->avg_frame_rate = inStream->avg_frame_rate;<br>
outStream->duration = inStream->duration;<br>
av_dict_copy(&outStream->metadata,
inStream->metadata, 0);<br>
<br>
return true;<br>
}<br>
<br>
int ra = 0;<br>
int wa = 0;<br>
int rv = 0;<br>
int wv = 0;<br>
<br>
<b>int decodeFromPacket(AVCodecContext* codecContext, AVPacket*
packet, AVFrame*& frame) {</b><b><br>
</b><b> if (!frame) {</b><b><br>
</b><b> frame = avcodec_alloc_frame();</b><b><br>
</b><b> }</b><b><br>
</b><b><br>
</b><b> avcodec_get_frame_defaults(frame);</b><b><br>
</b><b><br>
</b><b> int gotFrame = 0;</b><b><br>
</b><b> int result = avcodec_decode_audio4(codecContext, frame,
&gotFrame, packet);</b><b><br>
</b><b><br>
</b><b> if (result < 0) {</b><b><br>
</b><b> std::cout << "Could not decode audio frame"
<< std::endl;</b><b><br>
</b><b> packet->size = 0;</b><b><br>
</b><b> packet->data = NULL;</b><b><br>
</b><b><br>
</b><b> return 0;</b><b><br>
</b><b> }</b><b><br>
</b><b><br>
</b><b> packet->size -= result;</b><b><br>
</b><b> packet->data += result;</b><b><br>
</b><b><br>
</b><b> return gotFrame;</b><b><br>
</b><b>}</b><b><br>
</b><b><br>
</b><b>int convertFrame(AVCodecContext* codecContext,
AVFrame*& frame) {</b><b><br>
</b><b> int result = swr_convert(swrContext, &rawData,
codecContext->frame_size, (const
uint8_t**)frame->extended_data, frame->nb_samples);</b><b><br>
</b><b><br>
</b><b> if (result < 0) {</b><b><br>
</b><b> std::cout << "Could not convert frame
content: " << std::endl;</b><b><br>
</b><b> return 0;</b><b><br>
</b><b> }</b><b><br>
</b><b><br>
</b><b> avcodec_get_frame_defaults(frame);</b><b><br>
</b><b> frame->nb_samples = codecContext->frame_size;</b><b><br>
</b><b> frame->format = codecContext->sample_fmt;</b><b><br>
</b><b><br>
</b><b> result = avcodec_fill_audio_frame(frame,
codecContext->channels, codecContext->sample_fmt, (uint8_t
*)rawData, rawDataSize, 1);</b><b><br>
</b><b><br>
</b><b> if (result < 0) {</b><b><br>
</b><b> std::cout << "Could not fill output frame"
<< std::endl;</b><b><br>
</b><b> return 0;</b><b><br>
</b><b> }</b><b><br>
</b><b><br>
</b><b> return 1;</b><b><br>
</b><b>}</b><b><br>
</b><b><br>
</b><b>int encodeToPacket(AVCodecContext* codecContext, AVPacket*
packet, AVFrame*& frame) {</b><b><br>
</b><b> int gotPacket = 0;</b><b><br>
</b><b><br>
</b><b> int result = avcodec_encode_audio2(codecContext,
packet, frame, &gotPacket);</b><b><br>
</b><b><br>
</b><b> if (result < 0) {</b><b><br>
</b><b> std::cout << "Could not encode audio frame"
<< std::endl;</b><b><br>
</b><b> return 0;</b><b><br>
</b><b> }</b><b><br>
</b><b><br>
</b><b> return gotPacket;</b><b><br>
</b><b>}</b><b><br>
</b><b><br>
</b><b>void writeFrame (AVFormatContext* formatContext, AVPacket*
packet, int streamIndex) {</b><b><br>
</b><b> packet->stream_index = streamIndex;</b><b><br>
</b><b><br>
</b><b> if (av_interleaved_write_frame(formatContext, packet)
< 0) {</b><b><br>
</b><b> std::cout << "Could not write audio frame"
<< std::endl;</b><b><br>
</b><b> }</b><b><br>
</b><b><br>
</b><b> if (streamIndex == 1) {</b><b><br>
</b><b> wa++;</b><b><br>
</b><b> } else {</b><b><br>
</b><b> wv++;</b><b><br>
</b><b> }</b><b><br>
</b><b>}</b><br>
<br>
int64_t time_base = 0;<br>
<br>
<b>void transcodeAudioPacket(AVFormatContext* formatContext,
AVStream* inStream, AVStream* outStream, AVPacket* inPacket,
AVFrame*& frame) {</b><b><br>
</b><b> int result = 0;</b><b><br>
</b><b><br>
</b><b> while(inPacket->size > 0) {</b><b><br>
</b><b> result = decodeFromPacket(inStream->codec,
inPacket, frame);</b><b><br>
</b><b><br>
</b><b> if (!result) {</b><b><br>
</b><b> continue;</b><b><br>
</b><b> }</b><b><br>
</b><b><br>
</b><b> result = convertFrame(outStream->codec, frame);</b><b><br>
</b><b><br>
</b><b> if (!result) {</b><b><br>
</b><b> continue;</b><b><br>
</b><b> }</b><b><br>
</b><b><br>
</b><b> AVPacket outPacket = {0};</b><b><br>
</b><b> av_init_packet(&outPacket);</b><b><br>
</b><b><br>
</b><b> result = encodeToPacket(outStream->codec,
&outPacket, frame);</b><b><br>
</b><b><br>
</b><b> if (result) {</b><b><br>
</b><b> writeFrame(formatContext,&outPacket,
outStream->index);</b><b><br>
</b><b> }</b><b><br>
</b><b><br>
</b><b> av_free_packet(&outPacket);</b><b><br>
</b><b> }</b><b><br>
</b><b>}</b><br>
<br>
void processVideo(const std::string& inputFile, const
std::string& outputFile) {<br>
AVFormatContext* inFormatContext =
initInputContext(inputFile);<br>
AVFormatContext* outFormatContext =
initOutputContext(outputFile);<br>
<br>
outFormatContext->duration =
inFormatContext->duration;<br>
outFormatContext->bit_rate =
inFormatContext->bit_rate;<br>
outFormatContext->start_time =
inFormatContext->start_time;<br>
<br>
AVStream* inVideoStream = getStream(inFormatContext,
AVMEDIA_TYPE_VIDEO);<br>
AVStream* inAudioStream = getStream(inFormatContext,
AVMEDIA_TYPE_AUDIO);<br>
<br>
AVOutputFormat* outFormat = outFormatContext->oformat;<br>
outFormat->codec_tag = NULL;<br>
<br>
AVStream* outVideoStream = createStream(outFormatContext,
outFormat->video_codec);<br>
AVStream* outAudioStream = createStream(outFormatContext,
outFormat->audio_codec, inAudioStream);<br>
<br>
copyStreamContext(outVideoStream, inVideoStream);<br>
av_dict_copy(&outAudioStream->metadata,
inAudioStream->metadata, 0);<br>
<br>
initEncoder(outFormatContext, inAudioStream, outAudioStream,
outFormat->audio_codec);<br>
<br>
if (!(outFormat->flags & AVFMT_NOFILE)) {<br>
if (avio_open(&outFormatContext->pb,
outputFile.c_str(), AVIO_FLAG_WRITE) < 0) {<br>
std::cout << "Could not open output file: "
<< outputFile << std::endl;<br>
return;<br>
}<br>
}<br>
<br>
if (outFormat->flags & AVFMT_GLOBALHEADER) {<br>
outVideoStream->codec->flags |=
CODEC_FLAG_GLOBAL_HEADER;<br>
outAudioStream->codec->flags |=
CODEC_FLAG_GLOBAL_HEADER;<br>
}<br>
<br>
av_dump_format(inFormatContext, 0, inputFile.c_str(), 0);<br>
av_dump_format(outFormatContext, 0, outputFile.c_str(), 1);<br>
<br>
AVDictionary* options = NULL;<br>
av_dict_set(&options, "movflags", "frag_keyframe", 0);<br>
<br>
if (avformat_write_header(outFormatContext, &options) <
0) {<br>
std::cout << "Unable to write headers " <<
std::endl;<br>
}<br>
<br>
av_dict_free(&options);<br>
<br>
AVPacket packet = {0};<br>
AVFrame* frame = avcodec_alloc_frame();<br>
int result = 0;<br>
int gotPacket = 0;<br>
<br>
<b> // rewrite video and audio packets</b><b><br>
</b><b> while (av_read_frame(inFormatContext, &packet)
>= 0) {</b><b><br>
</b><b> if (packet.stream_index ==
outAudioStream->index) {</b><b><br>
</b><b> ra++;</b><b><br>
</b><b> //rescalePacket(packet,
inFormatContext->start_time, inAudioStream->time_base,
outAudioStream->time_base);</b><b><br>
</b><b> transcodeAudioPacket(outFormatContext,
inAudioStream, outAudioStream, &packet, frame);</b><b><br>
</b><b> } else if (packet.stream_index ==
outVideoStream->index) {</b><b><br>
</b><b> rv++;</b><b><br>
</b><b> //rescalePacket(packet,
inFormatContext->start_time, inVideoStream->time_base,
outVideoStream->time_base);</b><b><br>
</b><b> writeFrame(outFormatContext, &packet,
outVideoStream->index);</b><b><br>
</b><b> }</b><b><br>
</b><b><br>
</b><b> av_free_packet(&packet);</b><b><br>
</b><b> }</b><b><br>
</b><br>
gotPacket = 1;<br>
<br>
std::cout << "Video => read = " << rv <<
"; wrote = " << wv << std::endl;<br>
std::cout << "Audio => read = " << ra <<
"; wrote = " << wa << std::endl;<br>
<br>
packet.size = 0;<br>
packet.data = NULL;<br>
<br>
while (gotPacket) {<br>
result = avcodec_encode_audio2(outAudioStream->codec,
&packet, NULL, &gotPacket);<br>
<br>
if (result >= 0 && gotPacket) {<br>
packet.stream_index = outAudioStream->index;<br>
<br>
if (av_interleaved_write_frame(outFormatContext,
&packet) < 0) {<br>
std::cout << "Could not write audio frame"
<< std::endl;<br>
} else {<br>
wa++;<br>
}<br>
} else {<br>
std::cout << "Could not encode audio frame"
<< std::endl;<br>
}<br>
<br>
av_free_packet(&packet);<br>
}<br>
<br>
std::cout << "Video => read = " << rv <<
"; wrote = " << wv << std::endl;<br>
std::cout << "Audio => read = " << ra <<
"; wrote = " << wa << std::endl;<br>
<br>
av_free(frame);<br>
av_free(rawData);<br>
swr_free(&swrContext);<br>
<br>
if (av_write_trailer(outFormatContext) < 0) {<br>
std::cout << "Unable to write trailer" <<
std::endl;<br>
return;<br>
}<br>
<br>
avcodec_close(inAudioStream->codec);<br>
avcodec_close(outAudioStream->codec);<br>
<br>
// close input<br>
avformat_close_input(&inFormatContext);<br>
inFormatContext = NULL;<br>
<br>
// close output<br>
if (!(outFormat->flags & AVFMT_NOFILE)) {<br>
avio_close(outFormatContext->pb);<br>
}<br>
<br>
avformat_free_context(outFormatContext);<br>
outFormatContext = NULL;<br>
}<br>
<br>
int main(int argc, char *argv[]) {<br>
std::string inpFile, outFile;<br>
<br>
if (argc < 2) {<br>
std::cout << "error: you should specify input file
for transcoding" << std::endl;<br>
usage(argv);<br>
return 1;<br>
}<br>
<br>
inpFile = argv[1];<br>
if (argc == 3) {<br>
outFile = argv[2];<br>
} else {<br>
outFile = "out.mp4";<br>
} <br>
<br>
// register all muxers and demuxers<br>
avcodec_register_all();<br>
av_register_all();<br>
<br>
processVideo(inpFile, outFile);<br>
<br>
return 0;<br>
}<br>
<br>
<br>
</div>
<br>
</body>
</html>