<div dir="ltr">Hi all,<div><br></div><div>I'm trying to write a small tool which reads pcm16 audio from files, converts them to webm/ogg format in buffer and passes it to another program through socket (currently just dumps it on disk). I have it working with one file but when I start to encode second file the avcodec_encode_audio2 function never sets got_packet variable to 1 (see the code below). I suppose the reason is that AVFormatContext holds some state the needs to be reset before it can be used again.Â </div><div><br></div><div>For sake of brevity the code sample here isn't complete but should give pretty good overview about how I'm encoding audio. The my_iocontext class is just holder for AVIOContext*, internal buffer and implementsÂ write and seek functions forÂ AVIOContext (allocated with avio_alloc_context function).</div><div><br></div><div>Any help is greatly appreciated.</div><div><br></div><div>With Regards,</div><div>Ragnar</div><div><br></div><div>//--------------------------------------------------------------</div><div><br></div><div><div>class audio_encoderÂ </div><div>{</div><div>Â  Â  AVFormatContext * formatContext;</div><div>Â  Â  AVOutputFormat * outputFormat;</div><div>Â  Â  AVStream * stream;</div><div>Â  Â  AVCodecContext * codecContext;</div><div>Â  Â  SwrContext * swr_ctx;</div><div>public:</div><div>Â  Â  audio_encoder() {}</div><div>Â  Â  ~audio_encoder() {</div><div>Â  Â  Â  Â  avcodec_close(codecContext);</div><div>Â  Â  Â  Â  swr_free(&swr_ctx);</div><div>Â  Â  Â  Â  avformat_free_context(formatContext);</div><div>Â  Â  }</div><div>Â  Â  bool initialize();</div><div>Â  Â  std::vector<unsigned char> encode( std::vector<unsigned char> audio );</div><div>};</div><div><br></div><div>bool audio_encoder::initialize()</div><div>{</div><div>Â  Â  avformat_alloc_output_context2(&formatContext, NULL, NULL, ".webm");</div><div>Â  Â  if (!formatContext) {</div><div>Â  Â  Â  Â  return false;</div><div>Â  Â  }</div><div>Â  Â Â </div><div>Â  Â  outputFormat = formatContext->oformat;</div><div>Â  Â  if ( outputFormat->audio_codec == AV_CODEC_ID_NONE ) {</div><div>Â  Â  Â  Â  std::cout << "Audio codec not found" << std::endl;</div><div>Â  Â  Â  Â  return false;</div><div>Â  Â  }</div><div>Â  Â  outputFormat->video_codec = AV_CODEC_ID_NONE;</div><div>Â  Â  outputFormat->subtitle_codec = AV_CODEC_ID_NONE;</div><div>Â  Â Â </div><div>Â  Â  AVCodec * codec = avcodec_find_encoder(outputFormat->audio_codec);</div><div>Â  Â  if (!codec) {</div><div>Â  Â  Â  Â  std::cout << "Could not find encoder for: " << avcodec_get_name(outputFormat->audio_codec) << std::endl;</div><div>Â  Â  Â  Â  return false;</div><div>Â  Â  }</div><div>Â  Â Â </div><div>Â  Â  stream = avformat_new_stream(formatContext, codec);</div><div>Â  Â  if (!stream) {</div><div>Â  Â  Â  Â  std::cout << "Could not allocate stream" << std::endl;</div><div>Â  Â  Â  Â  return false;</div><div>Â  Â  }</div><div>Â  Â  stream->id = formatContext->nb_streams-1;</div><div>Â  Â Â </div><div>Â  Â  codecContext = stream->codec;</div><div>Â  Â  codecContext->sample_fmt Â = codec->sample_fmts ? codec->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;</div><div>Â  Â  codecContext->bit_rate Â  Â = 64000;</div><div>Â  Â  codecContext->sample_rate = 16000;</div><div>Â  Â Â </div><div>Â  Â  codecContext->channels Â  Â  Â  Â = av_get_channel_layout_nb_channels(codecContext->channel_layout);</div><div>Â  Â  codecContext->channel_layout = AV_CH_LAYOUT_MONO;</div><div>Â  Â  codecContext->channels Â  Â  Â  Â = av_get_channel_layout_nb_channels(codecContext->channel_layout);</div><div>Â  Â  stream->time_base = av_make_q( 1, codecContext->sample_rate );</div><div><br></div><div>Â  Â  if (formatContext->oformat->flags & AVFMT_GLOBALHEADER) {</div><div>Â  Â  Â  Â  codecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;</div><div>Â  Â  }</div><div><br></div><div>Â  Â  int ret = avcodec_open2(codecContext, codec, nullptr );</div><div>Â  Â  if (ret < 0) {</div><div>Â  Â  Â  Â  std::cout << "Could not open audio codec: " << av_err2str(ret) << std::endl;</div><div>Â  Â  Â  Â  return false;</div><div>Â  Â  }</div><div>Â  Â Â </div><div>Â  Â  swr_ctx = swr_alloc();</div><div>Â  Â  if (!swr_ctx) {</div><div>Â  Â  Â  Â  std::cout << "Could not allocate resampler context" << std::endl;</div><div>Â  Â  Â  Â  return false;</div><div>Â  Â  }</div><div><br></div><div>Â  Â  av_opt_set_int Â  Â  Â  (swr_ctx, "in_channel_count", Â  codecContext->channels, Â  Â  Â  0);</div><div>Â  Â  av_opt_set_int Â  Â  Â  (swr_ctx, "in_sample_rate", Â  Â  codecContext->sample_rate, Â  Â 0);</div><div>Â  Â  av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", Â  Â  Â AV_SAMPLE_FMT_S16, 0);</div><div>Â  Â  av_opt_set_int Â  Â  Â  (swr_ctx, "out_channel_count", Â codecContext->channels, Â  Â  Â  0);</div><div>Â  Â  av_opt_set_int Â  Â  Â  (swr_ctx, "out_sample_rate", Â  Â codecContext->sample_rate, Â  Â 0);</div><div>Â  Â  av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", Â  Â  codecContext->sample_fmt, Â  Â  0);</div><div><br></div><div>Â  Â  if ((ret = swr_init(swr_ctx)) < 0) {</div><div>Â  Â  Â  Â  std::cout << "Failed to initialize the resampling context" << std::endl;</div><div>Â  Â  Â  Â  return false;</div><div>Â  Â  }</div><div>Â  Â Â </div><div>Â  Â  return true;</div><div>}</div><div><br></div><div><br></div><div>std::vector<unsigned char> audio_encoder::encode( std::vector<unsigned char> pcm_audio )</div><div>{</div><div>Â  Â Â <br></div><div>Â  Â  my_iocontext io_ctx;Â </div><div>Â  Â  formatContext->pb = io_ctx.get_avio();</div><div><br></div><div>Â  Â  int ret = avformat_write_header(formatContext, nullptr);</div><div>Â  Â  if (ret < 0) {</div><div>Â  Â  Â  Â  std::cout << "Error occurred when opening output file: " << av_err2str(ret) << std::endl;</div><div>Â  Â  Â  Â  return {};</div><div>Â  Â  }</div><div>Â  Â Â </div><div>Â  Â  AVFrame * frame = av_frame_alloc();</div><div>Â  Â  if (!frame) {</div><div>Â  Â  Â  Â  std::cout << "Error allocating an audio frame" << std::endl;</div><div>Â  Â  Â  Â  return {};</div><div>Â  Â  }</div><div>Â  Â  frame->format = codecContext->sample_fmt;</div><div>Â  Â  frame->channel_layout = codecContext->channel_layout;</div><div>Â  Â  frame->sample_rate = codecContext->sample_rate;</div><div>Â  Â  frame->nb_samples = codecContext->frame_size;</div><div>Â  Â  if (frame->nb_samples) {</div><div>Â  Â  Â  Â  if (av_frame_get_buffer(frame, 0) < 0) {</div><div>Â  Â  Â  Â  Â  Â  std::cout << "Error allocating an audio buffer" << std::endl;</div><div>Â  Â  Â  Â  Â  Â  return {};</div><div>Â  Â  Â  Â  }</div><div>Â  Â  }</div><div><br></div><div>Â  Â  int samples_count = 0;</div><div>Â  Â  unsigned int size_in_bytes = (codecContext->frame_size*2);</div><div>Â  Â  unsigned int cycles = (pcm_audio.size() / size_in_bytes );</div><div>Â  Â Â <br></div><div>Â  Â  for( int x = 0; x < cycles; x++) {</div><div>Â  Â  Â  Â Â </div><div>Â  Â  Â  Â  int start_idx = (x*size_in_bytes);</div><div>Â  Â  Â  Â  std::vector<unsigned char> chunk( pcm_audio.begin()+start_idx, Â pcm_audio.begin()+start_idx + size_in_bytes );</div><div>Â  Â  Â  Â Â </div><div>Â  Â  Â  Â Â </div><div>Â  Â  Â  Â  int dst_nb_samples = av_rescale_rnd(swr_get_delay(swr_ctx, codecContext->sample_rate) + codecContext->frame_size,</div><div>Â  Â  Â  Â  Â  Â  Â  Â  Â  Â  Â  Â  Â  Â  Â  Â  Â  Â  Â  Â  codecContext->sample_rate, codecContext->sample_rate, AV_ROUND_UP);</div><div><br></div><div>Â  Â  Â  Â  int ret = av_frame_make_writable(frame);</div><div>Â  Â  Â  Â  if (ret < 0) {</div><div>Â  Â  Â  Â  Â  Â  break;</div><div>Â  Â  Â  Â  }</div><div>Â  Â  Â  Â Â </div><div>Â  Â  Â  Â  ret = swr_convert(swr_ctx, frame->data, dst_nb_samples, (const uint8_t **)&chunk, codecContext->frame_size); //in_frame->data</div><div>Â  Â  Â  Â  if (ret < 0) {</div><div>Â  Â  Â  Â  Â  Â  std::cout << "Error while converting" << std::endl;</div><div>Â  Â  Â  Â  Â  Â  break;</div><div>Â  Â  Â  Â  }</div><div>Â  Â  Â  Â Â </div><div>Â  Â  Â  Â  frame->pts = av_rescale_q(samples_count, av_make_q( 1, codecContext->sample_rate ), codecContext->time_base);</div><div>Â  Â  Â  Â  samples_count += dst_nb_samples;</div><div>Â  Â  Â  Â Â </div><div>Â  Â  Â  Â  int got_packet = 0;</div><div>Â  Â  Â  Â  AVPacket pkt = { 0 };</div><div>Â  Â  Â  Â  av_init_packet( &pkt );</div><div>Â  Â  Â  Â  ret = avcodec_encode_audio2(codecContext, &pkt, frame, &got_packet);</div><div>Â  Â  Â  Â  if (ret < 0) {</div><div>Â  Â  Â  Â  Â  Â  std::cout << "Error encoding audio frame: " << av_err2str(ret) << std::endl;</div><div>Â  Â  Â  Â  Â  Â  break;</div><div>Â  Â  Â  Â  }</div><div><br></div><div>Â  Â  Â  Â  if (got_packet) {</div><div>Â  Â  Â  Â  Â  Â  ret = write_frame(formatContext, &codecContext->time_base, stream, &pkt);</div><div>Â  Â  Â  Â  Â  Â  if (ret < 0) {</div><div>Â  Â  Â  Â  Â  Â  Â  Â  std::cout << "Error while writing audio frame: " << av_err2str(ret) << std::endl;</div><div>Â  Â  Â  Â  Â  Â  Â  Â  break;</div><div>Â  Â  Â  Â  Â  Â  }</div><div>Â  Â  Â  Â  }</div><div>Â  Â  }</div><div>Â  Â Â </div><div>Â  Â  int got_packet = 0;</div><div>Â  Â  do {</div><div>Â  Â  Â  Â  AVPacket pkt = { 0 };</div><div>Â  Â  Â  Â  av_init_packet(&pkt);</div><div>Â  Â  Â  Â  int ret = avcodec_encode_audio2(codecContext, &pkt, nullptr, &got_packet);</div><div>Â  Â  Â  Â  if (ret < 0) {</div><div>Â  Â  Â  Â  Â  Â  std::cout << "Error encoding audio frame: " << av_err2str(ret) << std::endl;</div><div>Â  Â  Â  Â  Â  Â  break;</div><div>Â  Â  Â  Â  }</div><div><br></div><div>Â  Â  Â  Â  if (got_packet) {</div><div>Â  Â  Â  Â  Â  Â  ret = write_frame(formatContext, &codecContext->time_base, stream, &pkt);</div><div>Â  Â  Â  Â  Â  Â  if (ret < 0) {</div><div>Â  Â  Â  Â  Â  Â  Â  Â  std::cout << "Error while writing audio frame: " << Â av_err2str(ret) << std::endl;</div><div>Â  Â  Â  Â  Â  Â  Â  Â  break;</div><div>Â  Â  Â  Â  Â  Â  }</div><div>Â  Â  Â  Â  }</div><div>Â  Â  } while ( got_packet );</div><div>Â  Â Â </div><div>Â  Â  av_write_trailer(formatContext);</div><div>Â  Â Â </div><div>Â  Â  av_frame_free( &frame );</div><div><br></div><div>Â  Â  return io_ctx.get_data();</div><div>}</div><div><br></div><div><br></div><div>int main(int argc, char **argv)</div><div>{</div><div><br></div><div>Â  Â  std::ifstream input_file;</div><div>Â  Â  input_file.open( "/tmp/input.wav", std::ios_base::in | std::ios_base::binary );</div><div>Â  Â  if ( !input_file.is_open() ) {</div><div>Â  Â  Â  Â  printf("Unable to open input file.\n");</div><div>Â  Â  Â  Â  return 1;</div><div>Â  Â  }</div><div>Â  Â  std::vector<unsigned char> pcm_audio( ( std::istreambuf_iterator<char>( input_file ) ), ( std::istreambuf_iterator<char>() ) );</div><div>Â  Â Â <br></div><div>Â  Â Â </div><div>Â  Â  av_register_all();</div><div>Â  Â  av_log_set_level(AV_LOG_DEBUG);</div><div>Â  Â Â </div><div>Â  Â  audio_encoder encoder;</div><div>Â  Â  if (!encoder.initialize()) {</div><div>Â  Â  Â  Â  std::cout << "Error initializing ..." << std::endl;</div><div>Â  Â  Â  Â  exit(1);</div><div>Â  Â  }</div><div>Â  Â Â </div><div>Â  Â  std::vector<unsigned char> bufr = encoder.encode( pcm_audio );</div><div>Â  Â Â </div><div>Â  Â  std::ofstream cmp( "/tmp/result.webm", std::ios_base::out | std::ios_base::binary );</div><div>Â  Â  cmp.write( ( char * )&bufr[0], bufr.size() );</div><div>Â  Â  cmp.close();</div><div>Â  Â Â </div><div>Â  Â  std::vector<unsigned char> bufr2 = encoder.encode( pcm_audio );</div><div>Â  Â Â </div><div>Â  Â  std::ofstream cmp2( "/tmp/result2.webm", std::ios_base::out | std::ios_base::binary );</div><div>Â  Â  cmp2.write( ( char * )&bufr2[0], bufr2.size() );</div><div>Â  Â  cmp2.close();</div><div><br></div><div>Â  Â  return 0;</div><div>}</div></div><div><br></div></div>