FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
aacenc.c
Go to the documentation of this file.
1 /*
2  * AAC encoder
3  * Copyright (C) 2008 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * AAC encoder
25  */
26 
27 /***********************************
28  * TODOs:
29  * add sane pulse detection
30  * add temporal noise shaping
31  ***********************************/
32 
33 #include "libavutil/float_dsp.h"
34 #include "libavutil/opt.h"
35 #include "avcodec.h"
36 #include "put_bits.h"
37 #include "internal.h"
38 #include "mpeg4audio.h"
39 #include "kbdwin.h"
40 #include "sinewin.h"
41 
42 #include "aac.h"
43 #include "aactab.h"
44 #include "aacenc.h"
45 
46 #include "psymodel.h"
47 
48 #define AAC_MAX_CHANNELS 6
49 
50 #define ERROR_IF(cond, ...) \
51  if (cond) { \
52  av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
53  return AVERROR(EINVAL); \
54  }
55 
56 #define WARN_IF(cond, ...) \
57  if (cond) { \
58  av_log(avctx, AV_LOG_WARNING, __VA_ARGS__); \
59  }
60 
61 float ff_aac_pow34sf_tab[428];
62 
63 static const uint8_t swb_size_1024_96[] = {
64  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
65  12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
66  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
67 };
68 
69 static const uint8_t swb_size_1024_64[] = {
70  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
71  12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
72  40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
73 };
74 
75 static const uint8_t swb_size_1024_48[] = {
76  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
77  12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
78  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
79  96
80 };
81 
82 static const uint8_t swb_size_1024_32[] = {
83  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
84  12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
85  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
86 };
87 
88 static const uint8_t swb_size_1024_24[] = {
89  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
90  12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
91  32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
92 };
93 
94 static const uint8_t swb_size_1024_16[] = {
95  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
96  12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
97  32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
98 };
99 
100 static const uint8_t swb_size_1024_8[] = {
101  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
102  16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
103  32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
104 };
105 
106 static const uint8_t *swb_size_1024[] = {
111  swb_size_1024_8
112 };
113 
114 static const uint8_t swb_size_128_96[] = {
115  4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
116 };
117 
118 static const uint8_t swb_size_128_48[] = {
119  4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
120 };
121 
122 static const uint8_t swb_size_128_24[] = {
123  4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
124 };
125 
126 static const uint8_t swb_size_128_16[] = {
127  4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
128 };
129 
130 static const uint8_t swb_size_128_8[] = {
131  4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
132 };
133 
134 static const uint8_t *swb_size_128[] = {
135  /* the last entry on the following row is swb_size_128_64 but is a
136  duplicate of swb_size_128_96 */
141  swb_size_128_8
142 };
143 
144 /** default channel configurations */
145 static const uint8_t aac_chan_configs[6][5] = {
146  {1, TYPE_SCE}, // 1 channel - single channel element
147  {1, TYPE_CPE}, // 2 channels - channel pair
148  {2, TYPE_SCE, TYPE_CPE}, // 3 channels - center + stereo
149  {3, TYPE_SCE, TYPE_CPE, TYPE_SCE}, // 4 channels - front center + stereo + back center
150  {3, TYPE_SCE, TYPE_CPE, TYPE_CPE}, // 5 channels - front center + stereo + back stereo
151  {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
152 };
153 
154 /**
155  * Table to remap channels from libavcodec's default order to AAC order.
156  */
158  { 0 },
159  { 0, 1 },
160  { 2, 0, 1 },
161  { 2, 0, 1, 3 },
162  { 2, 0, 1, 3, 4 },
163  { 2, 0, 1, 4, 5, 3 },
164 };
165 
166 /**
167  * Make AAC audio config object.
168  * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
169  */
171 {
172  PutBitContext pb;
173  AACEncContext *s = avctx->priv_data;
174 
175  init_put_bits(&pb, avctx->extradata, avctx->extradata_size);
176  put_bits(&pb, 5, 2); //object type - AAC-LC
177  put_bits(&pb, 4, s->samplerate_index); //sample rate index
178  put_bits(&pb, 4, s->channels);
179  //GASpecificConfig
180  put_bits(&pb, 1, 0); //frame length - 1024 samples
181  put_bits(&pb, 1, 0); //does not depend on core coder
182  put_bits(&pb, 1, 0); //is not extension
183 
184  //Explicitly Mark SBR absent
185  put_bits(&pb, 11, 0x2b7); //sync extension
186  put_bits(&pb, 5, AOT_SBR);
187  put_bits(&pb, 1, 0);
188  flush_put_bits(&pb);
189 }
190 
191 #define WINDOW_FUNC(type) \
192 static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
193  SingleChannelElement *sce, \
194  const float *audio)
195 
196 WINDOW_FUNC(only_long)
197 {
198  const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
199  const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
200  float *out = sce->ret_buf;
201 
202  fdsp->vector_fmul (out, audio, lwindow, 1024);
203  fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
204 }
205 
206 WINDOW_FUNC(long_start)
207 {
208  const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
209  const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
210  float *out = sce->ret_buf;
211 
212  fdsp->vector_fmul(out, audio, lwindow, 1024);
213  memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
214  fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
215  memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
216 }
217 
218 WINDOW_FUNC(long_stop)
219 {
220  const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
221  const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
222  float *out = sce->ret_buf;
223 
224  memset(out, 0, sizeof(out[0]) * 448);
225  fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
226  memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
227  fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
228 }
229 
230 WINDOW_FUNC(eight_short)
231 {
232  const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
233  const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
234  const float *in = audio + 448;
235  float *out = sce->ret_buf;
236  int w;
237 
238  for (w = 0; w < 8; w++) {
239  fdsp->vector_fmul (out, in, w ? pwindow : swindow, 128);
240  out += 128;
241  in += 128;
242  fdsp->vector_fmul_reverse(out, in, swindow, 128);
243  out += 128;
244  }
245 }
246 
247 static void (*const apply_window[4])(AVFloatDSPContext *fdsp,
249  const float *audio) = {
250  [ONLY_LONG_SEQUENCE] = apply_only_long_window,
251  [LONG_START_SEQUENCE] = apply_long_start_window,
252  [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
253  [LONG_STOP_SEQUENCE] = apply_long_stop_window
254 };
255 
257  float *audio)
258 {
259  int i;
260  float *output = sce->ret_buf;
261 
262  apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
263 
265  s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
266  else
267  for (i = 0; i < 1024; i += 128)
268  s->mdct128.mdct_calc(&s->mdct128, sce->coeffs + i, output + i*2);
269  memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
270  memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
271 }
272 
273 /**
274  * Encode ics_info element.
275  * @see Table 4.6 (syntax of ics_info)
276  */
278 {
279  int w;
280 
281  put_bits(&s->pb, 1, 0); // ics_reserved bit
282  put_bits(&s->pb, 2, info->window_sequence[0]);
283  put_bits(&s->pb, 1, info->use_kb_window[0]);
284  if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
285  put_bits(&s->pb, 6, info->max_sfb);
286  put_bits(&s->pb, 1, 0); // no prediction
287  } else {
288  put_bits(&s->pb, 4, info->max_sfb);
289  for (w = 1; w < 8; w++)
290  put_bits(&s->pb, 1, !info->group_len[w]);
291  }
292 }
293 
294 /**
295  * Encode MS data.
296  * @see 4.6.8.1 "Joint Coding - M/S Stereo"
297  */
299 {
300  int i, w;
301 
302  put_bits(pb, 2, cpe->ms_mode);
303  if (cpe->ms_mode == 1)
304  for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
305  for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
306  put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
307 }
308 
309 /**
310  * Produce integer coefficients from scalefactors provided by the model.
311  */
312 static void adjust_frame_information(ChannelElement *cpe, int chans)
313 {
314  int i, w, w2, g, ch;
315  int start, maxsfb, cmaxsfb;
316 
317  for (ch = 0; ch < chans; ch++) {
318  IndividualChannelStream *ics = &cpe->ch[ch].ics;
319  start = 0;
320  maxsfb = 0;
321  cpe->ch[ch].pulse.num_pulse = 0;
322  for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
323  for (w2 = 0; w2 < ics->group_len[w]; w2++) {
324  start = (w+w2) * 128;
325  for (g = 0; g < ics->num_swb; g++) {
326  //apply M/S
327  if (cpe->common_window && !ch && cpe->ms_mask[w*16 + g]) {
328  for (i = 0; i < ics->swb_sizes[g]; i++) {
329  cpe->ch[0].coeffs[start+i] = (cpe->ch[0].pcoeffs[start+i] + cpe->ch[1].pcoeffs[start+i]) * 0.5f;
330  cpe->ch[1].coeffs[start+i] = cpe->ch[0].coeffs[start+i] - cpe->ch[1].pcoeffs[start+i];
331  }
332  }
333  start += ics->swb_sizes[g];
334  }
335  for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
336  ;
337  maxsfb = FFMAX(maxsfb, cmaxsfb);
338  }
339  }
340  ics->max_sfb = maxsfb;
341 
342  //adjust zero bands for window groups
343  for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
344  for (g = 0; g < ics->max_sfb; g++) {
345  i = 1;
346  for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
347  if (!cpe->ch[ch].zeroes[w2*16 + g]) {
348  i = 0;
349  break;
350  }
351  }
352  cpe->ch[ch].zeroes[w*16 + g] = i;
353  }
354  }
355  }
356 
357  if (chans > 1 && cpe->common_window) {
358  IndividualChannelStream *ics0 = &cpe->ch[0].ics;
359  IndividualChannelStream *ics1 = &cpe->ch[1].ics;
360  int msc = 0;
361  ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
362  ics1->max_sfb = ics0->max_sfb;
363  for (w = 0; w < ics0->num_windows*16; w += 16)
364  for (i = 0; i < ics0->max_sfb; i++)
365  if (cpe->ms_mask[w+i])
366  msc++;
367  if (msc == 0 || ics0->max_sfb == 0)
368  cpe->ms_mode = 0;
369  else
370  cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
371  }
372 }
373 
374 /**
375  * Encode scalefactor band coding type.
376  */
378 {
379  int w;
380 
381  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
382  s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
383 }
384 
385 /**
386  * Encode scalefactors.
387  */
390 {
391  int diff, off_sf = sce->sf_idx[0], off_pns = sce->sf_idx[0] - NOISE_OFFSET;
392  int noise_flag = 1;
393  int i, w;
394 
395  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
396  for (i = 0; i < sce->ics.max_sfb; i++) {
397  if (!sce->zeroes[w*16 + i]) {
398  if (sce->band_type[w*16 + i] == NOISE_BT) {
399  diff = sce->sf_idx[w*16 + i] - off_pns;
400  off_pns = sce->sf_idx[w*16 + i];
401  if (noise_flag-- > 0) {
402  put_bits(&s->pb, NOISE_PRE_BITS, diff + NOISE_PRE);
403  continue;
404  }
405  } else {
406  diff = sce->sf_idx[w*16 + i] - off_sf;
407  off_sf = sce->sf_idx[w*16 + i];
408  }
409  diff += SCALE_DIFF_ZERO;
410  av_assert0(diff >= 0 && diff <= 120);
412  }
413  }
414  }
415 }
416 
417 /**
418  * Encode pulse data.
419  */
420 static void encode_pulses(AACEncContext *s, Pulse *pulse)
421 {
422  int i;
423 
424  put_bits(&s->pb, 1, !!pulse->num_pulse);
425  if (!pulse->num_pulse)
426  return;
427 
428  put_bits(&s->pb, 2, pulse->num_pulse - 1);
429  put_bits(&s->pb, 6, pulse->start);
430  for (i = 0; i < pulse->num_pulse; i++) {
431  put_bits(&s->pb, 5, pulse->pos[i]);
432  put_bits(&s->pb, 4, pulse->amp[i]);
433  }
434 }
435 
436 /**
437  * Encode spectral coefficients processed by psychoacoustic model.
438  */
440 {
441  int start, i, w, w2;
442 
443  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
444  start = 0;
445  for (i = 0; i < sce->ics.max_sfb; i++) {
446  if (sce->zeroes[w*16 + i]) {
447  start += sce->ics.swb_sizes[i];
448  continue;
449  }
450  for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++)
451  s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128,
452  sce->ics.swb_sizes[i],
453  sce->sf_idx[w*16 + i],
454  sce->band_type[w*16 + i],
455  s->lambda);
456  start += sce->ics.swb_sizes[i];
457  }
458  }
459 }
460 
461 /**
462  * Encode one channel of audio data.
463  */
466  int common_window)
467 {
468  put_bits(&s->pb, 8, sce->sf_idx[0]);
469  if (!common_window)
470  put_ics_info(s, &sce->ics);
471  encode_band_info(s, sce);
472  encode_scale_factors(avctx, s, sce);
473  encode_pulses(s, &sce->pulse);
474  put_bits(&s->pb, 1, 0); //tns
475  put_bits(&s->pb, 1, 0); //ssr
476  encode_spectral_coeffs(s, sce);
477  return 0;
478 }
479 
480 /**
481  * Write some auxiliary information about the created AAC file.
482  */
483 static void put_bitstream_info(AACEncContext *s, const char *name)
484 {
485  int i, namelen, padbits;
486 
487  namelen = strlen(name) + 2;
488  put_bits(&s->pb, 3, TYPE_FIL);
489  put_bits(&s->pb, 4, FFMIN(namelen, 15));
490  if (namelen >= 15)
491  put_bits(&s->pb, 8, namelen - 14);
492  put_bits(&s->pb, 4, 0); //extension type - filler
493  padbits = -put_bits_count(&s->pb) & 7;
495  for (i = 0; i < namelen - 2; i++)
496  put_bits(&s->pb, 8, name[i]);
497  put_bits(&s->pb, 12 - padbits, 0);
498 }
499 
500 /*
501  * Copy input samples.
502  * Channels are reordered from libavcodec's default order to AAC order.
503  */
505 {
506  int ch;
507  int end = 2048 + (frame ? frame->nb_samples : 0);
508  const uint8_t *channel_map = aac_chan_maps[s->channels - 1];
509 
510  /* copy and remap input samples */
511  for (ch = 0; ch < s->channels; ch++) {
512  /* copy last 1024 samples of previous frame to the start of the current frame */
513  memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
514 
515  /* copy new samples and zero any remaining samples */
516  if (frame) {
517  memcpy(&s->planar_samples[ch][2048],
518  frame->extended_data[channel_map[ch]],
519  frame->nb_samples * sizeof(s->planar_samples[0][0]));
520  }
521  memset(&s->planar_samples[ch][end], 0,
522  (3072 - end) * sizeof(s->planar_samples[0][0]));
523  }
524 }
525 
526 static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
527  const AVFrame *frame, int *got_packet_ptr)
528 {
529  AACEncContext *s = avctx->priv_data;
530  float **samples = s->planar_samples, *samples2, *la, *overlap;
531  ChannelElement *cpe;
532  int i, ch, w, g, chans, tag, start_ch, ret, ms_mode = 0;
533  int chan_el_counter[4];
535 
536  if (s->last_frame == 2)
537  return 0;
538 
539  /* add current frame to queue */
540  if (frame) {
541  if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
542  return ret;
543  }
544 
545  copy_input_samples(s, frame);
546  if (s->psypp)
548 
549  if (!avctx->frame_number)
550  return 0;
551 
552  start_ch = 0;
553  for (i = 0; i < s->chan_map[0]; i++) {
554  FFPsyWindowInfo* wi = windows + start_ch;
555  tag = s->chan_map[i+1];
556  chans = tag == TYPE_CPE ? 2 : 1;
557  cpe = &s->cpe[i];
558  for (ch = 0; ch < chans; ch++) {
559  IndividualChannelStream *ics = &cpe->ch[ch].ics;
560  int cur_channel = start_ch + ch;
561  overlap = &samples[cur_channel][0];
562  samples2 = overlap + 1024;
563  la = samples2 + (448+64);
564  if (!frame)
565  la = NULL;
566  if (tag == TYPE_LFE) {
567  wi[ch].window_type[0] = ONLY_LONG_SEQUENCE;
568  wi[ch].window_shape = 0;
569  wi[ch].num_windows = 1;
570  wi[ch].grouping[0] = 1;
571 
572  /* Only the lowest 12 coefficients are used in a LFE channel.
573  * The expression below results in only the bottom 8 coefficients
574  * being used for 11.025kHz to 16kHz sample rates.
575  */
576  ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
577  } else {
578  wi[ch] = s->psy.model->window(&s->psy, samples2, la, cur_channel,
579  ics->window_sequence[0]);
580  }
581  ics->window_sequence[1] = ics->window_sequence[0];
582  ics->window_sequence[0] = wi[ch].window_type[0];
583  ics->use_kb_window[1] = ics->use_kb_window[0];
584  ics->use_kb_window[0] = wi[ch].window_shape;
585  ics->num_windows = wi[ch].num_windows;
586  ics->swb_sizes = s->psy.bands [ics->num_windows == 8];
587  ics->num_swb = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
588  for (w = 0; w < ics->num_windows; w++)
589  ics->group_len[w] = wi[ch].grouping[w];
590 
591  apply_window_and_mdct(s, &cpe->ch[ch], overlap);
592  if (isnan(cpe->ch->coeffs[0])) {
593  av_log(avctx, AV_LOG_ERROR, "Input contains NaN\n");
594  return AVERROR(EINVAL);
595  }
596  }
597  start_ch += chans;
598  }
599  if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels)) < 0)
600  return ret;
601  do {
602  int frame_bits;
603 
604  init_put_bits(&s->pb, avpkt->data, avpkt->size);
605 
606  if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & CODEC_FLAG_BITEXACT))
608  start_ch = 0;
609  memset(chan_el_counter, 0, sizeof(chan_el_counter));
610  for (i = 0; i < s->chan_map[0]; i++) {
611  FFPsyWindowInfo* wi = windows + start_ch;
612  const float *coeffs[2];
613  tag = s->chan_map[i+1];
614  chans = tag == TYPE_CPE ? 2 : 1;
615  cpe = &s->cpe[i];
616  put_bits(&s->pb, 3, tag);
617  put_bits(&s->pb, 4, chan_el_counter[tag]++);
618  for (ch = 0; ch < chans; ch++)
619  coeffs[ch] = cpe->ch[ch].coeffs;
620  s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
621  for (ch = 0; ch < chans; ch++) {
622  s->cur_channel = start_ch + ch;
623  s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
624  }
625  cpe->common_window = 0;
626  if (chans > 1
627  && wi[0].window_type[0] == wi[1].window_type[0]
628  && wi[0].window_shape == wi[1].window_shape) {
629 
630  cpe->common_window = 1;
631  for (w = 0; w < wi[0].num_windows; w++) {
632  if (wi[0].grouping[w] != wi[1].grouping[w]) {
633  cpe->common_window = 0;
634  break;
635  }
636  }
637  }
638  s->cur_channel = start_ch;
639  if (s->options.stereo_mode && cpe->common_window) {
640  if (s->options.stereo_mode > 0) {
641  IndividualChannelStream *ics = &cpe->ch[0].ics;
642  for (w = 0; w < ics->num_windows; w += ics->group_len[w])
643  for (g = 0; g < ics->num_swb; g++)
644  cpe->ms_mask[w*16+g] = 1;
645  } else if (s->coder->search_for_ms) {
646  s->coder->search_for_ms(s, cpe, s->lambda);
647  }
648  }
649  adjust_frame_information(cpe, chans);
650  if (chans == 2) {
651  put_bits(&s->pb, 1, cpe->common_window);
652  if (cpe->common_window) {
653  put_ics_info(s, &cpe->ch[0].ics);
654  encode_ms_info(&s->pb, cpe);
655  if (cpe->ms_mode) ms_mode = 1;
656  }
657  }
658  for (ch = 0; ch < chans; ch++) {
659  s->cur_channel = start_ch + ch;
660  encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
661  }
662  start_ch += chans;
663  }
664 
665  frame_bits = put_bits_count(&s->pb);
666  if (frame_bits <= 6144 * s->channels - 3) {
667  s->psy.bitres.bits = frame_bits / s->channels;
668  break;
669  }
670  if (ms_mode) {
671  for (i = 0; i < s->chan_map[0]; i++) {
672  // Must restore coeffs
673  chans = tag == TYPE_CPE ? 2 : 1;
674  cpe = &s->cpe[i];
675  for (ch = 0; ch < chans; ch++)
676  memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
677  }
678  }
679 
680  s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;
681 
682  } while (1);
683 
684  put_bits(&s->pb, 3, TYPE_END);
685  flush_put_bits(&s->pb);
686  avctx->frame_bits = put_bits_count(&s->pb);
687 
688  // rate control stuff
689  if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
690  float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
691  s->lambda *= ratio;
692  s->lambda = FFMIN(s->lambda, 65536.f);
693  }
694 
695  if (!frame)
696  s->last_frame++;
697 
698  ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
699  &avpkt->duration);
700 
701  avpkt->size = put_bits_count(&s->pb) >> 3;
702  *got_packet_ptr = 1;
703  return 0;
704 }
705 
707 {
708  AACEncContext *s = avctx->priv_data;
709 
710  ff_mdct_end(&s->mdct1024);
711  ff_mdct_end(&s->mdct128);
712  ff_psy_end(&s->psy);
713  if (s->psypp)
715  av_freep(&s->buffer.samples);
716  av_freep(&s->cpe);
717  av_freep(&s->fdsp);
718  ff_af_queue_close(&s->afq);
719  return 0;
720 }
721 
723 {
724  int ret = 0;
725 
727  if (!s->fdsp)
728  return AVERROR(ENOMEM);
729 
730  // window init
735 
736  if ((ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0)) < 0)
737  return ret;
738  if ((ret = ff_mdct_init(&s->mdct128, 8, 0, 32768.0)) < 0)
739  return ret;
740 
741  return 0;
742 }
743 
745 {
746  int ch;
747  FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->buffer.samples, s->channels, 3 * 1024 * sizeof(s->buffer.samples[0]), alloc_fail);
748  FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->cpe, s->chan_map[0], sizeof(ChannelElement), alloc_fail);
749  FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + FF_INPUT_BUFFER_PADDING_SIZE, alloc_fail);
750 
751  for(ch = 0; ch < s->channels; ch++)
752  s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
753 
754  return 0;
755 alloc_fail:
756  return AVERROR(ENOMEM);
757 }
758 
760 {
761  AACEncContext *s = avctx->priv_data;
762  int i, ret = 0;
763  const uint8_t *sizes[2];
764  uint8_t grouping[AAC_MAX_CHANNELS];
765  int lengths[2];
766 
767  avctx->frame_size = 1024;
768 
769  for (i = 0; i < 16; i++)
771  break;
772 
773  s->channels = avctx->channels;
774 
775  ERROR_IF(i == 16
776  || i >= (sizeof(swb_size_1024) / sizeof(*swb_size_1024))
777  || i >= (sizeof(swb_size_128) / sizeof(*swb_size_128)),
778  "Unsupported sample rate %d\n", avctx->sample_rate);
780  "Unsupported number of channels: %d\n", s->channels);
782  "Unsupported profile %d\n", avctx->profile);
783  WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
784  "Too many bits per frame requested, clamping to max\n");
785 
786  avctx->bit_rate = (int)FFMIN(
787  6144 * s->channels / 1024.0 * avctx->sample_rate,
788  avctx->bit_rate);
789 
790  s->samplerate_index = i;
791 
793 
794  if ((ret = dsp_init(avctx, s)) < 0)
795  goto fail;
796 
797  if ((ret = alloc_buffers(avctx, s)) < 0)
798  goto fail;
799 
800  avctx->extradata_size = 5;
802 
803  sizes[0] = swb_size_1024[i];
804  sizes[1] = swb_size_128[i];
805  lengths[0] = ff_aac_num_swb_1024[i];
806  lengths[1] = ff_aac_num_swb_128[i];
807  for (i = 0; i < s->chan_map[0]; i++)
808  grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
809  if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
810  s->chan_map[0], grouping)) < 0)
811  goto fail;
812  s->psypp = ff_psy_preprocess_init(avctx);
814 
815  if (HAVE_MIPSDSPR1)
817 
818  s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
819 
821 
822  for (i = 0; i < 428; i++)
823  ff_aac_pow34sf_tab[i] = sqrt(ff_aac_pow2sf_tab[i] * sqrt(ff_aac_pow2sf_tab[i]));
824 
825  avctx->initial_padding = 1024;
826  ff_af_queue_init(avctx, &s->afq);
827 
828  return 0;
829 fail:
830  aac_encode_end(avctx);
831  return ret;
832 }
833 
834 #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
835 static const AVOption aacenc_options[] = {
836  {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.i64 = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
837  {"auto", "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.i64 = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
838  {"ms_off", "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
839  {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
840  {"aac_coder", "", offsetof(AACEncContext, options.aac_coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "aac_coder"},
841  {"faac", "FAAC-inspired method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAAC}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
842  {"anmr", "ANMR method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
843  {"twoloop", "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
844  {"fast", "Constant quantizer", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
845  {"aac_pns", "Perceptual Noise Substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "aac_pns"},
846  {"disable", "Disable PNS", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
847  {"enable", "Enable PNS (Proof of concept)", 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
848  {NULL}
849 };
850 
851 static const AVClass aacenc_class = {
852  "AAC encoder",
856 };
857 
858 /* duplicated from avpriv_mpeg4audio_sample_rates to avoid shared build
859  * failures */
860 static const int mpeg4audio_sample_rates[16] = {
861  96000, 88200, 64000, 48000, 44100, 32000,
862  24000, 22050, 16000, 12000, 11025, 8000, 7350
863 };
864 
866  .name = "aac",
867  .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
868  .type = AVMEDIA_TYPE_AUDIO,
869  .id = AV_CODEC_ID_AAC,
870  .priv_data_size = sizeof(AACEncContext),
872  .encode2 = aac_encode_frame,
873  .close = aac_encode_end,
875  .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY |
877  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
879  .priv_class = &aacenc_class,
880 };
float, planar
Definition: samplefmt.h:70
#define NULL
Definition: coverity.c:32
int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int64_t size)
Check AVPacket size and/or allocate data.
Definition: utils.c:1736
static const uint8_t aac_chan_configs[6][5]
default channel configurations
Definition: aacenc.c:145
const char * s
Definition: avisynth_c.h:631
void(* search_for_ms)(struct AACEncContext *s, ChannelElement *cpe, const float lambda)
Definition: aacenc.h:57
uint8_t use_kb_window[2]
If set, use Kaiser-Bessel window, otherwise use a sine window.
Definition: aac.h:164
AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB]
Definition: aaccoder.c:1175
This structure describes decoded (raw) audio or video data.
Definition: frame.h:171
#define FF_ALLOCZ_ARRAY_OR_GOTO(ctx, p, nelem, elsize, label)
Definition: internal.h:156
static const uint8_t swb_size_1024_64[]
Definition: aacenc.c:69
int grouping[8]
window grouping (for e.g. AAC)
Definition: psymodel.h:69
AVOption.
Definition: opt.h:255
uint8_t ** bands
scalefactor band sizes for possible frame sizes
Definition: psymodel.h:84
Definition: aac.h:207
void(* mdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input)
Definition: fft.h:109
static const AVClass aacenc_class
Definition: aacenc.c:851
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:160
av_cold void ff_kbd_window_init(float *window, float alpha, int n)
Generate a Kaiser-Bessel Derived Window.
Definition: kbdwin.c:26
float pcoeffs[1024]
coefficients for IMDCT, pristine
Definition: aac.h:240
#define LIBAVUTIL_VERSION_INT
Definition: version.h:62
#define SCALE_DIFF_ZERO
codebook index corresponding to zero scalefactor indices difference
Definition: aac.h:142
Definition: aac.h:56
const char * g
Definition: vf_curves.c:108
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
Definition: aac.h:49
Definition: aac.h:50
av_cold void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx)
Cleanup audio preprocessing module.
Definition: psymodel.c:150
int size
Definition: avcodec.h:1163
AACCoefficientsEncoder * coder
Definition: aacenc.h:81
void avpriv_align_put_bits(PutBitContext *s)
Pad the bitstream with zeros up to the next byte boundary.
Definition: bitstream.c:48
static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
Encode ics_info element.
Definition: aacenc.c:277
#define AAC_MAX_CHANNELS
Definition: aacenc.c:48
int common_window
Set if channels share a common 'IndividualChannelStream' in bitstream.
Definition: aac.h:255
av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens, const uint8_t **bands, const int *num_bands, int num_groups, const uint8_t *group_map)
Initialize psychoacoustic model.
Definition: psymodel.c:31
uint8_t ms_mask[128]
Set if mid/side stereo is used for each scalefactor window band.
Definition: aac.h:257
static const uint8_t swb_size_128_8[]
Definition: aacenc.c:130
float lambda
Definition: aacenc.h:84
#define NOISE_PRE
preamble for NOISE_BT, put in bitstream with the first noise band
Definition: aac.h:144
int profile
profile
Definition: avcodec.h:2835
AVCodec.
Definition: avcodec.h:3181
static const uint8_t swb_size_1024_8[]
Definition: aacenc.c:100
static const uint8_t swb_size_128_96[]
Definition: aacenc.c:114
static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
Encode spectral coefficients processed by psychoacoustic model.
Definition: aacenc.c:439
Spectral data are scaled white noise not coded in the bitstream.
Definition: aac.h:79
int * num_bands
number of scalefactor bands for possible frame sizes
Definition: psymodel.h:85
const uint8_t ff_aac_num_swb_128[]
Definition: aactab.c:51
static const int mpeg4audio_sample_rates[16]
Definition: aacenc.c:860
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
supported_samplerates
AACEncOptions options
encoding options
Definition: aacenc.h:67
AAC encoder context.
Definition: aacenc.h:65
if()
Definition: avfilter.c:975
uint8_t
#define av_cold
Definition: attributes.h:74
AVOptions.
#define WINDOW_FUNC(type)
Definition: aacenc.c:191
static av_always_inline av_const int isnan(float x)
Definition: libm.h:96
void ff_aac_coder_init_mips(AACEncContext *c)
void(* quantize_and_encode_band)(struct AACEncContext *s, PutBitContext *pb, const float *in, int size, int scale_idx, int cb, const float lambda)
Definition: aacenc.h:55
SingleChannelElement ch[2]
Definition: aac.h:259
int samplerate_index
MPEG-4 samplerate index.
Definition: aacenc.h:74
Definition: aac.h:52
av_cold void ff_af_queue_init(AVCodecContext *avctx, AudioFrameQueue *afq)
Initialize AudioFrameQueue.
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:67
const uint8_t * chan_map
channel configuration map
Definition: aacenc.h:76
#define FF_PROFILE_UNKNOWN
Definition: avcodec.h:2836
const uint8_t ff_aac_scalefactor_bits[121]
Definition: aactab.c:78
uint8_t * extradata
some codecs need / can use extradata like Huffman tables.
Definition: avcodec.h:1355
AudioFrameQueue afq
Definition: aacenc.h:85
static AVFrame * frame
static const uint8_t swb_size_1024_48[]
Definition: aacenc.c:75
uint8_t * data
Definition: avcodec.h:1162
uint32_t tag
Definition: movenc.c:1333
#define CODEC_FLAG_BITEXACT
Use only bitexact stuff (except (I)DCT).
Definition: avcodec.h:759
int duration
Duration of this packet in AVStream->time_base units, 0 if unknown.
Definition: avcodec.h:1180
const OptionDef options[]
Definition: ffserver.c:3798
static void adjust_frame_information(ChannelElement *cpe, int chans)
Produce integer coefficients from scalefactors provided by the model.
Definition: aacenc.c:312
#define av_log(a,...)
static const AVOption aacenc_options[]
Definition: aacenc.c:835
static const uint8_t swb_size_1024_24[]
Definition: aacenc.c:88
float coeffs[1024]
coefficients for IMDCT, maybe processed
Definition: aac.h:241
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
#define CODEC_CAP_DELAY
Encoder or decoder requires flushing with NULL input at the end in order to give the complete and cor...
Definition: avcodec.h:824
av_default_item_name
static const int sizes[][2]
Definition: img2dec.c:48
#define AVERROR(e)
Definition: error.h:43
const uint8_t ff_aac_num_swb_1024[]
Definition: aactab.c:39
int last_frame
Definition: aacenc.h:83
#define CODEC_CAP_SMALL_LAST_FRAME
Codec can be fed a final frame with a smaller size.
Definition: avcodec.h:829
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:175
int stereo_mode
Definition: aacenc.h:43
int initial_padding
Audio only.
Definition: avcodec.h:3015
float ff_aac_kbd_long_1024[1024]
Definition: aactab.c:36
struct FFPsyContext::@77 bitres
int flags
CODEC_FLAG_*.
Definition: avcodec.h:1335
int amp[4]
Definition: aac.h:211
#define CODEC_FLAG_QSCALE
Use fixed qscale.
Definition: avcodec.h:712
const char * name
Name of the codec implementation.
Definition: avcodec.h:3188
int num_windows
number of windows in a frame
Definition: psymodel.h:68
static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
Definition: aacenc.c:504
uint8_t max_sfb
number of scalefactor bands per group
Definition: aac.h:162
#define ff_mdct_init
Definition: fft.h:167
Definition: aac.h:55
int num_swb
number of scalefactor window bands
Definition: aac.h:170
int ff_af_queue_add(AudioFrameQueue *afq, const AVFrame *f)
Add a frame to the queue.
#define FFMAX(a, b)
Definition: common.h:64
Libavcodec external API header.
void(* search_for_quantizers)(AVCodecContext *avctx, struct AACEncContext *s, SingleChannelElement *sce, const float lambda)
Definition: aacenc.h:51
static int put_bits_count(PutBitContext *s)
Definition: put_bits.h:85
#define FF_INPUT_BUFFER_PADDING_SIZE
Required number of additionally allocated bytes at the end of the input bitstream for decoding...
Definition: avcodec.h:630
#define AACENC_FLAGS
Definition: aacenc.c:834
int bit_rate
the average bitrate
Definition: avcodec.h:1305
enum WindowSequence window_sequence[2]
Definition: aac.h:163
int cur_channel
Definition: aacenc.h:82
#define FFMIN(a, b)
Definition: common.h:66
static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr)
Definition: aacenc.c:526
ret
Definition: avfilter.c:974
void(* analyze)(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi)
Perform psychoacoustic analysis and set band info (threshold, energy) for a group of channels...
Definition: psymodel.h:124
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
static const uint8_t aac_chan_maps[AAC_MAX_CHANNELS][AAC_MAX_CHANNELS]
Table to remap channels from libavcodec's default order to AAC order.
Definition: aacenc.c:157
int pos[4]
Definition: aac.h:210
int channels
channel count
Definition: aacenc.h:75
AAC definitions and structures.
FFTContext mdct128
short (128 samples) frame transform context
Definition: aacenc.h:70
PutBitContext pb
Definition: aacenc.h:68
static void(*const apply_window[4])(AVFloatDSPContext *fdsp, SingleChannelElement *sce, const float *audio)
Definition: aacenc.c:247
float ff_aac_pow34sf_tab[428]
Definition: aacenc.c:61
static const uint8_t swb_size_128_48[]
Definition: aacenc.c:118
static const uint8_t swb_size_128_24[]
Definition: aacenc.c:122
AVFloatDSPContext * fdsp
Definition: aacenc.h:71
static const uint8_t swb_size_1024_16[]
Definition: aacenc.c:94
static av_cold int aac_encode_end(AVCodecContext *avctx)
Definition: aacenc.c:706
int frame_size
Number of samples per channel in an audio frame.
Definition: avcodec.h:2005
static const uint8_t swb_size_1024_32[]
Definition: aacenc.c:82
#define WARN_IF(cond,...)
Definition: aacenc.c:56
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:59
static void put_audio_specific_config(AVCodecContext *avctx)
Make AAC audio config object.
Definition: aacenc.c:170
int sample_rate
samples per second
Definition: avcodec.h:1985
float ff_aac_kbd_short_128[128]
Definition: aactab.c:37
static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
Encode MS data.
Definition: aacenc.c:298
FFPsyWindowInfo(* window)(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type)
Suggest window sequence for channel.
Definition: psymodel.h:114
int frame_bits
number of bits used for the previously encoded frame
Definition: avcodec.h:2487
main external API structure.
Definition: avcodec.h:1241
int bits
number of bits used in the bitresevoir
Definition: psymodel.h:90
#define NOISE_PRE_BITS
length of preamble
Definition: aac.h:145
#define FF_PROFILE_AAC_LOW
Definition: avcodec.h:2840
IndividualChannelStream ics
Definition: aac.h:232
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> in
void(* encode_window_bands_info)(struct AACEncContext *s, SingleChannelElement *sce, int win, int group_len, const float lambda)
Definition: aacenc.h:53
int extradata_size
Definition: avcodec.h:1356
uint8_t group_len[8]
Definition: aac.h:166
Describe the class of an AVClass context structure.
Definition: log.h:67
static void put_bitstream_info(AACEncContext *s, const char *name)
Write some auxiliary information about the created AAC file.
Definition: aacenc.c:483
static const uint8_t swb_size_1024_96[]
Definition: aacenc.c:63
int window_shape
window shape (sine/KBD/whatever)
Definition: psymodel.h:67
static void encode_pulses(AACEncContext *s, Pulse *pulse)
Encode pulse data.
Definition: aacenc.c:420
static const uint8_t swb_size_128_16[]
Definition: aacenc.c:126
const uint8_t * swb_sizes
table of scalefactor band sizes for a particular window
Definition: aac.h:169
av_cold AVFloatDSPContext * avpriv_float_dsp_alloc(int bit_exact)
Allocate a float DSP context.
Definition: float_dsp.c:143
FFPsyContext psy
Definition: aacenc.h:79
const uint32_t ff_aac_scalefactor_code[121]
Definition: aactab.c:59
static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
Definition: aacenc.c:744
const struct FFPsyModel * model
encoder-specific model functions
Definition: psymodel.h:78
struct AACEncContext::@28 buffer
int ms_mode
Signals mid/side stereo flags coding mode (used by encoder)
Definition: aac.h:256
static const uint8_t * swb_size_1024[]
Definition: aacenc.c:106
struct FFPsyPreprocessContext * psypp
Definition: aacenc.h:80
#define NOISE_OFFSET
subtracted from global gain, used as offset for the preamble
Definition: aac.h:146
int global_quality
Global quality for codecs which cannot change it per frame.
Definition: avcodec.h:1321
uint8_t zeroes[128]
band is not coded (used by encoder)
Definition: aac.h:239
int sf_idx[128]
scalefactor indices (used by encoder)
Definition: aac.h:238
AVCodec ff_aac_encoder
Definition: aacenc.c:865
const int avpriv_mpeg4audio_sample_rates[16]
Definition: mpeg4audio.c:57
int aac_coder
Definition: aacenc.h:44
av_cold void ff_aac_tableinit(void)
Definition: aac_tablegen.h:34
Y Spectral Band Replication.
Definition: mpeg4audio.h:65
float * samples
Definition: aacenc.h:90
static av_cold int aac_encode_init(AVCodecContext *avctx)
Definition: aacenc.c:759
common internal api header.
static void flush_put_bits(PutBitContext *s)
Pad the end of the output stream with zeros.
Definition: put_bits.h:101
Single Channel Element - used for both SCE and LFE elements.
Definition: aac.h:231
windowing related information
Definition: psymodel.h:65
#define ff_mdct_end
Definition: fft.h:168
av_cold struct FFPsyPreprocessContext * ff_psy_preprocess_init(AVCodecContext *avctx)
psychoacoustic model audio preprocessing initialization
Definition: psymodel.c:102
void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, float **audio, int channels)
Preprocess several channel in audio frame in order to compress it better.
Definition: psymodel.c:137
static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce)
Encode scalefactors.
Definition: aacenc.c:388
#define CODEC_CAP_EXPERIMENTAL
Codec is experimental and is thus avoided in favor of non experimental encoders.
Definition: avcodec.h:852
ChannelElement * cpe
channel elements
Definition: aacenc.h:78
Individual Channel Stream.
Definition: aac.h:161
float ff_aac_pow2sf_tab[428]
Definition: aac_tablegen.h:32
static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
Initialize the PutBitContext s.
Definition: put_bits.h:48
static const uint8_t * swb_size_128[]
Definition: aacenc.c:134
channel element - generic struct for SCE/CPE/CCE/LFE
Definition: aac.h:252
void * priv_data
Definition: avcodec.h:1283
int start
Definition: aac.h:209
FFTContext mdct1024
long (1024 samples) frame transform context
Definition: aacenc.h:69
static av_always_inline int diff(const uint32_t a, const uint32_t b)
#define ERROR_IF(cond,...)
Definition: aacenc.c:50
void ff_af_queue_remove(AudioFrameQueue *afq, int nb_samples, int64_t *pts, int *duration)
Remove frame(s) from the queue.
int channels
number of audio channels
Definition: avcodec.h:1986
int num_pulse
Definition: aac.h:208
static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
Encode scalefactor band coding type.
Definition: aacenc.c:377
void ff_af_queue_close(AudioFrameQueue *afq)
Close AudioFrameQueue.
enum BandType band_type[128]
band types
Definition: aac.h:235
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> out
#define LIBAVCODEC_IDENT
Definition: version.h:43
static enum AVSampleFormat sample_fmts[]
Definition: adpcmenc.c:701
int frame_number
Frame counter, set by libavcodec.
Definition: avcodec.h:2016
float ret_buf[2048]
PCM output buffer.
Definition: aac.h:243
#define av_freep(p)
void INT64 start
Definition: avisynth_c.h:553
static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce, int common_window)
Encode one channel of audio data.
Definition: aacenc.c:464
static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce, float *audio)
Definition: aacenc.c:256
uint8_t ** extended_data
pointers to the data planes/channels.
Definition: frame.h:215
AAC data declarations.
av_cold void ff_psy_end(FFPsyContext *ctx)
Cleanup model context at the end.
Definition: psymodel.c:82
This structure stores compressed data.
Definition: avcodec.h:1139
int window_type[3]
window type (short/long/transitional, etc.) - current, previous and next
Definition: psymodel.h:66
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:225
static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
Definition: aacenc.c:722
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...
Definition: avcodec.h:1155
void ff_init_ff_sine_windows(int index)
initialize the specified entry of ff_sine_windows
#define FF_ALLOCZ_OR_GOTO(ctx, p, size, label)
Definition: internal.h:138
float * planar_samples[6]
saved preprocessed input
Definition: aacenc.h:72
const char * name
Definition: opengl_enc.c:103
bitstream writer API