FFmpeg
aacenc.c
Go to the documentation of this file.
1 /*
2  * AAC encoder
3  * Copyright (C) 2008 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * AAC encoder
25  */
26 
27 /***********************************
28  * TODOs:
29  * add sane pulse detection
30  ***********************************/
31 
32 #include "libavutil/libm.h"
33 #include "libavutil/float_dsp.h"
34 #include "libavutil/opt.h"
35 #include "avcodec.h"
36 #include "put_bits.h"
37 #include "internal.h"
38 #include "mpeg4audio.h"
39 #include "sinewin.h"
40 #include "profiles.h"
41 
42 #include "aac.h"
43 #include "aactab.h"
44 #include "aacenc.h"
45 #include "aacenctab.h"
46 #include "aacenc_utils.h"
47 
48 #include "psymodel.h"
49 
50 static void put_pce(PutBitContext *pb, AVCodecContext *avctx)
51 {
52  int i, j;
53  AACEncContext *s = avctx->priv_data;
54  AACPCEInfo *pce = &s->pce;
55  const int bitexact = avctx->flags & AV_CODEC_FLAG_BITEXACT;
56  const char *aux_data = bitexact ? "Lavc" : LIBAVCODEC_IDENT;
57 
58  put_bits(pb, 4, 0);
59 
60  put_bits(pb, 2, avctx->profile);
61  put_bits(pb, 4, s->samplerate_index);
62 
63  put_bits(pb, 4, pce->num_ele[0]); /* Front */
64  put_bits(pb, 4, pce->num_ele[1]); /* Side */
65  put_bits(pb, 4, pce->num_ele[2]); /* Back */
66  put_bits(pb, 2, pce->num_ele[3]); /* LFE */
67  put_bits(pb, 3, 0); /* Assoc data */
68  put_bits(pb, 4, 0); /* CCs */
69 
70  put_bits(pb, 1, 0); /* Stereo mixdown */
71  put_bits(pb, 1, 0); /* Mono mixdown */
72  put_bits(pb, 1, 0); /* Something else */
73 
74  for (i = 0; i < 4; i++) {
75  for (j = 0; j < pce->num_ele[i]; j++) {
76  if (i < 3)
77  put_bits(pb, 1, pce->pairing[i][j]);
78  put_bits(pb, 4, pce->index[i][j]);
79  }
80  }
81 
82  align_put_bits(pb);
83  put_bits(pb, 8, strlen(aux_data));
84  ff_put_string(pb, aux_data, 0);
85 }
86 
87 /**
88  * Make AAC audio config object.
89  * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
90  */
92 {
93  PutBitContext pb;
94  AACEncContext *s = avctx->priv_data;
95  int channels = (!s->needs_pce)*(s->channels - (s->channels == 8 ? 1 : 0));
96  const int max_size = 32;
97 
98  avctx->extradata = av_mallocz(max_size);
99  if (!avctx->extradata)
100  return AVERROR(ENOMEM);
101 
102  init_put_bits(&pb, avctx->extradata, max_size);
103  put_bits(&pb, 5, s->profile+1); //profile
104  put_bits(&pb, 4, s->samplerate_index); //sample rate index
105  put_bits(&pb, 4, channels);
106  //GASpecificConfig
107  put_bits(&pb, 1, 0); //frame length - 1024 samples
108  put_bits(&pb, 1, 0); //does not depend on core coder
109  put_bits(&pb, 1, 0); //is not extension
110  if (s->needs_pce)
111  put_pce(&pb, avctx);
112 
113  //Explicitly Mark SBR absent
114  put_bits(&pb, 11, 0x2b7); //sync extension
115  put_bits(&pb, 5, AOT_SBR);
116  put_bits(&pb, 1, 0);
117  flush_put_bits(&pb);
118  avctx->extradata_size = put_bits_count(&pb) >> 3;
119 
120  return 0;
121 }
122 
124 {
127  memset(s->quantize_band_cost_cache, 0, sizeof(s->quantize_band_cost_cache));
129  }
130 }
131 
132 #define WINDOW_FUNC(type) \
133 static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
134  SingleChannelElement *sce, \
135  const float *audio)
136 
137 WINDOW_FUNC(only_long)
138 {
139  const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
140  const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
141  float *out = sce->ret_buf;
142 
143  fdsp->vector_fmul (out, audio, lwindow, 1024);
144  fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
145 }
146 
147 WINDOW_FUNC(long_start)
148 {
149  const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
150  const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
151  float *out = sce->ret_buf;
152 
153  fdsp->vector_fmul(out, audio, lwindow, 1024);
154  memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
155  fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
156  memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
157 }
158 
159 WINDOW_FUNC(long_stop)
160 {
161  const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
162  const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
163  float *out = sce->ret_buf;
164 
165  memset(out, 0, sizeof(out[0]) * 448);
166  fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
167  memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
168  fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
169 }
170 
171 WINDOW_FUNC(eight_short)
172 {
173  const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
174  const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
175  const float *in = audio + 448;
176  float *out = sce->ret_buf;
177  int w;
178 
179  for (w = 0; w < 8; w++) {
180  fdsp->vector_fmul (out, in, w ? pwindow : swindow, 128);
181  out += 128;
182  in += 128;
183  fdsp->vector_fmul_reverse(out, in, swindow, 128);
184  out += 128;
185  }
186 }
187 
188 static void (*const apply_window[4])(AVFloatDSPContext *fdsp,
190  const float *audio) = {
191  [ONLY_LONG_SEQUENCE] = apply_only_long_window,
192  [LONG_START_SEQUENCE] = apply_long_start_window,
193  [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
194  [LONG_STOP_SEQUENCE] = apply_long_stop_window
195 };
196 
198  float *audio)
199 {
200  int i;
201  const float *output = sce->ret_buf;
202 
203  apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
204 
206  s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
207  else
208  for (i = 0; i < 1024; i += 128)
209  s->mdct128.mdct_calc(&s->mdct128, &sce->coeffs[i], output + i*2);
210  memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
211  memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
212 }
213 
214 /**
215  * Encode ics_info element.
216  * @see Table 4.6 (syntax of ics_info)
217  */
219 {
220  int w;
221 
222  put_bits(&s->pb, 1, 0); // ics_reserved bit
223  put_bits(&s->pb, 2, info->window_sequence[0]);
224  put_bits(&s->pb, 1, info->use_kb_window[0]);
225  if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
226  put_bits(&s->pb, 6, info->max_sfb);
227  put_bits(&s->pb, 1, !!info->predictor_present);
228  } else {
229  put_bits(&s->pb, 4, info->max_sfb);
230  for (w = 1; w < 8; w++)
231  put_bits(&s->pb, 1, !info->group_len[w]);
232  }
233 }
234 
235 /**
236  * Encode MS data.
237  * @see 4.6.8.1 "Joint Coding - M/S Stereo"
238  */
240 {
241  int i, w;
242 
243  put_bits(pb, 2, cpe->ms_mode);
244  if (cpe->ms_mode == 1)
245  for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
246  for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
247  put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
248 }
249 
250 /**
251  * Produce integer coefficients from scalefactors provided by the model.
252  */
253 static void adjust_frame_information(ChannelElement *cpe, int chans)
254 {
255  int i, w, w2, g, ch;
256  int maxsfb, cmaxsfb;
257 
258  for (ch = 0; ch < chans; ch++) {
259  IndividualChannelStream *ics = &cpe->ch[ch].ics;
260  maxsfb = 0;
261  cpe->ch[ch].pulse.num_pulse = 0;
262  for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
263  for (w2 = 0; w2 < ics->group_len[w]; w2++) {
264  for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
265  ;
266  maxsfb = FFMAX(maxsfb, cmaxsfb);
267  }
268  }
269  ics->max_sfb = maxsfb;
270 
271  //adjust zero bands for window groups
272  for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
273  for (g = 0; g < ics->max_sfb; g++) {
274  i = 1;
275  for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
276  if (!cpe->ch[ch].zeroes[w2*16 + g]) {
277  i = 0;
278  break;
279  }
280  }
281  cpe->ch[ch].zeroes[w*16 + g] = i;
282  }
283  }
284  }
285 
286  if (chans > 1 && cpe->common_window) {
287  IndividualChannelStream *ics0 = &cpe->ch[0].ics;
288  IndividualChannelStream *ics1 = &cpe->ch[1].ics;
289  int msc = 0;
290  ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
291  ics1->max_sfb = ics0->max_sfb;
292  for (w = 0; w < ics0->num_windows*16; w += 16)
293  for (i = 0; i < ics0->max_sfb; i++)
294  if (cpe->ms_mask[w+i])
295  msc++;
296  if (msc == 0 || ics0->max_sfb == 0)
297  cpe->ms_mode = 0;
298  else
299  cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
300  }
301 }
302 
304 {
305  int w, w2, g, i;
306  IndividualChannelStream *ics = &cpe->ch[0].ics;
307  if (!cpe->common_window)
308  return;
309  for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
310  for (w2 = 0; w2 < ics->group_len[w]; w2++) {
311  int start = (w+w2) * 128;
312  for (g = 0; g < ics->num_swb; g++) {
313  int p = -1 + 2 * (cpe->ch[1].band_type[w*16+g] - 14);
314  float scale = cpe->ch[0].is_ener[w*16+g];
315  if (!cpe->is_mask[w*16 + g]) {
316  start += ics->swb_sizes[g];
317  continue;
318  }
319  if (cpe->ms_mask[w*16 + g])
320  p *= -1;
321  for (i = 0; i < ics->swb_sizes[g]; i++) {
322  float sum = (cpe->ch[0].coeffs[start+i] + p*cpe->ch[1].coeffs[start+i])*scale;
323  cpe->ch[0].coeffs[start+i] = sum;
324  cpe->ch[1].coeffs[start+i] = 0.0f;
325  }
326  start += ics->swb_sizes[g];
327  }
328  }
329  }
330 }
331 
333 {
334  int w, w2, g, i;
335  IndividualChannelStream *ics = &cpe->ch[0].ics;
336  if (!cpe->common_window)
337  return;
338  for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
339  for (w2 = 0; w2 < ics->group_len[w]; w2++) {
340  int start = (w+w2) * 128;
341  for (g = 0; g < ics->num_swb; g++) {
342  /* ms_mask can be used for other purposes in PNS and I/S,
343  * so must not apply M/S if any band uses either, even if
344  * ms_mask is set.
345  */
346  if (!cpe->ms_mask[w*16 + g] || cpe->is_mask[w*16 + g]
347  || cpe->ch[0].band_type[w*16 + g] >= NOISE_BT
348  || cpe->ch[1].band_type[w*16 + g] >= NOISE_BT) {
349  start += ics->swb_sizes[g];
350  continue;
351  }
352  for (i = 0; i < ics->swb_sizes[g]; i++) {
353  float L = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) * 0.5f;
354  float R = L - cpe->ch[1].coeffs[start+i];
355  cpe->ch[0].coeffs[start+i] = L;
356  cpe->ch[1].coeffs[start+i] = R;
357  }
358  start += ics->swb_sizes[g];
359  }
360  }
361  }
362 }
363 
364 /**
365  * Encode scalefactor band coding type.
366  */
368 {
369  int w;
370 
373 
374  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
375  s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
376 }
377 
378 /**
379  * Encode scalefactors.
380  */
383 {
384  int diff, off_sf = sce->sf_idx[0], off_pns = sce->sf_idx[0] - NOISE_OFFSET;
385  int off_is = 0, noise_flag = 1;
386  int i, w;
387 
388  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
389  for (i = 0; i < sce->ics.max_sfb; i++) {
390  if (!sce->zeroes[w*16 + i]) {
391  if (sce->band_type[w*16 + i] == NOISE_BT) {
392  diff = sce->sf_idx[w*16 + i] - off_pns;
393  off_pns = sce->sf_idx[w*16 + i];
394  if (noise_flag-- > 0) {
395  put_bits(&s->pb, NOISE_PRE_BITS, diff + NOISE_PRE);
396  continue;
397  }
398  } else if (sce->band_type[w*16 + i] == INTENSITY_BT ||
399  sce->band_type[w*16 + i] == INTENSITY_BT2) {
400  diff = sce->sf_idx[w*16 + i] - off_is;
401  off_is = sce->sf_idx[w*16 + i];
402  } else {
403  diff = sce->sf_idx[w*16 + i] - off_sf;
404  off_sf = sce->sf_idx[w*16 + i];
405  }
406  diff += SCALE_DIFF_ZERO;
407  av_assert0(diff >= 0 && diff <= 120);
409  }
410  }
411  }
412 }
413 
414 /**
415  * Encode pulse data.
416  */
417 static void encode_pulses(AACEncContext *s, Pulse *pulse)
418 {
419  int i;
420 
421  put_bits(&s->pb, 1, !!pulse->num_pulse);
422  if (!pulse->num_pulse)
423  return;
424 
425  put_bits(&s->pb, 2, pulse->num_pulse - 1);
426  put_bits(&s->pb, 6, pulse->start);
427  for (i = 0; i < pulse->num_pulse; i++) {
428  put_bits(&s->pb, 5, pulse->pos[i]);
429  put_bits(&s->pb, 4, pulse->amp[i]);
430  }
431 }
432 
433 /**
434  * Encode spectral coefficients processed by psychoacoustic model.
435  */
437 {
438  int start, i, w, w2;
439 
440  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
441  start = 0;
442  for (i = 0; i < sce->ics.max_sfb; i++) {
443  if (sce->zeroes[w*16 + i]) {
444  start += sce->ics.swb_sizes[i];
445  continue;
446  }
447  for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++) {
448  s->coder->quantize_and_encode_band(s, &s->pb,
449  &sce->coeffs[start + w2*128],
450  NULL, sce->ics.swb_sizes[i],
451  sce->sf_idx[w*16 + i],
452  sce->band_type[w*16 + i],
453  s->lambda,
454  sce->ics.window_clipping[w]);
455  }
456  start += sce->ics.swb_sizes[i];
457  }
458  }
459 }
460 
461 /**
462  * Downscale spectral coefficients for near-clipping windows to avoid artifacts
463  */
465 {
466  int start, i, j, w;
467 
468  if (sce->ics.clip_avoidance_factor < 1.0f) {
469  for (w = 0; w < sce->ics.num_windows; w++) {
470  start = 0;
471  for (i = 0; i < sce->ics.max_sfb; i++) {
472  float *swb_coeffs = &sce->coeffs[start + w*128];
473  for (j = 0; j < sce->ics.swb_sizes[i]; j++)
474  swb_coeffs[j] *= sce->ics.clip_avoidance_factor;
475  start += sce->ics.swb_sizes[i];
476  }
477  }
478  }
479 }
480 
481 /**
482  * Encode one channel of audio data.
483  */
486  int common_window)
487 {
488  put_bits(&s->pb, 8, sce->sf_idx[0]);
489  if (!common_window) {
490  put_ics_info(s, &sce->ics);
491  if (s->coder->encode_main_pred)
492  s->coder->encode_main_pred(s, sce);
493  if (s->coder->encode_ltp_info)
494  s->coder->encode_ltp_info(s, sce, 0);
495  }
496  encode_band_info(s, sce);
497  encode_scale_factors(avctx, s, sce);
498  encode_pulses(s, &sce->pulse);
499  put_bits(&s->pb, 1, !!sce->tns.present);
500  if (s->coder->encode_tns_info)
501  s->coder->encode_tns_info(s, sce);
502  put_bits(&s->pb, 1, 0); //ssr
503  encode_spectral_coeffs(s, sce);
504  return 0;
505 }
506 
507 /**
508  * Write some auxiliary information about the created AAC file.
509  */
510 static void put_bitstream_info(AACEncContext *s, const char *name)
511 {
512  int i, namelen, padbits;
513 
514  namelen = strlen(name) + 2;
515  put_bits(&s->pb, 3, TYPE_FIL);
516  put_bits(&s->pb, 4, FFMIN(namelen, 15));
517  if (namelen >= 15)
518  put_bits(&s->pb, 8, namelen - 14);
519  put_bits(&s->pb, 4, 0); //extension type - filler
520  padbits = -put_bits_count(&s->pb) & 7;
521  align_put_bits(&s->pb);
522  for (i = 0; i < namelen - 2; i++)
523  put_bits(&s->pb, 8, name[i]);
524  put_bits(&s->pb, 12 - padbits, 0);
525 }
526 
527 /*
528  * Copy input samples.
529  * Channels are reordered from libavcodec's default order to AAC order.
530  */
532 {
533  int ch;
534  int end = 2048 + (frame ? frame->nb_samples : 0);
535  const uint8_t *channel_map = s->reorder_map;
536 
537  /* copy and remap input samples */
538  for (ch = 0; ch < s->channels; ch++) {
539  /* copy last 1024 samples of previous frame to the start of the current frame */
540  memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
541 
542  /* copy new samples and zero any remaining samples */
543  if (frame) {
544  memcpy(&s->planar_samples[ch][2048],
545  frame->extended_data[channel_map[ch]],
546  frame->nb_samples * sizeof(s->planar_samples[0][0]));
547  }
548  memset(&s->planar_samples[ch][end], 0,
549  (3072 - end) * sizeof(s->planar_samples[0][0]));
550  }
551 }
552 
553 static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
554  const AVFrame *frame, int *got_packet_ptr)
555 {
556  AACEncContext *s = avctx->priv_data;
557  float **samples = s->planar_samples, *samples2, *la, *overlap;
558  ChannelElement *cpe;
561  int i, its, ch, w, chans, tag, start_ch, ret, frame_bits;
562  int target_bits, rate_bits, too_many_bits, too_few_bits;
563  int ms_mode = 0, is_mode = 0, tns_mode = 0, pred_mode = 0;
564  int chan_el_counter[4];
566 
567  /* add current frame to queue */
568  if (frame) {
569  if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
570  return ret;
571  } else {
572  if (!s->afq.remaining_samples || (!s->afq.frame_alloc && !s->afq.frame_count))
573  return 0;
574  }
575 
576  copy_input_samples(s, frame);
577  if (s->psypp)
579 
580  if (!avctx->frame_number)
581  return 0;
582 
583  start_ch = 0;
584  for (i = 0; i < s->chan_map[0]; i++) {
585  FFPsyWindowInfo* wi = windows + start_ch;
586  tag = s->chan_map[i+1];
587  chans = tag == TYPE_CPE ? 2 : 1;
588  cpe = &s->cpe[i];
589  for (ch = 0; ch < chans; ch++) {
590  int k;
591  float clip_avoidance_factor;
592  sce = &cpe->ch[ch];
593  ics = &sce->ics;
594  s->cur_channel = start_ch + ch;
595  overlap = &samples[s->cur_channel][0];
596  samples2 = overlap + 1024;
597  la = samples2 + (448+64);
598  if (!frame)
599  la = NULL;
600  if (tag == TYPE_LFE) {
601  wi[ch].window_type[0] = wi[ch].window_type[1] = ONLY_LONG_SEQUENCE;
602  wi[ch].window_shape = 0;
603  wi[ch].num_windows = 1;
604  wi[ch].grouping[0] = 1;
605  wi[ch].clipping[0] = 0;
606 
607  /* Only the lowest 12 coefficients are used in a LFE channel.
608  * The expression below results in only the bottom 8 coefficients
609  * being used for 11.025kHz to 16kHz sample rates.
610  */
611  ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
612  } else {
613  wi[ch] = s->psy.model->window(&s->psy, samples2, la, s->cur_channel,
614  ics->window_sequence[0]);
615  }
616  ics->window_sequence[1] = ics->window_sequence[0];
617  ics->window_sequence[0] = wi[ch].window_type[0];
618  ics->use_kb_window[1] = ics->use_kb_window[0];
619  ics->use_kb_window[0] = wi[ch].window_shape;
620  ics->num_windows = wi[ch].num_windows;
621  ics->swb_sizes = s->psy.bands [ics->num_windows == 8];
622  ics->num_swb = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
623  ics->max_sfb = FFMIN(ics->max_sfb, ics->num_swb);
624  ics->swb_offset = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ?
627  ics->tns_max_bands = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ?
630 
631  for (w = 0; w < ics->num_windows; w++)
632  ics->group_len[w] = wi[ch].grouping[w];
633 
634  /* Calculate input sample maximums and evaluate clipping risk */
635  clip_avoidance_factor = 0.0f;
636  for (w = 0; w < ics->num_windows; w++) {
637  const float *wbuf = overlap + w * 128;
638  const int wlen = 2048 / ics->num_windows;
639  float max = 0;
640  int j;
641  /* mdct input is 2 * output */
642  for (j = 0; j < wlen; j++)
643  max = FFMAX(max, fabsf(wbuf[j]));
644  wi[ch].clipping[w] = max;
645  }
646  for (w = 0; w < ics->num_windows; w++) {
647  if (wi[ch].clipping[w] > CLIP_AVOIDANCE_FACTOR) {
648  ics->window_clipping[w] = 1;
649  clip_avoidance_factor = FFMAX(clip_avoidance_factor, wi[ch].clipping[w]);
650  } else {
651  ics->window_clipping[w] = 0;
652  }
653  }
654  if (clip_avoidance_factor > CLIP_AVOIDANCE_FACTOR) {
655  ics->clip_avoidance_factor = CLIP_AVOIDANCE_FACTOR / clip_avoidance_factor;
656  } else {
657  ics->clip_avoidance_factor = 1.0f;
658  }
659 
660  apply_window_and_mdct(s, sce, overlap);
661 
662  if (s->options.ltp && s->coder->update_ltp) {
663  s->coder->update_ltp(s, sce);
664  apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, &sce->ltp_state[0]);
665  s->mdct1024.mdct_calc(&s->mdct1024, sce->lcoeffs, sce->ret_buf);
666  }
667 
668  for (k = 0; k < 1024; k++) {
669  if (!(fabs(cpe->ch[ch].coeffs[k]) < 1E16)) { // Ensure headroom for energy calculation
670  av_log(avctx, AV_LOG_ERROR, "Input contains (near) NaN/+-Inf\n");
671  return AVERROR(EINVAL);
672  }
673  }
674  avoid_clipping(s, sce);
675  }
676  start_ch += chans;
677  }
678  if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels, 0)) < 0)
679  return ret;
680  frame_bits = its = 0;
681  do {
682  init_put_bits(&s->pb, avpkt->data, avpkt->size);
683 
684  if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & AV_CODEC_FLAG_BITEXACT))
686  start_ch = 0;
687  target_bits = 0;
688  memset(chan_el_counter, 0, sizeof(chan_el_counter));
689  for (i = 0; i < s->chan_map[0]; i++) {
690  FFPsyWindowInfo* wi = windows + start_ch;
691  const float *coeffs[2];
692  tag = s->chan_map[i+1];
693  chans = tag == TYPE_CPE ? 2 : 1;
694  cpe = &s->cpe[i];
695  cpe->common_window = 0;
696  memset(cpe->is_mask, 0, sizeof(cpe->is_mask));
697  memset(cpe->ms_mask, 0, sizeof(cpe->ms_mask));
698  put_bits(&s->pb, 3, tag);
699  put_bits(&s->pb, 4, chan_el_counter[tag]++);
700  for (ch = 0; ch < chans; ch++) {
701  sce = &cpe->ch[ch];
702  coeffs[ch] = sce->coeffs;
703  sce->ics.predictor_present = 0;
704  sce->ics.ltp.present = 0;
705  memset(sce->ics.ltp.used, 0, sizeof(sce->ics.ltp.used));
706  memset(sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used));
707  memset(&sce->tns, 0, sizeof(TemporalNoiseShaping));
708  for (w = 0; w < 128; w++)
709  if (sce->band_type[w] > RESERVED_BT)
710  sce->band_type[w] = 0;
711  }
712  s->psy.bitres.alloc = -1;
714  s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
715  if (s->psy.bitres.alloc > 0) {
716  /* Lambda unused here on purpose, we need to take psy's unscaled allocation */
717  target_bits += s->psy.bitres.alloc
718  * (s->lambda / (avctx->global_quality ? avctx->global_quality : 120));
719  s->psy.bitres.alloc /= chans;
720  }
721  s->cur_type = tag;
722  for (ch = 0; ch < chans; ch++) {
723  s->cur_channel = start_ch + ch;
724  if (s->options.pns && s->coder->mark_pns)
725  s->coder->mark_pns(s, avctx, &cpe->ch[ch]);
726  s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
727  }
728  if (chans > 1
729  && wi[0].window_type[0] == wi[1].window_type[0]
730  && wi[0].window_shape == wi[1].window_shape) {
731 
732  cpe->common_window = 1;
733  for (w = 0; w < wi[0].num_windows; w++) {
734  if (wi[0].grouping[w] != wi[1].grouping[w]) {
735  cpe->common_window = 0;
736  break;
737  }
738  }
739  }
740  for (ch = 0; ch < chans; ch++) { /* TNS and PNS */
741  sce = &cpe->ch[ch];
742  s->cur_channel = start_ch + ch;
743  if (s->options.tns && s->coder->search_for_tns)
744  s->coder->search_for_tns(s, sce);
745  if (s->options.tns && s->coder->apply_tns_filt)
746  s->coder->apply_tns_filt(s, sce);
747  if (sce->tns.present)
748  tns_mode = 1;
749  if (s->options.pns && s->coder->search_for_pns)
750  s->coder->search_for_pns(s, avctx, sce);
751  }
752  s->cur_channel = start_ch;
753  if (s->options.intensity_stereo) { /* Intensity Stereo */
754  if (s->coder->search_for_is)
755  s->coder->search_for_is(s, avctx, cpe);
756  if (cpe->is_mode) is_mode = 1;
758  }
759  if (s->options.pred) { /* Prediction */
760  for (ch = 0; ch < chans; ch++) {
761  sce = &cpe->ch[ch];
762  s->cur_channel = start_ch + ch;
763  if (s->options.pred && s->coder->search_for_pred)
764  s->coder->search_for_pred(s, sce);
765  if (cpe->ch[ch].ics.predictor_present) pred_mode = 1;
766  }
767  if (s->coder->adjust_common_pred)
768  s->coder->adjust_common_pred(s, cpe);
769  for (ch = 0; ch < chans; ch++) {
770  sce = &cpe->ch[ch];
771  s->cur_channel = start_ch + ch;
772  if (s->options.pred && s->coder->apply_main_pred)
773  s->coder->apply_main_pred(s, sce);
774  }
775  s->cur_channel = start_ch;
776  }
777  if (s->options.mid_side) { /* Mid/Side stereo */
778  if (s->options.mid_side == -1 && s->coder->search_for_ms)
779  s->coder->search_for_ms(s, cpe);
780  else if (cpe->common_window)
781  memset(cpe->ms_mask, 1, sizeof(cpe->ms_mask));
783  }
784  adjust_frame_information(cpe, chans);
785  if (s->options.ltp) { /* LTP */
786  for (ch = 0; ch < chans; ch++) {
787  sce = &cpe->ch[ch];
788  s->cur_channel = start_ch + ch;
789  if (s->coder->search_for_ltp)
790  s->coder->search_for_ltp(s, sce, cpe->common_window);
791  if (sce->ics.ltp.present) pred_mode = 1;
792  }
793  s->cur_channel = start_ch;
794  if (s->coder->adjust_common_ltp)
795  s->coder->adjust_common_ltp(s, cpe);
796  }
797  if (chans == 2) {
798  put_bits(&s->pb, 1, cpe->common_window);
799  if (cpe->common_window) {
800  put_ics_info(s, &cpe->ch[0].ics);
801  if (s->coder->encode_main_pred)
802  s->coder->encode_main_pred(s, &cpe->ch[0]);
803  if (s->coder->encode_ltp_info)
804  s->coder->encode_ltp_info(s, &cpe->ch[0], 1);
805  encode_ms_info(&s->pb, cpe);
806  if (cpe->ms_mode) ms_mode = 1;
807  }
808  }
809  for (ch = 0; ch < chans; ch++) {
810  s->cur_channel = start_ch + ch;
811  encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
812  }
813  start_ch += chans;
814  }
815 
816  if (avctx->flags & AV_CODEC_FLAG_QSCALE) {
817  /* When using a constant Q-scale, don't mess with lambda */
818  break;
819  }
820 
821  /* rate control stuff
822  * allow between the nominal bitrate, and what psy's bit reservoir says to target
823  * but drift towards the nominal bitrate always
824  */
825  frame_bits = put_bits_count(&s->pb);
826  rate_bits = avctx->bit_rate * 1024 / avctx->sample_rate;
827  rate_bits = FFMIN(rate_bits, 6144 * s->channels - 3);
828  too_many_bits = FFMAX(target_bits, rate_bits);
829  too_many_bits = FFMIN(too_many_bits, 6144 * s->channels - 3);
830  too_few_bits = FFMIN(FFMAX(rate_bits - rate_bits/4, target_bits), too_many_bits);
831 
832  /* When using ABR, be strict (but only for increasing) */
833  too_few_bits = too_few_bits - too_few_bits/8;
834  too_many_bits = too_many_bits + too_many_bits/2;
835 
836  if ( its == 0 /* for steady-state Q-scale tracking */
837  || (its < 5 && (frame_bits < too_few_bits || frame_bits > too_many_bits))
838  || frame_bits >= 6144 * s->channels - 3 )
839  {
840  float ratio = ((float)rate_bits) / frame_bits;
841 
842  if (frame_bits >= too_few_bits && frame_bits <= too_many_bits) {
843  /*
844  * This path is for steady-state Q-scale tracking
845  * When frame bits fall within the stable range, we still need to adjust
846  * lambda to maintain it like so in a stable fashion (large jumps in lambda
847  * create artifacts and should be avoided), but slowly
848  */
849  ratio = sqrtf(sqrtf(ratio));
850  ratio = av_clipf(ratio, 0.9f, 1.1f);
851  } else {
852  /* Not so fast though */
853  ratio = sqrtf(ratio);
854  }
855  s->lambda = FFMIN(s->lambda * ratio, 65536.f);
856 
857  /* Keep iterating if we must reduce and lambda is in the sky */
858  if (ratio > 0.9f && ratio < 1.1f) {
859  break;
860  } else {
861  if (is_mode || ms_mode || tns_mode || pred_mode) {
862  for (i = 0; i < s->chan_map[0]; i++) {
863  // Must restore coeffs
864  chans = tag == TYPE_CPE ? 2 : 1;
865  cpe = &s->cpe[i];
866  for (ch = 0; ch < chans; ch++)
867  memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
868  }
869  }
870  its++;
871  }
872  } else {
873  break;
874  }
875  } while (1);
876 
877  if (s->options.ltp && s->coder->ltp_insert_new_frame)
879 
880  put_bits(&s->pb, 3, TYPE_END);
881  flush_put_bits(&s->pb);
882 
884 
885  s->lambda_sum += s->lambda;
886  s->lambda_count++;
887 
888  ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
889  &avpkt->duration);
890 
891  avpkt->size = put_bits_count(&s->pb) >> 3;
892  *got_packet_ptr = 1;
893  return 0;
894 }
895 
897 {
898  AACEncContext *s = avctx->priv_data;
899 
900  av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_sum / s->lambda_count);
901 
902  ff_mdct_end(&s->mdct1024);
903  ff_mdct_end(&s->mdct128);
904  ff_psy_end(&s->psy);
905  ff_lpc_end(&s->lpc);
906  if (s->psypp)
908  av_freep(&s->buffer.samples);
909  av_freep(&s->cpe);
910  av_freep(&s->fdsp);
911  ff_af_queue_close(&s->afq);
912  return 0;
913 }
914 
916 {
917  int ret = 0;
918 
920  if (!s->fdsp)
921  return AVERROR(ENOMEM);
922 
923  // window init
925 
926  if ((ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0)) < 0)
927  return ret;
928  if ((ret = ff_mdct_init(&s->mdct128, 8, 0, 32768.0)) < 0)
929  return ret;
930 
931  return 0;
932 }
933 
935 {
936  int ch;
937  if (!FF_ALLOCZ_TYPED_ARRAY(s->buffer.samples, s->channels * 3 * 1024) ||
938  !FF_ALLOCZ_TYPED_ARRAY(s->cpe, s->chan_map[0]))
939  return AVERROR(ENOMEM);
940 
941  for(ch = 0; ch < s->channels; ch++)
942  s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
943 
944  return 0;
945 }
946 
948 {
949  AACEncContext *s = avctx->priv_data;
950  int i, ret = 0;
951  const uint8_t *sizes[2];
952  uint8_t grouping[AAC_MAX_CHANNELS];
953  int lengths[2];
954 
955  /* Constants */
956  s->last_frame_pb_count = 0;
957  avctx->frame_size = 1024;
958  avctx->initial_padding = 1024;
959  s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
960 
961  /* Channel map and unspecified bitrate guessing */
962  s->channels = avctx->channels;
963 
964  s->needs_pce = 1;
965  for (i = 0; i < FF_ARRAY_ELEMS(aac_normal_chan_layouts); i++) {
966  if (avctx->channel_layout == aac_normal_chan_layouts[i]) {
967  s->needs_pce = s->options.pce;
968  break;
969  }
970  }
971 
972  if (s->needs_pce) {
973  char buf[64];
974  for (i = 0; i < FF_ARRAY_ELEMS(aac_pce_configs); i++)
975  if (avctx->channel_layout == aac_pce_configs[i].layout)
976  break;
977  av_get_channel_layout_string(buf, sizeof(buf), -1, avctx->channel_layout);
978  ERROR_IF(i == FF_ARRAY_ELEMS(aac_pce_configs), "Unsupported channel layout \"%s\"\n", buf);
979  av_log(avctx, AV_LOG_INFO, "Using a PCE to encode channel layout \"%s\"\n", buf);
980  s->pce = aac_pce_configs[i];
981  s->reorder_map = s->pce.reorder_map;
982  s->chan_map = s->pce.config_map;
983  } else {
984  s->reorder_map = aac_chan_maps[s->channels - 1];
985  s->chan_map = aac_chan_configs[s->channels - 1];
986  }
987 
988  if (!avctx->bit_rate) {
989  for (i = 1; i <= s->chan_map[0]; i++) {
990  avctx->bit_rate += s->chan_map[i] == TYPE_CPE ? 128000 : /* Pair */
991  s->chan_map[i] == TYPE_LFE ? 16000 : /* LFE */
992  69000 ; /* SCE */
993  }
994  }
995 
996  /* Samplerate */
997  for (i = 0; i < 16; i++)
999  break;
1000  s->samplerate_index = i;
1001  ERROR_IF(s->samplerate_index == 16 ||
1004  "Unsupported sample rate %d\n", avctx->sample_rate);
1005 
1006  /* Bitrate limiting */
1007  WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
1008  "Too many bits %f > %d per frame requested, clamping to max\n",
1009  1024.0 * avctx->bit_rate / avctx->sample_rate,
1010  6144 * s->channels);
1011  avctx->bit_rate = (int64_t)FFMIN(6144 * s->channels / 1024.0 * avctx->sample_rate,
1012  avctx->bit_rate);
1013 
1014  /* Profile and option setting */
1015  avctx->profile = avctx->profile == FF_PROFILE_UNKNOWN ? FF_PROFILE_AAC_LOW :
1016  avctx->profile;
1017  for (i = 0; i < FF_ARRAY_ELEMS(aacenc_profiles); i++)
1018  if (avctx->profile == aacenc_profiles[i])
1019  break;
1020  if (avctx->profile == FF_PROFILE_MPEG2_AAC_LOW) {
1021  avctx->profile = FF_PROFILE_AAC_LOW;
1022  ERROR_IF(s->options.pred,
1023  "Main prediction unavailable in the \"mpeg2_aac_low\" profile\n");
1024  ERROR_IF(s->options.ltp,
1025  "LTP prediction unavailable in the \"mpeg2_aac_low\" profile\n");
1026  WARN_IF(s->options.pns,
1027  "PNS unavailable in the \"mpeg2_aac_low\" profile, turning off\n");
1028  s->options.pns = 0;
1029  } else if (avctx->profile == FF_PROFILE_AAC_LTP) {
1030  s->options.ltp = 1;
1031  ERROR_IF(s->options.pred,
1032  "Main prediction unavailable in the \"aac_ltp\" profile\n");
1033  } else if (avctx->profile == FF_PROFILE_AAC_MAIN) {
1034  s->options.pred = 1;
1035  ERROR_IF(s->options.ltp,
1036  "LTP prediction unavailable in the \"aac_main\" profile\n");
1037  } else if (s->options.ltp) {
1038  avctx->profile = FF_PROFILE_AAC_LTP;
1039  WARN_IF(1,
1040  "Chainging profile to \"aac_ltp\"\n");
1041  ERROR_IF(s->options.pred,
1042  "Main prediction unavailable in the \"aac_ltp\" profile\n");
1043  } else if (s->options.pred) {
1044  avctx->profile = FF_PROFILE_AAC_MAIN;
1045  WARN_IF(1,
1046  "Chainging profile to \"aac_main\"\n");
1047  ERROR_IF(s->options.ltp,
1048  "LTP prediction unavailable in the \"aac_main\" profile\n");
1049  }
1050  s->profile = avctx->profile;
1051 
1052  /* Coder limitations */
1053  s->coder = &ff_aac_coders[s->options.coder];
1054  if (s->options.coder == AAC_CODER_ANMR) {
1056  "The ANMR coder is considered experimental, add -strict -2 to enable!\n");
1057  s->options.intensity_stereo = 0;
1058  s->options.pns = 0;
1059  }
1061  "The LPT profile requires experimental compliance, add -strict -2 to enable!\n");
1062 
1063  /* M/S introduces horrible artifacts with multichannel files, this is temporary */
1064  if (s->channels > 3)
1065  s->options.mid_side = 0;
1066 
1067  if ((ret = dsp_init(avctx, s)) < 0)
1068  return ret;
1069 
1070  if ((ret = alloc_buffers(avctx, s)) < 0)
1071  return ret;
1072 
1073  if ((ret = put_audio_specific_config(avctx)))
1074  return ret;
1075 
1076  sizes[0] = ff_aac_swb_size_1024[s->samplerate_index];
1077  sizes[1] = ff_aac_swb_size_128[s->samplerate_index];
1078  lengths[0] = ff_aac_num_swb_1024[s->samplerate_index];
1079  lengths[1] = ff_aac_num_swb_128[s->samplerate_index];
1080  for (i = 0; i < s->chan_map[0]; i++)
1081  grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
1082  if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
1083  s->chan_map[0], grouping)) < 0)
1084  return ret;
1085  s->psypp = ff_psy_preprocess_init(avctx);
1087  s->random_state = 0x1f2e3d4c;
1088 
1089  s->abs_pow34 = abs_pow34_v;
1091 
1092  if (ARCH_X86)
1094 
1095  if (HAVE_MIPSDSP)
1097 
1098  ff_af_queue_init(avctx, &s->afq);
1099  ff_aac_tableinit();
1100 
1101  return 0;
1102 }
1103 
1104 #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
1105 static const AVOption aacenc_options[] = {
1106  {"aac_coder", "Coding algorithm", offsetof(AACEncContext, options.coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_FAST}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "coder"},
1107  {"anmr", "ANMR method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
1108  {"twoloop", "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
1109  {"fast", "Default fast search", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
1110  {"aac_ms", "Force M/S stereo coding", offsetof(AACEncContext, options.mid_side), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, AACENC_FLAGS},
1111  {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
1112  {"aac_pns", "Perceptual noise substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
1113  {"aac_tns", "Temporal noise shaping", offsetof(AACEncContext, options.tns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
1114  {"aac_ltp", "Long term prediction", offsetof(AACEncContext, options.ltp), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
1115  {"aac_pred", "AAC-Main prediction", offsetof(AACEncContext, options.pred), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
1116  {"aac_pce", "Forces the use of PCEs", offsetof(AACEncContext, options.pce), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
1118  {NULL}
1119 };
1120 
1121 static const AVClass aacenc_class = {
1122  .class_name = "AAC encoder",
1123  .item_name = av_default_item_name,
1124  .option = aacenc_options,
1125  .version = LIBAVUTIL_VERSION_INT,
1126 };
1127 
1129  { "b", "0" },
1130  { NULL }
1131 };
1132 
1134  .name = "aac",
1135  .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
1136  .type = AVMEDIA_TYPE_AUDIO,
1137  .id = AV_CODEC_ID_AAC,
1138  .priv_data_size = sizeof(AACEncContext),
1139  .init = aac_encode_init,
1140  .encode2 = aac_encode_frame,
1141  .close = aac_encode_end,
1142  .defaults = aac_encode_defaults,
1143  .supported_samplerates = mpeg4audio_sample_rates,
1146  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
1148  .priv_class = &aacenc_class,
1149 };
#define FF_CODEC_CAP_INIT_CLEANUP
The codec allows calling the close function for deallocation even if the init function returned a fai...
Definition: internal.h:48
#define FF_COMPLIANCE_EXPERIMENTAL
Allow nonstandardized experimental things.
Definition: avcodec.h:1601
float, planar
Definition: samplefmt.h:69
void ff_af_queue_remove(AudioFrameQueue *afq, int nb_samples, int64_t *pts, int64_t *duration)
Remove frame(s) from the queue.
void ff_quantize_band_cost_cache_init(struct AACEncContext *s)
Definition: aacenc.c:123
#define NULL
Definition: coverity.c:32
static void align_put_bits(PutBitContext *s)
Pad the bitstream with zeros up to the next byte boundary.
Definition: put_bits.h:393
const AACCoefficientsEncoder * coder
Definition: aacenc.h:399
Band types following are encoded differently from others.
Definition: aac.h:87
static const uint8_t aac_chan_configs[AAC_MAX_CHANNELS][6]
default channel configurations
Definition: aacenctab.h:58
uint8_t use_kb_window[2]
If set, use Kaiser-Bessel window, otherwise use a sine window.
Definition: aac.h:178
int coder
Definition: aacenc.h:46
This structure describes decoded (raw) audio or video data.
Definition: frame.h:314
int grouping[8]
window grouping (for e.g. AAC)
Definition: psymodel.h:81
void(* search_for_ms)(struct AACEncContext *s, ChannelElement *cpe)
Definition: aacenc.h:79
AVOption.
Definition: opt.h:248
void ff_aac_tableinit(void)
Definition: aactab.c:3347
enum RawDataBlockType cur_type
channel group type cur_channel belongs to
Definition: aacenc.h:406
uint8_t ** bands
scalefactor band sizes for possible frame sizes
Definition: psymodel.h:98
Definition: aac.h:225
AACQuantizeBandCostCacheEntry quantize_band_cost_cache[256][128]
memoization area for quantize_band_cost
Definition: aacenc.h:413
static void abs_pow34_v(float *out, const float *in, const int size)
Definition: aacenc_utils.h:40
static const AVClass aacenc_class
Definition: aacenc.c:1121
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:218
int64_t bit_rate
the average bitrate
Definition: avcodec.h:581
#define LIBAVUTIL_VERSION_INT
Definition: version.h:85
#define SCALE_DIFF_ZERO
codebook index corresponding to zero scalefactor indices difference
Definition: aac.h:153
uint8_t window_clipping[8]
set if a certain window is near clipping
Definition: aac.h:192
Definition: aac.h:64
const char * g
Definition: vf_curves.c:115
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
Definition: aac.h:58
av_cold void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx)
Cleanup audio preprocessing module.
Definition: psymodel.c:152
#define WARN_IF(cond,...)
Definition: aacenc_utils.h:274
int size
Definition: packet.h:364
const char * av_default_item_name(void *ptr)
Return the context name.
Definition: log.c:235
const int ff_aac_swb_size_1024_len
Definition: aacenctab.c:108
static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
Encode ics_info element.
Definition: aacenc.c:218
void(* search_for_tns)(struct AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc.h:77
int common_window
Set if channels share a common &#39;IndividualChannelStream&#39; in bitstream.
Definition: aac.h:279
int alloc
number of bits allocated by the psy, or -1 if no allocation was done
Definition: psymodel.h:105
#define FF_PROFILE_AAC_MAIN
Definition: avcodec.h:1855
int lambda_count
count(lambda), for Qvg reporting
Definition: aacenc.h:405
av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens, const uint8_t **bands, const int *num_bands, int num_groups, const uint8_t *group_map)
Initialize psychoacoustic model.
Definition: psymodel.c:31
uint8_t ms_mask[128]
Set if mid/side stereo is used for each scalefactor window band.
Definition: aac.h:282
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:237
float lambda
Definition: aacenc.h:402
#define NOISE_PRE
preamble for NOISE_BT, put in bitstream with the first noise band
Definition: aac.h:157
int profile
profile
Definition: avcodec.h:1851
AVCodec.
Definition: codec.h:190
static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
Encode spectral coefficients processed by psychoacoustic model.
Definition: aacenc.c:436
Spectral data are scaled white noise not coded in the bitstream.
Definition: aac.h:88
int * num_bands
number of scalefactor bands for possible frame sizes
Definition: psymodel.h:99
static int put_audio_specific_config(AVCodecContext *avctx)
Make AAC audio config object.
Definition: aacenc.c:91
struct AACEncContext::@6 buffer
void(* apply_tns_filt)(struct AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc.h:71
const uint8_t ff_aac_num_swb_128[]
Definition: aactab.c:80
void(* search_for_quantizers)(AVCodecContext *avctx, struct AACEncContext *s, SingleChannelElement *sce, const float lambda)
Definition: aacenc.h:59
INTFLOAT pcoeffs[1024]
coefficients for IMDCT, pristine
Definition: aac.h:262
const uint16_t * swb_offset
table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular wind...
Definition: aac.h:182
const char * class_name
The name of the class; usually it is the same name as the context structure type to which the AVClass...
Definition: log.h:72
#define AV_CODEC_CAP_DELAY
Encoder or decoder requires flushing with NULL input at the end in order to give the complete and cor...
Definition: codec.h:75
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int64_t size, int64_t min_size)
Check AVPacket size and/or allocate data.
Definition: encode.c:33
AACEncOptions options
encoding options
Definition: aacenc.h:380
#define FF_CODEC_CAP_INIT_THREADSAFE
The codec does not modify any global variables in the init function, allowing to call the init functi...
Definition: internal.h:40
AAC encoder context.
Definition: aacenc.h:378
int num_ele[4]
front, side, back, lfe
Definition: aacenc.h:97
uint8_t
#define av_cold
Definition: attributes.h:88
void(* search_for_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
Definition: aacenc.h:75
AVOptions.
int intensity_stereo
Definition: aacenc.h:53
#define WINDOW_FUNC(type)
Definition: aacenc.c:132
void(* update_ltp)(struct AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc.h:72
LPCContext lpc
used by TNS
Definition: aacenc.h:390
void ff_aac_coder_init_mips(AACEncContext *c)
SingleChannelElement ch[2]
Definition: aac.h:285
int samplerate_index
MPEG-4 samplerate index.
Definition: aacenc.h:391
#define f(width, name)
Definition: cbs_vp9.c:255
Definition: aac.h:60
av_cold void ff_af_queue_init(AVCodecContext *avctx, AudioFrameQueue *afq)
Initialize AudioFrameQueue.
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:92
int64_t duration
Duration of this packet in AVStream->time_base units, 0 if unknown.
Definition: packet.h:381
const uint8_t * chan_map
channel configuration map
Definition: aacenc.h:394
TemporalNoiseShaping tns
Definition: aac.h:251
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
const uint8_t ff_aac_scalefactor_bits[121]
Definition: aactab.c:111
uint8_t * extradata
some codecs need / can use extradata like Huffman tables.
Definition: avcodec.h:632
AudioFrameQueue afq
Definition: aacenc.h:408
const AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB]
Definition: aaccoder.c:897
static const uint8_t aac_chan_maps[AAC_MAX_CHANNELS][AAC_MAX_CHANNELS]
Table to remap channels from libavcodec&#39;s default order to AAC order.
Definition: aacenctab.h:72
#define FF_PROFILE_AAC_LTP
Definition: avcodec.h:1858
uint8_t * data
Definition: packet.h:363
static void put_pce(PutBitContext *pb, AVCodecContext *avctx)
Definition: aacenc.c:50
uint32_t tag
Definition: movenc.c:1597
Scalefactor data are intensity stereo positions (in phase).
Definition: aac.h:90
#define max(a, b)
Definition: cuda_runtime.h:33
int profile
copied from avctx
Definition: aacenc.h:388
void ff_aac_float_common_init(void)
channels
Definition: aptx.h:33
static __device__ float fabsf(float a)
Definition: cuda_runtime.h:181
uint8_t reorder_map[16]
maps channels from lavc to aac order
Definition: aacenc.h:101
static void adjust_frame_information(ChannelElement *cpe, int chans)
Produce integer coefficients from scalefactors provided by the model.
Definition: aacenc.c:253
#define av_log(a,...)
static const AVOption aacenc_options[]
Definition: aacenc.c:1105
struct FFPsyContext::@113 bitres
int64_t layout
Definition: aacenc.h:96
float ff_aac_kbd_long_1024[1024]
const uint8_t * reorder_map
lavc to aac reorder map
Definition: aacenc.h:393
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:194
#define R
Definition: huffyuvdsp.h:34
av_cold AVFloatDSPContext * avpriv_float_dsp_alloc(int bit_exact)
Allocate a float DSP context.
Definition: float_dsp.c:135
void(* encode_ltp_info)(struct AACEncContext *s, SingleChannelElement *sce, int common_window)
Definition: aacenc.h:66
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
static const int sizes[][2]
Definition: img2dec.c:53
const uint8_t ff_aac_num_swb_1024[]
Definition: aactab.c:64
#define FF_PROFILE_MPEG2_AAC_LOW
Definition: avcodec.h:1863
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:115
#define FF_AAC_PROFILE_OPTS
Definition: profiles.h:28
float is_ener[128]
Intensity stereo pos (used by encoder)
Definition: aac.h:260
int initial_padding
Audio only.
Definition: avcodec.h:2055
static const AACPCEInfo aac_pce_configs[]
List of PCE (Program Configuration Element) for the channel layouts listed in channel_layout.h.
Definition: aacenc.h:139
int flags
AV_CODEC_FLAG_*.
Definition: avcodec.h:611
void(* mdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input)
Definition: fft.h:104
static const int mpeg4audio_sample_rates[16]
Definition: aacenctab.h:85
int amp[4]
Definition: aac.h:229
const char * name
Name of the codec implementation.
Definition: codec.h:197
int num_windows
number of windows in a frame
Definition: psymodel.h:80
static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
Definition: aacenc.c:531
uint8_t max_sfb
number of scalefactor bands per group
Definition: aac.h:176
static const AVCodecDefault defaults[]
Definition: amfenc_h264.c:361
void(* adjust_common_ltp)(struct AACEncContext *s, ChannelElement *cpe)
Definition: aacenc.h:69
#define ff_mdct_init
Definition: fft.h:161
Definition: aac.h:63
int num_swb
number of scalefactor window bands
Definition: aac.h:184
int ff_af_queue_add(AudioFrameQueue *afq, const AVFrame *f)
Add a frame to the queue.
#define FFMAX(a, b)
Definition: common.h:102
void(* mark_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
Definition: aacenc.h:76
int index[4][8]
front, side, back, lfe
Definition: aacenc.h:99
uint64_t channel_layout
Audio channel layout.
Definition: avcodec.h:1242
static int put_bits_count(PutBitContext *s)
Definition: put_bits.h:83
#define AACENC_FLAGS
Definition: aacenc.c:1104
INTFLOAT ret_buf[2048]
PCM output buffer.
Definition: aac.h:265
void(* set_special_band_scalefactors)(struct AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc.h:74
MIPS optimizations info
Definition: mips.txt:2
enum WindowSequence window_sequence[2]
Definition: aac.h:177
INTFLOAT ltp_state[3072]
time signal for LTP
Definition: aac.h:266
#define AV_CODEC_FLAG_BITEXACT
Use only bitexact stuff (except (I)DCT).
Definition: avcodec.h:333
av_cold void ff_lpc_end(LPCContext *s)
Uninitialize LPCContext.
Definition: lpc.c:323
#define AV_CODEC_FLAG_QSCALE
Use fixed qscale.
Definition: avcodec.h:275
#define AV_CODEC_CAP_SMALL_LAST_FRAME
Codec can be fed a final frame with a smaller size.
Definition: codec.h:80
int cur_channel
current channel for coder context
Definition: aacenc.h:400
int last_frame_pb_count
number of bits for the previous frame
Definition: aacenc.h:403
#define FFMIN(a, b)
Definition: common.h:104
static void apply_intensity_stereo(ChannelElement *cpe)
Definition: aacenc.c:303
static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr)
Definition: aacenc.c:553
void(* quant_bands)(int *out, const float *in, const float *scaled, int size, int is_signed, int maxval, const float Q34, const float rounding)
Definition: aacenc.h:416
uint8_t w
Definition: llviddspenc.c:39
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
#define FF_PROFILE_AAC_LOW
Definition: avcodec.h:1856
static const AVCodecDefault aac_encode_defaults[]
Definition: aacenc.c:1128
#define FF_PROFILE_UNKNOWN
Definition: avcodec.h:1852
int pos[4]
Definition: aac.h:228
int channels
channel count
Definition: aacenc.h:392
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
#define s(width, name)
Definition: cbs_vp9.c:257
AAC definitions and structures.
const uint8_t ff_tns_max_bands_1024[]
Definition: aactab.c:1413
static void quantize_bands(int *out, const float *in, const float *scaled, int size, int is_signed, int maxval, const float Q34, const float rounding)
Definition: aacenc_utils.h:65
FFTContext mdct128
short (128 samples) frame transform context
Definition: aacenc.h:383
PutBitContext pb
Definition: aacenc.h:381
static void(*const apply_window[4])(AVFloatDSPContext *fdsp, SingleChannelElement *sce, const float *audio)
Definition: aacenc.c:188
#define L(x)
Definition: vp56_arith.h:36
AVFloatDSPContext * fdsp
Definition: aacenc.h:384
int mid_side
Definition: aacenc.h:52
#define FF_ARRAY_ELEMS(a)
if(ret)
void av_get_channel_layout_string(char *buf, int buf_size, int nb_channels, uint64_t channel_layout)
Return a description of a channel layout.
static av_cold int aac_encode_end(AVCodecContext *avctx)
Definition: aacenc.c:896
void(* search_for_is)(struct AACEncContext *s, AVCodecContext *avctx, ChannelElement *cpe)
Definition: aacenc.h:80
void ff_aac_dsp_init_x86(AACEncContext *s)
int frame_size
Number of samples per channel in an audio frame.
Definition: avcodec.h:1211
void(* search_for_ltp)(struct AACEncContext *s, SingleChannelElement *sce, int common_window)
Definition: aacenc.h:78
#define AV_LOG_INFO
Standard information.
Definition: log.h:205
#define CLIP_AVOIDANCE_FACTOR
Definition: aac.h:54
Libavcodec external API header.
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:58
Temporal Noise Shaping.
Definition: aac.h:199
int sample_rate
samples per second
Definition: avcodec.h:1191
static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
Encode MS data.
Definition: aacenc.c:239
void(* ltp_insert_new_frame)(struct AACEncContext *s)
Definition: aacenc.h:73
void(* search_for_pred)(struct AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc.h:81
main external API structure.
Definition: avcodec.h:531
int pairing[3][8]
front, side, back
Definition: aacenc.h:98
int bits
number of bits used in the bitresevoir
Definition: psymodel.h:104
#define NOISE_PRE_BITS
length of preamble
Definition: aac.h:158
Levinson-Durbin recursion.
Definition: lpc.h:47
void(* apply_main_pred)(struct AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc.h:70
IndividualChannelStream ics
Definition: aac.h:250
int extradata_size
Definition: avcodec.h:633
uint8_t group_len[8]
Definition: aac.h:180
Replacements for frequently missing libm functions.
float lambda_sum
sum(lambda), for Qvg reporting
Definition: aacenc.h:404
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31))))#define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac){}void ff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map){AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);return NULL;}return ac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;}int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){int use_generic=1;int len=in->nb_samples;int p;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
const uint8_t *const ff_aac_swb_size_1024[]
Definition: aacenctab.c:99
Describe the class of an AVClass context structure.
Definition: log.h:67
static void put_bitstream_info(AACEncContext *s, const char *name)
Write some auxiliary information about the created AAC file.
Definition: aacenc.c:510
const int ff_aac_swb_size_128_len
Definition: aacenctab.c:107
void(* encode_main_pred)(struct AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc.h:67
void(* adjust_common_pred)(struct AACEncContext *s, ChannelElement *cpe)
Definition: aacenc.h:68
int window_shape
window shape (sine/KBD/whatever)
Definition: psymodel.h:79
static void encode_pulses(AACEncContext *s, Pulse *pulse)
Encode pulse data.
Definition: aacenc.c:417
uint16_t quantize_band_cost_cache_generation
Definition: aacenc.h:412
const uint8_t * swb_sizes
table of scalefactor band sizes for a particular window
Definition: aac.h:183
#define TNS_MAX_ORDER
Definition: aac.h:51
FFPsyContext psy
Definition: aacenc.h:397
const uint32_t ff_aac_scalefactor_code[121]
Definition: aactab.c:92
LongTermPrediction ltp
Definition: aac.h:181
static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
Definition: aacenc.c:934
const struct FFPsyModel * model
encoder-specific model functions
Definition: psymodel.h:91
av_cold int ff_lpc_init(LPCContext *s, int blocksize, int max_order, enum FFLPCType lpc_type)
Initialize LPCContext.
Definition: lpc.c:301
#define AAC_MAX_CHANNELS
Definition: aacenctab.h:39
int needs_pce
flag for non-standard layout
Definition: aacenc.h:389
FFPsyWindowInfo(* window)(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type)
Suggest window sequence for channel.
Definition: psymodel.h:129
int ms_mode
Signals mid/side stereo flags coding mode (used by encoder)
Definition: aac.h:280
AAC encoder data.
const uint8_t ff_tns_max_bands_128[]
Definition: aactab.c:1425
struct FFPsyPreprocessContext * psypp
Definition: aacenc.h:398
#define NOISE_OFFSET
subtracted from global gain, used as offset for the preamble
Definition: aac.h:159
int global_quality
Global quality for codecs which cannot change it per frame.
Definition: avcodec.h:597
uint8_t zeroes[128]
band is not coded (used by encoder)
Definition: aac.h:258
int sf_idx[128]
scalefactor indices (used by encoder)
Definition: aac.h:257
AVCodec ff_aac_encoder
Definition: aacenc.c:1133
uint8_t is_mode
Set if any bands have been encoded using intensity stereo (used by encoder)
Definition: aac.h:281
INTFLOAT coeffs[1024]
coefficients for IMDCT, maybe processed
Definition: aac.h:263
const int avpriv_mpeg4audio_sample_rates[16]
Definition: mpeg4audio.c:62
float ff_aac_kbd_short_128[128]
void(* quantize_and_encode_band)(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, int size, int scale_idx, int cb, const float lambda, int rtz)
Definition: aacenc.h:63
Scalefactor data are intensity stereo positions (out of phase).
Definition: aac.h:89
Y Spectral Band Replication.
Definition: mpeg4audio.h:94
void ff_put_string(PutBitContext *pb, const char *string, int terminate_string)
Put the string string in the bitstream.
Definition: bitstream.c:59
const OptionDef options[]
Definition: ffmpeg_opt.c:3407
float * samples
Definition: aacenc.h:421
uint8_t prediction_used[41]
Definition: aac.h:191
static av_cold int aac_encode_init(AVCodecContext *avctx)
Definition: aacenc.c:947
common internal api header.
AACPCEInfo pce
PCE data, if needed.
Definition: aacenc.h:385
static void flush_put_bits(PutBitContext *s)
Pad the end of the output stream with zeros.
Definition: put_bits.h:117
AAC encoder utilities.
Single Channel Element - used for both SCE and LFE elements.
Definition: aac.h:249
windowing related information
Definition: psymodel.h:77
#define ff_mdct_end
Definition: fft.h:162
av_cold struct FFPsyPreprocessContext * ff_psy_preprocess_init(AVCodecContext *avctx)
psychoacoustic model audio preprocessing initialization
Definition: psymodel.c:103
const uint16_t *const ff_swb_offset_1024[]
Definition: aactab.c:1355
uint8_t config_map[16]
configs the encoder&#39;s channel specific settings
Definition: aacenc.h:100
void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, float **audio, int channels)
Preprocess several channel in audio frame in order to compress it better.
Definition: psymodel.c:139
static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce)
Encode scalefactors.
Definition: aacenc.c:381
float * planar_samples[16]
saved preprocessed input
Definition: aacenc.h:386
ChannelElement * cpe
channel elements
Definition: aacenc.h:396
Individual Channel Stream.
Definition: aac.h:175
float clip_avoidance_factor
set if any window is near clipping to the necessary atennuation factor to avoid it ...
Definition: aac.h:193
static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
Initialize the PutBitContext s.
Definition: put_bits.h:64
#define ERROR_IF(cond,...)
Definition: aacenc_utils.h:268
channel element - generic struct for SCE/CPE/CCE/LFE
Definition: aac.h:276
void * priv_data
Definition: avcodec.h:558
int start
Definition: aac.h:227
FFTContext mdct1024
long (1024 samples) frame transform context
Definition: aacenc.h:382
int random_state
Definition: aacenc.h:401
static av_always_inline int diff(const uint32_t a, const uint32_t b)
int channels
number of audio channels
Definition: avcodec.h:1192
int num_pulse
Definition: aac.h:226
AAC_FLOAT lcoeffs[1024]
MDCT of LTP coefficients (used by encoder)
Definition: aac.h:267
static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
Encode scalefactor band coding type.
Definition: aacenc.c:367
void(* analyze)(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi)
Perform psychoacoustic analysis and set band info (threshold, energy) for a group of channels...
Definition: psymodel.h:139
static void apply_mid_side_stereo(ChannelElement *cpe)
Definition: aacenc.c:332
static const int64_t aac_normal_chan_layouts[7]
Definition: aacenctab.h:47
void ff_af_queue_close(AudioFrameQueue *afq)
Close AudioFrameQueue.
enum BandType band_type[128]
band types
Definition: aac.h:253
#define LIBAVCODEC_IDENT
Definition: version.h:42
static enum AVSampleFormat sample_fmts[]
Definition: adpcmenc.c:940
int frame_number
Frame counter, set by libavcodec.
Definition: avcodec.h:1222
FILE * out
Definition: movenc.c:54
Filter the word “frame” indicates either a video frame or a group of audio samples
#define av_freep(p)
void(* encode_tns_info)(struct AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc.h:65
static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce, int common_window)
Encode one channel of audio data.
Definition: aacenc.c:484
#define FF_ALLOCZ_TYPED_ARRAY(p, nelem)
Definition: internal.h:103
int8_t used[MAX_LTP_LONG_SFB]
Definition: aac.h:169
static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce, float *audio)
Definition: aacenc.c:197
const uint16_t *const ff_swb_offset_128[]
Definition: aactab.c:1387
int8_t present
Definition: aac.h:165
uint8_t is_mask[128]
Set if intensity stereo is used (used by encoder)
Definition: aac.h:283
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
static const int aacenc_profiles[]
Definition: aacenctab.h:132
void(* abs_pow34)(float *out, const float *in, const int size)
Definition: aacenc.h:415
uint8_t ** extended_data
pointers to the data planes/channels.
Definition: frame.h:361
AAC data declarations.
av_cold void ff_psy_end(FFPsyContext *ctx)
Cleanup model context at the end.
Definition: psymodel.c:83
This structure stores compressed data.
Definition: packet.h:340
static void avoid_clipping(AACEncContext *s, SingleChannelElement *sce)
Downscale spectral coefficients for near-clipping windows to avoid artifacts.
Definition: aacenc.c:464
int window_type[3]
window type (short/long/transitional, etc.) - current, previous and next
Definition: psymodel.h:78
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:380
int strict_std_compliance
strictly follow the standard (MPEG-4, ...).
Definition: avcodec.h:1596
static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
Definition: aacenc.c:915
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...
Definition: packet.h:356
for(j=16;j >0;--j)
int pred
Definition: aacenc.h:51
int i
Definition: input.c:407
float clipping[8]
maximum absolute normalized intensity in the given window for clip avoidance
Definition: psymodel.h:82
const uint8_t *const ff_aac_swb_size_128[]
Definition: aacenctab.c:91
const char * name
Definition: opengl_enc.c:102
void(* encode_window_bands_info)(struct AACEncContext *s, SingleChannelElement *sce, int win, int group_len, const float lambda)
Definition: aacenc.h:61
bitstream writer API