FFmpeg
aacenc.c
Go to the documentation of this file.
1 /*
2  * AAC encoder
3  * Copyright (C) 2008 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * AAC encoder
25  */
26 
27 /***********************************
28  * TODOs:
29  * add sane pulse detection
30  ***********************************/
31 
32 #include "libavutil/libm.h"
33 #include "libavutil/float_dsp.h"
34 #include "libavutil/opt.h"
35 #include "avcodec.h"
36 #include "put_bits.h"
37 #include "internal.h"
38 #include "mpeg4audio.h"
39 #include "kbdwin.h"
40 #include "sinewin.h"
41 #include "profiles.h"
42 
43 #include "aac.h"
44 #include "aactab.h"
45 #include "aacenc.h"
46 #include "aacenctab.h"
47 #include "aacenc_utils.h"
48 
49 #include "psymodel.h"
50 
51 static void put_pce(PutBitContext *pb, AVCodecContext *avctx)
52 {
53  int i, j;
54  AACEncContext *s = avctx->priv_data;
55  AACPCEInfo *pce = &s->pce;
56  const int bitexact = avctx->flags & AV_CODEC_FLAG_BITEXACT;
57  const char *aux_data = bitexact ? "Lavc" : LIBAVCODEC_IDENT;
58 
59  put_bits(pb, 4, 0);
60 
61  put_bits(pb, 2, avctx->profile);
62  put_bits(pb, 4, s->samplerate_index);
63 
64  put_bits(pb, 4, pce->num_ele[0]); /* Front */
65  put_bits(pb, 4, pce->num_ele[1]); /* Side */
66  put_bits(pb, 4, pce->num_ele[2]); /* Back */
67  put_bits(pb, 2, pce->num_ele[3]); /* LFE */
68  put_bits(pb, 3, 0); /* Assoc data */
69  put_bits(pb, 4, 0); /* CCs */
70 
71  put_bits(pb, 1, 0); /* Stereo mixdown */
72  put_bits(pb, 1, 0); /* Mono mixdown */
73  put_bits(pb, 1, 0); /* Something else */
74 
75  for (i = 0; i < 4; i++) {
76  for (j = 0; j < pce->num_ele[i]; j++) {
77  if (i < 3)
78  put_bits(pb, 1, pce->pairing[i][j]);
79  put_bits(pb, 4, pce->index[i][j]);
80  }
81  }
82 
83  align_put_bits(pb);
84  put_bits(pb, 8, strlen(aux_data));
85  ff_put_string(pb, aux_data, 0);
86 }
87 
88 /**
89  * Make AAC audio config object.
90  * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
91  */
93 {
94  PutBitContext pb;
95  AACEncContext *s = avctx->priv_data;
96  int channels = (!s->needs_pce)*(s->channels - (s->channels == 8 ? 1 : 0));
97  const int max_size = 32;
98 
99  avctx->extradata = av_mallocz(max_size);
100  if (!avctx->extradata)
101  return AVERROR(ENOMEM);
102 
103  init_put_bits(&pb, avctx->extradata, max_size);
104  put_bits(&pb, 5, s->profile+1); //profile
105  put_bits(&pb, 4, s->samplerate_index); //sample rate index
106  put_bits(&pb, 4, channels);
107  //GASpecificConfig
108  put_bits(&pb, 1, 0); //frame length - 1024 samples
109  put_bits(&pb, 1, 0); //does not depend on core coder
110  put_bits(&pb, 1, 0); //is not extension
111  if (s->needs_pce)
112  put_pce(&pb, avctx);
113 
114  //Explicitly Mark SBR absent
115  put_bits(&pb, 11, 0x2b7); //sync extension
116  put_bits(&pb, 5, AOT_SBR);
117  put_bits(&pb, 1, 0);
118  flush_put_bits(&pb);
119  avctx->extradata_size = put_bits_count(&pb) >> 3;
120 
121  return 0;
122 }
123 
125 {
128  memset(s->quantize_band_cost_cache, 0, sizeof(s->quantize_band_cost_cache));
130  }
131 }
132 
133 #define WINDOW_FUNC(type) \
134 static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
135  SingleChannelElement *sce, \
136  const float *audio)
137 
138 WINDOW_FUNC(only_long)
139 {
140  const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
141  const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
142  float *out = sce->ret_buf;
143 
144  fdsp->vector_fmul (out, audio, lwindow, 1024);
145  fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
146 }
147 
148 WINDOW_FUNC(long_start)
149 {
150  const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
151  const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
152  float *out = sce->ret_buf;
153 
154  fdsp->vector_fmul(out, audio, lwindow, 1024);
155  memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
156  fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
157  memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
158 }
159 
160 WINDOW_FUNC(long_stop)
161 {
162  const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
163  const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
164  float *out = sce->ret_buf;
165 
166  memset(out, 0, sizeof(out[0]) * 448);
167  fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
168  memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
169  fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
170 }
171 
172 WINDOW_FUNC(eight_short)
173 {
174  const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
175  const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
176  const float *in = audio + 448;
177  float *out = sce->ret_buf;
178  int w;
179 
180  for (w = 0; w < 8; w++) {
181  fdsp->vector_fmul (out, in, w ? pwindow : swindow, 128);
182  out += 128;
183  in += 128;
184  fdsp->vector_fmul_reverse(out, in, swindow, 128);
185  out += 128;
186  }
187 }
188 
189 static void (*const apply_window[4])(AVFloatDSPContext *fdsp,
191  const float *audio) = {
192  [ONLY_LONG_SEQUENCE] = apply_only_long_window,
193  [LONG_START_SEQUENCE] = apply_long_start_window,
194  [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
195  [LONG_STOP_SEQUENCE] = apply_long_stop_window
196 };
197 
199  float *audio)
200 {
201  int i;
202  const float *output = sce->ret_buf;
203 
204  apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
205 
207  s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
208  else
209  for (i = 0; i < 1024; i += 128)
210  s->mdct128.mdct_calc(&s->mdct128, &sce->coeffs[i], output + i*2);
211  memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
212  memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
213 }
214 
215 /**
216  * Encode ics_info element.
217  * @see Table 4.6 (syntax of ics_info)
218  */
220 {
221  int w;
222 
223  put_bits(&s->pb, 1, 0); // ics_reserved bit
224  put_bits(&s->pb, 2, info->window_sequence[0]);
225  put_bits(&s->pb, 1, info->use_kb_window[0]);
226  if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
227  put_bits(&s->pb, 6, info->max_sfb);
228  put_bits(&s->pb, 1, !!info->predictor_present);
229  } else {
230  put_bits(&s->pb, 4, info->max_sfb);
231  for (w = 1; w < 8; w++)
232  put_bits(&s->pb, 1, !info->group_len[w]);
233  }
234 }
235 
236 /**
237  * Encode MS data.
238  * @see 4.6.8.1 "Joint Coding - M/S Stereo"
239  */
241 {
242  int i, w;
243 
244  put_bits(pb, 2, cpe->ms_mode);
245  if (cpe->ms_mode == 1)
246  for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
247  for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
248  put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
249 }
250 
251 /**
252  * Produce integer coefficients from scalefactors provided by the model.
253  */
254 static void adjust_frame_information(ChannelElement *cpe, int chans)
255 {
256  int i, w, w2, g, ch;
257  int maxsfb, cmaxsfb;
258 
259  for (ch = 0; ch < chans; ch++) {
260  IndividualChannelStream *ics = &cpe->ch[ch].ics;
261  maxsfb = 0;
262  cpe->ch[ch].pulse.num_pulse = 0;
263  for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
264  for (w2 = 0; w2 < ics->group_len[w]; w2++) {
265  for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
266  ;
267  maxsfb = FFMAX(maxsfb, cmaxsfb);
268  }
269  }
270  ics->max_sfb = maxsfb;
271 
272  //adjust zero bands for window groups
273  for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
274  for (g = 0; g < ics->max_sfb; g++) {
275  i = 1;
276  for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
277  if (!cpe->ch[ch].zeroes[w2*16 + g]) {
278  i = 0;
279  break;
280  }
281  }
282  cpe->ch[ch].zeroes[w*16 + g] = i;
283  }
284  }
285  }
286 
287  if (chans > 1 && cpe->common_window) {
288  IndividualChannelStream *ics0 = &cpe->ch[0].ics;
289  IndividualChannelStream *ics1 = &cpe->ch[1].ics;
290  int msc = 0;
291  ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
292  ics1->max_sfb = ics0->max_sfb;
293  for (w = 0; w < ics0->num_windows*16; w += 16)
294  for (i = 0; i < ics0->max_sfb; i++)
295  if (cpe->ms_mask[w+i])
296  msc++;
297  if (msc == 0 || ics0->max_sfb == 0)
298  cpe->ms_mode = 0;
299  else
300  cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
301  }
302 }
303 
305 {
306  int w, w2, g, i;
307  IndividualChannelStream *ics = &cpe->ch[0].ics;
308  if (!cpe->common_window)
309  return;
310  for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
311  for (w2 = 0; w2 < ics->group_len[w]; w2++) {
312  int start = (w+w2) * 128;
313  for (g = 0; g < ics->num_swb; g++) {
314  int p = -1 + 2 * (cpe->ch[1].band_type[w*16+g] - 14);
315  float scale = cpe->ch[0].is_ener[w*16+g];
316  if (!cpe->is_mask[w*16 + g]) {
317  start += ics->swb_sizes[g];
318  continue;
319  }
320  if (cpe->ms_mask[w*16 + g])
321  p *= -1;
322  for (i = 0; i < ics->swb_sizes[g]; i++) {
323  float sum = (cpe->ch[0].coeffs[start+i] + p*cpe->ch[1].coeffs[start+i])*scale;
324  cpe->ch[0].coeffs[start+i] = sum;
325  cpe->ch[1].coeffs[start+i] = 0.0f;
326  }
327  start += ics->swb_sizes[g];
328  }
329  }
330  }
331 }
332 
334 {
335  int w, w2, g, i;
336  IndividualChannelStream *ics = &cpe->ch[0].ics;
337  if (!cpe->common_window)
338  return;
339  for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
340  for (w2 = 0; w2 < ics->group_len[w]; w2++) {
341  int start = (w+w2) * 128;
342  for (g = 0; g < ics->num_swb; g++) {
343  /* ms_mask can be used for other purposes in PNS and I/S,
344  * so must not apply M/S if any band uses either, even if
345  * ms_mask is set.
346  */
347  if (!cpe->ms_mask[w*16 + g] || cpe->is_mask[w*16 + g]
348  || cpe->ch[0].band_type[w*16 + g] >= NOISE_BT
349  || cpe->ch[1].band_type[w*16 + g] >= NOISE_BT) {
350  start += ics->swb_sizes[g];
351  continue;
352  }
353  for (i = 0; i < ics->swb_sizes[g]; i++) {
354  float L = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) * 0.5f;
355  float R = L - cpe->ch[1].coeffs[start+i];
356  cpe->ch[0].coeffs[start+i] = L;
357  cpe->ch[1].coeffs[start+i] = R;
358  }
359  start += ics->swb_sizes[g];
360  }
361  }
362  }
363 }
364 
365 /**
366  * Encode scalefactor band coding type.
367  */
369 {
370  int w;
371 
374 
375  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
376  s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
377 }
378 
379 /**
380  * Encode scalefactors.
381  */
384 {
385  int diff, off_sf = sce->sf_idx[0], off_pns = sce->sf_idx[0] - NOISE_OFFSET;
386  int off_is = 0, noise_flag = 1;
387  int i, w;
388 
389  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
390  for (i = 0; i < sce->ics.max_sfb; i++) {
391  if (!sce->zeroes[w*16 + i]) {
392  if (sce->band_type[w*16 + i] == NOISE_BT) {
393  diff = sce->sf_idx[w*16 + i] - off_pns;
394  off_pns = sce->sf_idx[w*16 + i];
395  if (noise_flag-- > 0) {
396  put_bits(&s->pb, NOISE_PRE_BITS, diff + NOISE_PRE);
397  continue;
398  }
399  } else if (sce->band_type[w*16 + i] == INTENSITY_BT ||
400  sce->band_type[w*16 + i] == INTENSITY_BT2) {
401  diff = sce->sf_idx[w*16 + i] - off_is;
402  off_is = sce->sf_idx[w*16 + i];
403  } else {
404  diff = sce->sf_idx[w*16 + i] - off_sf;
405  off_sf = sce->sf_idx[w*16 + i];
406  }
407  diff += SCALE_DIFF_ZERO;
408  av_assert0(diff >= 0 && diff <= 120);
410  }
411  }
412  }
413 }
414 
415 /**
416  * Encode pulse data.
417  */
418 static void encode_pulses(AACEncContext *s, Pulse *pulse)
419 {
420  int i;
421 
422  put_bits(&s->pb, 1, !!pulse->num_pulse);
423  if (!pulse->num_pulse)
424  return;
425 
426  put_bits(&s->pb, 2, pulse->num_pulse - 1);
427  put_bits(&s->pb, 6, pulse->start);
428  for (i = 0; i < pulse->num_pulse; i++) {
429  put_bits(&s->pb, 5, pulse->pos[i]);
430  put_bits(&s->pb, 4, pulse->amp[i]);
431  }
432 }
433 
434 /**
435  * Encode spectral coefficients processed by psychoacoustic model.
436  */
438 {
439  int start, i, w, w2;
440 
441  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
442  start = 0;
443  for (i = 0; i < sce->ics.max_sfb; i++) {
444  if (sce->zeroes[w*16 + i]) {
445  start += sce->ics.swb_sizes[i];
446  continue;
447  }
448  for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++) {
449  s->coder->quantize_and_encode_band(s, &s->pb,
450  &sce->coeffs[start + w2*128],
451  NULL, sce->ics.swb_sizes[i],
452  sce->sf_idx[w*16 + i],
453  sce->band_type[w*16 + i],
454  s->lambda,
455  sce->ics.window_clipping[w]);
456  }
457  start += sce->ics.swb_sizes[i];
458  }
459  }
460 }
461 
462 /**
463  * Downscale spectral coefficients for near-clipping windows to avoid artifacts
464  */
466 {
467  int start, i, j, w;
468 
469  if (sce->ics.clip_avoidance_factor < 1.0f) {
470  for (w = 0; w < sce->ics.num_windows; w++) {
471  start = 0;
472  for (i = 0; i < sce->ics.max_sfb; i++) {
473  float *swb_coeffs = &sce->coeffs[start + w*128];
474  for (j = 0; j < sce->ics.swb_sizes[i]; j++)
475  swb_coeffs[j] *= sce->ics.clip_avoidance_factor;
476  start += sce->ics.swb_sizes[i];
477  }
478  }
479  }
480 }
481 
482 /**
483  * Encode one channel of audio data.
484  */
487  int common_window)
488 {
489  put_bits(&s->pb, 8, sce->sf_idx[0]);
490  if (!common_window) {
491  put_ics_info(s, &sce->ics);
492  if (s->coder->encode_main_pred)
493  s->coder->encode_main_pred(s, sce);
494  if (s->coder->encode_ltp_info)
495  s->coder->encode_ltp_info(s, sce, 0);
496  }
497  encode_band_info(s, sce);
498  encode_scale_factors(avctx, s, sce);
499  encode_pulses(s, &sce->pulse);
500  put_bits(&s->pb, 1, !!sce->tns.present);
501  if (s->coder->encode_tns_info)
502  s->coder->encode_tns_info(s, sce);
503  put_bits(&s->pb, 1, 0); //ssr
504  encode_spectral_coeffs(s, sce);
505  return 0;
506 }
507 
508 /**
509  * Write some auxiliary information about the created AAC file.
510  */
511 static void put_bitstream_info(AACEncContext *s, const char *name)
512 {
513  int i, namelen, padbits;
514 
515  namelen = strlen(name) + 2;
516  put_bits(&s->pb, 3, TYPE_FIL);
517  put_bits(&s->pb, 4, FFMIN(namelen, 15));
518  if (namelen >= 15)
519  put_bits(&s->pb, 8, namelen - 14);
520  put_bits(&s->pb, 4, 0); //extension type - filler
521  padbits = -put_bits_count(&s->pb) & 7;
522  align_put_bits(&s->pb);
523  for (i = 0; i < namelen - 2; i++)
524  put_bits(&s->pb, 8, name[i]);
525  put_bits(&s->pb, 12 - padbits, 0);
526 }
527 
528 /*
529  * Copy input samples.
530  * Channels are reordered from libavcodec's default order to AAC order.
531  */
533 {
534  int ch;
535  int end = 2048 + (frame ? frame->nb_samples : 0);
536  const uint8_t *channel_map = s->reorder_map;
537 
538  /* copy and remap input samples */
539  for (ch = 0; ch < s->channels; ch++) {
540  /* copy last 1024 samples of previous frame to the start of the current frame */
541  memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
542 
543  /* copy new samples and zero any remaining samples */
544  if (frame) {
545  memcpy(&s->planar_samples[ch][2048],
546  frame->extended_data[channel_map[ch]],
547  frame->nb_samples * sizeof(s->planar_samples[0][0]));
548  }
549  memset(&s->planar_samples[ch][end], 0,
550  (3072 - end) * sizeof(s->planar_samples[0][0]));
551  }
552 }
553 
554 static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
555  const AVFrame *frame, int *got_packet_ptr)
556 {
557  AACEncContext *s = avctx->priv_data;
558  float **samples = s->planar_samples, *samples2, *la, *overlap;
559  ChannelElement *cpe;
562  int i, its, ch, w, chans, tag, start_ch, ret, frame_bits;
563  int target_bits, rate_bits, too_many_bits, too_few_bits;
564  int ms_mode = 0, is_mode = 0, tns_mode = 0, pred_mode = 0;
565  int chan_el_counter[4];
567 
568  /* add current frame to queue */
569  if (frame) {
570  if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
571  return ret;
572  } else {
573  if (!s->afq.remaining_samples || (!s->afq.frame_alloc && !s->afq.frame_count))
574  return 0;
575  }
576 
577  copy_input_samples(s, frame);
578  if (s->psypp)
580 
581  if (!avctx->frame_number)
582  return 0;
583 
584  start_ch = 0;
585  for (i = 0; i < s->chan_map[0]; i++) {
586  FFPsyWindowInfo* wi = windows + start_ch;
587  tag = s->chan_map[i+1];
588  chans = tag == TYPE_CPE ? 2 : 1;
589  cpe = &s->cpe[i];
590  for (ch = 0; ch < chans; ch++) {
591  int k;
592  float clip_avoidance_factor;
593  sce = &cpe->ch[ch];
594  ics = &sce->ics;
595  s->cur_channel = start_ch + ch;
596  overlap = &samples[s->cur_channel][0];
597  samples2 = overlap + 1024;
598  la = samples2 + (448+64);
599  if (!frame)
600  la = NULL;
601  if (tag == TYPE_LFE) {
602  wi[ch].window_type[0] = wi[ch].window_type[1] = ONLY_LONG_SEQUENCE;
603  wi[ch].window_shape = 0;
604  wi[ch].num_windows = 1;
605  wi[ch].grouping[0] = 1;
606  wi[ch].clipping[0] = 0;
607 
608  /* Only the lowest 12 coefficients are used in a LFE channel.
609  * The expression below results in only the bottom 8 coefficients
610  * being used for 11.025kHz to 16kHz sample rates.
611  */
612  ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
613  } else {
614  wi[ch] = s->psy.model->window(&s->psy, samples2, la, s->cur_channel,
615  ics->window_sequence[0]);
616  }
617  ics->window_sequence[1] = ics->window_sequence[0];
618  ics->window_sequence[0] = wi[ch].window_type[0];
619  ics->use_kb_window[1] = ics->use_kb_window[0];
620  ics->use_kb_window[0] = wi[ch].window_shape;
621  ics->num_windows = wi[ch].num_windows;
622  ics->swb_sizes = s->psy.bands [ics->num_windows == 8];
623  ics->num_swb = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
624  ics->max_sfb = FFMIN(ics->max_sfb, ics->num_swb);
625  ics->swb_offset = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ?
628  ics->tns_max_bands = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ?
631 
632  for (w = 0; w < ics->num_windows; w++)
633  ics->group_len[w] = wi[ch].grouping[w];
634 
635  /* Calculate input sample maximums and evaluate clipping risk */
636  clip_avoidance_factor = 0.0f;
637  for (w = 0; w < ics->num_windows; w++) {
638  const float *wbuf = overlap + w * 128;
639  const int wlen = 2048 / ics->num_windows;
640  float max = 0;
641  int j;
642  /* mdct input is 2 * output */
643  for (j = 0; j < wlen; j++)
644  max = FFMAX(max, fabsf(wbuf[j]));
645  wi[ch].clipping[w] = max;
646  }
647  for (w = 0; w < ics->num_windows; w++) {
648  if (wi[ch].clipping[w] > CLIP_AVOIDANCE_FACTOR) {
649  ics->window_clipping[w] = 1;
650  clip_avoidance_factor = FFMAX(clip_avoidance_factor, wi[ch].clipping[w]);
651  } else {
652  ics->window_clipping[w] = 0;
653  }
654  }
655  if (clip_avoidance_factor > CLIP_AVOIDANCE_FACTOR) {
656  ics->clip_avoidance_factor = CLIP_AVOIDANCE_FACTOR / clip_avoidance_factor;
657  } else {
658  ics->clip_avoidance_factor = 1.0f;
659  }
660 
661  apply_window_and_mdct(s, sce, overlap);
662 
663  if (s->options.ltp && s->coder->update_ltp) {
664  s->coder->update_ltp(s, sce);
665  apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, &sce->ltp_state[0]);
666  s->mdct1024.mdct_calc(&s->mdct1024, sce->lcoeffs, sce->ret_buf);
667  }
668 
669  for (k = 0; k < 1024; k++) {
670  if (!(fabs(cpe->ch[ch].coeffs[k]) < 1E16)) { // Ensure headroom for energy calculation
671  av_log(avctx, AV_LOG_ERROR, "Input contains (near) NaN/+-Inf\n");
672  return AVERROR(EINVAL);
673  }
674  }
675  avoid_clipping(s, sce);
676  }
677  start_ch += chans;
678  }
679  if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels, 0)) < 0)
680  return ret;
681  frame_bits = its = 0;
682  do {
683  init_put_bits(&s->pb, avpkt->data, avpkt->size);
684 
685  if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & AV_CODEC_FLAG_BITEXACT))
687  start_ch = 0;
688  target_bits = 0;
689  memset(chan_el_counter, 0, sizeof(chan_el_counter));
690  for (i = 0; i < s->chan_map[0]; i++) {
691  FFPsyWindowInfo* wi = windows + start_ch;
692  const float *coeffs[2];
693  tag = s->chan_map[i+1];
694  chans = tag == TYPE_CPE ? 2 : 1;
695  cpe = &s->cpe[i];
696  cpe->common_window = 0;
697  memset(cpe->is_mask, 0, sizeof(cpe->is_mask));
698  memset(cpe->ms_mask, 0, sizeof(cpe->ms_mask));
699  put_bits(&s->pb, 3, tag);
700  put_bits(&s->pb, 4, chan_el_counter[tag]++);
701  for (ch = 0; ch < chans; ch++) {
702  sce = &cpe->ch[ch];
703  coeffs[ch] = sce->coeffs;
704  sce->ics.predictor_present = 0;
705  sce->ics.ltp.present = 0;
706  memset(sce->ics.ltp.used, 0, sizeof(sce->ics.ltp.used));
707  memset(sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used));
708  memset(&sce->tns, 0, sizeof(TemporalNoiseShaping));
709  for (w = 0; w < 128; w++)
710  if (sce->band_type[w] > RESERVED_BT)
711  sce->band_type[w] = 0;
712  }
713  s->psy.bitres.alloc = -1;
715  s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
716  if (s->psy.bitres.alloc > 0) {
717  /* Lambda unused here on purpose, we need to take psy's unscaled allocation */
718  target_bits += s->psy.bitres.alloc
719  * (s->lambda / (avctx->global_quality ? avctx->global_quality : 120));
720  s->psy.bitres.alloc /= chans;
721  }
722  s->cur_type = tag;
723  for (ch = 0; ch < chans; ch++) {
724  s->cur_channel = start_ch + ch;
725  if (s->options.pns && s->coder->mark_pns)
726  s->coder->mark_pns(s, avctx, &cpe->ch[ch]);
727  s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
728  }
729  if (chans > 1
730  && wi[0].window_type[0] == wi[1].window_type[0]
731  && wi[0].window_shape == wi[1].window_shape) {
732 
733  cpe->common_window = 1;
734  for (w = 0; w < wi[0].num_windows; w++) {
735  if (wi[0].grouping[w] != wi[1].grouping[w]) {
736  cpe->common_window = 0;
737  break;
738  }
739  }
740  }
741  for (ch = 0; ch < chans; ch++) { /* TNS and PNS */
742  sce = &cpe->ch[ch];
743  s->cur_channel = start_ch + ch;
744  if (s->options.tns && s->coder->search_for_tns)
745  s->coder->search_for_tns(s, sce);
746  if (s->options.tns && s->coder->apply_tns_filt)
747  s->coder->apply_tns_filt(s, sce);
748  if (sce->tns.present)
749  tns_mode = 1;
750  if (s->options.pns && s->coder->search_for_pns)
751  s->coder->search_for_pns(s, avctx, sce);
752  }
753  s->cur_channel = start_ch;
754  if (s->options.intensity_stereo) { /* Intensity Stereo */
755  if (s->coder->search_for_is)
756  s->coder->search_for_is(s, avctx, cpe);
757  if (cpe->is_mode) is_mode = 1;
759  }
760  if (s->options.pred) { /* Prediction */
761  for (ch = 0; ch < chans; ch++) {
762  sce = &cpe->ch[ch];
763  s->cur_channel = start_ch + ch;
764  if (s->options.pred && s->coder->search_for_pred)
765  s->coder->search_for_pred(s, sce);
766  if (cpe->ch[ch].ics.predictor_present) pred_mode = 1;
767  }
768  if (s->coder->adjust_common_pred)
769  s->coder->adjust_common_pred(s, cpe);
770  for (ch = 0; ch < chans; ch++) {
771  sce = &cpe->ch[ch];
772  s->cur_channel = start_ch + ch;
773  if (s->options.pred && s->coder->apply_main_pred)
774  s->coder->apply_main_pred(s, sce);
775  }
776  s->cur_channel = start_ch;
777  }
778  if (s->options.mid_side) { /* Mid/Side stereo */
779  if (s->options.mid_side == -1 && s->coder->search_for_ms)
780  s->coder->search_for_ms(s, cpe);
781  else if (cpe->common_window)
782  memset(cpe->ms_mask, 1, sizeof(cpe->ms_mask));
784  }
785  adjust_frame_information(cpe, chans);
786  if (s->options.ltp) { /* LTP */
787  for (ch = 0; ch < chans; ch++) {
788  sce = &cpe->ch[ch];
789  s->cur_channel = start_ch + ch;
790  if (s->coder->search_for_ltp)
791  s->coder->search_for_ltp(s, sce, cpe->common_window);
792  if (sce->ics.ltp.present) pred_mode = 1;
793  }
794  s->cur_channel = start_ch;
795  if (s->coder->adjust_common_ltp)
796  s->coder->adjust_common_ltp(s, cpe);
797  }
798  if (chans == 2) {
799  put_bits(&s->pb, 1, cpe->common_window);
800  if (cpe->common_window) {
801  put_ics_info(s, &cpe->ch[0].ics);
802  if (s->coder->encode_main_pred)
803  s->coder->encode_main_pred(s, &cpe->ch[0]);
804  if (s->coder->encode_ltp_info)
805  s->coder->encode_ltp_info(s, &cpe->ch[0], 1);
806  encode_ms_info(&s->pb, cpe);
807  if (cpe->ms_mode) ms_mode = 1;
808  }
809  }
810  for (ch = 0; ch < chans; ch++) {
811  s->cur_channel = start_ch + ch;
812  encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
813  }
814  start_ch += chans;
815  }
816 
817  if (avctx->flags & AV_CODEC_FLAG_QSCALE) {
818  /* When using a constant Q-scale, don't mess with lambda */
819  break;
820  }
821 
822  /* rate control stuff
823  * allow between the nominal bitrate, and what psy's bit reservoir says to target
824  * but drift towards the nominal bitrate always
825  */
826  frame_bits = put_bits_count(&s->pb);
827  rate_bits = avctx->bit_rate * 1024 / avctx->sample_rate;
828  rate_bits = FFMIN(rate_bits, 6144 * s->channels - 3);
829  too_many_bits = FFMAX(target_bits, rate_bits);
830  too_many_bits = FFMIN(too_many_bits, 6144 * s->channels - 3);
831  too_few_bits = FFMIN(FFMAX(rate_bits - rate_bits/4, target_bits), too_many_bits);
832 
833  /* When using ABR, be strict (but only for increasing) */
834  too_few_bits = too_few_bits - too_few_bits/8;
835  too_many_bits = too_many_bits + too_many_bits/2;
836 
837  if ( its == 0 /* for steady-state Q-scale tracking */
838  || (its < 5 && (frame_bits < too_few_bits || frame_bits > too_many_bits))
839  || frame_bits >= 6144 * s->channels - 3 )
840  {
841  float ratio = ((float)rate_bits) / frame_bits;
842 
843  if (frame_bits >= too_few_bits && frame_bits <= too_many_bits) {
844  /*
845  * This path is for steady-state Q-scale tracking
846  * When frame bits fall within the stable range, we still need to adjust
847  * lambda to maintain it like so in a stable fashion (large jumps in lambda
848  * create artifacts and should be avoided), but slowly
849  */
850  ratio = sqrtf(sqrtf(ratio));
851  ratio = av_clipf(ratio, 0.9f, 1.1f);
852  } else {
853  /* Not so fast though */
854  ratio = sqrtf(ratio);
855  }
856  s->lambda = FFMIN(s->lambda * ratio, 65536.f);
857 
858  /* Keep iterating if we must reduce and lambda is in the sky */
859  if (ratio > 0.9f && ratio < 1.1f) {
860  break;
861  } else {
862  if (is_mode || ms_mode || tns_mode || pred_mode) {
863  for (i = 0; i < s->chan_map[0]; i++) {
864  // Must restore coeffs
865  chans = tag == TYPE_CPE ? 2 : 1;
866  cpe = &s->cpe[i];
867  for (ch = 0; ch < chans; ch++)
868  memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
869  }
870  }
871  its++;
872  }
873  } else {
874  break;
875  }
876  } while (1);
877 
878  if (s->options.ltp && s->coder->ltp_insert_new_frame)
880 
881  put_bits(&s->pb, 3, TYPE_END);
882  flush_put_bits(&s->pb);
883 
885 
886  s->lambda_sum += s->lambda;
887  s->lambda_count++;
888 
889  ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
890  &avpkt->duration);
891 
892  avpkt->size = put_bits_count(&s->pb) >> 3;
893  *got_packet_ptr = 1;
894  return 0;
895 }
896 
898 {
899  AACEncContext *s = avctx->priv_data;
900 
901  av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_sum / s->lambda_count);
902 
903  ff_mdct_end(&s->mdct1024);
904  ff_mdct_end(&s->mdct128);
905  ff_psy_end(&s->psy);
906  ff_lpc_end(&s->lpc);
907  if (s->psypp)
909  av_freep(&s->buffer.samples);
910  av_freep(&s->cpe);
911  av_freep(&s->fdsp);
912  ff_af_queue_close(&s->afq);
913  return 0;
914 }
915 
917 {
918  int ret = 0;
919 
921  if (!s->fdsp)
922  return AVERROR(ENOMEM);
923 
924  // window init
929 
930  if ((ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0)) < 0)
931  return ret;
932  if ((ret = ff_mdct_init(&s->mdct128, 8, 0, 32768.0)) < 0)
933  return ret;
934 
935  return 0;
936 }
937 
939 {
940  int ch;
941  if (!FF_ALLOCZ_TYPED_ARRAY(s->buffer.samples, s->channels * 3 * 1024) ||
942  !FF_ALLOCZ_TYPED_ARRAY(s->cpe, s->chan_map[0]))
943  return AVERROR(ENOMEM);
944 
945  for(ch = 0; ch < s->channels; ch++)
946  s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
947 
948  return 0;
949 }
950 
952 {
953  AACEncContext *s = avctx->priv_data;
954  int i, ret = 0;
955  const uint8_t *sizes[2];
956  uint8_t grouping[AAC_MAX_CHANNELS];
957  int lengths[2];
958 
959  /* Constants */
960  s->last_frame_pb_count = 0;
961  avctx->frame_size = 1024;
962  avctx->initial_padding = 1024;
963  s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
964 
965  /* Channel map and unspecified bitrate guessing */
966  s->channels = avctx->channels;
967 
968  s->needs_pce = 1;
969  for (i = 0; i < FF_ARRAY_ELEMS(aac_normal_chan_layouts); i++) {
970  if (avctx->channel_layout == aac_normal_chan_layouts[i]) {
971  s->needs_pce = s->options.pce;
972  break;
973  }
974  }
975 
976  if (s->needs_pce) {
977  char buf[64];
978  for (i = 0; i < FF_ARRAY_ELEMS(aac_pce_configs); i++)
979  if (avctx->channel_layout == aac_pce_configs[i].layout)
980  break;
981  av_get_channel_layout_string(buf, sizeof(buf), -1, avctx->channel_layout);
982  ERROR_IF(i == FF_ARRAY_ELEMS(aac_pce_configs), "Unsupported channel layout \"%s\"\n", buf);
983  av_log(avctx, AV_LOG_INFO, "Using a PCE to encode channel layout \"%s\"\n", buf);
984  s->pce = aac_pce_configs[i];
985  s->reorder_map = s->pce.reorder_map;
986  s->chan_map = s->pce.config_map;
987  } else {
988  s->reorder_map = aac_chan_maps[s->channels - 1];
989  s->chan_map = aac_chan_configs[s->channels - 1];
990  }
991 
992  if (!avctx->bit_rate) {
993  for (i = 1; i <= s->chan_map[0]; i++) {
994  avctx->bit_rate += s->chan_map[i] == TYPE_CPE ? 128000 : /* Pair */
995  s->chan_map[i] == TYPE_LFE ? 16000 : /* LFE */
996  69000 ; /* SCE */
997  }
998  }
999 
1000  /* Samplerate */
1001  for (i = 0; i < 16; i++)
1003  break;
1004  s->samplerate_index = i;
1005  ERROR_IF(s->samplerate_index == 16 ||
1008  "Unsupported sample rate %d\n", avctx->sample_rate);
1009 
1010  /* Bitrate limiting */
1011  WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
1012  "Too many bits %f > %d per frame requested, clamping to max\n",
1013  1024.0 * avctx->bit_rate / avctx->sample_rate,
1014  6144 * s->channels);
1015  avctx->bit_rate = (int64_t)FFMIN(6144 * s->channels / 1024.0 * avctx->sample_rate,
1016  avctx->bit_rate);
1017 
1018  /* Profile and option setting */
1019  avctx->profile = avctx->profile == FF_PROFILE_UNKNOWN ? FF_PROFILE_AAC_LOW :
1020  avctx->profile;
1021  for (i = 0; i < FF_ARRAY_ELEMS(aacenc_profiles); i++)
1022  if (avctx->profile == aacenc_profiles[i])
1023  break;
1024  if (avctx->profile == FF_PROFILE_MPEG2_AAC_LOW) {
1025  avctx->profile = FF_PROFILE_AAC_LOW;
1026  ERROR_IF(s->options.pred,
1027  "Main prediction unavailable in the \"mpeg2_aac_low\" profile\n");
1028  ERROR_IF(s->options.ltp,
1029  "LTP prediction unavailable in the \"mpeg2_aac_low\" profile\n");
1030  WARN_IF(s->options.pns,
1031  "PNS unavailable in the \"mpeg2_aac_low\" profile, turning off\n");
1032  s->options.pns = 0;
1033  } else if (avctx->profile == FF_PROFILE_AAC_LTP) {
1034  s->options.ltp = 1;
1035  ERROR_IF(s->options.pred,
1036  "Main prediction unavailable in the \"aac_ltp\" profile\n");
1037  } else if (avctx->profile == FF_PROFILE_AAC_MAIN) {
1038  s->options.pred = 1;
1039  ERROR_IF(s->options.ltp,
1040  "LTP prediction unavailable in the \"aac_main\" profile\n");
1041  } else if (s->options.ltp) {
1042  avctx->profile = FF_PROFILE_AAC_LTP;
1043  WARN_IF(1,
1044  "Chainging profile to \"aac_ltp\"\n");
1045  ERROR_IF(s->options.pred,
1046  "Main prediction unavailable in the \"aac_ltp\" profile\n");
1047  } else if (s->options.pred) {
1048  avctx->profile = FF_PROFILE_AAC_MAIN;
1049  WARN_IF(1,
1050  "Chainging profile to \"aac_main\"\n");
1051  ERROR_IF(s->options.ltp,
1052  "LTP prediction unavailable in the \"aac_main\" profile\n");
1053  }
1054  s->profile = avctx->profile;
1055 
1056  /* Coder limitations */
1057  s->coder = &ff_aac_coders[s->options.coder];
1058  if (s->options.coder == AAC_CODER_ANMR) {
1060  "The ANMR coder is considered experimental, add -strict -2 to enable!\n");
1061  s->options.intensity_stereo = 0;
1062  s->options.pns = 0;
1063  }
1065  "The LPT profile requires experimental compliance, add -strict -2 to enable!\n");
1066 
1067  /* M/S introduces horrible artifacts with multichannel files, this is temporary */
1068  if (s->channels > 3)
1069  s->options.mid_side = 0;
1070 
1071  if ((ret = dsp_init(avctx, s)) < 0)
1072  return ret;
1073 
1074  if ((ret = alloc_buffers(avctx, s)) < 0)
1075  return ret;
1076 
1077  if ((ret = put_audio_specific_config(avctx)))
1078  return ret;
1079 
1080  sizes[0] = ff_aac_swb_size_1024[s->samplerate_index];
1081  sizes[1] = ff_aac_swb_size_128[s->samplerate_index];
1082  lengths[0] = ff_aac_num_swb_1024[s->samplerate_index];
1083  lengths[1] = ff_aac_num_swb_128[s->samplerate_index];
1084  for (i = 0; i < s->chan_map[0]; i++)
1085  grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
1086  if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
1087  s->chan_map[0], grouping)) < 0)
1088  return ret;
1089  s->psypp = ff_psy_preprocess_init(avctx);
1091  s->random_state = 0x1f2e3d4c;
1092 
1093  s->abs_pow34 = abs_pow34_v;
1095 
1096  if (ARCH_X86)
1098 
1099  if (HAVE_MIPSDSP)
1101 
1102  ff_af_queue_init(avctx, &s->afq);
1103  ff_aac_tableinit();
1104 
1105  return 0;
1106 }
1107 
1108 #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
1109 static const AVOption aacenc_options[] = {
1110  {"aac_coder", "Coding algorithm", offsetof(AACEncContext, options.coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_FAST}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "coder"},
1111  {"anmr", "ANMR method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
1112  {"twoloop", "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
1113  {"fast", "Default fast search", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
1114  {"aac_ms", "Force M/S stereo coding", offsetof(AACEncContext, options.mid_side), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, AACENC_FLAGS},
1115  {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
1116  {"aac_pns", "Perceptual noise substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
1117  {"aac_tns", "Temporal noise shaping", offsetof(AACEncContext, options.tns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
1118  {"aac_ltp", "Long term prediction", offsetof(AACEncContext, options.ltp), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
1119  {"aac_pred", "AAC-Main prediction", offsetof(AACEncContext, options.pred), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
1120  {"aac_pce", "Forces the use of PCEs", offsetof(AACEncContext, options.pce), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
1122  {NULL}
1123 };
1124 
1125 static const AVClass aacenc_class = {
1126  .class_name = "AAC encoder",
1127  .item_name = av_default_item_name,
1128  .option = aacenc_options,
1129  .version = LIBAVUTIL_VERSION_INT,
1130 };
1131 
1133  { "b", "0" },
1134  { NULL }
1135 };
1136 
1138  .name = "aac",
1139  .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
1140  .type = AVMEDIA_TYPE_AUDIO,
1141  .id = AV_CODEC_ID_AAC,
1142  .priv_data_size = sizeof(AACEncContext),
1143  .init = aac_encode_init,
1144  .encode2 = aac_encode_frame,
1145  .close = aac_encode_end,
1146  .defaults = aac_encode_defaults,
1147  .supported_samplerates = mpeg4audio_sample_rates,
1150  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
1152  .priv_class = &aacenc_class,
1153 };
#define FF_CODEC_CAP_INIT_CLEANUP
The codec allows calling the close function for deallocation even if the init function returned a fai...
Definition: internal.h:48
#define FF_COMPLIANCE_EXPERIMENTAL
Allow nonstandardized experimental things.
Definition: avcodec.h:1599
float, planar
Definition: samplefmt.h:69
void ff_af_queue_remove(AudioFrameQueue *afq, int nb_samples, int64_t *pts, int64_t *duration)
Remove frame(s) from the queue.
void ff_quantize_band_cost_cache_init(struct AACEncContext *s)
Definition: aacenc.c:124
#define NULL
Definition: coverity.c:32
static void align_put_bits(PutBitContext *s)
Pad the bitstream with zeros up to the next byte boundary.
Definition: put_bits.h:393
const AACCoefficientsEncoder * coder
Definition: aacenc.h:397
Band types following are encoded differently from others.
Definition: aac.h:86
static const uint8_t aac_chan_configs[AAC_MAX_CHANNELS][6]
default channel configurations
Definition: aacenctab.h:58
uint8_t use_kb_window[2]
If set, use Kaiser-Bessel window, otherwise use a sine window.
Definition: aac.h:177
int coder
Definition: aacenc.h:44
This structure describes decoded (raw) audio or video data.
Definition: frame.h:314
int grouping[8]
window grouping (for e.g. AAC)
Definition: psymodel.h:81
void(* search_for_ms)(struct AACEncContext *s, ChannelElement *cpe)
Definition: aacenc.h:77
AVOption.
Definition: opt.h:248
void ff_aac_tableinit(void)
Definition: aactab.c:3330
enum RawDataBlockType cur_type
channel group type cur_channel belongs to
Definition: aacenc.h:404
uint8_t ** bands
scalefactor band sizes for possible frame sizes
Definition: psymodel.h:98
Definition: aac.h:224
AACQuantizeBandCostCacheEntry quantize_band_cost_cache[256][128]
memoization area for quantize_band_cost
Definition: aacenc.h:411
static void abs_pow34_v(float *out, const float *in, const int size)
Definition: aacenc_utils.h:40
static const AVClass aacenc_class
Definition: aacenc.c:1125
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:218
av_cold void ff_kbd_window_init(float *window, float alpha, int n)
Generate a Kaiser-Bessel Derived Window.
Definition: kbdwin.c:26
int64_t bit_rate
the average bitrate
Definition: avcodec.h:581
#define LIBAVUTIL_VERSION_INT
Definition: version.h:85
#define SCALE_DIFF_ZERO
codebook index corresponding to zero scalefactor indices difference
Definition: aac.h:152
uint8_t window_clipping[8]
set if a certain window is near clipping
Definition: aac.h:191
Definition: aac.h:63
const char * g
Definition: vf_curves.c:115
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
Definition: aac.h:57
av_cold void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx)
Cleanup audio preprocessing module.
Definition: psymodel.c:152
#define WARN_IF(cond,...)
Definition: aacenc_utils.h:274
int size
Definition: packet.h:364
const char * av_default_item_name(void *ptr)
Return the context name.
Definition: log.c:235
const int ff_aac_swb_size_1024_len
Definition: aacenctab.c:108
static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
Encode ics_info element.
Definition: aacenc.c:219
void(* search_for_tns)(struct AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc.h:75
int common_window
Set if channels share a common &#39;IndividualChannelStream&#39; in bitstream.
Definition: aac.h:278
int alloc
number of bits allocated by the psy, or -1 if no allocation was done
Definition: psymodel.h:105
const uint8_t * ff_aac_swb_size_1024[]
Definition: aacenctab.c:99
#define FF_PROFILE_AAC_MAIN
Definition: avcodec.h:1875
int lambda_count
count(lambda), for Qvg reporting
Definition: aacenc.h:403
av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens, const uint8_t **bands, const int *num_bands, int num_groups, const uint8_t *group_map)
Initialize psychoacoustic model.
Definition: psymodel.c:31
uint8_t ms_mask[128]
Set if mid/side stereo is used for each scalefactor window band.
Definition: aac.h:281
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:237
float lambda
Definition: aacenc.h:400
#define NOISE_PRE
preamble for NOISE_BT, put in bitstream with the first noise band
Definition: aac.h:156
int profile
profile
Definition: avcodec.h:1871
AVCodec.
Definition: codec.h:190
static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
Encode spectral coefficients processed by psychoacoustic model.
Definition: aacenc.c:437
Spectral data are scaled white noise not coded in the bitstream.
Definition: aac.h:87
int * num_bands
number of scalefactor bands for possible frame sizes
Definition: psymodel.h:99
static int put_audio_specific_config(AVCodecContext *avctx)
Make AAC audio config object.
Definition: aacenc.c:92
struct AACEncContext::@6 buffer
void(* apply_tns_filt)(struct AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc.h:69
const uint8_t ff_aac_num_swb_128[]
Definition: aactab.c:63
void(* search_for_quantizers)(AVCodecContext *avctx, struct AACEncContext *s, SingleChannelElement *sce, const float lambda)
Definition: aacenc.h:57
INTFLOAT pcoeffs[1024]
coefficients for IMDCT, pristine
Definition: aac.h:261
const uint16_t * swb_offset
table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular wind...
Definition: aac.h:181
const char * class_name
The name of the class; usually it is the same name as the context structure type to which the AVClass...
Definition: log.h:72
#define AV_CODEC_CAP_DELAY
Encoder or decoder requires flushing with NULL input at the end in order to give the complete and cor...
Definition: codec.h:75
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int64_t size, int64_t min_size)
Check AVPacket size and/or allocate data.
Definition: encode.c:33
AACEncOptions options
encoding options
Definition: aacenc.h:378
#define FF_CODEC_CAP_INIT_THREADSAFE
The codec does not modify any global variables in the init function, allowing to call the init functi...
Definition: internal.h:40
AAC encoder context.
Definition: aacenc.h:376
int num_ele[4]
front, side, back, lfe
Definition: aacenc.h:95
uint8_t
#define av_cold
Definition: attributes.h:88
void(* search_for_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
Definition: aacenc.h:73
AVOptions.
int intensity_stereo
Definition: aacenc.h:51
#define WINDOW_FUNC(type)
Definition: aacenc.c:133
void(* update_ltp)(struct AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc.h:70
LPCContext lpc
used by TNS
Definition: aacenc.h:388
void ff_aac_coder_init_mips(AACEncContext *c)
SingleChannelElement ch[2]
Definition: aac.h:284
int samplerate_index
MPEG-4 samplerate index.
Definition: aacenc.h:389
#define f(width, name)
Definition: cbs_vp9.c:255
Definition: aac.h:59
av_cold void ff_af_queue_init(AVCodecContext *avctx, AudioFrameQueue *afq)
Initialize AudioFrameQueue.
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:92
int64_t duration
Duration of this packet in AVStream->time_base units, 0 if unknown.
Definition: packet.h:381
const uint8_t * chan_map
channel configuration map
Definition: aacenc.h:392
TemporalNoiseShaping tns
Definition: aac.h:250
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
const uint8_t ff_aac_scalefactor_bits[121]
Definition: aactab.c:94
uint8_t * extradata
some codecs need / can use extradata like Huffman tables.
Definition: avcodec.h:632
AudioFrameQueue afq
Definition: aacenc.h:406
const AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB]
Definition: aaccoder.c:897
static const uint8_t aac_chan_maps[AAC_MAX_CHANNELS][AAC_MAX_CHANNELS]
Table to remap channels from libavcodec&#39;s default order to AAC order.
Definition: aacenctab.h:72
#define FF_PROFILE_AAC_LTP
Definition: avcodec.h:1878
uint8_t * data
Definition: packet.h:363
static void put_pce(PutBitContext *pb, AVCodecContext *avctx)
Definition: aacenc.c:51
const uint8_t * ff_aac_swb_size_128[]
Definition: aacenctab.c:91
uint32_t tag
Definition: movenc.c:1597
Scalefactor data are intensity stereo positions (in phase).
Definition: aac.h:89
#define max(a, b)
Definition: cuda_runtime.h:33
int profile
copied from avctx
Definition: aacenc.h:386
channels
Definition: aptx.h:33
static __device__ float fabsf(float a)
Definition: cuda_runtime.h:181
uint8_t reorder_map[16]
maps channels from lavc to aac order
Definition: aacenc.h:99
static void adjust_frame_information(ChannelElement *cpe, int chans)
Produce integer coefficients from scalefactors provided by the model.
Definition: aacenc.c:254
#define av_log(a,...)
struct FFPsyContext::@119 bitres
static const AVOption aacenc_options[]
Definition: aacenc.c:1109
int64_t layout
Definition: aacenc.h:94
const uint8_t * reorder_map
lavc to aac reorder map
Definition: aacenc.h:391
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:194
#define R
Definition: huffyuvdsp.h:34
av_cold AVFloatDSPContext * avpriv_float_dsp_alloc(int bit_exact)
Allocate a float DSP context.
Definition: float_dsp.c:135
void(* encode_ltp_info)(struct AACEncContext *s, SingleChannelElement *sce, int common_window)
Definition: aacenc.h:64
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
static const int sizes[][2]
Definition: img2dec.c:53
const uint8_t ff_aac_num_swb_1024[]
Definition: aactab.c:47
#define FF_PROFILE_MPEG2_AAC_LOW
Definition: avcodec.h:1883
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:153
#define FF_AAC_PROFILE_OPTS
Definition: profiles.h:28
float is_ener[128]
Intensity stereo pos (used by encoder)
Definition: aac.h:259
int initial_padding
Audio only.
Definition: avcodec.h:2072
static const AACPCEInfo aac_pce_configs[]
List of PCE (Program Configuration Element) for the channel layouts listed in channel_layout.h.
Definition: aacenc.h:137
float ff_aac_kbd_long_1024[1024]
Definition: aactab.c:40
int flags
AV_CODEC_FLAG_*.
Definition: avcodec.h:611
void(* mdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input)
Definition: fft.h:109
static const int mpeg4audio_sample_rates[16]
Definition: aacenctab.h:85
int amp[4]
Definition: aac.h:228
const char * name
Name of the codec implementation.
Definition: codec.h:197
int num_windows
number of windows in a frame
Definition: psymodel.h:80
static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
Definition: aacenc.c:532
uint8_t max_sfb
number of scalefactor bands per group
Definition: aac.h:175
static const AVCodecDefault defaults[]
Definition: amfenc_h264.c:361
void(* adjust_common_ltp)(struct AACEncContext *s, ChannelElement *cpe)
Definition: aacenc.h:67
#define ff_mdct_init
Definition: fft.h:169
Definition: aac.h:62
int num_swb
number of scalefactor window bands
Definition: aac.h:183
int ff_af_queue_add(AudioFrameQueue *afq, const AVFrame *f)
Add a frame to the queue.
#define FFMAX(a, b)
Definition: common.h:94
void(* mark_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
Definition: aacenc.h:74
int index[4][8]
front, side, back, lfe
Definition: aacenc.h:97
uint64_t channel_layout
Audio channel layout.
Definition: avcodec.h:1242
static int put_bits_count(PutBitContext *s)
Definition: put_bits.h:83
#define AACENC_FLAGS
Definition: aacenc.c:1108
INTFLOAT ret_buf[2048]
PCM output buffer.
Definition: aac.h:264
void(* set_special_band_scalefactors)(struct AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc.h:72
MIPS optimizations info
Definition: mips.txt:2
enum WindowSequence window_sequence[2]
Definition: aac.h:176
INTFLOAT ltp_state[3072]
time signal for LTP
Definition: aac.h:265
#define AV_CODEC_FLAG_BITEXACT
Use only bitexact stuff (except (I)DCT).
Definition: avcodec.h:333
av_cold void ff_lpc_end(LPCContext *s)
Uninitialize LPCContext.
Definition: lpc.c:322
#define AV_CODEC_FLAG_QSCALE
Use fixed qscale.
Definition: avcodec.h:275
#define AV_CODEC_CAP_SMALL_LAST_FRAME
Codec can be fed a final frame with a smaller size.
Definition: codec.h:80
int cur_channel
current channel for coder context
Definition: aacenc.h:398
int last_frame_pb_count
number of bits for the previous frame
Definition: aacenc.h:401
#define FFMIN(a, b)
Definition: common.h:96
static void apply_intensity_stereo(ChannelElement *cpe)
Definition: aacenc.c:304
static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr)
Definition: aacenc.c:554
void(* quant_bands)(int *out, const float *in, const float *scaled, int size, int is_signed, int maxval, const float Q34, const float rounding)
Definition: aacenc.h:414
uint8_t w
Definition: llviddspenc.c:38
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
#define FF_PROFILE_AAC_LOW
Definition: avcodec.h:1876
static const AVCodecDefault aac_encode_defaults[]
Definition: aacenc.c:1132
#define FF_PROFILE_UNKNOWN
Definition: avcodec.h:1872
int pos[4]
Definition: aac.h:227
int channels
channel count
Definition: aacenc.h:390
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
#define s(width, name)
Definition: cbs_vp9.c:257
AAC definitions and structures.
const uint8_t ff_tns_max_bands_1024[]
Definition: aactab.c:1396
static void quantize_bands(int *out, const float *in, const float *scaled, int size, int is_signed, int maxval, const float Q34, const float rounding)
Definition: aacenc_utils.h:65
FFTContext mdct128
short (128 samples) frame transform context
Definition: aacenc.h:381
PutBitContext pb
Definition: aacenc.h:379
static void(*const apply_window[4])(AVFloatDSPContext *fdsp, SingleChannelElement *sce, const float *audio)
Definition: aacenc.c:189
#define L(x)
Definition: vp56_arith.h:36
AVFloatDSPContext * fdsp
Definition: aacenc.h:382
int mid_side
Definition: aacenc.h:50
#define FF_ARRAY_ELEMS(a)
if(ret)
void av_get_channel_layout_string(char *buf, int buf_size, int nb_channels, uint64_t channel_layout)
Return a description of a channel layout.
static av_cold int aac_encode_end(AVCodecContext *avctx)
Definition: aacenc.c:897
void(* search_for_is)(struct AACEncContext *s, AVCodecContext *avctx, ChannelElement *cpe)
Definition: aacenc.h:78
void ff_aac_dsp_init_x86(AACEncContext *s)
int frame_size
Number of samples per channel in an audio frame.
Definition: avcodec.h:1211
void(* search_for_ltp)(struct AACEncContext *s, SingleChannelElement *sce, int common_window)
Definition: aacenc.h:76
#define AV_LOG_INFO
Standard information.
Definition: log.h:205
#define CLIP_AVOIDANCE_FACTOR
Definition: aac.h:53
Libavcodec external API header.
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:58
Temporal Noise Shaping.
Definition: aac.h:198
int sample_rate
samples per second
Definition: avcodec.h:1191
float ff_aac_kbd_short_128[128]
Definition: aactab.c:41
static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
Encode MS data.
Definition: aacenc.c:240
void(* ltp_insert_new_frame)(struct AACEncContext *s)
Definition: aacenc.h:71
void(* search_for_pred)(struct AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc.h:79
main external API structure.
Definition: avcodec.h:531
int pairing[3][8]
front, side, back
Definition: aacenc.h:96
int bits
number of bits used in the bitresevoir
Definition: psymodel.h:104
#define NOISE_PRE_BITS
length of preamble
Definition: aac.h:157
Levinson-Durbin recursion.
Definition: lpc.h:47
void(* apply_main_pred)(struct AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc.h:68
IndividualChannelStream ics
Definition: aac.h:249
int extradata_size
Definition: avcodec.h:633
uint8_t group_len[8]
Definition: aac.h:179
Replacements for frequently missing libm functions.
float lambda_sum
sum(lambda), for Qvg reporting
Definition: aacenc.h:402
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31))))#define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac){}void ff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map){AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);return NULL;}return ac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;}int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){int use_generic=1;int len=in->nb_samples;int p;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
Describe the class of an AVClass context structure.
Definition: log.h:67
static void put_bitstream_info(AACEncContext *s, const char *name)
Write some auxiliary information about the created AAC file.
Definition: aacenc.c:511
const int ff_aac_swb_size_128_len
Definition: aacenctab.c:107
void(* encode_main_pred)(struct AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc.h:65
void(* adjust_common_pred)(struct AACEncContext *s, ChannelElement *cpe)
Definition: aacenc.h:66
int window_shape
window shape (sine/KBD/whatever)
Definition: psymodel.h:79
static void encode_pulses(AACEncContext *s, Pulse *pulse)
Encode pulse data.
Definition: aacenc.c:418
uint16_t quantize_band_cost_cache_generation
Definition: aacenc.h:410
const uint8_t * swb_sizes
table of scalefactor band sizes for a particular window
Definition: aac.h:182
#define TNS_MAX_ORDER
Definition: aac.h:50
FFPsyContext psy
Definition: aacenc.h:395
const uint32_t ff_aac_scalefactor_code[121]
Definition: aactab.c:75
LongTermPrediction ltp
Definition: aac.h:180
static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
Definition: aacenc.c:938
const struct FFPsyModel * model
encoder-specific model functions
Definition: psymodel.h:91
av_cold int ff_lpc_init(LPCContext *s, int blocksize, int max_order, enum FFLPCType lpc_type)
Initialize LPCContext.
Definition: lpc.c:300
#define AAC_MAX_CHANNELS
Definition: aacenctab.h:39
int needs_pce
flag for non-standard layout
Definition: aacenc.h:387
FFPsyWindowInfo(* window)(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type)
Suggest window sequence for channel.
Definition: psymodel.h:129
int ms_mode
Signals mid/side stereo flags coding mode (used by encoder)
Definition: aac.h:279
AAC encoder data.
const uint8_t ff_tns_max_bands_128[]
Definition: aactab.c:1408
struct FFPsyPreprocessContext * psypp
Definition: aacenc.h:396
#define NOISE_OFFSET
subtracted from global gain, used as offset for the preamble
Definition: aac.h:158
int global_quality
Global quality for codecs which cannot change it per frame.
Definition: avcodec.h:597
uint8_t zeroes[128]
band is not coded (used by encoder)
Definition: aac.h:257
int sf_idx[128]
scalefactor indices (used by encoder)
Definition: aac.h:256
AVCodec ff_aac_encoder
Definition: aacenc.c:1137
uint8_t is_mode
Set if any bands have been encoded using intensity stereo (used by encoder)
Definition: aac.h:280
INTFLOAT coeffs[1024]
coefficients for IMDCT, maybe processed
Definition: aac.h:262
const int avpriv_mpeg4audio_sample_rates[16]
Definition: mpeg4audio.c:62
void(* quantize_and_encode_band)(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, int size, int scale_idx, int cb, const float lambda, int rtz)
Definition: aacenc.h:61
Scalefactor data are intensity stereo positions (out of phase).
Definition: aac.h:88
Y Spectral Band Replication.
Definition: mpeg4audio.h:94
void ff_put_string(PutBitContext *pb, const char *string, int terminate_string)
Put the string string in the bitstream.
Definition: bitstream.c:59
const OptionDef options[]
Definition: ffmpeg_opt.c:3400
float * samples
Definition: aacenc.h:419
uint8_t prediction_used[41]
Definition: aac.h:190
static av_cold int aac_encode_init(AVCodecContext *avctx)
Definition: aacenc.c:951
common internal api header.
AACPCEInfo pce
PCE data, if needed.
Definition: aacenc.h:383
static void flush_put_bits(PutBitContext *s)
Pad the end of the output stream with zeros.
Definition: put_bits.h:117
AAC encoder utilities.
Single Channel Element - used for both SCE and LFE elements.
Definition: aac.h:248
windowing related information
Definition: psymodel.h:77
#define ff_mdct_end
Definition: fft.h:170
av_cold struct FFPsyPreprocessContext * ff_psy_preprocess_init(AVCodecContext *avctx)
psychoacoustic model audio preprocessing initialization
Definition: psymodel.c:103
const uint16_t *const ff_swb_offset_1024[]
Definition: aactab.c:1338
uint8_t config_map[16]
configs the encoder&#39;s channel specific settings
Definition: aacenc.h:98
void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, float **audio, int channels)
Preprocess several channel in audio frame in order to compress it better.
Definition: psymodel.c:139
static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce)
Encode scalefactors.
Definition: aacenc.c:382
float * planar_samples[16]
saved preprocessed input
Definition: aacenc.h:384
ChannelElement * cpe
channel elements
Definition: aacenc.h:394
Individual Channel Stream.
Definition: aac.h:174
float clip_avoidance_factor
set if any window is near clipping to the necessary atennuation factor to avoid it ...
Definition: aac.h:192
static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
Initialize the PutBitContext s.
Definition: put_bits.h:64
#define ERROR_IF(cond,...)
Definition: aacenc_utils.h:268
channel element - generic struct for SCE/CPE/CCE/LFE
Definition: aac.h:275
void * priv_data
Definition: avcodec.h:558
int start
Definition: aac.h:226
FFTContext mdct1024
long (1024 samples) frame transform context
Definition: aacenc.h:380
int random_state
Definition: aacenc.h:399
static av_always_inline int diff(const uint32_t a, const uint32_t b)
int channels
number of audio channels
Definition: avcodec.h:1192
int num_pulse
Definition: aac.h:225
AAC_FLOAT lcoeffs[1024]
MDCT of LTP coefficients (used by encoder)
Definition: aac.h:266
static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
Encode scalefactor band coding type.
Definition: aacenc.c:368
void(* analyze)(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi)
Perform psychoacoustic analysis and set band info (threshold, energy) for a group of channels...
Definition: psymodel.h:139
static void apply_mid_side_stereo(ChannelElement *cpe)
Definition: aacenc.c:333
static const int64_t aac_normal_chan_layouts[7]
Definition: aacenctab.h:47
void ff_af_queue_close(AudioFrameQueue *afq)
Close AudioFrameQueue.
enum BandType band_type[128]
band types
Definition: aac.h:252
#define LIBAVCODEC_IDENT
Definition: version.h:42
static enum AVSampleFormat sample_fmts[]
Definition: adpcmenc.c:940
int frame_number
Frame counter, set by libavcodec.
Definition: avcodec.h:1222
FILE * out
Definition: movenc.c:54
Filter the word “frame” indicates either a video frame or a group of audio samples
#define av_freep(p)
void(* encode_tns_info)(struct AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc.h:63
static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce, int common_window)
Encode one channel of audio data.
Definition: aacenc.c:485
#define FF_ALLOCZ_TYPED_ARRAY(p, nelem)
Definition: internal.h:141
int8_t used[MAX_LTP_LONG_SFB]
Definition: aac.h:168
static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce, float *audio)
Definition: aacenc.c:198
const uint16_t *const ff_swb_offset_128[]
Definition: aactab.c:1370
int8_t present
Definition: aac.h:164
uint8_t is_mask[128]
Set if intensity stereo is used (used by encoder)
Definition: aac.h:282
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
static const int aacenc_profiles[]
Definition: aacenctab.h:132
void(* abs_pow34)(float *out, const float *in, const int size)
Definition: aacenc.h:413
uint8_t ** extended_data
pointers to the data planes/channels.
Definition: frame.h:361
AAC data declarations.
av_cold void ff_psy_end(FFPsyContext *ctx)
Cleanup model context at the end.
Definition: psymodel.c:83
This structure stores compressed data.
Definition: packet.h:340
static void avoid_clipping(AACEncContext *s, SingleChannelElement *sce)
Downscale spectral coefficients for near-clipping windows to avoid artifacts.
Definition: aacenc.c:465
int window_type[3]
window type (short/long/transitional, etc.) - current, previous and next
Definition: psymodel.h:78
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:380
int strict_std_compliance
strictly follow the standard (MPEG-4, ...).
Definition: avcodec.h:1594
static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
Definition: aacenc.c:916
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...
Definition: packet.h:356
for(j=16;j >0;--j)
int pred
Definition: aacenc.h:49
int i
Definition: input.c:407
void AAC_RENAME() ff_init_ff_sine_windows(int index)
initialize the specified entry of ff_sine_windows
float clipping[8]
maximum absolute normalized intensity in the given window for clip avoidance
Definition: psymodel.h:82
const char * name
Definition: opengl_enc.c:102
void(* encode_window_bands_info)(struct AACEncContext *s, SingleChannelElement *sce, int win, int group_len, const float lambda)
Definition: aacenc.h:59
bitstream writer API