FFmpeg
aacenc.c
Go to the documentation of this file.
1 /*
2  * AAC encoder
3  * Copyright (C) 2008 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * AAC encoder
25  */
26 
27 /***********************************
28  * TODOs:
29  * add sane pulse detection
30  ***********************************/
31 
32 #include "libavutil/libm.h"
33 #include "libavutil/thread.h"
34 #include "libavutil/float_dsp.h"
35 #include "libavutil/opt.h"
36 #include "avcodec.h"
37 #include "put_bits.h"
38 #include "internal.h"
39 #include "mpeg4audio.h"
40 #include "kbdwin.h"
41 #include "sinewin.h"
42 
43 #include "aac.h"
44 #include "aactab.h"
45 #include "aacenc.h"
46 #include "aacenctab.h"
47 #include "aacenc_utils.h"
48 
49 #include "psymodel.h"
50 
52 
53 static void put_pce(PutBitContext *pb, AVCodecContext *avctx)
54 {
55  int i, j;
56  AACEncContext *s = avctx->priv_data;
57  AACPCEInfo *pce = &s->pce;
58  const int bitexact = avctx->flags & AV_CODEC_FLAG_BITEXACT;
59  const char *aux_data = bitexact ? "Lavc" : LIBAVCODEC_IDENT;
60 
61  put_bits(pb, 4, 0);
62 
63  put_bits(pb, 2, avctx->profile);
64  put_bits(pb, 4, s->samplerate_index);
65 
66  put_bits(pb, 4, pce->num_ele[0]); /* Front */
67  put_bits(pb, 4, pce->num_ele[1]); /* Side */
68  put_bits(pb, 4, pce->num_ele[2]); /* Back */
69  put_bits(pb, 2, pce->num_ele[3]); /* LFE */
70  put_bits(pb, 3, 0); /* Assoc data */
71  put_bits(pb, 4, 0); /* CCs */
72 
73  put_bits(pb, 1, 0); /* Stereo mixdown */
74  put_bits(pb, 1, 0); /* Mono mixdown */
75  put_bits(pb, 1, 0); /* Something else */
76 
77  for (i = 0; i < 4; i++) {
78  for (j = 0; j < pce->num_ele[i]; j++) {
79  if (i < 3)
80  put_bits(pb, 1, pce->pairing[i][j]);
81  put_bits(pb, 4, pce->index[i][j]);
82  }
83  }
84 
86  put_bits(pb, 8, strlen(aux_data));
87  avpriv_put_string(pb, aux_data, 0);
88 }
89 
90 /**
91  * Make AAC audio config object.
92  * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
93  */
95 {
96  PutBitContext pb;
97  AACEncContext *s = avctx->priv_data;
98  int channels = (!s->needs_pce)*(s->channels - (s->channels == 8 ? 1 : 0));
99  const int max_size = 32;
100 
101  avctx->extradata = av_mallocz(max_size);
102  if (!avctx->extradata)
103  return AVERROR(ENOMEM);
104 
105  init_put_bits(&pb, avctx->extradata, max_size);
106  put_bits(&pb, 5, s->profile+1); //profile
107  put_bits(&pb, 4, s->samplerate_index); //sample rate index
108  put_bits(&pb, 4, channels);
109  //GASpecificConfig
110  put_bits(&pb, 1, 0); //frame length - 1024 samples
111  put_bits(&pb, 1, 0); //does not depend on core coder
112  put_bits(&pb, 1, 0); //is not extension
113  if (s->needs_pce)
114  put_pce(&pb, avctx);
115 
116  //Explicitly Mark SBR absent
117  put_bits(&pb, 11, 0x2b7); //sync extension
118  put_bits(&pb, 5, AOT_SBR);
119  put_bits(&pb, 1, 0);
120  flush_put_bits(&pb);
121  avctx->extradata_size = put_bits_count(&pb) >> 3;
122 
123  return 0;
124 }
125 
127 {
130  memset(s->quantize_band_cost_cache, 0, sizeof(s->quantize_band_cost_cache));
132  }
133 }
134 
135 #define WINDOW_FUNC(type) \
136 static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
137  SingleChannelElement *sce, \
138  const float *audio)
139 
140 WINDOW_FUNC(only_long)
141 {
142  const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
143  const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
144  float *out = sce->ret_buf;
145 
146  fdsp->vector_fmul (out, audio, lwindow, 1024);
147  fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
148 }
149 
150 WINDOW_FUNC(long_start)
151 {
152  const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
153  const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
154  float *out = sce->ret_buf;
155 
156  fdsp->vector_fmul(out, audio, lwindow, 1024);
157  memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
158  fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
159  memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
160 }
161 
162 WINDOW_FUNC(long_stop)
163 {
164  const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
165  const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
166  float *out = sce->ret_buf;
167 
168  memset(out, 0, sizeof(out[0]) * 448);
169  fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
170  memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
171  fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
172 }
173 
174 WINDOW_FUNC(eight_short)
175 {
176  const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
177  const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
178  const float *in = audio + 448;
179  float *out = sce->ret_buf;
180  int w;
181 
182  for (w = 0; w < 8; w++) {
183  fdsp->vector_fmul (out, in, w ? pwindow : swindow, 128);
184  out += 128;
185  in += 128;
186  fdsp->vector_fmul_reverse(out, in, swindow, 128);
187  out += 128;
188  }
189 }
190 
191 static void (*const apply_window[4])(AVFloatDSPContext *fdsp,
193  const float *audio) = {
194  [ONLY_LONG_SEQUENCE] = apply_only_long_window,
195  [LONG_START_SEQUENCE] = apply_long_start_window,
196  [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
197  [LONG_STOP_SEQUENCE] = apply_long_stop_window
198 };
199 
201  float *audio)
202 {
203  int i;
204  const float *output = sce->ret_buf;
205 
206  apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
207 
209  s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
210  else
211  for (i = 0; i < 1024; i += 128)
212  s->mdct128.mdct_calc(&s->mdct128, &sce->coeffs[i], output + i*2);
213  memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
214  memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
215 }
216 
217 /**
218  * Encode ics_info element.
219  * @see Table 4.6 (syntax of ics_info)
220  */
222 {
223  int w;
224 
225  put_bits(&s->pb, 1, 0); // ics_reserved bit
226  put_bits(&s->pb, 2, info->window_sequence[0]);
227  put_bits(&s->pb, 1, info->use_kb_window[0]);
228  if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
229  put_bits(&s->pb, 6, info->max_sfb);
230  put_bits(&s->pb, 1, !!info->predictor_present);
231  } else {
232  put_bits(&s->pb, 4, info->max_sfb);
233  for (w = 1; w < 8; w++)
234  put_bits(&s->pb, 1, !info->group_len[w]);
235  }
236 }
237 
238 /**
239  * Encode MS data.
240  * @see 4.6.8.1 "Joint Coding - M/S Stereo"
241  */
243 {
244  int i, w;
245 
246  put_bits(pb, 2, cpe->ms_mode);
247  if (cpe->ms_mode == 1)
248  for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
249  for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
250  put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
251 }
252 
253 /**
254  * Produce integer coefficients from scalefactors provided by the model.
255  */
256 static void adjust_frame_information(ChannelElement *cpe, int chans)
257 {
258  int i, w, w2, g, ch;
259  int maxsfb, cmaxsfb;
260 
261  for (ch = 0; ch < chans; ch++) {
262  IndividualChannelStream *ics = &cpe->ch[ch].ics;
263  maxsfb = 0;
264  cpe->ch[ch].pulse.num_pulse = 0;
265  for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
266  for (w2 = 0; w2 < ics->group_len[w]; w2++) {
267  for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
268  ;
269  maxsfb = FFMAX(maxsfb, cmaxsfb);
270  }
271  }
272  ics->max_sfb = maxsfb;
273 
274  //adjust zero bands for window groups
275  for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
276  for (g = 0; g < ics->max_sfb; g++) {
277  i = 1;
278  for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
279  if (!cpe->ch[ch].zeroes[w2*16 + g]) {
280  i = 0;
281  break;
282  }
283  }
284  cpe->ch[ch].zeroes[w*16 + g] = i;
285  }
286  }
287  }
288 
289  if (chans > 1 && cpe->common_window) {
290  IndividualChannelStream *ics0 = &cpe->ch[0].ics;
291  IndividualChannelStream *ics1 = &cpe->ch[1].ics;
292  int msc = 0;
293  ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
294  ics1->max_sfb = ics0->max_sfb;
295  for (w = 0; w < ics0->num_windows*16; w += 16)
296  for (i = 0; i < ics0->max_sfb; i++)
297  if (cpe->ms_mask[w+i])
298  msc++;
299  if (msc == 0 || ics0->max_sfb == 0)
300  cpe->ms_mode = 0;
301  else
302  cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
303  }
304 }
305 
307 {
308  int w, w2, g, i;
309  IndividualChannelStream *ics = &cpe->ch[0].ics;
310  if (!cpe->common_window)
311  return;
312  for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
313  for (w2 = 0; w2 < ics->group_len[w]; w2++) {
314  int start = (w+w2) * 128;
315  for (g = 0; g < ics->num_swb; g++) {
316  int p = -1 + 2 * (cpe->ch[1].band_type[w*16+g] - 14);
317  float scale = cpe->ch[0].is_ener[w*16+g];
318  if (!cpe->is_mask[w*16 + g]) {
319  start += ics->swb_sizes[g];
320  continue;
321  }
322  if (cpe->ms_mask[w*16 + g])
323  p *= -1;
324  for (i = 0; i < ics->swb_sizes[g]; i++) {
325  float sum = (cpe->ch[0].coeffs[start+i] + p*cpe->ch[1].coeffs[start+i])*scale;
326  cpe->ch[0].coeffs[start+i] = sum;
327  cpe->ch[1].coeffs[start+i] = 0.0f;
328  }
329  start += ics->swb_sizes[g];
330  }
331  }
332  }
333 }
334 
336 {
337  int w, w2, g, i;
338  IndividualChannelStream *ics = &cpe->ch[0].ics;
339  if (!cpe->common_window)
340  return;
341  for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
342  for (w2 = 0; w2 < ics->group_len[w]; w2++) {
343  int start = (w+w2) * 128;
344  for (g = 0; g < ics->num_swb; g++) {
345  /* ms_mask can be used for other purposes in PNS and I/S,
346  * so must not apply M/S if any band uses either, even if
347  * ms_mask is set.
348  */
349  if (!cpe->ms_mask[w*16 + g] || cpe->is_mask[w*16 + g]
350  || cpe->ch[0].band_type[w*16 + g] >= NOISE_BT
351  || cpe->ch[1].band_type[w*16 + g] >= NOISE_BT) {
352  start += ics->swb_sizes[g];
353  continue;
354  }
355  for (i = 0; i < ics->swb_sizes[g]; i++) {
356  float L = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) * 0.5f;
357  float R = L - cpe->ch[1].coeffs[start+i];
358  cpe->ch[0].coeffs[start+i] = L;
359  cpe->ch[1].coeffs[start+i] = R;
360  }
361  start += ics->swb_sizes[g];
362  }
363  }
364  }
365 }
366 
367 /**
368  * Encode scalefactor band coding type.
369  */
371 {
372  int w;
373 
376 
377  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
378  s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
379 }
380 
381 /**
382  * Encode scalefactors.
383  */
386 {
387  int diff, off_sf = sce->sf_idx[0], off_pns = sce->sf_idx[0] - NOISE_OFFSET;
388  int off_is = 0, noise_flag = 1;
389  int i, w;
390 
391  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
392  for (i = 0; i < sce->ics.max_sfb; i++) {
393  if (!sce->zeroes[w*16 + i]) {
394  if (sce->band_type[w*16 + i] == NOISE_BT) {
395  diff = sce->sf_idx[w*16 + i] - off_pns;
396  off_pns = sce->sf_idx[w*16 + i];
397  if (noise_flag-- > 0) {
398  put_bits(&s->pb, NOISE_PRE_BITS, diff + NOISE_PRE);
399  continue;
400  }
401  } else if (sce->band_type[w*16 + i] == INTENSITY_BT ||
402  sce->band_type[w*16 + i] == INTENSITY_BT2) {
403  diff = sce->sf_idx[w*16 + i] - off_is;
404  off_is = sce->sf_idx[w*16 + i];
405  } else {
406  diff = sce->sf_idx[w*16 + i] - off_sf;
407  off_sf = sce->sf_idx[w*16 + i];
408  }
409  diff += SCALE_DIFF_ZERO;
410  av_assert0(diff >= 0 && diff <= 120);
412  }
413  }
414  }
415 }
416 
417 /**
418  * Encode pulse data.
419  */
420 static void encode_pulses(AACEncContext *s, Pulse *pulse)
421 {
422  int i;
423 
424  put_bits(&s->pb, 1, !!pulse->num_pulse);
425  if (!pulse->num_pulse)
426  return;
427 
428  put_bits(&s->pb, 2, pulse->num_pulse - 1);
429  put_bits(&s->pb, 6, pulse->start);
430  for (i = 0; i < pulse->num_pulse; i++) {
431  put_bits(&s->pb, 5, pulse->pos[i]);
432  put_bits(&s->pb, 4, pulse->amp[i]);
433  }
434 }
435 
436 /**
437  * Encode spectral coefficients processed by psychoacoustic model.
438  */
440 {
441  int start, i, w, w2;
442 
443  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
444  start = 0;
445  for (i = 0; i < sce->ics.max_sfb; i++) {
446  if (sce->zeroes[w*16 + i]) {
447  start += sce->ics.swb_sizes[i];
448  continue;
449  }
450  for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++) {
451  s->coder->quantize_and_encode_band(s, &s->pb,
452  &sce->coeffs[start + w2*128],
453  NULL, sce->ics.swb_sizes[i],
454  sce->sf_idx[w*16 + i],
455  sce->band_type[w*16 + i],
456  s->lambda,
457  sce->ics.window_clipping[w]);
458  }
459  start += sce->ics.swb_sizes[i];
460  }
461  }
462 }
463 
464 /**
465  * Downscale spectral coefficients for near-clipping windows to avoid artifacts
466  */
468 {
469  int start, i, j, w;
470 
471  if (sce->ics.clip_avoidance_factor < 1.0f) {
472  for (w = 0; w < sce->ics.num_windows; w++) {
473  start = 0;
474  for (i = 0; i < sce->ics.max_sfb; i++) {
475  float *swb_coeffs = &sce->coeffs[start + w*128];
476  for (j = 0; j < sce->ics.swb_sizes[i]; j++)
477  swb_coeffs[j] *= sce->ics.clip_avoidance_factor;
478  start += sce->ics.swb_sizes[i];
479  }
480  }
481  }
482 }
483 
484 /**
485  * Encode one channel of audio data.
486  */
489  int common_window)
490 {
491  put_bits(&s->pb, 8, sce->sf_idx[0]);
492  if (!common_window) {
493  put_ics_info(s, &sce->ics);
494  if (s->coder->encode_main_pred)
495  s->coder->encode_main_pred(s, sce);
496  if (s->coder->encode_ltp_info)
497  s->coder->encode_ltp_info(s, sce, 0);
498  }
499  encode_band_info(s, sce);
500  encode_scale_factors(avctx, s, sce);
501  encode_pulses(s, &sce->pulse);
502  put_bits(&s->pb, 1, !!sce->tns.present);
503  if (s->coder->encode_tns_info)
504  s->coder->encode_tns_info(s, sce);
505  put_bits(&s->pb, 1, 0); //ssr
506  encode_spectral_coeffs(s, sce);
507  return 0;
508 }
509 
510 /**
511  * Write some auxiliary information about the created AAC file.
512  */
513 static void put_bitstream_info(AACEncContext *s, const char *name)
514 {
515  int i, namelen, padbits;
516 
517  namelen = strlen(name) + 2;
518  put_bits(&s->pb, 3, TYPE_FIL);
519  put_bits(&s->pb, 4, FFMIN(namelen, 15));
520  if (namelen >= 15)
521  put_bits(&s->pb, 8, namelen - 14);
522  put_bits(&s->pb, 4, 0); //extension type - filler
523  padbits = -put_bits_count(&s->pb) & 7;
525  for (i = 0; i < namelen - 2; i++)
526  put_bits(&s->pb, 8, name[i]);
527  put_bits(&s->pb, 12 - padbits, 0);
528 }
529 
530 /*
531  * Copy input samples.
532  * Channels are reordered from libavcodec's default order to AAC order.
533  */
535 {
536  int ch;
537  int end = 2048 + (frame ? frame->nb_samples : 0);
538  const uint8_t *channel_map = s->reorder_map;
539 
540  /* copy and remap input samples */
541  for (ch = 0; ch < s->channels; ch++) {
542  /* copy last 1024 samples of previous frame to the start of the current frame */
543  memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
544 
545  /* copy new samples and zero any remaining samples */
546  if (frame) {
547  memcpy(&s->planar_samples[ch][2048],
548  frame->extended_data[channel_map[ch]],
549  frame->nb_samples * sizeof(s->planar_samples[0][0]));
550  }
551  memset(&s->planar_samples[ch][end], 0,
552  (3072 - end) * sizeof(s->planar_samples[0][0]));
553  }
554 }
555 
556 static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
557  const AVFrame *frame, int *got_packet_ptr)
558 {
559  AACEncContext *s = avctx->priv_data;
560  float **samples = s->planar_samples, *samples2, *la, *overlap;
561  ChannelElement *cpe;
564  int i, its, ch, w, chans, tag, start_ch, ret, frame_bits;
565  int target_bits, rate_bits, too_many_bits, too_few_bits;
566  int ms_mode = 0, is_mode = 0, tns_mode = 0, pred_mode = 0;
567  int chan_el_counter[4];
569 
570  /* add current frame to queue */
571  if (frame) {
572  if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
573  return ret;
574  } else {
575  if (!s->afq.remaining_samples || (!s->afq.frame_alloc && !s->afq.frame_count))
576  return 0;
577  }
578 
579  copy_input_samples(s, frame);
580  if (s->psypp)
582 
583  if (!avctx->frame_number)
584  return 0;
585 
586  start_ch = 0;
587  for (i = 0; i < s->chan_map[0]; i++) {
588  FFPsyWindowInfo* wi = windows + start_ch;
589  tag = s->chan_map[i+1];
590  chans = tag == TYPE_CPE ? 2 : 1;
591  cpe = &s->cpe[i];
592  for (ch = 0; ch < chans; ch++) {
593  int k;
594  float clip_avoidance_factor;
595  sce = &cpe->ch[ch];
596  ics = &sce->ics;
597  s->cur_channel = start_ch + ch;
598  overlap = &samples[s->cur_channel][0];
599  samples2 = overlap + 1024;
600  la = samples2 + (448+64);
601  if (!frame)
602  la = NULL;
603  if (tag == TYPE_LFE) {
604  wi[ch].window_type[0] = wi[ch].window_type[1] = ONLY_LONG_SEQUENCE;
605  wi[ch].window_shape = 0;
606  wi[ch].num_windows = 1;
607  wi[ch].grouping[0] = 1;
608  wi[ch].clipping[0] = 0;
609 
610  /* Only the lowest 12 coefficients are used in a LFE channel.
611  * The expression below results in only the bottom 8 coefficients
612  * being used for 11.025kHz to 16kHz sample rates.
613  */
614  ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
615  } else {
616  wi[ch] = s->psy.model->window(&s->psy, samples2, la, s->cur_channel,
617  ics->window_sequence[0]);
618  }
619  ics->window_sequence[1] = ics->window_sequence[0];
620  ics->window_sequence[0] = wi[ch].window_type[0];
621  ics->use_kb_window[1] = ics->use_kb_window[0];
622  ics->use_kb_window[0] = wi[ch].window_shape;
623  ics->num_windows = wi[ch].num_windows;
624  ics->swb_sizes = s->psy.bands [ics->num_windows == 8];
625  ics->num_swb = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
626  ics->max_sfb = FFMIN(ics->max_sfb, ics->num_swb);
627  ics->swb_offset = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ?
633 
634  for (w = 0; w < ics->num_windows; w++)
635  ics->group_len[w] = wi[ch].grouping[w];
636 
637  /* Calculate input sample maximums and evaluate clipping risk */
638  clip_avoidance_factor = 0.0f;
639  for (w = 0; w < ics->num_windows; w++) {
640  const float *wbuf = overlap + w * 128;
641  const int wlen = 2048 / ics->num_windows;
642  float max = 0;
643  int j;
644  /* mdct input is 2 * output */
645  for (j = 0; j < wlen; j++)
646  max = FFMAX(max, fabsf(wbuf[j]));
647  wi[ch].clipping[w] = max;
648  }
649  for (w = 0; w < ics->num_windows; w++) {
650  if (wi[ch].clipping[w] > CLIP_AVOIDANCE_FACTOR) {
651  ics->window_clipping[w] = 1;
652  clip_avoidance_factor = FFMAX(clip_avoidance_factor, wi[ch].clipping[w]);
653  } else {
654  ics->window_clipping[w] = 0;
655  }
656  }
657  if (clip_avoidance_factor > CLIP_AVOIDANCE_FACTOR) {
658  ics->clip_avoidance_factor = CLIP_AVOIDANCE_FACTOR / clip_avoidance_factor;
659  } else {
660  ics->clip_avoidance_factor = 1.0f;
661  }
662 
663  apply_window_and_mdct(s, sce, overlap);
664 
665  if (s->options.ltp && s->coder->update_ltp) {
666  s->coder->update_ltp(s, sce);
667  apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, &sce->ltp_state[0]);
668  s->mdct1024.mdct_calc(&s->mdct1024, sce->lcoeffs, sce->ret_buf);
669  }
670 
671  for (k = 0; k < 1024; k++) {
672  if (!(fabs(cpe->ch[ch].coeffs[k]) < 1E16)) { // Ensure headroom for energy calculation
673  av_log(avctx, AV_LOG_ERROR, "Input contains (near) NaN/+-Inf\n");
674  return AVERROR(EINVAL);
675  }
676  }
677  avoid_clipping(s, sce);
678  }
679  start_ch += chans;
680  }
681  if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels, 0)) < 0)
682  return ret;
683  frame_bits = its = 0;
684  do {
685  init_put_bits(&s->pb, avpkt->data, avpkt->size);
686 
687  if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & AV_CODEC_FLAG_BITEXACT))
689  start_ch = 0;
690  target_bits = 0;
691  memset(chan_el_counter, 0, sizeof(chan_el_counter));
692  for (i = 0; i < s->chan_map[0]; i++) {
693  FFPsyWindowInfo* wi = windows + start_ch;
694  const float *coeffs[2];
695  tag = s->chan_map[i+1];
696  chans = tag == TYPE_CPE ? 2 : 1;
697  cpe = &s->cpe[i];
698  cpe->common_window = 0;
699  memset(cpe->is_mask, 0, sizeof(cpe->is_mask));
700  memset(cpe->ms_mask, 0, sizeof(cpe->ms_mask));
701  put_bits(&s->pb, 3, tag);
702  put_bits(&s->pb, 4, chan_el_counter[tag]++);
703  for (ch = 0; ch < chans; ch++) {
704  sce = &cpe->ch[ch];
705  coeffs[ch] = sce->coeffs;
706  sce->ics.predictor_present = 0;
707  sce->ics.ltp.present = 0;
708  memset(sce->ics.ltp.used, 0, sizeof(sce->ics.ltp.used));
709  memset(sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used));
710  memset(&sce->tns, 0, sizeof(TemporalNoiseShaping));
711  for (w = 0; w < 128; w++)
712  if (sce->band_type[w] > RESERVED_BT)
713  sce->band_type[w] = 0;
714  }
715  s->psy.bitres.alloc = -1;
717  s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
718  if (s->psy.bitres.alloc > 0) {
719  /* Lambda unused here on purpose, we need to take psy's unscaled allocation */
720  target_bits += s->psy.bitres.alloc
721  * (s->lambda / (avctx->global_quality ? avctx->global_quality : 120));
722  s->psy.bitres.alloc /= chans;
723  }
724  s->cur_type = tag;
725  for (ch = 0; ch < chans; ch++) {
726  s->cur_channel = start_ch + ch;
727  if (s->options.pns && s->coder->mark_pns)
728  s->coder->mark_pns(s, avctx, &cpe->ch[ch]);
729  s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
730  }
731  if (chans > 1
732  && wi[0].window_type[0] == wi[1].window_type[0]
733  && wi[0].window_shape == wi[1].window_shape) {
734 
735  cpe->common_window = 1;
736  for (w = 0; w < wi[0].num_windows; w++) {
737  if (wi[0].grouping[w] != wi[1].grouping[w]) {
738  cpe->common_window = 0;
739  break;
740  }
741  }
742  }
743  for (ch = 0; ch < chans; ch++) { /* TNS and PNS */
744  sce = &cpe->ch[ch];
745  s->cur_channel = start_ch + ch;
746  if (s->options.tns && s->coder->search_for_tns)
747  s->coder->search_for_tns(s, sce);
748  if (s->options.tns && s->coder->apply_tns_filt)
749  s->coder->apply_tns_filt(s, sce);
750  if (sce->tns.present)
751  tns_mode = 1;
752  if (s->options.pns && s->coder->search_for_pns)
753  s->coder->search_for_pns(s, avctx, sce);
754  }
755  s->cur_channel = start_ch;
756  if (s->options.intensity_stereo) { /* Intensity Stereo */
757  if (s->coder->search_for_is)
758  s->coder->search_for_is(s, avctx, cpe);
759  if (cpe->is_mode) is_mode = 1;
761  }
762  if (s->options.pred) { /* Prediction */
763  for (ch = 0; ch < chans; ch++) {
764  sce = &cpe->ch[ch];
765  s->cur_channel = start_ch + ch;
766  if (s->options.pred && s->coder->search_for_pred)
767  s->coder->search_for_pred(s, sce);
768  if (cpe->ch[ch].ics.predictor_present) pred_mode = 1;
769  }
770  if (s->coder->adjust_common_pred)
771  s->coder->adjust_common_pred(s, cpe);
772  for (ch = 0; ch < chans; ch++) {
773  sce = &cpe->ch[ch];
774  s->cur_channel = start_ch + ch;
775  if (s->options.pred && s->coder->apply_main_pred)
776  s->coder->apply_main_pred(s, sce);
777  }
778  s->cur_channel = start_ch;
779  }
780  if (s->options.mid_side) { /* Mid/Side stereo */
781  if (s->options.mid_side == -1 && s->coder->search_for_ms)
782  s->coder->search_for_ms(s, cpe);
783  else if (cpe->common_window)
784  memset(cpe->ms_mask, 1, sizeof(cpe->ms_mask));
786  }
787  adjust_frame_information(cpe, chans);
788  if (s->options.ltp) { /* LTP */
789  for (ch = 0; ch < chans; ch++) {
790  sce = &cpe->ch[ch];
791  s->cur_channel = start_ch + ch;
792  if (s->coder->search_for_ltp)
793  s->coder->search_for_ltp(s, sce, cpe->common_window);
794  if (sce->ics.ltp.present) pred_mode = 1;
795  }
796  s->cur_channel = start_ch;
797  if (s->coder->adjust_common_ltp)
798  s->coder->adjust_common_ltp(s, cpe);
799  }
800  if (chans == 2) {
801  put_bits(&s->pb, 1, cpe->common_window);
802  if (cpe->common_window) {
803  put_ics_info(s, &cpe->ch[0].ics);
804  if (s->coder->encode_main_pred)
805  s->coder->encode_main_pred(s, &cpe->ch[0]);
806  if (s->coder->encode_ltp_info)
807  s->coder->encode_ltp_info(s, &cpe->ch[0], 1);
808  encode_ms_info(&s->pb, cpe);
809  if (cpe->ms_mode) ms_mode = 1;
810  }
811  }
812  for (ch = 0; ch < chans; ch++) {
813  s->cur_channel = start_ch + ch;
814  encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
815  }
816  start_ch += chans;
817  }
818 
819  if (avctx->flags & AV_CODEC_FLAG_QSCALE) {
820  /* When using a constant Q-scale, don't mess with lambda */
821  break;
822  }
823 
824  /* rate control stuff
825  * allow between the nominal bitrate, and what psy's bit reservoir says to target
826  * but drift towards the nominal bitrate always
827  */
828  frame_bits = put_bits_count(&s->pb);
829  rate_bits = avctx->bit_rate * 1024 / avctx->sample_rate;
830  rate_bits = FFMIN(rate_bits, 6144 * s->channels - 3);
831  too_many_bits = FFMAX(target_bits, rate_bits);
832  too_many_bits = FFMIN(too_many_bits, 6144 * s->channels - 3);
833  too_few_bits = FFMIN(FFMAX(rate_bits - rate_bits/4, target_bits), too_many_bits);
834 
835  /* When using ABR, be strict (but only for increasing) */
836  too_few_bits = too_few_bits - too_few_bits/8;
837  too_many_bits = too_many_bits + too_many_bits/2;
838 
839  if ( its == 0 /* for steady-state Q-scale tracking */
840  || (its < 5 && (frame_bits < too_few_bits || frame_bits > too_many_bits))
841  || frame_bits >= 6144 * s->channels - 3 )
842  {
843  float ratio = ((float)rate_bits) / frame_bits;
844 
845  if (frame_bits >= too_few_bits && frame_bits <= too_many_bits) {
846  /*
847  * This path is for steady-state Q-scale tracking
848  * When frame bits fall within the stable range, we still need to adjust
849  * lambda to maintain it like so in a stable fashion (large jumps in lambda
850  * create artifacts and should be avoided), but slowly
851  */
852  ratio = sqrtf(sqrtf(ratio));
853  ratio = av_clipf(ratio, 0.9f, 1.1f);
854  } else {
855  /* Not so fast though */
856  ratio = sqrtf(ratio);
857  }
858  s->lambda = FFMIN(s->lambda * ratio, 65536.f);
859 
860  /* Keep iterating if we must reduce and lambda is in the sky */
861  if (ratio > 0.9f && ratio < 1.1f) {
862  break;
863  } else {
864  if (is_mode || ms_mode || tns_mode || pred_mode) {
865  for (i = 0; i < s->chan_map[0]; i++) {
866  // Must restore coeffs
867  chans = tag == TYPE_CPE ? 2 : 1;
868  cpe = &s->cpe[i];
869  for (ch = 0; ch < chans; ch++)
870  memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
871  }
872  }
873  its++;
874  }
875  } else {
876  break;
877  }
878  } while (1);
879 
880  if (s->options.ltp && s->coder->ltp_insert_new_frame)
882 
883  put_bits(&s->pb, 3, TYPE_END);
884  flush_put_bits(&s->pb);
885 
887 
888  s->lambda_sum += s->lambda;
889  s->lambda_count++;
890 
891  ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
892  &avpkt->duration);
893 
894  avpkt->size = put_bits_count(&s->pb) >> 3;
895  *got_packet_ptr = 1;
896  return 0;
897 }
898 
900 {
901  AACEncContext *s = avctx->priv_data;
902 
903  av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_sum / s->lambda_count);
904 
905  ff_mdct_end(&s->mdct1024);
906  ff_mdct_end(&s->mdct128);
907  ff_psy_end(&s->psy);
908  ff_lpc_end(&s->lpc);
909  if (s->psypp)
911  av_freep(&s->buffer.samples);
912  av_freep(&s->cpe);
913  av_freep(&s->fdsp);
914  ff_af_queue_close(&s->afq);
915  return 0;
916 }
917 
919 {
920  int ret = 0;
921 
923  if (!s->fdsp)
924  return AVERROR(ENOMEM);
925 
926  // window init
931 
932  if ((ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0)) < 0)
933  return ret;
934  if ((ret = ff_mdct_init(&s->mdct128, 8, 0, 32768.0)) < 0)
935  return ret;
936 
937  return 0;
938 }
939 
941 {
942  int ch;
943  FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->buffer.samples, s->channels, 3 * 1024 * sizeof(s->buffer.samples[0]), alloc_fail);
944  FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->cpe, s->chan_map[0], sizeof(ChannelElement), alloc_fail);
945 
946  for(ch = 0; ch < s->channels; ch++)
947  s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
948 
949  return 0;
950 alloc_fail:
951  return AVERROR(ENOMEM);
952 }
953 
955 {
957 }
958 
960 {
961  AACEncContext *s = avctx->priv_data;
962  int i, ret = 0;
963  const uint8_t *sizes[2];
964  uint8_t grouping[AAC_MAX_CHANNELS];
965  int lengths[2];
966 
967  /* Constants */
968  s->last_frame_pb_count = 0;
969  avctx->frame_size = 1024;
970  avctx->initial_padding = 1024;
971  s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
972 
973  /* Channel map and unspecified bitrate guessing */
974  s->channels = avctx->channels;
975 
976  s->needs_pce = 1;
977  for (i = 0; i < FF_ARRAY_ELEMS(aac_normal_chan_layouts); i++) {
978  if (avctx->channel_layout == aac_normal_chan_layouts[i]) {
979  s->needs_pce = s->options.pce;
980  break;
981  }
982  }
983 
984  if (s->needs_pce) {
985  char buf[64];
986  for (i = 0; i < FF_ARRAY_ELEMS(aac_pce_configs); i++)
987  if (avctx->channel_layout == aac_pce_configs[i].layout)
988  break;
989  av_get_channel_layout_string(buf, sizeof(buf), -1, avctx->channel_layout);
990  ERROR_IF(i == FF_ARRAY_ELEMS(aac_pce_configs), "Unsupported channel layout \"%s\"\n", buf);
991  av_log(avctx, AV_LOG_INFO, "Using a PCE to encode channel layout \"%s\"\n", buf);
992  s->pce = aac_pce_configs[i];
993  s->reorder_map = s->pce.reorder_map;
994  s->chan_map = s->pce.config_map;
995  } else {
996  s->reorder_map = aac_chan_maps[s->channels - 1];
997  s->chan_map = aac_chan_configs[s->channels - 1];
998  }
999 
1000  if (!avctx->bit_rate) {
1001  for (i = 1; i <= s->chan_map[0]; i++) {
1002  avctx->bit_rate += s->chan_map[i] == TYPE_CPE ? 128000 : /* Pair */
1003  s->chan_map[i] == TYPE_LFE ? 16000 : /* LFE */
1004  69000 ; /* SCE */
1005  }
1006  }
1007 
1008  /* Samplerate */
1009  for (i = 0; i < 16; i++)
1011  break;
1012  s->samplerate_index = i;
1013  ERROR_IF(s->samplerate_index == 16 ||
1016  "Unsupported sample rate %d\n", avctx->sample_rate);
1017 
1018  /* Bitrate limiting */
1019  WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
1020  "Too many bits %f > %d per frame requested, clamping to max\n",
1021  1024.0 * avctx->bit_rate / avctx->sample_rate,
1022  6144 * s->channels);
1023  avctx->bit_rate = (int64_t)FFMIN(6144 * s->channels / 1024.0 * avctx->sample_rate,
1024  avctx->bit_rate);
1025 
1026  /* Profile and option setting */
1027  avctx->profile = avctx->profile == FF_PROFILE_UNKNOWN ? FF_PROFILE_AAC_LOW :
1028  avctx->profile;
1029  for (i = 0; i < FF_ARRAY_ELEMS(aacenc_profiles); i++)
1030  if (avctx->profile == aacenc_profiles[i])
1031  break;
1032  if (avctx->profile == FF_PROFILE_MPEG2_AAC_LOW) {
1033  avctx->profile = FF_PROFILE_AAC_LOW;
1034  ERROR_IF(s->options.pred,
1035  "Main prediction unavailable in the \"mpeg2_aac_low\" profile\n");
1036  ERROR_IF(s->options.ltp,
1037  "LTP prediction unavailable in the \"mpeg2_aac_low\" profile\n");
1038  WARN_IF(s->options.pns,
1039  "PNS unavailable in the \"mpeg2_aac_low\" profile, turning off\n");
1040  s->options.pns = 0;
1041  } else if (avctx->profile == FF_PROFILE_AAC_LTP) {
1042  s->options.ltp = 1;
1043  ERROR_IF(s->options.pred,
1044  "Main prediction unavailable in the \"aac_ltp\" profile\n");
1045  } else if (avctx->profile == FF_PROFILE_AAC_MAIN) {
1046  s->options.pred = 1;
1047  ERROR_IF(s->options.ltp,
1048  "LTP prediction unavailable in the \"aac_main\" profile\n");
1049  } else if (s->options.ltp) {
1050  avctx->profile = FF_PROFILE_AAC_LTP;
1051  WARN_IF(1,
1052  "Chainging profile to \"aac_ltp\"\n");
1053  ERROR_IF(s->options.pred,
1054  "Main prediction unavailable in the \"aac_ltp\" profile\n");
1055  } else if (s->options.pred) {
1056  avctx->profile = FF_PROFILE_AAC_MAIN;
1057  WARN_IF(1,
1058  "Chainging profile to \"aac_main\"\n");
1059  ERROR_IF(s->options.ltp,
1060  "LTP prediction unavailable in the \"aac_main\" profile\n");
1061  }
1062  s->profile = avctx->profile;
1063 
1064  /* Coder limitations */
1065  s->coder = &ff_aac_coders[s->options.coder];
1066  if (s->options.coder == AAC_CODER_ANMR) {
1068  "The ANMR coder is considered experimental, add -strict -2 to enable!\n");
1069  s->options.intensity_stereo = 0;
1070  s->options.pns = 0;
1071  }
1073  "The LPT profile requires experimental compliance, add -strict -2 to enable!\n");
1074 
1075  /* M/S introduces horrible artifacts with multichannel files, this is temporary */
1076  if (s->channels > 3)
1077  s->options.mid_side = 0;
1078 
1079  if ((ret = dsp_init(avctx, s)) < 0)
1080  goto fail;
1081 
1082  if ((ret = alloc_buffers(avctx, s)) < 0)
1083  goto fail;
1084 
1085  if ((ret = put_audio_specific_config(avctx)))
1086  goto fail;
1087 
1088  sizes[0] = ff_aac_swb_size_1024[s->samplerate_index];
1089  sizes[1] = ff_aac_swb_size_128[s->samplerate_index];
1090  lengths[0] = ff_aac_num_swb_1024[s->samplerate_index];
1091  lengths[1] = ff_aac_num_swb_128[s->samplerate_index];
1092  for (i = 0; i < s->chan_map[0]; i++)
1093  grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
1094  if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
1095  s->chan_map[0], grouping)) < 0)
1096  goto fail;
1097  s->psypp = ff_psy_preprocess_init(avctx);
1099  s->random_state = 0x1f2e3d4c;
1100 
1101  s->abs_pow34 = abs_pow34_v;
1103 
1104  if (ARCH_X86)
1106 
1107  if (HAVE_MIPSDSP)
1109 
1111  return AVERROR_UNKNOWN;
1112 
1113  ff_af_queue_init(avctx, &s->afq);
1114 
1115  return 0;
1116 fail:
1117  aac_encode_end(avctx);
1118  return ret;
1119 }
1120 
1121 #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
1122 static const AVOption aacenc_options[] = {
1123  {"aac_coder", "Coding algorithm", offsetof(AACEncContext, options.coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_FAST}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "coder"},
1124  {"anmr", "ANMR method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
1125  {"twoloop", "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
1126  {"fast", "Default fast search", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
1127  {"aac_ms", "Force M/S stereo coding", offsetof(AACEncContext, options.mid_side), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, AACENC_FLAGS},
1128  {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
1129  {"aac_pns", "Perceptual noise substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
1130  {"aac_tns", "Temporal noise shaping", offsetof(AACEncContext, options.tns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
1131  {"aac_ltp", "Long term prediction", offsetof(AACEncContext, options.ltp), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
1132  {"aac_pred", "AAC-Main prediction", offsetof(AACEncContext, options.pred), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
1133  {"aac_pce", "Forces the use of PCEs", offsetof(AACEncContext, options.pce), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
1134  {NULL}
1135 };
1136 
1137 static const AVClass aacenc_class = {
1138  .class_name = "AAC encoder",
1139  .item_name = av_default_item_name,
1140  .option = aacenc_options,
1141  .version = LIBAVUTIL_VERSION_INT,
1142 };
1143 
1145  { "b", "0" },
1146  { NULL }
1147 };
1148 
1150  .name = "aac",
1151  .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
1152  .type = AVMEDIA_TYPE_AUDIO,
1153  .id = AV_CODEC_ID_AAC,
1154  .priv_data_size = sizeof(AACEncContext),
1155  .init = aac_encode_init,
1156  .encode2 = aac_encode_frame,
1157  .close = aac_encode_end,
1158  .defaults = aac_encode_defaults,
1159  .supported_samplerates = mpeg4audio_sample_rates,
1160  .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE,
1162  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
1164  .priv_class = &aacenc_class,
1165 };
#define FF_COMPLIANCE_EXPERIMENTAL
Allow nonstandardized experimental things.
Definition: avcodec.h:2636
float, planar
Definition: samplefmt.h:69
void ff_af_queue_remove(AudioFrameQueue *afq, int nb_samples, int64_t *pts, int64_t *duration)
Remove frame(s) from the queue.
void ff_quantize_band_cost_cache_init(struct AACEncContext *s)
Definition: aacenc.c:126
#define NULL
Definition: coverity.c:32
const AACCoefficientsEncoder * coder
Definition: aacenc.h:397
Band types following are encoded differently from others.
Definition: aac.h:86
static const uint8_t aac_chan_configs[AAC_MAX_CHANNELS][6]
default channel configurations
Definition: aacenctab.h:58
uint8_t use_kb_window[2]
If set, use Kaiser-Bessel window, otherwise use a sine window.
Definition: aac.h:177
int coder
Definition: aacenc.h:44
This structure describes decoded (raw) audio or video data.
Definition: frame.h:295
#define FF_ALLOCZ_ARRAY_OR_GOTO(ctx, p, nelem, elsize, label)
Definition: internal.h:167
int grouping[8]
window grouping (for e.g. AAC)
Definition: psymodel.h:81
void(* search_for_ms)(struct AACEncContext *s, ChannelElement *cpe)
Definition: aacenc.h:77
AVOption.
Definition: opt.h:246
enum RawDataBlockType cur_type
channel group type cur_channel belongs to
Definition: aacenc.h:404
uint8_t ** bands
scalefactor band sizes for possible frame sizes
Definition: psymodel.h:98
Definition: aac.h:224
AACQuantizeBandCostCacheEntry quantize_band_cost_cache[256][128]
memoization area for quantize_band_cost
Definition: aacenc.h:411
static void abs_pow34_v(float *out, const float *in, const int size)
Definition: aacenc_utils.h:40
static const AVClass aacenc_class
Definition: aacenc.c:1137
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:208
av_cold void ff_kbd_window_init(float *window, float alpha, int n)
Generate a Kaiser-Bessel Derived Window.
Definition: kbdwin.c:26
int64_t bit_rate
the average bitrate
Definition: avcodec.h:1618
#define LIBAVUTIL_VERSION_INT
Definition: version.h:85
#define SCALE_DIFF_ZERO
codebook index corresponding to zero scalefactor indices difference
Definition: aac.h:152
uint8_t window_clipping[8]
set if a certain window is near clipping
Definition: aac.h:191
Definition: aac.h:63
const char * g
Definition: vf_curves.c:115
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
Definition: aac.h:57
channels
Definition: aptx.c:30
av_cold void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx)
Cleanup audio preprocessing module.
Definition: psymodel.c:152
#define WARN_IF(cond,...)
Definition: aacenc_utils.h:274
int size
Definition: avcodec.h:1481
const char * av_default_item_name(void *ptr)
Return the context name.
Definition: log.c:191
const int ff_aac_swb_size_1024_len
Definition: aacenctab.c:108
void avpriv_align_put_bits(PutBitContext *s)
Pad the bitstream with zeros up to the next byte boundary.
Definition: bitstream.c:48
static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
Encode ics_info element.
Definition: aacenc.c:221
void(* search_for_tns)(struct AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc.h:75
int common_window
Set if channels share a common &#39;IndividualChannelStream&#39; in bitstream.
Definition: aac.h:278
int alloc
number of bits allocated by the psy, or -1 if no allocation was done
Definition: psymodel.h:105
const uint8_t * ff_aac_swb_size_1024[]
Definition: aacenctab.c:99
#define FF_PROFILE_AAC_MAIN
Definition: avcodec.h:2905
int lambda_count
count(lambda), for Qvg reporting
Definition: aacenc.h:403
av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens, const uint8_t **bands, const int *num_bands, int num_groups, const uint8_t *group_map)
Initialize psychoacoustic model.
Definition: psymodel.c:31
uint8_t ms_mask[128]
Set if mid/side stereo is used for each scalefactor window band.
Definition: aac.h:281
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:236
float lambda
Definition: aacenc.h:400
#define NOISE_PRE
preamble for NOISE_BT, put in bitstream with the first noise band
Definition: aac.h:156
int profile
profile
Definition: avcodec.h:2901
AVCodec.
Definition: avcodec.h:3492
static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
Encode spectral coefficients processed by psychoacoustic model.
Definition: aacenc.c:439
Spectral data are scaled white noise not coded in the bitstream.
Definition: aac.h:87
int * num_bands
number of scalefactor bands for possible frame sizes
Definition: psymodel.h:99
static AVOnce aac_table_init
Definition: aacenc.c:51
static int put_audio_specific_config(AVCodecContext *avctx)
Make AAC audio config object.
Definition: aacenc.c:94
void(* apply_tns_filt)(struct AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc.h:69
const uint8_t ff_aac_num_swb_128[]
Definition: aactab.c:61
void(* search_for_quantizers)(AVCodecContext *avctx, struct AACEncContext *s, SingleChannelElement *sce, const float lambda)
Definition: aacenc.h:57
INTFLOAT pcoeffs[1024]
coefficients for IMDCT, pristine
Definition: aac.h:261
const uint16_t * swb_offset
table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular wind...
Definition: aac.h:181
const char * class_name
The name of the class; usually it is the same name as the context structure type to which the AVClass...
Definition: log.h:72
#define AV_CODEC_CAP_DELAY
Encoder or decoder requires flushing with NULL input at the end in order to give the complete and cor...
Definition: avcodec.h:1009
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int64_t size, int64_t min_size)
Check AVPacket size and/or allocate data.
Definition: encode.c:32
AACEncOptions options
encoding options
Definition: aacenc.h:378
#define FF_CODEC_CAP_INIT_THREADSAFE
The codec does not modify any global variables in the init function, allowing to call the init functi...
Definition: internal.h:40
AAC encoder context.
Definition: aacenc.h:376
int num_ele[4]
front, side, back, lfe
Definition: aacenc.h:95
uint8_t
#define av_cold
Definition: attributes.h:82
void(* search_for_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
Definition: aacenc.h:73
AVOptions.
int intensity_stereo
Definition: aacenc.h:51
#define WINDOW_FUNC(type)
Definition: aacenc.c:135
void(* update_ltp)(struct AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc.h:70
LPCContext lpc
used by TNS
Definition: aacenc.h:388
void ff_aac_coder_init_mips(AACEncContext *c)
SingleChannelElement ch[2]
Definition: aac.h:284
int samplerate_index
MPEG-4 samplerate index.
Definition: aacenc.h:389
#define f(width, name)
Definition: cbs_vp9.c:255
Definition: aac.h:59
av_cold void ff_af_queue_init(AVCodecContext *avctx, AudioFrameQueue *afq)
Initialize AudioFrameQueue.
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:90
int64_t duration
Duration of this packet in AVStream->time_base units, 0 if unknown.
Definition: avcodec.h:1498
const uint8_t * chan_map
channel configuration map
Definition: aacenc.h:392
TemporalNoiseShaping tns
Definition: aac.h:250
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
const uint8_t ff_aac_scalefactor_bits[121]
Definition: aactab.c:92
uint8_t * extradata
some codecs need / can use extradata like Huffman tables.
Definition: avcodec.h:1669
AudioFrameQueue afq
Definition: aacenc.h:406
const AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB]
Definition: aaccoder.c:897
static const uint8_t aac_chan_maps[AAC_MAX_CHANNELS][AAC_MAX_CHANNELS]
Table to remap channels from libavcodec&#39;s default order to AAC order.
Definition: aacenctab.h:72
#define FF_PROFILE_AAC_LTP
Definition: avcodec.h:2908
uint8_t * data
Definition: avcodec.h:1480
static void put_pce(PutBitContext *pb, AVCodecContext *avctx)
Definition: aacenc.c:53
const uint8_t * ff_aac_swb_size_128[]
Definition: aacenctab.c:91
uint32_t tag
Definition: movenc.c:1531
Scalefactor data are intensity stereo positions (in phase).
Definition: aac.h:89
#define max(a, b)
Definition: cuda_runtime.h:33
int profile
copied from avctx
Definition: aacenc.h:386
#define AVOnce
Definition: thread.h:159
uint8_t reorder_map[16]
maps channels from lavc to aac order
Definition: aacenc.h:99
static void adjust_frame_information(ChannelElement *cpe, int chans)
Produce integer coefficients from scalefactors provided by the model.
Definition: aacenc.c:256
#define av_log(a,...)
static const AVOption aacenc_options[]
Definition: aacenc.c:1122
int64_t layout
Definition: aacenc.h:94
const uint8_t * reorder_map
lavc to aac reorder map
Definition: aacenc.h:391
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
#define R
Definition: huffyuvdsp.h:34
av_cold AVFloatDSPContext * avpriv_float_dsp_alloc(int bit_exact)
Allocate a float DSP context.
Definition: float_dsp.c:135
void(* encode_ltp_info)(struct AACEncContext *s, SingleChannelElement *sce, int common_window)
Definition: aacenc.h:64
static const int sizes[][2]
Definition: img2dec.c:53
const uint8_t ff_aac_num_swb_1024[]
Definition: aactab.c:45
#define FF_PROFILE_MPEG2_AAC_LOW
Definition: avcodec.h:2913
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:186
float is_ener[128]
Intensity stereo pos (used by encoder)
Definition: aac.h:259
int initial_padding
Audio only.
Definition: avcodec.h:3099
static const AACPCEInfo aac_pce_configs[]
List of PCE (Program Configuration Element) for the channel layouts listed in channel_layout.h.
Definition: aacenc.h:137
float ff_aac_kbd_long_1024[1024]
Definition: aactab.c:38
int flags
AV_CODEC_FLAG_*.
Definition: avcodec.h:1648
void(* mdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input)
Definition: fft.h:109
static const int mpeg4audio_sample_rates[16]
Definition: aacenctab.h:85
int amp[4]
Definition: aac.h:228
const char * name
Name of the codec implementation.
Definition: avcodec.h:3499
int num_windows
number of windows in a frame
Definition: psymodel.h:80
static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
Definition: aacenc.c:534
uint8_t max_sfb
number of scalefactor bands per group
Definition: aac.h:175
static const AVCodecDefault defaults[]
Definition: amfenc_h264.c:361
void(* adjust_common_ltp)(struct AACEncContext *s, ChannelElement *cpe)
Definition: aacenc.h:67
#define ff_mdct_init
Definition: fft.h:169
Definition: aac.h:62
int num_swb
number of scalefactor window bands
Definition: aac.h:183
int ff_af_queue_add(AudioFrameQueue *afq, const AVFrame *f)
Add a frame to the queue.
#define FFMAX(a, b)
Definition: common.h:94
void(* mark_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
Definition: aacenc.h:74
#define fail()
Definition: checkasm.h:122
int index[4][8]
front, side, back, lfe
Definition: aacenc.h:97
uint64_t channel_layout
Audio channel layout.
Definition: avcodec.h:2279
static int put_bits_count(PutBitContext *s)
Definition: put_bits.h:67
#define AACENC_FLAGS
Definition: aacenc.c:1121
INTFLOAT ret_buf[2048]
PCM output buffer.
Definition: aac.h:264
void(* set_special_band_scalefactors)(struct AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc.h:72
MIPS optimizations info
Definition: mips.txt:2
enum WindowSequence window_sequence[2]
Definition: aac.h:176
INTFLOAT ltp_state[3072]
time signal for LTP
Definition: aac.h:265
#define AV_CODEC_FLAG_BITEXACT
Use only bitexact stuff (except (I)DCT).
Definition: avcodec.h:911
av_cold void ff_lpc_end(LPCContext *s)
Uninitialize LPCContext.
Definition: lpc.c:322
#define AV_CODEC_FLAG_QSCALE
Use fixed qscale.
Definition: avcodec.h:853
#define AV_CODEC_CAP_SMALL_LAST_FRAME
Codec can be fed a final frame with a smaller size.
Definition: avcodec.h:1014
int cur_channel
current channel for coder context
Definition: aacenc.h:398
int last_frame_pb_count
number of bits for the previous frame
Definition: aacenc.h:401
#define FFMIN(a, b)
Definition: common.h:96
static void apply_intensity_stereo(ChannelElement *cpe)
Definition: aacenc.c:306
static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr)
Definition: aacenc.c:556
void(* quant_bands)(int *out, const float *in, const float *scaled, int size, int is_signed, int maxval, const float Q34, const float rounding)
Definition: aacenc.h:414
uint8_t w
Definition: llviddspenc.c:38
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
#define FF_PROFILE_AAC_LOW
Definition: avcodec.h:2906
static const AVCodecDefault aac_encode_defaults[]
Definition: aacenc.c:1144
#define FF_PROFILE_UNKNOWN
Definition: avcodec.h:2902
int pos[4]
Definition: aac.h:227
int channels
channel count
Definition: aacenc.h:390
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
#define s(width, name)
Definition: cbs_vp9.c:257
AAC definitions and structures.
const uint8_t ff_tns_max_bands_1024[]
Definition: aactab.c:1394
static void quantize_bands(int *out, const float *in, const float *scaled, int size, int is_signed, int maxval, const float Q34, const float rounding)
Definition: aacenc_utils.h:65
FFTContext mdct128
short (128 samples) frame transform context
Definition: aacenc.h:381
PutBitContext pb
Definition: aacenc.h:379
static void(*const apply_window[4])(AVFloatDSPContext *fdsp, SingleChannelElement *sce, const float *audio)
Definition: aacenc.c:191
#define L(x)
Definition: vp56_arith.h:36
AVFloatDSPContext * fdsp
Definition: aacenc.h:382
int mid_side
Definition: aacenc.h:50
#define FF_ARRAY_ELEMS(a)
if(ret)
void av_get_channel_layout_string(char *buf, int buf_size, int nb_channels, uint64_t channel_layout)
Return a description of a channel layout.
static av_cold int aac_encode_end(AVCodecContext *avctx)
Definition: aacenc.c:899
void(* search_for_is)(struct AACEncContext *s, AVCodecContext *avctx, ChannelElement *cpe)
Definition: aacenc.h:78
void ff_aac_dsp_init_x86(AACEncContext *s)
int frame_size
Number of samples per channel in an audio frame.
Definition: avcodec.h:2248
void(* search_for_ltp)(struct AACEncContext *s, SingleChannelElement *sce, int common_window)
Definition: aacenc.h:76
#define AV_LOG_INFO
Standard information.
Definition: log.h:187
#define AV_ONCE_INIT
Definition: thread.h:160
#define CLIP_AVOIDANCE_FACTOR
Definition: aac.h:53
Libavcodec external API header.
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:58
Temporal Noise Shaping.
Definition: aac.h:198
int sample_rate
samples per second
Definition: avcodec.h:2228
float ff_aac_kbd_short_128[128]
Definition: aactab.c:39
static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
Encode MS data.
Definition: aacenc.c:242
void(* ltp_insert_new_frame)(struct AACEncContext *s)
Definition: aacenc.h:71
void(* search_for_pred)(struct AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc.h:79
main external API structure.
Definition: avcodec.h:1568
int pairing[3][8]
front, side, back
Definition: aacenc.h:96
int bits
number of bits used in the bitresevoir
Definition: psymodel.h:104
#define NOISE_PRE_BITS
length of preamble
Definition: aac.h:157
Levinson-Durbin recursion.
Definition: lpc.h:47
void(* apply_main_pred)(struct AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc.h:68
IndividualChannelStream ics
Definition: aac.h:249
void * buf
Definition: avisynth_c.h:766
int extradata_size
Definition: avcodec.h:1670
uint8_t group_len[8]
Definition: aac.h:179
Replacements for frequently missing libm functions.
float lambda_sum
sum(lambda), for Qvg reporting
Definition: aacenc.h:402
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31))))#define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac){}void ff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map){AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);return NULL;}return ac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;}int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){int use_generic=1;int len=in->nb_samples;int p;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
Describe the class of an AVClass context structure.
Definition: log.h:67
static void put_bitstream_info(AACEncContext *s, const char *name)
Write some auxiliary information about the created AAC file.
Definition: aacenc.c:513
const int ff_aac_swb_size_128_len
Definition: aacenctab.c:107
void(* encode_main_pred)(struct AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc.h:65
void(* adjust_common_pred)(struct AACEncContext *s, ChannelElement *cpe)
Definition: aacenc.h:66
int window_shape
window shape (sine/KBD/whatever)
Definition: psymodel.h:79
static void encode_pulses(AACEncContext *s, Pulse *pulse)
Encode pulse data.
Definition: aacenc.c:420
uint16_t quantize_band_cost_cache_generation
Definition: aacenc.h:410
static av_cold void aac_encode_init_tables(void)
Definition: aacenc.c:954
const uint8_t * swb_sizes
table of scalefactor band sizes for a particular window
Definition: aac.h:182
#define TNS_MAX_ORDER
Definition: aac.h:50
FFPsyContext psy
Definition: aacenc.h:395
const uint32_t ff_aac_scalefactor_code[121]
Definition: aactab.c:73
LongTermPrediction ltp
Definition: aac.h:180
struct FFPsyContext::@131 bitres
static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
Definition: aacenc.c:940
const struct FFPsyModel * model
encoder-specific model functions
Definition: psymodel.h:91
av_cold int ff_lpc_init(LPCContext *s, int blocksize, int max_order, enum FFLPCType lpc_type)
Initialize LPCContext.
Definition: lpc.c:300
#define AAC_MAX_CHANNELS
Definition: aacenctab.h:39
int needs_pce
flag for non-standard layout
Definition: aacenc.h:387
struct AACEncContext::@28 buffer
uint8_t pi<< 24) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_U8,(uint64_t)((*(const uint8_t *) pi-0x80U))<< 56) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16,(*(const int16_t *) pi >>8)+0x80) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S16,(uint64_t)(*(const int16_t *) pi)<< 48) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32,(*(const int32_t *) pi >>24)+0x80) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S32,(uint64_t)(*(const int32_t *) pi)<< 32) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S64,(*(const int64_t *) pi >>56)+0x80) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S64,*(const int64_t *) pi *(1.0f/(UINT64_C(1)<< 63))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S64,*(const int64_t *) pi *(1.0/(UINT64_C(1)<< 63))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_FLT, llrintf(*(const float *) pi *(UINT64_C(1)<< 63))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_DBL, llrint(*(const double *) pi *(UINT64_C(1)<< 63)))#define FMT_PAIR_FUNC(out, in) static conv_func_type *const fmt_pair_to_conv_functions[AV_SAMPLE_FMT_NB *AV_SAMPLE_FMT_NB]={FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S64),};static void cpy1(uint8_t **dst, const uint8_t **src, int len){memcpy(*dst,*src, len);}static void cpy2(uint8_t **dst, const uint8_t **src, int len){memcpy(*dst,*src, 2 *len);}static void cpy4(uint8_t **dst, const uint8_t **src, int len){memcpy(*dst,*src, 4 *len);}static void cpy8(uint8_t **dst, const uint8_t **src, int len){memcpy(*dst,*src, 8 *len);}AudioConvert *swri_audio_convert_alloc(enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, const int *ch_map, int flags){AudioConvert *ctx;conv_func_type *f=fmt_pair_to_conv_functions[av_get_packed_sample_fmt(out_fmt)+AV_SAMPLE_FMT_NB *av_get_packed_sample_fmt(in_fmt)];if(!f) return NULL;ctx=av_mallocz(sizeof(*ctx));if(!ctx) return NULL;if(channels==1){in_fmt=av_get_planar_sample_fmt(in_fmt);out_fmt=av_get_planar_sample_fmt(out_fmt);}ctx->channels=channels;ctx->conv_f=f;ctx->ch_map=ch_map;if(in_fmt==AV_SAMPLE_FMT_U8||in_fmt==AV_SAMPLE_FMT_U8P) memset(ctx->silence, 0x80, sizeof(ctx->silence));if(out_fmt==in_fmt &&!ch_map){switch(av_get_bytes_per_sample(in_fmt)){case 1:ctx->simd_f=cpy1;break;case 2:ctx->simd_f=cpy2;break;case 4:ctx->simd_f=cpy4;break;case 8:ctx->simd_f=cpy8;break;}}if(HAVE_X86ASM &&1) swri_audio_convert_init_x86(ctx, out_fmt, in_fmt, channels);if(ARCH_ARM) swri_audio_convert_init_arm(ctx, out_fmt, in_fmt, channels);if(ARCH_AARCH64) swri_audio_convert_init_aarch64(ctx, out_fmt, in_fmt, channels);return ctx;}void swri_audio_convert_free(AudioConvert **ctx){av_freep(ctx);}int swri_audio_convert(AudioConvert *ctx, AudioData *out, AudioData *in, int len){int ch;int off=0;const int os=(out->planar?1:out->ch_count)*out->bps;unsigned misaligned=0;av_assert0(ctx->channels==out->ch_count);if(ctx->in_simd_align_mask){int planes=in->planar?in->ch_count:1;unsigned m=0;for(ch=0;ch< planes;ch++) m|=(intptr_t) in->ch[ch];misaligned|=m &ctx->in_simd_align_mask;}if(ctx->out_simd_align_mask){int planes=out->planar?out->ch_count:1;unsigned m=0;for(ch=0;ch< planes;ch++) m|=(intptr_t) out->ch[ch];misaligned|=m &ctx->out_simd_align_mask;}if(ctx->simd_f &&!ctx->ch_map &&!misaligned){off=len &~15;av_assert1(off >=0);av_assert1(off<=len);av_assert2(ctx->channels==SWR_CH_MAX||!in->ch[ctx->channels]);if(off >0){if(out->planar==in->planar){int planes=out->planar?out->ch_count:1;for(ch=0;ch< planes;ch++){ctx->simd_f(out-> ch ch
Definition: audioconvert.c:56
FFPsyWindowInfo(* window)(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type)
Suggest window sequence for channel.
Definition: psymodel.h:129
int ms_mode
Signals mid/side stereo flags coding mode (used by encoder)
Definition: aac.h:279
AAC encoder data.
const uint8_t ff_tns_max_bands_128[]
Definition: aactab.c:1406
struct FFPsyPreprocessContext * psypp
Definition: aacenc.h:396
#define NOISE_OFFSET
subtracted from global gain, used as offset for the preamble
Definition: aac.h:158
int global_quality
Global quality for codecs which cannot change it per frame.
Definition: avcodec.h:1634
uint8_t zeroes[128]
band is not coded (used by encoder)
Definition: aac.h:257
int sf_idx[128]
scalefactor indices (used by encoder)
Definition: aac.h:256
AVCodec ff_aac_encoder
Definition: aacenc.c:1149
uint8_t is_mode
Set if any bands have been encoded using intensity stereo (used by encoder)
Definition: aac.h:280
INTFLOAT coeffs[1024]
coefficients for IMDCT, maybe processed
Definition: aac.h:262
const int avpriv_mpeg4audio_sample_rates[16]
Definition: mpeg4audio.c:62
void(* quantize_and_encode_band)(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, int size, int scale_idx, int cb, const float lambda, int rtz)
Definition: aacenc.h:61
Scalefactor data are intensity stereo positions (out of phase).
Definition: aac.h:88
Y Spectral Band Replication.
Definition: mpeg4audio.h:94
const OptionDef options[]
Definition: ffmpeg_opt.c:3374
float * samples
Definition: aacenc.h:419
uint8_t prediction_used[41]
Definition: aac.h:190
static av_cold int aac_encode_init(AVCodecContext *avctx)
Definition: aacenc.c:959
common internal api header.
AACPCEInfo pce
PCE data, if needed.
Definition: aacenc.h:383
static void flush_put_bits(PutBitContext *s)
Pad the end of the output stream with zeros.
Definition: put_bits.h:101
AAC encoder utilities.
Single Channel Element - used for both SCE and LFE elements.
Definition: aac.h:248
windowing related information
Definition: psymodel.h:77
#define ff_mdct_end
Definition: fft.h:170
av_cold struct FFPsyPreprocessContext * ff_psy_preprocess_init(AVCodecContext *avctx)
psychoacoustic model audio preprocessing initialization
Definition: psymodel.c:103
const uint16_t *const ff_swb_offset_1024[]
Definition: aactab.c:1336
uint8_t config_map[16]
configs the encoder&#39;s channel specific settings
Definition: aacenc.h:98
void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, float **audio, int channels)
Preprocess several channel in audio frame in order to compress it better.
Definition: psymodel.c:139
static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce)
Encode scalefactors.
Definition: aacenc.c:384
float * planar_samples[16]
saved preprocessed input
Definition: aacenc.h:384
ChannelElement * cpe
channel elements
Definition: aacenc.h:394
Individual Channel Stream.
Definition: aac.h:174
float clip_avoidance_factor
set if any window is near clipping to the necessary atennuation factor to avoid it ...
Definition: aac.h:192
static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
Initialize the PutBitContext s.
Definition: put_bits.h:48
#define AVERROR_UNKNOWN
Unknown error, typically from an external library.
Definition: error.h:71
#define ERROR_IF(cond,...)
Definition: aacenc_utils.h:268
static void ff_aac_tableinit(void)
Definition: aactab.h:45
channel element - generic struct for SCE/CPE/CCE/LFE
Definition: aac.h:275
void * priv_data
Definition: avcodec.h:1595
int start
Definition: aac.h:226
FFTContext mdct1024
long (1024 samples) frame transform context
Definition: aacenc.h:380
int random_state
Definition: aacenc.h:399
static av_always_inline int diff(const uint32_t a, const uint32_t b)
int channels
number of audio channels
Definition: avcodec.h:2229
int num_pulse
Definition: aac.h:225
AAC_FLOAT lcoeffs[1024]
MDCT of LTP coefficients (used by encoder)
Definition: aac.h:266
static int ff_thread_once(char *control, void(*routine)(void))
Definition: thread.h:162
static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
Encode scalefactor band coding type.
Definition: aacenc.c:370
void(* analyze)(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi)
Perform psychoacoustic analysis and set band info (threshold, energy) for a group of channels...
Definition: psymodel.h:139
static void apply_mid_side_stereo(ChannelElement *cpe)
Definition: aacenc.c:335
static const int64_t aac_normal_chan_layouts[7]
Definition: aacenctab.h:47
void ff_af_queue_close(AudioFrameQueue *afq)
Close AudioFrameQueue.
enum BandType band_type[128]
band types
Definition: aac.h:252
#define LIBAVCODEC_IDENT
Definition: version.h:42
void avpriv_put_string(PutBitContext *pb, const char *string, int terminate_string)
Put the string string in the bitstream.
Definition: bitstream.c:53
static enum AVSampleFormat sample_fmts[]
Definition: adpcmenc.c:701
int frame_number
Frame counter, set by libavcodec.
Definition: avcodec.h:2259
FILE * out
Definition: movenc.c:54
Filter the word “frame” indicates either a video frame or a group of audio samples
#define av_freep(p)
void(* encode_tns_info)(struct AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc.h:63
void INT64 start
Definition: avisynth_c.h:766
static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce, int common_window)
Encode one channel of audio data.
Definition: aacenc.c:487
int8_t used[MAX_LTP_LONG_SFB]
Definition: aac.h:168
static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce, float *audio)
Definition: aacenc.c:200
const uint16_t *const ff_swb_offset_128[]
Definition: aactab.c:1368
int8_t present
Definition: aac.h:164
uint8_t is_mask[128]
Set if intensity stereo is used (used by encoder)
Definition: aac.h:282
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
static const int aacenc_profiles[]
Definition: aacenctab.h:132
void(* abs_pow34)(float *out, const float *in, const int size)
Definition: aacenc.h:413
uint8_t ** extended_data
pointers to the data planes/channels.
Definition: frame.h:342
AAC data declarations.
av_cold void ff_psy_end(FFPsyContext *ctx)
Cleanup model context at the end.
Definition: psymodel.c:83
This structure stores compressed data.
Definition: avcodec.h:1457
static void avoid_clipping(AACEncContext *s, SingleChannelElement *sce)
Downscale spectral coefficients for near-clipping windows to avoid artifacts.
Definition: aacenc.c:467
int window_type[3]
window type (short/long/transitional, etc.) - current, previous and next
Definition: psymodel.h:78
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:361
int strict_std_compliance
strictly follow the standard (MPEG-4, ...).
Definition: avcodec.h:2631
static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
Definition: aacenc.c:918
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...
Definition: avcodec.h:1473
for(j=16;j >0;--j)
int pred
Definition: aacenc.h:49
void AAC_RENAME() ff_init_ff_sine_windows(int index)
initialize the specified entry of ff_sine_windows
float clipping[8]
maximum absolute normalized intensity in the given window for clip avoidance
Definition: psymodel.h:82
const char * name
Definition: opengl_enc.c:102
void(* encode_window_bands_info)(struct AACEncContext *s, SingleChannelElement *sce, int win, int group_len, const float lambda)
Definition: aacenc.h:59
bitstream writer API