FFmpeg
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
aacdec.c
Go to the documentation of this file.
1 /*
2  * AAC decoder
3  * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
4  * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
5  *
6  * AAC LATM decoder
7  * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
8  * Copyright (c) 2010 Janne Grunau <janne-libav@jannau.net>
9  *
10  * This file is part of FFmpeg.
11  *
12  * FFmpeg is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public
14  * License as published by the Free Software Foundation; either
15  * version 2.1 of the License, or (at your option) any later version.
16  *
17  * FFmpeg is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with FFmpeg; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25  */
26 
27 /**
28  * @file
29  * AAC decoder
30  * @author Oded Shimon ( ods15 ods15 dyndns org )
31  * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
32  */
33 
34 /*
35  * supported tools
36  *
37  * Support? Name
38  * N (code in SoC repo) gain control
39  * Y block switching
40  * Y window shapes - standard
41  * N window shapes - Low Delay
42  * Y filterbank - standard
43  * N (code in SoC repo) filterbank - Scalable Sample Rate
44  * Y Temporal Noise Shaping
45  * Y Long Term Prediction
46  * Y intensity stereo
47  * Y channel coupling
48  * Y frequency domain prediction
49  * Y Perceptual Noise Substitution
50  * Y Mid/Side stereo
51  * N Scalable Inverse AAC Quantization
52  * N Frequency Selective Switch
53  * N upsampling filter
54  * Y quantization & coding - AAC
55  * N quantization & coding - TwinVQ
56  * N quantization & coding - BSAC
57  * N AAC Error Resilience tools
58  * N Error Resilience payload syntax
59  * N Error Protection tool
60  * N CELP
61  * N Silence Compression
62  * N HVXC
63  * N HVXC 4kbits/s VR
64  * N Structured Audio tools
65  * N Structured Audio Sample Bank Format
66  * N MIDI
67  * N Harmonic and Individual Lines plus Noise
68  * N Text-To-Speech Interface
69  * Y Spectral Band Replication
70  * Y (not in this code) Layer-1
71  * Y (not in this code) Layer-2
72  * Y (not in this code) Layer-3
73  * N SinuSoidal Coding (Transient, Sinusoid, Noise)
74  * Y Parametric Stereo
75  * N Direct Stream Transfer
76  *
77  * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
78  * - HE AAC v2 comprises LC AAC with Spectral Band Replication and
79  Parametric Stereo.
80  */
81 
82 #include "libavutil/float_dsp.h"
83 #include "libavutil/opt.h"
84 #include "avcodec.h"
85 #include "internal.h"
86 #include "get_bits.h"
87 #include "dsputil.h"
88 #include "fft.h"
89 #include "fmtconvert.h"
90 #include "lpc.h"
91 #include "kbdwin.h"
92 #include "sinewin.h"
93 
94 #include "aac.h"
95 #include "aactab.h"
96 #include "aacdectab.h"
97 #include "cbrt_tablegen.h"
98 #include "sbr.h"
99 #include "aacsbr.h"
100 #include "mpeg4audio.h"
101 #include "aacadtsdec.h"
102 #include "libavutil/intfloat.h"
103 
104 #include <assert.h>
105 #include <errno.h>
106 #include <math.h>
107 #include <string.h>
108 
109 #if ARCH_ARM
110 # include "arm/aac.h"
111 #endif
112 
114 static VLC vlc_spectral[11];
115 
116 static int output_configure(AACContext *ac,
117  uint8_t layout_map[MAX_ELEM_ID*4][3], int tags,
118  enum OCStatus oc_type, int get_new_frame);
119 
120 #define overread_err "Input buffer exhausted before END element found\n"
121 
122 static int count_channels(uint8_t (*layout)[3], int tags)
123 {
124  int i, sum = 0;
125  for (i = 0; i < tags; i++) {
126  int syn_ele = layout[i][0];
127  int pos = layout[i][2];
128  sum += (1 + (syn_ele == TYPE_CPE)) *
129  (pos != AAC_CHANNEL_OFF && pos != AAC_CHANNEL_CC);
130  }
131  return sum;
132 }
133 
134 /**
135  * Check for the channel element in the current channel position configuration.
136  * If it exists, make sure the appropriate element is allocated and map the
137  * channel order to match the internal FFmpeg channel layout.
138  *
139  * @param che_pos current channel position configuration
140  * @param type channel element type
141  * @param id channel element id
142  * @param channels count of the number of channels in the configuration
143  *
144  * @return Returns error status. 0 - OK, !0 - error
145  */
147  enum ChannelPosition che_pos,
148  int type, int id, int *channels)
149 {
150  if (*channels >= MAX_CHANNELS)
151  return AVERROR_INVALIDDATA;
152  if (che_pos) {
153  if (!ac->che[type][id]) {
154  if (!(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
155  return AVERROR(ENOMEM);
156  ff_aac_sbr_ctx_init(ac, &ac->che[type][id]->sbr);
157  }
158  if (type != TYPE_CCE) {
159  if (*channels >= MAX_CHANNELS - (type == TYPE_CPE || (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1))) {
160  av_log(ac->avctx, AV_LOG_ERROR, "Too many channels\n");
161  return AVERROR_INVALIDDATA;
162  }
163  ac->output_element[(*channels)++] = &ac->che[type][id]->ch[0];
164  if (type == TYPE_CPE ||
165  (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1)) {
166  ac->output_element[(*channels)++] = &ac->che[type][id]->ch[1];
167  }
168  }
169  } else {
170  if (ac->che[type][id])
171  ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
172  av_freep(&ac->che[type][id]);
173  }
174  return 0;
175 }
176 
178 {
179  AACContext *ac = avctx->priv_data;
180  int type, id, ch, ret;
181 
182  /* set channel pointers to internal buffers by default */
183  for (type = 0; type < 4; type++) {
184  for (id = 0; id < MAX_ELEM_ID; id++) {
185  ChannelElement *che = ac->che[type][id];
186  if (che) {
187  che->ch[0].ret = che->ch[0].ret_buf;
188  che->ch[1].ret = che->ch[1].ret_buf;
189  }
190  }
191  }
192 
193  /* get output buffer */
194  ac->frame.nb_samples = 2048;
195  if ((ret = ff_get_buffer(avctx, &ac->frame)) < 0) {
196  av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
197  return ret;
198  }
199 
200  /* map output channel pointers to AVFrame data */
201  for (ch = 0; ch < avctx->channels; ch++) {
202  if (ac->output_element[ch])
203  ac->output_element[ch]->ret = (float *)ac->frame.extended_data[ch];
204  }
205 
206  return 0;
207 }
208 
210  uint64_t av_position;
214 };
215 
216 static int assign_pair(struct elem_to_channel e2c_vec[MAX_ELEM_ID],
217  uint8_t (*layout_map)[3], int offset, uint64_t left,
218  uint64_t right, int pos)
219 {
220  if (layout_map[offset][0] == TYPE_CPE) {
221  e2c_vec[offset] = (struct elem_to_channel) {
222  .av_position = left | right, .syn_ele = TYPE_CPE,
223  .elem_id = layout_map[offset ][1], .aac_position = pos };
224  return 1;
225  } else {
226  e2c_vec[offset] = (struct elem_to_channel) {
227  .av_position = left, .syn_ele = TYPE_SCE,
228  .elem_id = layout_map[offset ][1], .aac_position = pos };
229  e2c_vec[offset + 1] = (struct elem_to_channel) {
230  .av_position = right, .syn_ele = TYPE_SCE,
231  .elem_id = layout_map[offset + 1][1], .aac_position = pos };
232  return 2;
233  }
234 }
235 
236 static int count_paired_channels(uint8_t (*layout_map)[3], int tags, int pos, int *current) {
237  int num_pos_channels = 0;
238  int first_cpe = 0;
239  int sce_parity = 0;
240  int i;
241  for (i = *current; i < tags; i++) {
242  if (layout_map[i][2] != pos)
243  break;
244  if (layout_map[i][0] == TYPE_CPE) {
245  if (sce_parity) {
246  if (pos == AAC_CHANNEL_FRONT && !first_cpe) {
247  sce_parity = 0;
248  } else {
249  return -1;
250  }
251  }
252  num_pos_channels += 2;
253  first_cpe = 1;
254  } else {
255  num_pos_channels++;
256  sce_parity ^= 1;
257  }
258  }
259  if (sce_parity &&
260  ((pos == AAC_CHANNEL_FRONT && first_cpe) || pos == AAC_CHANNEL_SIDE))
261  return -1;
262  *current = i;
263  return num_pos_channels;
264 }
265 
266 static uint64_t sniff_channel_order(uint8_t (*layout_map)[3], int tags)
267 {
268  int i, n, total_non_cc_elements;
269  struct elem_to_channel e2c_vec[4*MAX_ELEM_ID] = {{ 0 }};
270  int num_front_channels, num_side_channels, num_back_channels;
271  uint64_t layout;
272 
273  if (FF_ARRAY_ELEMS(e2c_vec) < tags)
274  return 0;
275 
276  i = 0;
277  num_front_channels =
278  count_paired_channels(layout_map, tags, AAC_CHANNEL_FRONT, &i);
279  if (num_front_channels < 0)
280  return 0;
281  num_side_channels =
282  count_paired_channels(layout_map, tags, AAC_CHANNEL_SIDE, &i);
283  if (num_side_channels < 0)
284  return 0;
285  num_back_channels =
286  count_paired_channels(layout_map, tags, AAC_CHANNEL_BACK, &i);
287  if (num_back_channels < 0)
288  return 0;
289 
290  i = 0;
291  if (num_front_channels & 1) {
292  e2c_vec[i] = (struct elem_to_channel) {
293  .av_position = AV_CH_FRONT_CENTER, .syn_ele = TYPE_SCE,
294  .elem_id = layout_map[i][1], .aac_position = AAC_CHANNEL_FRONT };
295  i++;
296  num_front_channels--;
297  }
298  if (num_front_channels >= 4) {
299  i += assign_pair(e2c_vec, layout_map, i,
303  num_front_channels -= 2;
304  }
305  if (num_front_channels >= 2) {
306  i += assign_pair(e2c_vec, layout_map, i,
310  num_front_channels -= 2;
311  }
312  while (num_front_channels >= 2) {
313  i += assign_pair(e2c_vec, layout_map, i,
314  UINT64_MAX,
315  UINT64_MAX,
317  num_front_channels -= 2;
318  }
319 
320  if (num_side_channels >= 2) {
321  i += assign_pair(e2c_vec, layout_map, i,
325  num_side_channels -= 2;
326  }
327  while (num_side_channels >= 2) {
328  i += assign_pair(e2c_vec, layout_map, i,
329  UINT64_MAX,
330  UINT64_MAX,
332  num_side_channels -= 2;
333  }
334 
335  while (num_back_channels >= 4) {
336  i += assign_pair(e2c_vec, layout_map, i,
337  UINT64_MAX,
338  UINT64_MAX,
340  num_back_channels -= 2;
341  }
342  if (num_back_channels >= 2) {
343  i += assign_pair(e2c_vec, layout_map, i,
347  num_back_channels -= 2;
348  }
349  if (num_back_channels) {
350  e2c_vec[i] = (struct elem_to_channel) {
351  .av_position = AV_CH_BACK_CENTER, .syn_ele = TYPE_SCE,
352  .elem_id = layout_map[i][1], .aac_position = AAC_CHANNEL_BACK };
353  i++;
354  num_back_channels--;
355  }
356 
357  if (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) {
358  e2c_vec[i] = (struct elem_to_channel) {
360  .elem_id = layout_map[i][1], .aac_position = AAC_CHANNEL_LFE };
361  i++;
362  }
363  while (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) {
364  e2c_vec[i] = (struct elem_to_channel) {
365  .av_position = UINT64_MAX, .syn_ele = TYPE_LFE,
366  .elem_id = layout_map[i][1], .aac_position = AAC_CHANNEL_LFE };
367  i++;
368  }
369 
370  // Must choose a stable sort
371  total_non_cc_elements = n = i;
372  do {
373  int next_n = 0;
374  for (i = 1; i < n; i++) {
375  if (e2c_vec[i-1].av_position > e2c_vec[i].av_position) {
376  FFSWAP(struct elem_to_channel, e2c_vec[i-1], e2c_vec[i]);
377  next_n = i;
378  }
379  }
380  n = next_n;
381  } while (n > 0);
382 
383  layout = 0;
384  for (i = 0; i < total_non_cc_elements; i++) {
385  layout_map[i][0] = e2c_vec[i].syn_ele;
386  layout_map[i][1] = e2c_vec[i].elem_id;
387  layout_map[i][2] = e2c_vec[i].aac_position;
388  if (e2c_vec[i].av_position != UINT64_MAX) {
389  layout |= e2c_vec[i].av_position;
390  }
391  }
392 
393  return layout;
394 }
395 
396 /**
397  * Save current output configuration if and only if it has been locked.
398  */
400  if (ac->oc[1].status == OC_LOCKED) {
401  ac->oc[0] = ac->oc[1];
402  }
403  ac->oc[1].status = OC_NONE;
404 }
405 
406 /**
407  * Restore the previous output configuration if and only if the current
408  * configuration is unlocked.
409  */
411  if (ac->oc[1].status != OC_LOCKED && ac->oc[0].status != OC_NONE) {
412  ac->oc[1] = ac->oc[0];
413  ac->avctx->channels = ac->oc[1].channels;
414  ac->avctx->channel_layout = ac->oc[1].channel_layout;
415  output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags,
416  ac->oc[1].status, 0);
417  }
418 }
419 
420 /**
421  * Configure output channel order based on the current program configuration element.
422  *
423  * @return Returns error status. 0 - OK, !0 - error
424  */
426  uint8_t layout_map[MAX_ELEM_ID*4][3], int tags,
427  enum OCStatus oc_type, int get_new_frame)
428 {
429  AVCodecContext *avctx = ac->avctx;
430  int i, channels = 0, ret;
431  uint64_t layout = 0;
432 
433  if (ac->oc[1].layout_map != layout_map) {
434  memcpy(ac->oc[1].layout_map, layout_map, tags * sizeof(layout_map[0]));
435  ac->oc[1].layout_map_tags = tags;
436  }
437 
438  // Try to sniff a reasonable channel order, otherwise output the
439  // channels in the order the PCE declared them.
441  layout = sniff_channel_order(layout_map, tags);
442  for (i = 0; i < tags; i++) {
443  int type = layout_map[i][0];
444  int id = layout_map[i][1];
445  int position = layout_map[i][2];
446  // Allocate or free elements depending on if they are in the
447  // current program configuration.
448  ret = che_configure(ac, position, type, id, &channels);
449  if (ret < 0)
450  return ret;
451  }
452  if (ac->oc[1].m4ac.ps == 1 && channels == 2) {
453  if (layout == AV_CH_FRONT_CENTER) {
455  } else {
456  layout = 0;
457  }
458  }
459 
460  memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
461  if (layout) avctx->channel_layout = layout;
462  ac->oc[1].channel_layout = layout;
463  avctx->channels = ac->oc[1].channels = channels;
464  ac->oc[1].status = oc_type;
465 
466  if (get_new_frame) {
467  if ((ret = frame_configure_elements(ac->avctx)) < 0)
468  return ret;
469  }
470 
471  return 0;
472 }
473 
474 static void flush(AVCodecContext *avctx)
475 {
476  AACContext *ac= avctx->priv_data;
477  int type, i, j;
478 
479  for (type = 3; type >= 0; type--) {
480  for (i = 0; i < MAX_ELEM_ID; i++) {
481  ChannelElement *che = ac->che[type][i];
482  if (che) {
483  for (j = 0; j <= 1; j++) {
484  memset(che->ch[j].saved, 0, sizeof(che->ch[j].saved));
485  }
486  }
487  }
488  }
489 }
490 
491 /**
492  * Set up channel positions based on a default channel configuration
493  * as specified in table 1.17.
494  *
495  * @return Returns error status. 0 - OK, !0 - error
496  */
498  uint8_t (*layout_map)[3],
499  int *tags,
500  int channel_config)
501 {
502  if (channel_config < 1 || channel_config > 7) {
503  av_log(avctx, AV_LOG_ERROR, "invalid default channel configuration (%d)\n",
504  channel_config);
505  return -1;
506  }
507  *tags = tags_per_config[channel_config];
508  memcpy(layout_map, aac_channel_layout_map[channel_config-1], *tags * sizeof(*layout_map));
509  return 0;
510 }
511 
512 static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
513 {
514  // For PCE based channel configurations map the channels solely based on tags.
515  if (!ac->oc[1].m4ac.chan_config) {
516  return ac->tag_che_map[type][elem_id];
517  }
518  // Allow single CPE stereo files to be signalled with mono configuration.
519  if (!ac->tags_mapped && type == TYPE_CPE && ac->oc[1].m4ac.chan_config == 1) {
520  uint8_t layout_map[MAX_ELEM_ID*4][3];
521  int layout_map_tags;
523 
524  av_log(ac->avctx, AV_LOG_DEBUG, "mono with CPE\n");
525 
526  if (set_default_channel_config(ac->avctx, layout_map, &layout_map_tags,
527  2) < 0)
528  return NULL;
529  if (output_configure(ac, layout_map, layout_map_tags,
530  OC_TRIAL_FRAME, 1) < 0)
531  return NULL;
532 
533  ac->oc[1].m4ac.chan_config = 2;
534  ac->oc[1].m4ac.ps = 0;
535  }
536  // And vice-versa
537  if (!ac->tags_mapped && type == TYPE_SCE && ac->oc[1].m4ac.chan_config == 2) {
538  uint8_t layout_map[MAX_ELEM_ID*4][3];
539  int layout_map_tags;
541 
542  av_log(ac->avctx, AV_LOG_DEBUG, "stereo with SCE\n");
543 
544  if (set_default_channel_config(ac->avctx, layout_map, &layout_map_tags,
545  1) < 0)
546  return NULL;
547  if (output_configure(ac, layout_map, layout_map_tags,
548  OC_TRIAL_FRAME, 1) < 0)
549  return NULL;
550 
551  ac->oc[1].m4ac.chan_config = 1;
552  if (ac->oc[1].m4ac.sbr)
553  ac->oc[1].m4ac.ps = -1;
554  }
555  // For indexed channel configurations map the channels solely based on position.
556  switch (ac->oc[1].m4ac.chan_config) {
557  case 7:
558  if (ac->tags_mapped == 3 && type == TYPE_CPE) {
559  ac->tags_mapped++;
560  return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
561  }
562  case 6:
563  /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
564  instead of SCE[0] CPE[0] CPE[1] LFE[0]. If we seem to have
565  encountered such a stream, transfer the LFE[0] element to the SCE[1]'s mapping */
566  if (ac->tags_mapped == tags_per_config[ac->oc[1].m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
567  ac->tags_mapped++;
568  return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
569  }
570  case 5:
571  if (ac->tags_mapped == 2 && type == TYPE_CPE) {
572  ac->tags_mapped++;
573  return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
574  }
575  case 4:
576  if (ac->tags_mapped == 2 && ac->oc[1].m4ac.chan_config == 4 && type == TYPE_SCE) {
577  ac->tags_mapped++;
578  return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
579  }
580  case 3:
581  case 2:
582  if (ac->tags_mapped == (ac->oc[1].m4ac.chan_config != 2) && type == TYPE_CPE) {
583  ac->tags_mapped++;
584  return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
585  } else if (ac->oc[1].m4ac.chan_config == 2) {
586  return NULL;
587  }
588  case 1:
589  if (!ac->tags_mapped && type == TYPE_SCE) {
590  ac->tags_mapped++;
591  return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
592  }
593  default:
594  return NULL;
595  }
596 }
597 
598 /**
599  * Decode an array of 4 bit element IDs, optionally interleaved with a stereo/mono switching bit.
600  *
601  * @param type speaker type/position for these channels
602  */
603 static void decode_channel_map(uint8_t layout_map[][3],
604  enum ChannelPosition type,
605  GetBitContext *gb, int n)
606 {
607  while (n--) {
608  enum RawDataBlockType syn_ele;
609  switch (type) {
610  case AAC_CHANNEL_FRONT:
611  case AAC_CHANNEL_BACK:
612  case AAC_CHANNEL_SIDE:
613  syn_ele = get_bits1(gb);
614  break;
615  case AAC_CHANNEL_CC:
616  skip_bits1(gb);
617  syn_ele = TYPE_CCE;
618  break;
619  case AAC_CHANNEL_LFE:
620  syn_ele = TYPE_LFE;
621  break;
622  default:
623  av_assert0(0);
624  }
625  layout_map[0][0] = syn_ele;
626  layout_map[0][1] = get_bits(gb, 4);
627  layout_map[0][2] = type;
628  layout_map++;
629  }
630 }
631 
632 /**
633  * Decode program configuration element; reference: table 4.2.
634  *
635  * @return Returns error status. 0 - OK, !0 - error
636  */
637 static int decode_pce(AVCodecContext *avctx, MPEG4AudioConfig *m4ac,
638  uint8_t (*layout_map)[3],
639  GetBitContext *gb)
640 {
641  int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index;
642  int comment_len;
643  int tags;
644 
645  skip_bits(gb, 2); // object_type
646 
647  sampling_index = get_bits(gb, 4);
648  if (m4ac->sampling_index != sampling_index)
649  av_log(avctx, AV_LOG_WARNING, "Sample rate index in program config element does not match the sample rate index configured by the container.\n");
650 
651  num_front = get_bits(gb, 4);
652  num_side = get_bits(gb, 4);
653  num_back = get_bits(gb, 4);
654  num_lfe = get_bits(gb, 2);
655  num_assoc_data = get_bits(gb, 3);
656  num_cc = get_bits(gb, 4);
657 
658  if (get_bits1(gb))
659  skip_bits(gb, 4); // mono_mixdown_tag
660  if (get_bits1(gb))
661  skip_bits(gb, 4); // stereo_mixdown_tag
662 
663  if (get_bits1(gb))
664  skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
665 
666  if (get_bits_left(gb) < 4 * (num_front + num_side + num_back + num_lfe + num_assoc_data + num_cc)) {
667  av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err);
668  return -1;
669  }
670  decode_channel_map(layout_map , AAC_CHANNEL_FRONT, gb, num_front);
671  tags = num_front;
672  decode_channel_map(layout_map + tags, AAC_CHANNEL_SIDE, gb, num_side);
673  tags += num_side;
674  decode_channel_map(layout_map + tags, AAC_CHANNEL_BACK, gb, num_back);
675  tags += num_back;
676  decode_channel_map(layout_map + tags, AAC_CHANNEL_LFE, gb, num_lfe);
677  tags += num_lfe;
678 
679  skip_bits_long(gb, 4 * num_assoc_data);
680 
681  decode_channel_map(layout_map + tags, AAC_CHANNEL_CC, gb, num_cc);
682  tags += num_cc;
683 
684  align_get_bits(gb);
685 
686  /* comment field, first byte is length */
687  comment_len = get_bits(gb, 8) * 8;
688  if (get_bits_left(gb) < comment_len) {
689  av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err);
690  return -1;
691  }
692  skip_bits_long(gb, comment_len);
693  return tags;
694 }
695 
696 /**
697  * Decode GA "General Audio" specific configuration; reference: table 4.1.
698  *
699  * @param ac pointer to AACContext, may be null
700  * @param avctx pointer to AVCCodecContext, used for logging
701  *
702  * @return Returns error status. 0 - OK, !0 - error
703  */
705  GetBitContext *gb,
706  MPEG4AudioConfig *m4ac,
707  int channel_config)
708 {
709  int extension_flag, ret;
710  uint8_t layout_map[MAX_ELEM_ID*4][3];
711  int tags = 0;
712 
713  if (get_bits1(gb)) { // frameLengthFlag
714  av_log_missing_feature(avctx, "960/120 MDCT window", 1);
715  return AVERROR_PATCHWELCOME;
716  }
717 
718  if (get_bits1(gb)) // dependsOnCoreCoder
719  skip_bits(gb, 14); // coreCoderDelay
720  extension_flag = get_bits1(gb);
721 
722  if (m4ac->object_type == AOT_AAC_SCALABLE ||
724  skip_bits(gb, 3); // layerNr
725 
726  if (channel_config == 0) {
727  skip_bits(gb, 4); // element_instance_tag
728  tags = decode_pce(avctx, m4ac, layout_map, gb);
729  if (tags < 0)
730  return tags;
731  } else {
732  if ((ret = set_default_channel_config(avctx, layout_map, &tags, channel_config)))
733  return ret;
734  }
735 
736  if (count_channels(layout_map, tags) > 1) {
737  m4ac->ps = 0;
738  } else if (m4ac->sbr == 1 && m4ac->ps == -1)
739  m4ac->ps = 1;
740 
741  if (ac && (ret = output_configure(ac, layout_map, tags, OC_GLOBAL_HDR, 0)))
742  return ret;
743 
744  if (extension_flag) {
745  switch (m4ac->object_type) {
746  case AOT_ER_BSAC:
747  skip_bits(gb, 5); // numOfSubFrame
748  skip_bits(gb, 11); // layer_length
749  break;
750  case AOT_ER_AAC_LC:
751  case AOT_ER_AAC_LTP:
752  case AOT_ER_AAC_SCALABLE:
753  case AOT_ER_AAC_LD:
754  skip_bits(gb, 3); /* aacSectionDataResilienceFlag
755  * aacScalefactorDataResilienceFlag
756  * aacSpectralDataResilienceFlag
757  */
758  break;
759  }
760  skip_bits1(gb); // extensionFlag3 (TBD in version 3)
761  }
762  return 0;
763 }
764 
765 /**
766  * Decode audio specific configuration; reference: table 1.13.
767  *
768  * @param ac pointer to AACContext, may be null
769  * @param avctx pointer to AVCCodecContext, used for logging
770  * @param m4ac pointer to MPEG4AudioConfig, used for parsing
771  * @param data pointer to buffer holding an audio specific config
772  * @param bit_size size of audio specific config or data in bits
773  * @param sync_extension look for an appended sync extension
774  *
775  * @return Returns error status or number of consumed bits. <0 - error
776  */
778  AVCodecContext *avctx,
779  MPEG4AudioConfig *m4ac,
780  const uint8_t *data, int bit_size,
781  int sync_extension)
782 {
783  GetBitContext gb;
784  int i;
785 
786  av_dlog(avctx, "audio specific config size %d\n", bit_size >> 3);
787  for (i = 0; i < bit_size >> 3; i++)
788  av_dlog(avctx, "%02x ", data[i]);
789  av_dlog(avctx, "\n");
790 
791  init_get_bits(&gb, data, bit_size);
792 
793  if ((i = avpriv_mpeg4audio_get_config(m4ac, data, bit_size, sync_extension)) < 0)
794  return -1;
795  if (m4ac->sampling_index > 12) {
796  av_log(avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", m4ac->sampling_index);
797  return -1;
798  }
799 
800  skip_bits_long(&gb, i);
801 
802  switch (m4ac->object_type) {
803  case AOT_AAC_MAIN:
804  case AOT_AAC_LC:
805  case AOT_AAC_LTP:
806  if (decode_ga_specific_config(ac, avctx, &gb, m4ac, m4ac->chan_config))
807  return -1;
808  break;
809  default:
810  av_log(avctx, AV_LOG_ERROR, "Audio object type %s%d is not supported.\n",
811  m4ac->sbr == 1? "SBR+" : "", m4ac->object_type);
812  return -1;
813  }
814 
815  av_dlog(avctx, "AOT %d chan config %d sampling index %d (%d) SBR %d PS %d\n",
816  m4ac->object_type, m4ac->chan_config, m4ac->sampling_index,
817  m4ac->sample_rate, m4ac->sbr, m4ac->ps);
818 
819  return get_bits_count(&gb);
820 }
821 
822 /**
823  * linear congruential pseudorandom number generator
824  *
825  * @param previous_val pointer to the current state of the generator
826  *
827  * @return Returns a 32-bit pseudorandom integer
828  */
829 static av_always_inline int lcg_random(unsigned previous_val)
830 {
831  union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 };
832  return v.s;
833 }
834 
836 {
837  ps->r0 = 0.0f;
838  ps->r1 = 0.0f;
839  ps->cor0 = 0.0f;
840  ps->cor1 = 0.0f;
841  ps->var0 = 1.0f;
842  ps->var1 = 1.0f;
843 }
844 
846 {
847  int i;
848  for (i = 0; i < MAX_PREDICTORS; i++)
849  reset_predict_state(&ps[i]);
850 }
851 
852 static int sample_rate_idx (int rate)
853 {
854  if (92017 <= rate) return 0;
855  else if (75132 <= rate) return 1;
856  else if (55426 <= rate) return 2;
857  else if (46009 <= rate) return 3;
858  else if (37566 <= rate) return 4;
859  else if (27713 <= rate) return 5;
860  else if (23004 <= rate) return 6;
861  else if (18783 <= rate) return 7;
862  else if (13856 <= rate) return 8;
863  else if (11502 <= rate) return 9;
864  else if (9391 <= rate) return 10;
865  else return 11;
866 }
867 
868 static void reset_predictor_group(PredictorState *ps, int group_num)
869 {
870  int i;
871  for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
872  reset_predict_state(&ps[i]);
873 }
874 
875 #define AAC_INIT_VLC_STATIC(num, size) \
876  INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num], \
877  ff_aac_spectral_bits[num], sizeof( ff_aac_spectral_bits[num][0]), sizeof( ff_aac_spectral_bits[num][0]), \
878  ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \
879  size);
880 
882 {
883  AACContext *ac = avctx->priv_data;
884 
885  ac->avctx = avctx;
886  ac->oc[1].m4ac.sample_rate = avctx->sample_rate;
887 
889 
890  if (avctx->extradata_size > 0) {
891  if (decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
892  avctx->extradata,
893  avctx->extradata_size*8, 1) < 0)
894  return -1;
895  } else {
896  int sr, i;
897  uint8_t layout_map[MAX_ELEM_ID*4][3];
898  int layout_map_tags;
899 
900  sr = sample_rate_idx(avctx->sample_rate);
901  ac->oc[1].m4ac.sampling_index = sr;
902  ac->oc[1].m4ac.channels = avctx->channels;
903  ac->oc[1].m4ac.sbr = -1;
904  ac->oc[1].m4ac.ps = -1;
905 
906  for (i = 0; i < FF_ARRAY_ELEMS(ff_mpeg4audio_channels); i++)
907  if (ff_mpeg4audio_channels[i] == avctx->channels)
908  break;
910  i = 0;
911  }
912  ac->oc[1].m4ac.chan_config = i;
913 
914  if (ac->oc[1].m4ac.chan_config) {
915  int ret = set_default_channel_config(avctx, layout_map,
916  &layout_map_tags, ac->oc[1].m4ac.chan_config);
917  if (!ret)
918  output_configure(ac, layout_map, layout_map_tags,
919  OC_GLOBAL_HDR, 0);
920  else if (avctx->err_recognition & AV_EF_EXPLODE)
921  return AVERROR_INVALIDDATA;
922  }
923  }
924 
925  if (avctx->channels > MAX_CHANNELS) {
926  av_log(avctx, AV_LOG_ERROR, "Too many channels\n");
927  return AVERROR_INVALIDDATA;
928  }
929 
930  AAC_INIT_VLC_STATIC( 0, 304);
931  AAC_INIT_VLC_STATIC( 1, 270);
932  AAC_INIT_VLC_STATIC( 2, 550);
933  AAC_INIT_VLC_STATIC( 3, 300);
934  AAC_INIT_VLC_STATIC( 4, 328);
935  AAC_INIT_VLC_STATIC( 5, 294);
936  AAC_INIT_VLC_STATIC( 6, 306);
937  AAC_INIT_VLC_STATIC( 7, 268);
938  AAC_INIT_VLC_STATIC( 8, 510);
939  AAC_INIT_VLC_STATIC( 9, 366);
940  AAC_INIT_VLC_STATIC(10, 462);
941 
942  ff_aac_sbr_init();
943 
944  ff_dsputil_init(&ac->dsp, avctx);
945  ff_fmt_convert_init(&ac->fmt_conv, avctx);
947 
948  ac->random_state = 0x1f2e3d4c;
949 
951 
955  352);
956 
957  ff_mdct_init(&ac->mdct, 11, 1, 1.0 / (32768.0 * 1024.0));
958  ff_mdct_init(&ac->mdct_small, 8, 1, 1.0 / (32768.0 * 128.0));
959  ff_mdct_init(&ac->mdct_ltp, 11, 0, -2.0 * 32768.0);
960  // window initialization
965 
966  cbrt_tableinit();
967 
969  avctx->coded_frame = &ac->frame;
970 
971  return 0;
972 }
973 
974 /**
975  * Skip data_stream_element; reference: table 4.10.
976  */
978 {
979  int byte_align = get_bits1(gb);
980  int count = get_bits(gb, 8);
981  if (count == 255)
982  count += get_bits(gb, 8);
983  if (byte_align)
984  align_get_bits(gb);
985 
986  if (get_bits_left(gb) < 8 * count) {
987  av_log(ac->avctx, AV_LOG_ERROR, "skip_data_stream_element: "overread_err);
988  return -1;
989  }
990  skip_bits_long(gb, 8 * count);
991  return 0;
992 }
993 
995  GetBitContext *gb)
996 {
997  int sfb;
998  if (get_bits1(gb)) {
999  ics->predictor_reset_group = get_bits(gb, 5);
1000  if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) {
1001  av_log(ac->avctx, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n");
1002  return -1;
1003  }
1004  }
1005  for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index]); sfb++) {
1006  ics->prediction_used[sfb] = get_bits1(gb);
1007  }
1008  return 0;
1009 }
1010 
1011 /**
1012  * Decode Long Term Prediction data; reference: table 4.xx.
1013  */
1015  GetBitContext *gb, uint8_t max_sfb)
1016 {
1017  int sfb;
1018 
1019  ltp->lag = get_bits(gb, 11);
1020  ltp->coef = ltp_coef[get_bits(gb, 3)];
1021  for (sfb = 0; sfb < FFMIN(max_sfb, MAX_LTP_LONG_SFB); sfb++)
1022  ltp->used[sfb] = get_bits1(gb);
1023 }
1024 
1025 /**
1026  * Decode Individual Channel Stream info; reference: table 4.6.
1027  */
1029  GetBitContext *gb)
1030 {
1031  if (get_bits1(gb)) {
1032  av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
1033  return AVERROR_INVALIDDATA;
1034  }
1035  ics->window_sequence[1] = ics->window_sequence[0];
1036  ics->window_sequence[0] = get_bits(gb, 2);
1037  ics->use_kb_window[1] = ics->use_kb_window[0];
1038  ics->use_kb_window[0] = get_bits1(gb);
1039  ics->num_window_groups = 1;
1040  ics->group_len[0] = 1;
1041  if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1042  int i;
1043  ics->max_sfb = get_bits(gb, 4);
1044  for (i = 0; i < 7; i++) {
1045  if (get_bits1(gb)) {
1046  ics->group_len[ics->num_window_groups - 1]++;
1047  } else {
1048  ics->num_window_groups++;
1049  ics->group_len[ics->num_window_groups - 1] = 1;
1050  }
1051  }
1052  ics->num_windows = 8;
1056  ics->predictor_present = 0;
1057  } else {
1058  ics->max_sfb = get_bits(gb, 6);
1059  ics->num_windows = 1;
1063  ics->predictor_present = get_bits1(gb);
1064  ics->predictor_reset_group = 0;
1065  if (ics->predictor_present) {
1066  if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN) {
1067  if (decode_prediction(ac, ics, gb)) {
1068  goto fail;
1069  }
1070  } else if (ac->oc[1].m4ac.object_type == AOT_AAC_LC) {
1071  av_log(ac->avctx, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n");
1072  goto fail;
1073  } else {
1074  if ((ics->ltp.present = get_bits(gb, 1)))
1075  decode_ltp(&ics->ltp, gb, ics->max_sfb);
1076  }
1077  }
1078  }
1079 
1080  if (ics->max_sfb > ics->num_swb) {
1081  av_log(ac->avctx, AV_LOG_ERROR,
1082  "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
1083  ics->max_sfb, ics->num_swb);
1084  goto fail;
1085  }
1086 
1087  return 0;
1088 fail:
1089  ics->max_sfb = 0;
1090  return AVERROR_INVALIDDATA;
1091 }
1092 
1093 /**
1094  * Decode band types (section_data payload); reference: table 4.46.
1095  *
1096  * @param band_type array of the used band type
1097  * @param band_type_run_end array of the last scalefactor band of a band type run
1098  *
1099  * @return Returns error status. 0 - OK, !0 - error
1100  */
1101 static int decode_band_types(AACContext *ac, enum BandType band_type[120],
1102  int band_type_run_end[120], GetBitContext *gb,
1104 {
1105  int g, idx = 0;
1106  const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
1107  for (g = 0; g < ics->num_window_groups; g++) {
1108  int k = 0;
1109  while (k < ics->max_sfb) {
1110  uint8_t sect_end = k;
1111  int sect_len_incr;
1112  int sect_band_type = get_bits(gb, 4);
1113  if (sect_band_type == 12) {
1114  av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
1115  return -1;
1116  }
1117  do {
1118  sect_len_incr = get_bits(gb, bits);
1119  sect_end += sect_len_incr;
1120  if (get_bits_left(gb) < 0) {
1121  av_log(ac->avctx, AV_LOG_ERROR, "decode_band_types: "overread_err);
1122  return -1;
1123  }
1124  if (sect_end > ics->max_sfb) {
1125  av_log(ac->avctx, AV_LOG_ERROR,
1126  "Number of bands (%d) exceeds limit (%d).\n",
1127  sect_end, ics->max_sfb);
1128  return -1;
1129  }
1130  } while (sect_len_incr == (1 << bits) - 1);
1131  for (; k < sect_end; k++) {
1132  band_type [idx] = sect_band_type;
1133  band_type_run_end[idx++] = sect_end;
1134  }
1135  }
1136  }
1137  return 0;
1138 }
1139 
1140 /**
1141  * Decode scalefactors; reference: table 4.47.
1142  *
1143  * @param global_gain first scalefactor value as scalefactors are differentially coded
1144  * @param band_type array of the used band type
1145  * @param band_type_run_end array of the last scalefactor band of a band type run
1146  * @param sf array of scalefactors or intensity stereo positions
1147  *
1148  * @return Returns error status. 0 - OK, !0 - error
1149  */
1150 static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
1151  unsigned int global_gain,
1153  enum BandType band_type[120],
1154  int band_type_run_end[120])
1155 {
1156  int g, i, idx = 0;
1157  int offset[3] = { global_gain, global_gain - 90, 0 };
1158  int clipped_offset;
1159  int noise_flag = 1;
1160  for (g = 0; g < ics->num_window_groups; g++) {
1161  for (i = 0; i < ics->max_sfb;) {
1162  int run_end = band_type_run_end[idx];
1163  if (band_type[idx] == ZERO_BT) {
1164  for (; i < run_end; i++, idx++)
1165  sf[idx] = 0.;
1166  } else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) {
1167  for (; i < run_end; i++, idx++) {
1168  offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1169  clipped_offset = av_clip(offset[2], -155, 100);
1170  if (offset[2] != clipped_offset) {
1171  av_log_ask_for_sample(ac->avctx, "Intensity stereo "
1172  "position clipped (%d -> %d).\nIf you heard an "
1173  "audible artifact, there may be a bug in the "
1174  "decoder. ", offset[2], clipped_offset);
1175  }
1176  sf[idx] = ff_aac_pow2sf_tab[-clipped_offset + POW_SF2_ZERO];
1177  }
1178  } else if (band_type[idx] == NOISE_BT) {
1179  for (; i < run_end; i++, idx++) {
1180  if (noise_flag-- > 0)
1181  offset[1] += get_bits(gb, 9) - 256;
1182  else
1183  offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1184  clipped_offset = av_clip(offset[1], -100, 155);
1185  if (offset[1] != clipped_offset) {
1186  av_log_ask_for_sample(ac->avctx, "Noise gain clipped "
1187  "(%d -> %d).\nIf you heard an audible "
1188  "artifact, there may be a bug in the decoder. ",
1189  offset[1], clipped_offset);
1190  }
1191  sf[idx] = -ff_aac_pow2sf_tab[clipped_offset + POW_SF2_ZERO];
1192  }
1193  } else {
1194  for (; i < run_end; i++, idx++) {
1195  offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1196  if (offset[0] > 255U) {
1197  av_log(ac->avctx, AV_LOG_ERROR,
1198  "Scalefactor (%d) out of range.\n", offset[0]);
1199  return -1;
1200  }
1201  sf[idx] = -ff_aac_pow2sf_tab[offset[0] - 100 + POW_SF2_ZERO];
1202  }
1203  }
1204  }
1205  }
1206  return 0;
1207 }
1208 
1209 /**
1210  * Decode pulse data; reference: table 4.7.
1211  */
1212 static int decode_pulses(Pulse *pulse, GetBitContext *gb,
1213  const uint16_t *swb_offset, int num_swb)
1214 {
1215  int i, pulse_swb;
1216  pulse->num_pulse = get_bits(gb, 2) + 1;
1217  pulse_swb = get_bits(gb, 6);
1218  if (pulse_swb >= num_swb)
1219  return -1;
1220  pulse->pos[0] = swb_offset[pulse_swb];
1221  pulse->pos[0] += get_bits(gb, 5);
1222  if (pulse->pos[0] > 1023)
1223  return -1;
1224  pulse->amp[0] = get_bits(gb, 4);
1225  for (i = 1; i < pulse->num_pulse; i++) {
1226  pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
1227  if (pulse->pos[i] > 1023)
1228  return -1;
1229  pulse->amp[i] = get_bits(gb, 4);
1230  }
1231  return 0;
1232 }
1233 
1234 /**
1235  * Decode Temporal Noise Shaping data; reference: table 4.48.
1236  *
1237  * @return Returns error status. 0 - OK, !0 - error
1238  */
1240  GetBitContext *gb, const IndividualChannelStream *ics)
1241 {
1242  int w, filt, i, coef_len, coef_res, coef_compress;
1243  const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
1244  const int tns_max_order = is8 ? 7 : ac->oc[1].m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
1245  for (w = 0; w < ics->num_windows; w++) {
1246  if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
1247  coef_res = get_bits1(gb);
1248 
1249  for (filt = 0; filt < tns->n_filt[w]; filt++) {
1250  int tmp2_idx;
1251  tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
1252 
1253  if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
1254  av_log(ac->avctx, AV_LOG_ERROR, "TNS filter order %d is greater than maximum %d.\n",
1255  tns->order[w][filt], tns_max_order);
1256  tns->order[w][filt] = 0;
1257  return -1;
1258  }
1259  if (tns->order[w][filt]) {
1260  tns->direction[w][filt] = get_bits1(gb);
1261  coef_compress = get_bits1(gb);
1262  coef_len = coef_res + 3 - coef_compress;
1263  tmp2_idx = 2 * coef_compress + coef_res;
1264 
1265  for (i = 0; i < tns->order[w][filt]; i++)
1266  tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
1267  }
1268  }
1269  }
1270  }
1271  return 0;
1272 }
1273 
1274 /**
1275  * Decode Mid/Side data; reference: table 4.54.
1276  *
1277  * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s;
1278  * [1] mask is decoded from bitstream; [2] mask is all 1s;
1279  * [3] reserved for scalable AAC
1280  */
1282  int ms_present)
1283 {
1284  int idx;
1285  if (ms_present == 1) {
1286  for (idx = 0; idx < cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; idx++)
1287  cpe->ms_mask[idx] = get_bits1(gb);
1288  } else if (ms_present == 2) {
1289  memset(cpe->ms_mask, 1, sizeof(cpe->ms_mask[0]) * cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb);
1290  }
1291 }
1292 
1293 #ifndef VMUL2
1294 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
1295  const float *scale)
1296 {
1297  float s = *scale;
1298  *dst++ = v[idx & 15] * s;
1299  *dst++ = v[idx>>4 & 15] * s;
1300  return dst;
1301 }
1302 #endif
1303 
1304 #ifndef VMUL4
1305 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
1306  const float *scale)
1307 {
1308  float s = *scale;
1309  *dst++ = v[idx & 3] * s;
1310  *dst++ = v[idx>>2 & 3] * s;
1311  *dst++ = v[idx>>4 & 3] * s;
1312  *dst++ = v[idx>>6 & 3] * s;
1313  return dst;
1314 }
1315 #endif
1316 
1317 #ifndef VMUL2S
1318 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
1319  unsigned sign, const float *scale)
1320 {
1321  union av_intfloat32 s0, s1;
1322 
1323  s0.f = s1.f = *scale;
1324  s0.i ^= sign >> 1 << 31;
1325  s1.i ^= sign << 31;
1326 
1327  *dst++ = v[idx & 15] * s0.f;
1328  *dst++ = v[idx>>4 & 15] * s1.f;
1329 
1330  return dst;
1331 }
1332 #endif
1333 
1334 #ifndef VMUL4S
1335 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
1336  unsigned sign, const float *scale)
1337 {
1338  unsigned nz = idx >> 12;
1339  union av_intfloat32 s = { .f = *scale };
1340  union av_intfloat32 t;
1341 
1342  t.i = s.i ^ (sign & 1U<<31);
1343  *dst++ = v[idx & 3] * t.f;
1344 
1345  sign <<= nz & 1; nz >>= 1;
1346  t.i = s.i ^ (sign & 1U<<31);
1347  *dst++ = v[idx>>2 & 3] * t.f;
1348 
1349  sign <<= nz & 1; nz >>= 1;
1350  t.i = s.i ^ (sign & 1U<<31);
1351  *dst++ = v[idx>>4 & 3] * t.f;
1352 
1353  sign <<= nz & 1;
1354  t.i = s.i ^ (sign & 1U<<31);
1355  *dst++ = v[idx>>6 & 3] * t.f;
1356 
1357  return dst;
1358 }
1359 #endif
1360 
1361 /**
1362  * Decode spectral data; reference: table 4.50.
1363  * Dequantize and scale spectral data; reference: 4.6.3.3.
1364  *
1365  * @param coef array of dequantized, scaled spectral data
1366  * @param sf array of scalefactors or intensity stereo positions
1367  * @param pulse_present set if pulses are present
1368  * @param pulse pointer to pulse data struct
1369  * @param band_type array of the used band type
1370  *
1371  * @return Returns error status. 0 - OK, !0 - error
1372  */
1373 static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
1374  GetBitContext *gb, const float sf[120],
1375  int pulse_present, const Pulse *pulse,
1376  const IndividualChannelStream *ics,
1377  enum BandType band_type[120])
1378 {
1379  int i, k, g, idx = 0;
1380  const int c = 1024 / ics->num_windows;
1381  const uint16_t *offsets = ics->swb_offset;
1382  float *coef_base = coef;
1383 
1384  for (g = 0; g < ics->num_windows; g++)
1385  memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float) * (c - offsets[ics->max_sfb]));
1386 
1387  for (g = 0; g < ics->num_window_groups; g++) {
1388  unsigned g_len = ics->group_len[g];
1389 
1390  for (i = 0; i < ics->max_sfb; i++, idx++) {
1391  const unsigned cbt_m1 = band_type[idx] - 1;
1392  float *cfo = coef + offsets[i];
1393  int off_len = offsets[i + 1] - offsets[i];
1394  int group;
1395 
1396  if (cbt_m1 >= INTENSITY_BT2 - 1) {
1397  for (group = 0; group < g_len; group++, cfo+=128) {
1398  memset(cfo, 0, off_len * sizeof(float));
1399  }
1400  } else if (cbt_m1 == NOISE_BT - 1) {
1401  for (group = 0; group < g_len; group++, cfo+=128) {
1402  float scale;
1403  float band_energy;
1404 
1405  for (k = 0; k < off_len; k++) {
1407  cfo[k] = ac->random_state;
1408  }
1409 
1410  band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
1411  scale = sf[idx] / sqrtf(band_energy);
1412  ac->fdsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
1413  }
1414  } else {
1415  const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
1416  const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
1417  VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
1418  OPEN_READER(re, gb);
1419 
1420  switch (cbt_m1 >> 1) {
1421  case 0:
1422  for (group = 0; group < g_len; group++, cfo+=128) {
1423  float *cf = cfo;
1424  int len = off_len;
1425 
1426  do {
1427  int code;
1428  unsigned cb_idx;
1429 
1430  UPDATE_CACHE(re, gb);
1431  GET_VLC(code, re, gb, vlc_tab, 8, 2);
1432  cb_idx = cb_vector_idx[code];
1433  cf = VMUL4(cf, vq, cb_idx, sf + idx);
1434  } while (len -= 4);
1435  }
1436  break;
1437 
1438  case 1:
1439  for (group = 0; group < g_len; group++, cfo+=128) {
1440  float *cf = cfo;
1441  int len = off_len;
1442 
1443  do {
1444  int code;
1445  unsigned nnz;
1446  unsigned cb_idx;
1447  uint32_t bits;
1448 
1449  UPDATE_CACHE(re, gb);
1450  GET_VLC(code, re, gb, vlc_tab, 8, 2);
1451  cb_idx = cb_vector_idx[code];
1452  nnz = cb_idx >> 8 & 15;
1453  bits = nnz ? GET_CACHE(re, gb) : 0;
1454  LAST_SKIP_BITS(re, gb, nnz);
1455  cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
1456  } while (len -= 4);
1457  }
1458  break;
1459 
1460  case 2:
1461  for (group = 0; group < g_len; group++, cfo+=128) {
1462  float *cf = cfo;
1463  int len = off_len;
1464 
1465  do {
1466  int code;
1467  unsigned cb_idx;
1468 
1469  UPDATE_CACHE(re, gb);
1470  GET_VLC(code, re, gb, vlc_tab, 8, 2);
1471  cb_idx = cb_vector_idx[code];
1472  cf = VMUL2(cf, vq, cb_idx, sf + idx);
1473  } while (len -= 2);
1474  }
1475  break;
1476 
1477  case 3:
1478  case 4:
1479  for (group = 0; group < g_len; group++, cfo+=128) {
1480  float *cf = cfo;
1481  int len = off_len;
1482 
1483  do {
1484  int code;
1485  unsigned nnz;
1486  unsigned cb_idx;
1487  unsigned sign;
1488 
1489  UPDATE_CACHE(re, gb);
1490  GET_VLC(code, re, gb, vlc_tab, 8, 2);
1491  cb_idx = cb_vector_idx[code];
1492  nnz = cb_idx >> 8 & 15;
1493  sign = nnz ? SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12) : 0;
1494  LAST_SKIP_BITS(re, gb, nnz);
1495  cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
1496  } while (len -= 2);
1497  }
1498  break;
1499 
1500  default:
1501  for (group = 0; group < g_len; group++, cfo+=128) {
1502  float *cf = cfo;
1503  uint32_t *icf = (uint32_t *) cf;
1504  int len = off_len;
1505 
1506  do {
1507  int code;
1508  unsigned nzt, nnz;
1509  unsigned cb_idx;
1510  uint32_t bits;
1511  int j;
1512 
1513  UPDATE_CACHE(re, gb);
1514  GET_VLC(code, re, gb, vlc_tab, 8, 2);
1515 
1516  if (!code) {
1517  *icf++ = 0;
1518  *icf++ = 0;
1519  continue;
1520  }
1521 
1522  cb_idx = cb_vector_idx[code];
1523  nnz = cb_idx >> 12;
1524  nzt = cb_idx >> 8;
1525  bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1526  LAST_SKIP_BITS(re, gb, nnz);
1527 
1528  for (j = 0; j < 2; j++) {
1529  if (nzt & 1<<j) {
1530  uint32_t b;
1531  int n;
1532  /* The total length of escape_sequence must be < 22 bits according
1533  to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1534  UPDATE_CACHE(re, gb);
1535  b = GET_CACHE(re, gb);
1536  b = 31 - av_log2(~b);
1537 
1538  if (b > 8) {
1539  av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
1540  return -1;
1541  }
1542 
1543  SKIP_BITS(re, gb, b + 1);
1544  b += 4;
1545  n = (1 << b) + SHOW_UBITS(re, gb, b);
1546  LAST_SKIP_BITS(re, gb, b);
1547  *icf++ = cbrt_tab[n] | (bits & 1U<<31);
1548  bits <<= 1;
1549  } else {
1550  unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1551  *icf++ = (bits & 1U<<31) | v;
1552  bits <<= !!v;
1553  }
1554  cb_idx >>= 4;
1555  }
1556  } while (len -= 2);
1557 
1558  ac->fdsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
1559  }
1560  }
1561 
1562  CLOSE_READER(re, gb);
1563  }
1564  }
1565  coef += g_len << 7;
1566  }
1567 
1568  if (pulse_present) {
1569  idx = 0;
1570  for (i = 0; i < pulse->num_pulse; i++) {
1571  float co = coef_base[ pulse->pos[i] ];
1572  while (offsets[idx + 1] <= pulse->pos[i])
1573  idx++;
1574  if (band_type[idx] != NOISE_BT && sf[idx]) {
1575  float ico = -pulse->amp[i];
1576  if (co) {
1577  co /= sf[idx];
1578  ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
1579  }
1580  coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
1581  }
1582  }
1583  }
1584  return 0;
1585 }
1586 
1587 static av_always_inline float flt16_round(float pf)
1588 {
1589  union av_intfloat32 tmp;
1590  tmp.f = pf;
1591  tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
1592  return tmp.f;
1593 }
1594 
1595 static av_always_inline float flt16_even(float pf)
1596 {
1597  union av_intfloat32 tmp;
1598  tmp.f = pf;
1599  tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
1600  return tmp.f;
1601 }
1602 
1603 static av_always_inline float flt16_trunc(float pf)
1604 {
1605  union av_intfloat32 pun;
1606  pun.f = pf;
1607  pun.i &= 0xFFFF0000U;
1608  return pun.f;
1609 }
1610 
1611 static av_always_inline void predict(PredictorState *ps, float *coef,
1612  int output_enable)
1613 {
1614  const float a = 0.953125; // 61.0 / 64
1615  const float alpha = 0.90625; // 29.0 / 32
1616  float e0, e1;
1617  float pv;
1618  float k1, k2;
1619  float r0 = ps->r0, r1 = ps->r1;
1620  float cor0 = ps->cor0, cor1 = ps->cor1;
1621  float var0 = ps->var0, var1 = ps->var1;
1622 
1623  k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
1624  k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
1625 
1626  pv = flt16_round(k1 * r0 + k2 * r1);
1627  if (output_enable)
1628  *coef += pv;
1629 
1630  e0 = *coef;
1631  e1 = e0 - k1 * r0;
1632 
1633  ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
1634  ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
1635  ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
1636  ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
1637 
1638  ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
1639  ps->r0 = flt16_trunc(a * e0);
1640 }
1641 
1642 /**
1643  * Apply AAC-Main style frequency domain prediction.
1644  */
1646 {
1647  int sfb, k;
1648 
1649  if (!sce->ics.predictor_initialized) {
1651  sce->ics.predictor_initialized = 1;
1652  }
1653 
1654  if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
1655  for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index]; sfb++) {
1656  for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
1657  predict(&sce->predictor_state[k], &sce->coeffs[k],
1658  sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
1659  }
1660  }
1661  if (sce->ics.predictor_reset_group)
1663  } else
1665 }
1666 
1667 /**
1668  * Decode an individual_channel_stream payload; reference: table 4.44.
1669  *
1670  * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information.
1671  * @param scale_flag scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
1672  *
1673  * @return Returns error status. 0 - OK, !0 - error
1674  */
1676  GetBitContext *gb, int common_window, int scale_flag)
1677 {
1678  Pulse pulse;
1679  TemporalNoiseShaping *tns = &sce->tns;
1680  IndividualChannelStream *ics = &sce->ics;
1681  float *out = sce->coeffs;
1682  int global_gain, pulse_present = 0;
1683 
1684  /* This assignment is to silence a GCC warning about the variable being used
1685  * uninitialized when in fact it always is.
1686  */
1687  pulse.num_pulse = 0;
1688 
1689  global_gain = get_bits(gb, 8);
1690 
1691  if (!common_window && !scale_flag) {
1692  if (decode_ics_info(ac, ics, gb) < 0)
1693  return AVERROR_INVALIDDATA;
1694  }
1695 
1696  if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0)
1697  return -1;
1698  if (decode_scalefactors(ac, sce->sf, gb, global_gain, ics, sce->band_type, sce->band_type_run_end) < 0)
1699  return -1;
1700 
1701  pulse_present = 0;
1702  if (!scale_flag) {
1703  if ((pulse_present = get_bits1(gb))) {
1704  if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1705  av_log(ac->avctx, AV_LOG_ERROR, "Pulse tool not allowed in eight short sequence.\n");
1706  return -1;
1707  }
1708  if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
1709  av_log(ac->avctx, AV_LOG_ERROR, "Pulse data corrupt or invalid.\n");
1710  return -1;
1711  }
1712  }
1713  if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics))
1714  return -1;
1715  if (get_bits1(gb)) {
1716  av_log_missing_feature(ac->avctx, "SSR", 1);
1717  return AVERROR_PATCHWELCOME;
1718  }
1719  }
1720 
1721  if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0)
1722  return -1;
1723 
1724  if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN && !common_window)
1725  apply_prediction(ac, sce);
1726 
1727  return 0;
1728 }
1729 
1730 /**
1731  * Mid/Side stereo decoding; reference: 4.6.8.1.3.
1732  */
1734 {
1735  const IndividualChannelStream *ics = &cpe->ch[0].ics;
1736  float *ch0 = cpe->ch[0].coeffs;
1737  float *ch1 = cpe->ch[1].coeffs;
1738  int g, i, group, idx = 0;
1739  const uint16_t *offsets = ics->swb_offset;
1740  for (g = 0; g < ics->num_window_groups; g++) {
1741  for (i = 0; i < ics->max_sfb; i++, idx++) {
1742  if (cpe->ms_mask[idx] &&
1743  cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
1744  for (group = 0; group < ics->group_len[g]; group++) {
1745  ac->dsp.butterflies_float(ch0 + group * 128 + offsets[i],
1746  ch1 + group * 128 + offsets[i],
1747  offsets[i+1] - offsets[i]);
1748  }
1749  }
1750  }
1751  ch0 += ics->group_len[g] * 128;
1752  ch1 += ics->group_len[g] * 128;
1753  }
1754 }
1755 
1756 /**
1757  * intensity stereo decoding; reference: 4.6.8.2.3
1758  *
1759  * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s;
1760  * [1] mask is decoded from bitstream; [2] mask is all 1s;
1761  * [3] reserved for scalable AAC
1762  */
1763 static void apply_intensity_stereo(AACContext *ac, ChannelElement *cpe, int ms_present)
1764 {
1765  const IndividualChannelStream *ics = &cpe->ch[1].ics;
1766  SingleChannelElement *sce1 = &cpe->ch[1];
1767  float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
1768  const uint16_t *offsets = ics->swb_offset;
1769  int g, group, i, idx = 0;
1770  int c;
1771  float scale;
1772  for (g = 0; g < ics->num_window_groups; g++) {
1773  for (i = 0; i < ics->max_sfb;) {
1774  if (sce1->band_type[idx] == INTENSITY_BT || sce1->band_type[idx] == INTENSITY_BT2) {
1775  const int bt_run_end = sce1->band_type_run_end[idx];
1776  for (; i < bt_run_end; i++, idx++) {
1777  c = -1 + 2 * (sce1->band_type[idx] - 14);
1778  if (ms_present)
1779  c *= 1 - 2 * cpe->ms_mask[idx];
1780  scale = c * sce1->sf[idx];
1781  for (group = 0; group < ics->group_len[g]; group++)
1782  ac->fdsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i],
1783  coef0 + group * 128 + offsets[i],
1784  scale,
1785  offsets[i + 1] - offsets[i]);
1786  }
1787  } else {
1788  int bt_run_end = sce1->band_type_run_end[idx];
1789  idx += bt_run_end - i;
1790  i = bt_run_end;
1791  }
1792  }
1793  coef0 += ics->group_len[g] * 128;
1794  coef1 += ics->group_len[g] * 128;
1795  }
1796 }
1797 
1798 /**
1799  * Decode a channel_pair_element; reference: table 4.4.
1800  *
1801  * @return Returns error status. 0 - OK, !0 - error
1802  */
1804 {
1805  int i, ret, common_window, ms_present = 0;
1806 
1807  common_window = get_bits1(gb);
1808  if (common_window) {
1809  if (decode_ics_info(ac, &cpe->ch[0].ics, gb))
1810  return AVERROR_INVALIDDATA;
1811  i = cpe->ch[1].ics.use_kb_window[0];
1812  cpe->ch[1].ics = cpe->ch[0].ics;
1813  cpe->ch[1].ics.use_kb_window[1] = i;
1814  if (cpe->ch[1].ics.predictor_present && (ac->oc[1].m4ac.object_type != AOT_AAC_MAIN))
1815  if ((cpe->ch[1].ics.ltp.present = get_bits(gb, 1)))
1816  decode_ltp(&cpe->ch[1].ics.ltp, gb, cpe->ch[1].ics.max_sfb);
1817  ms_present = get_bits(gb, 2);
1818  if (ms_present == 3) {
1819  av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
1820  return -1;
1821  } else if (ms_present)
1822  decode_mid_side_stereo(cpe, gb, ms_present);
1823  }
1824  if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
1825  return ret;
1826  if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
1827  return ret;
1828 
1829  if (common_window) {
1830  if (ms_present)
1831  apply_mid_side_stereo(ac, cpe);
1832  if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN) {
1833  apply_prediction(ac, &cpe->ch[0]);
1834  apply_prediction(ac, &cpe->ch[1]);
1835  }
1836  }
1837 
1838  apply_intensity_stereo(ac, cpe, ms_present);
1839  return 0;
1840 }
1841 
1842 static const float cce_scale[] = {
1843  1.09050773266525765921, //2^(1/8)
1844  1.18920711500272106672, //2^(1/4)
1845  M_SQRT2,
1846  2,
1847 };
1848 
1849 /**
1850  * Decode coupling_channel_element; reference: table 4.8.
1851  *
1852  * @return Returns error status. 0 - OK, !0 - error
1853  */
1855 {
1856  int num_gain = 0;
1857  int c, g, sfb, ret;
1858  int sign;
1859  float scale;
1860  SingleChannelElement *sce = &che->ch[0];
1861  ChannelCoupling *coup = &che->coup;
1862 
1863  coup->coupling_point = 2 * get_bits1(gb);
1864  coup->num_coupled = get_bits(gb, 3);
1865  for (c = 0; c <= coup->num_coupled; c++) {
1866  num_gain++;
1867  coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
1868  coup->id_select[c] = get_bits(gb, 4);
1869  if (coup->type[c] == TYPE_CPE) {
1870  coup->ch_select[c] = get_bits(gb, 2);
1871  if (coup->ch_select[c] == 3)
1872  num_gain++;
1873  } else
1874  coup->ch_select[c] = 2;
1875  }
1876  coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
1877 
1878  sign = get_bits(gb, 1);
1879  scale = cce_scale[get_bits(gb, 2)];
1880 
1881  if ((ret = decode_ics(ac, sce, gb, 0, 0)))
1882  return ret;
1883 
1884  for (c = 0; c < num_gain; c++) {
1885  int idx = 0;
1886  int cge = 1;
1887  int gain = 0;
1888  float gain_cache = 1.;
1889  if (c) {
1890  cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
1891  gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
1892  gain_cache = powf(scale, -gain);
1893  }
1894  if (coup->coupling_point == AFTER_IMDCT) {
1895  coup->gain[c][0] = gain_cache;
1896  } else {
1897  for (g = 0; g < sce->ics.num_window_groups; g++) {
1898  for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
1899  if (sce->band_type[idx] != ZERO_BT) {
1900  if (!cge) {
1901  int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
1902  if (t) {
1903  int s = 1;
1904  t = gain += t;
1905  if (sign) {
1906  s -= 2 * (t & 0x1);
1907  t >>= 1;
1908  }
1909  gain_cache = powf(scale, -t) * s;
1910  }
1911  }
1912  coup->gain[c][idx] = gain_cache;
1913  }
1914  }
1915  }
1916  }
1917  }
1918  return 0;
1919 }
1920 
1921 /**
1922  * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
1923  *
1924  * @return Returns number of bytes consumed.
1925  */
1927  GetBitContext *gb)
1928 {
1929  int i;
1930  int num_excl_chan = 0;
1931 
1932  do {
1933  for (i = 0; i < 7; i++)
1934  che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
1935  } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
1936 
1937  return num_excl_chan / 7;
1938 }
1939 
1940 /**
1941  * Decode dynamic range information; reference: table 4.52.
1942  *
1943  * @return Returns number of bytes consumed.
1944  */
1946  GetBitContext *gb)
1947 {
1948  int n = 1;
1949  int drc_num_bands = 1;
1950  int i;
1951 
1952  /* pce_tag_present? */
1953  if (get_bits1(gb)) {
1954  che_drc->pce_instance_tag = get_bits(gb, 4);
1955  skip_bits(gb, 4); // tag_reserved_bits
1956  n++;
1957  }
1958 
1959  /* excluded_chns_present? */
1960  if (get_bits1(gb)) {
1961  n += decode_drc_channel_exclusions(che_drc, gb);
1962  }
1963 
1964  /* drc_bands_present? */
1965  if (get_bits1(gb)) {
1966  che_drc->band_incr = get_bits(gb, 4);
1967  che_drc->interpolation_scheme = get_bits(gb, 4);
1968  n++;
1969  drc_num_bands += che_drc->band_incr;
1970  for (i = 0; i < drc_num_bands; i++) {
1971  che_drc->band_top[i] = get_bits(gb, 8);
1972  n++;
1973  }
1974  }
1975 
1976  /* prog_ref_level_present? */
1977  if (get_bits1(gb)) {
1978  che_drc->prog_ref_level = get_bits(gb, 7);
1979  skip_bits1(gb); // prog_ref_level_reserved_bits
1980  n++;
1981  }
1982 
1983  for (i = 0; i < drc_num_bands; i++) {
1984  che_drc->dyn_rng_sgn[i] = get_bits1(gb);
1985  che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
1986  n++;
1987  }
1988 
1989  return n;
1990 }
1991 
1992 static int decode_fill(AACContext *ac, GetBitContext *gb, int len) {
1993  uint8_t buf[256];
1994  int i, major, minor;
1995 
1996  if (len < 13+7*8)
1997  goto unknown;
1998 
1999  get_bits(gb, 13); len -= 13;
2000 
2001  for(i=0; i+1<sizeof(buf) && len>=8; i++, len-=8)
2002  buf[i] = get_bits(gb, 8);
2003 
2004  buf[i] = 0;
2005  if (ac->avctx->debug & FF_DEBUG_PICT_INFO)
2006  av_log(ac->avctx, AV_LOG_DEBUG, "FILL:%s\n", buf);
2007 
2008  if (sscanf(buf, "libfaac %d.%d", &major, &minor) == 2){
2009  ac->avctx->internal->skip_samples = 1024;
2010  }
2011 
2012 unknown:
2013  skip_bits_long(gb, len);
2014 
2015  return 0;
2016 }
2017 
2018 /**
2019  * Decode extension data (incomplete); reference: table 4.51.
2020  *
2021  * @param cnt length of TYPE_FIL syntactic element in bytes
2022  *
2023  * @return Returns number of bytes consumed
2024  */
2026  ChannelElement *che, enum RawDataBlockType elem_type)
2027 {
2028  int crc_flag = 0;
2029  int res = cnt;
2030  switch (get_bits(gb, 4)) { // extension type
2031  case EXT_SBR_DATA_CRC:
2032  crc_flag++;
2033  case EXT_SBR_DATA:
2034  if (!che) {
2035  av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
2036  return res;
2037  } else if (!ac->oc[1].m4ac.sbr) {
2038  av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
2039  skip_bits_long(gb, 8 * cnt - 4);
2040  return res;
2041  } else if (ac->oc[1].m4ac.sbr == -1 && ac->oc[1].status == OC_LOCKED) {
2042  av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
2043  skip_bits_long(gb, 8 * cnt - 4);
2044  return res;
2045  } else if (ac->oc[1].m4ac.ps == -1 && ac->oc[1].status < OC_LOCKED && ac->avctx->channels == 1) {
2046  ac->oc[1].m4ac.sbr = 1;
2047  ac->oc[1].m4ac.ps = 1;
2048  output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags,
2049  ac->oc[1].status, 1);
2050  } else {
2051  ac->oc[1].m4ac.sbr = 1;
2052  }
2053  res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
2054  break;
2055  case EXT_DYNAMIC_RANGE:
2056  res = decode_dynamic_range(&ac->che_drc, gb);
2057  break;
2058  case EXT_FILL:
2059  decode_fill(ac, gb, 8 * cnt - 4);
2060  break;
2061  case EXT_FILL_DATA:
2062  case EXT_DATA_ELEMENT:
2063  default:
2064  skip_bits_long(gb, 8 * cnt - 4);
2065  break;
2066  };
2067  return res;
2068 }
2069 
2070 /**
2071  * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
2072  *
2073  * @param decode 1 if tool is used normally, 0 if tool is used in LTP.
2074  * @param coef spectral coefficients
2075  */
2076 static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
2077  IndividualChannelStream *ics, int decode)
2078 {
2079  const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
2080  int w, filt, m, i;
2081  int bottom, top, order, start, end, size, inc;
2082  float lpc[TNS_MAX_ORDER];
2083  float tmp[TNS_MAX_ORDER+1];
2084 
2085  for (w = 0; w < ics->num_windows; w++) {
2086  bottom = ics->num_swb;
2087  for (filt = 0; filt < tns->n_filt[w]; filt++) {
2088  top = bottom;
2089  bottom = FFMAX(0, top - tns->length[w][filt]);
2090  order = tns->order[w][filt];
2091  if (order == 0)
2092  continue;
2093 
2094  // tns_decode_coef
2095  compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
2096 
2097  start = ics->swb_offset[FFMIN(bottom, mmm)];
2098  end = ics->swb_offset[FFMIN( top, mmm)];
2099  if ((size = end - start) <= 0)
2100  continue;
2101  if (tns->direction[w][filt]) {
2102  inc = -1;
2103  start = end - 1;
2104  } else {
2105  inc = 1;
2106  }
2107  start += w * 128;
2108 
2109  if (decode) {
2110  // ar filter
2111  for (m = 0; m < size; m++, start += inc)
2112  for (i = 1; i <= FFMIN(m, order); i++)
2113  coef[start] -= coef[start - i * inc] * lpc[i - 1];
2114  } else {
2115  // ma filter
2116  for (m = 0; m < size; m++, start += inc) {
2117  tmp[0] = coef[start];
2118  for (i = 1; i <= FFMIN(m, order); i++)
2119  coef[start] += tmp[i] * lpc[i - 1];
2120  for (i = order; i > 0; i--)
2121  tmp[i] = tmp[i - 1];
2122  }
2123  }
2124  }
2125  }
2126 }
2127 
2128 /**
2129  * Apply windowing and MDCT to obtain the spectral
2130  * coefficient from the predicted sample by LTP.
2131  */
2132 static void windowing_and_mdct_ltp(AACContext *ac, float *out,
2133  float *in, IndividualChannelStream *ics)
2134 {
2135  const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2136  const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
2137  const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2138  const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
2139 
2140  if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) {
2141  ac->fdsp.vector_fmul(in, in, lwindow_prev, 1024);
2142  } else {
2143  memset(in, 0, 448 * sizeof(float));
2144  ac->fdsp.vector_fmul(in + 448, in + 448, swindow_prev, 128);
2145  }
2146  if (ics->window_sequence[0] != LONG_START_SEQUENCE) {
2147  ac->dsp.vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024);
2148  } else {
2149  ac->dsp.vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128);
2150  memset(in + 1024 + 576, 0, 448 * sizeof(float));
2151  }
2152  ac->mdct_ltp.mdct_calc(&ac->mdct_ltp, out, in);
2153 }
2154 
2155 /**
2156  * Apply the long term prediction
2157  */
2159 {
2160  const LongTermPrediction *ltp = &sce->ics.ltp;
2161  const uint16_t *offsets = sce->ics.swb_offset;
2162  int i, sfb;
2163 
2164  if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
2165  float *predTime = sce->ret;
2166  float *predFreq = ac->buf_mdct;
2167  int16_t num_samples = 2048;
2168 
2169  if (ltp->lag < 1024)
2170  num_samples = ltp->lag + 1024;
2171  for (i = 0; i < num_samples; i++)
2172  predTime[i] = sce->ltp_state[i + 2048 - ltp->lag] * ltp->coef;
2173  memset(&predTime[i], 0, (2048 - i) * sizeof(float));
2174 
2175  windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics);
2176 
2177  if (sce->tns.present)
2178  apply_tns(predFreq, &sce->tns, &sce->ics, 0);
2179 
2180  for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++)
2181  if (ltp->used[sfb])
2182  for (i = offsets[sfb]; i < offsets[sfb + 1]; i++)
2183  sce->coeffs[i] += predFreq[i];
2184  }
2185 }
2186 
2187 /**
2188  * Update the LTP buffer for next frame
2189  */
2191 {
2192  IndividualChannelStream *ics = &sce->ics;
2193  float *saved = sce->saved;
2194  float *saved_ltp = sce->coeffs;
2195  const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2196  const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
2197  int i;
2198 
2199  if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2200  memcpy(saved_ltp, saved, 512 * sizeof(float));
2201  memset(saved_ltp + 576, 0, 448 * sizeof(float));
2202  ac->dsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64);
2203  for (i = 0; i < 64; i++)
2204  saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i];
2205  } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
2206  memcpy(saved_ltp, ac->buf_mdct + 512, 448 * sizeof(float));
2207  memset(saved_ltp + 576, 0, 448 * sizeof(float));
2208  ac->dsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64);
2209  for (i = 0; i < 64; i++)
2210  saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i];
2211  } else { // LONG_STOP or ONLY_LONG
2212  ac->dsp.vector_fmul_reverse(saved_ltp, ac->buf_mdct + 512, &lwindow[512], 512);
2213  for (i = 0; i < 512; i++)
2214  saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * lwindow[511 - i];
2215  }
2216 
2217  memcpy(sce->ltp_state, sce->ltp_state+1024, 1024 * sizeof(*sce->ltp_state));
2218  memcpy(sce->ltp_state+1024, sce->ret, 1024 * sizeof(*sce->ltp_state));
2219  memcpy(sce->ltp_state+2048, saved_ltp, 1024 * sizeof(*sce->ltp_state));
2220 }
2221 
2222 /**
2223  * Conduct IMDCT and windowing.
2224  */
2226 {
2227  IndividualChannelStream *ics = &sce->ics;
2228  float *in = sce->coeffs;
2229  float *out = sce->ret;
2230  float *saved = sce->saved;
2231  const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
2232  const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
2233  const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
2234  float *buf = ac->buf_mdct;
2235  float *temp = ac->temp;
2236  int i;
2237 
2238  // imdct
2239  if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2240  for (i = 0; i < 1024; i += 128)
2241  ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i);
2242  } else
2243  ac->mdct.imdct_half(&ac->mdct, buf, in);
2244 
2245  /* window overlapping
2246  * NOTE: To simplify the overlapping code, all 'meaningless' short to long
2247  * and long to short transitions are considered to be short to short
2248  * transitions. This leaves just two cases (long to long and short to short)
2249  * with a little special sauce for EIGHT_SHORT_SEQUENCE.
2250  */
2251  if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
2253  ac->dsp.vector_fmul_window( out, saved, buf, lwindow_prev, 512);
2254  } else {
2255  memcpy( out, saved, 448 * sizeof(float));
2256 
2257  if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2258  ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, 64);
2259  ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, 64);
2260  ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, 64);
2261  ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, 64);
2262  ac->dsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, 64);
2263  memcpy( out + 448 + 4*128, temp, 64 * sizeof(float));
2264  } else {
2265  ac->dsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64);
2266  memcpy( out + 576, buf + 64, 448 * sizeof(float));
2267  }
2268  }
2269 
2270  // buffer update
2271  if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2272  memcpy( saved, temp + 64, 64 * sizeof(float));
2273  ac->dsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64);
2274  ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
2275  ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
2276  memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
2277  } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
2278  memcpy( saved, buf + 512, 448 * sizeof(float));
2279  memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
2280  } else { // LONG_STOP or ONLY_LONG
2281  memcpy( saved, buf + 512, 512 * sizeof(float));
2282  }
2283 }
2284 
2285 /**
2286  * Apply dependent channel coupling (applied before IMDCT).
2287  *
2288  * @param index index into coupling gain array
2289  */
2291  SingleChannelElement *target,
2292  ChannelElement *cce, int index)
2293 {
2294  IndividualChannelStream *ics = &cce->ch[0].ics;
2295  const uint16_t *offsets = ics->swb_offset;
2296  float *dest = target->coeffs;
2297  const float *src = cce->ch[0].coeffs;
2298  int g, i, group, k, idx = 0;
2299  if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
2300  av_log(ac->avctx, AV_LOG_ERROR,
2301  "Dependent coupling is not supported together with LTP\n");
2302  return;
2303  }
2304  for (g = 0; g < ics->num_window_groups; g++) {
2305  for (i = 0; i < ics->max_sfb; i++, idx++) {
2306  if (cce->ch[0].band_type[idx] != ZERO_BT) {
2307  const float gain = cce->coup.gain[index][idx];
2308  for (group = 0; group < ics->group_len[g]; group++) {
2309  for (k = offsets[i]; k < offsets[i + 1]; k++) {
2310  // XXX dsputil-ize
2311  dest[group * 128 + k] += gain * src[group * 128 + k];
2312  }
2313  }
2314  }
2315  }
2316  dest += ics->group_len[g] * 128;
2317  src += ics->group_len[g] * 128;
2318  }
2319 }
2320 
2321 /**
2322  * Apply independent channel coupling (applied after IMDCT).
2323  *
2324  * @param index index into coupling gain array
2325  */
2327  SingleChannelElement *target,
2328  ChannelElement *cce, int index)
2329 {
2330  int i;
2331  const float gain = cce->coup.gain[index][0];
2332  const float *src = cce->ch[0].ret;
2333  float *dest = target->ret;
2334  const int len = 1024 << (ac->oc[1].m4ac.sbr == 1);
2335 
2336  for (i = 0; i < len; i++)
2337  dest[i] += gain * src[i];
2338 }
2339 
2340 /**
2341  * channel coupling transformation interface
2342  *
2343  * @param apply_coupling_method pointer to (in)dependent coupling function
2344  */
2346  enum RawDataBlockType type, int elem_id,
2347  enum CouplingPoint coupling_point,
2348  void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
2349 {
2350  int i, c;
2351 
2352  for (i = 0; i < MAX_ELEM_ID; i++) {
2353  ChannelElement *cce = ac->che[TYPE_CCE][i];
2354  int index = 0;
2355 
2356  if (cce && cce->coup.coupling_point == coupling_point) {
2357  ChannelCoupling *coup = &cce->coup;
2358 
2359  for (c = 0; c <= coup->num_coupled; c++) {
2360  if (coup->type[c] == type && coup->id_select[c] == elem_id) {
2361  if (coup->ch_select[c] != 1) {
2362  apply_coupling_method(ac, &cc->ch[0], cce, index);
2363  if (coup->ch_select[c] != 0)
2364  index++;
2365  }
2366  if (coup->ch_select[c] != 2)
2367  apply_coupling_method(ac, &cc->ch[1], cce, index++);
2368  } else
2369  index += 1 + (coup->ch_select[c] == 3);
2370  }
2371  }
2372  }
2373 }
2374 
2375 /**
2376  * Convert spectral data to float samples, applying all supported tools as appropriate.
2377  */
2379 {
2380  int i, type;
2381  for (type = 3; type >= 0; type--) {
2382  for (i = 0; i < MAX_ELEM_ID; i++) {
2383  ChannelElement *che = ac->che[type][i];
2384  if (che) {
2385  if (type <= TYPE_CPE)
2387  if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
2388  if (che->ch[0].ics.predictor_present) {
2389  if (che->ch[0].ics.ltp.present)
2390  apply_ltp(ac, &che->ch[0]);
2391  if (che->ch[1].ics.ltp.present && type == TYPE_CPE)
2392  apply_ltp(ac, &che->ch[1]);
2393  }
2394  }
2395  if (che->ch[0].tns.present)
2396  apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
2397  if (che->ch[1].tns.present)
2398  apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
2399  if (type <= TYPE_CPE)
2401  if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
2402  imdct_and_windowing(ac, &che->ch[0]);
2403  if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
2404  update_ltp(ac, &che->ch[0]);
2405  if (type == TYPE_CPE) {
2406  imdct_and_windowing(ac, &che->ch[1]);
2407  if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
2408  update_ltp(ac, &che->ch[1]);
2409  }
2410  if (ac->oc[1].m4ac.sbr > 0) {
2411  ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
2412  }
2413  }
2414  if (type <= TYPE_CCE)
2416  }
2417  }
2418  }
2419 }
2420 
2422 {
2423  int size;
2424  AACADTSHeaderInfo hdr_info;
2425  uint8_t layout_map[MAX_ELEM_ID*4][3];
2426  int layout_map_tags;
2427 
2428  size = avpriv_aac_parse_header(gb, &hdr_info);
2429  if (size > 0) {
2430  if (!ac->warned_num_aac_frames && hdr_info.num_aac_frames != 1) {
2431  // This is 2 for "VLB " audio in NSV files.
2432  // See samples/nsv/vlb_audio.
2433  av_log_missing_feature(ac->avctx, "More than one AAC RDB per ADTS frame", 0);
2434  ac->warned_num_aac_frames = 1;
2435  }
2437  if (hdr_info.chan_config) {
2438  ac->oc[1].m4ac.chan_config = hdr_info.chan_config;
2439  if (set_default_channel_config(ac->avctx, layout_map,
2440  &layout_map_tags, hdr_info.chan_config))
2441  return -7;
2442  if (output_configure(ac, layout_map, layout_map_tags,
2443  FFMAX(ac->oc[1].status, OC_TRIAL_FRAME), 0))
2444  return -7;
2445  } else {
2446  ac->oc[1].m4ac.chan_config = 0;
2447  /**
2448  * dual mono frames in Japanese DTV can have chan_config 0
2449  * WITHOUT specifying PCE.
2450  * thus, set dual mono as default.
2451  */
2452  if (ac->dmono_mode && ac->oc[0].status == OC_NONE) {
2453  layout_map_tags = 2;
2454  layout_map[0][0] = layout_map[1][0] = TYPE_SCE;
2455  layout_map[0][2] = layout_map[1][2] = AAC_CHANNEL_FRONT;
2456  layout_map[0][1] = 0;
2457  layout_map[1][1] = 1;
2458  if (output_configure(ac, layout_map, layout_map_tags,
2459  OC_TRIAL_FRAME, 0))
2460  return -7;
2461  }
2462  }
2463  ac->oc[1].m4ac.sample_rate = hdr_info.sample_rate;
2464  ac->oc[1].m4ac.sampling_index = hdr_info.sampling_index;
2465  ac->oc[1].m4ac.object_type = hdr_info.object_type;
2466  if (ac->oc[0].status != OC_LOCKED ||
2467  ac->oc[0].m4ac.chan_config != hdr_info.chan_config ||
2468  ac->oc[0].m4ac.sample_rate != hdr_info.sample_rate) {
2469  ac->oc[1].m4ac.sbr = -1;
2470  ac->oc[1].m4ac.ps = -1;
2471  }
2472  if (!hdr_info.crc_absent)
2473  skip_bits(gb, 16);
2474  }
2475  return size;
2476 }
2477 
2478 static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
2479  int *got_frame_ptr, GetBitContext *gb, AVPacket *avpkt)
2480 {
2481  AACContext *ac = avctx->priv_data;
2482  ChannelElement *che = NULL, *che_prev = NULL;
2483  enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
2484  int err, elem_id;
2485  int samples = 0, multiplier, audio_found = 0, pce_found = 0;
2486  int is_dmono, sce_count = 0;
2487 
2488  if (show_bits(gb, 12) == 0xfff) {
2489  if (parse_adts_frame_header(ac, gb) < 0) {
2490  av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
2491  err = -1;
2492  goto fail;
2493  }
2494  if (ac->oc[1].m4ac.sampling_index > 12) {
2495  av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->oc[1].m4ac.sampling_index);
2496  err = -1;
2497  goto fail;
2498  }
2499  }
2500 
2501  if (frame_configure_elements(avctx) < 0) {
2502  err = -1;
2503  goto fail;
2504  }
2505 
2506  ac->tags_mapped = 0;
2507  // parse
2508  while ((elem_type = get_bits(gb, 3)) != TYPE_END) {
2509  elem_id = get_bits(gb, 4);
2510 
2511  if (elem_type < TYPE_DSE) {
2512  if (!(che=get_che(ac, elem_type, elem_id))) {
2513  av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n",
2514  elem_type, elem_id);
2515  err = -1;
2516  goto fail;
2517  }
2518  samples = 1024;
2519  }
2520 
2521  switch (elem_type) {
2522 
2523  case TYPE_SCE:
2524  err = decode_ics(ac, &che->ch[0], gb, 0, 0);
2525  audio_found = 1;
2526  sce_count++;
2527  break;
2528 
2529  case TYPE_CPE:
2530  err = decode_cpe(ac, gb, che);
2531  audio_found = 1;
2532  break;
2533 
2534  case TYPE_CCE:
2535  err = decode_cce(ac, gb, che);
2536  break;
2537 
2538  case TYPE_LFE:
2539  err = decode_ics(ac, &che->ch[0], gb, 0, 0);
2540  audio_found = 1;
2541  break;
2542 
2543  case TYPE_DSE:
2544  err = skip_data_stream_element(ac, gb);
2545  break;
2546 
2547  case TYPE_PCE: {
2548  uint8_t layout_map[MAX_ELEM_ID*4][3];
2549  int tags;
2551  tags = decode_pce(avctx, &ac->oc[1].m4ac, layout_map, gb);
2552  if (tags < 0) {
2553  err = tags;
2554  break;
2555  }
2556  if (pce_found) {
2557  av_log(avctx, AV_LOG_ERROR,
2558  "Not evaluating a further program_config_element as this construct is dubious at best.\n");
2560  } else {
2561  err = output_configure(ac, layout_map, tags, OC_TRIAL_PCE, 1);
2562  if (!err)
2563  ac->oc[1].m4ac.chan_config = 0;
2564  pce_found = 1;
2565  }
2566  break;
2567  }
2568 
2569  case TYPE_FIL:
2570  if (elem_id == 15)
2571  elem_id += get_bits(gb, 8) - 1;
2572  if (get_bits_left(gb) < 8 * elem_id) {
2573  av_log(avctx, AV_LOG_ERROR, "TYPE_FIL: "overread_err);
2574  err = -1;
2575  goto fail;
2576  }
2577  while (elem_id > 0)
2578  elem_id -= decode_extension_payload(ac, gb, elem_id, che_prev, elem_type_prev);
2579  err = 0; /* FIXME */
2580  break;
2581 
2582  default:
2583  err = -1; /* should not happen, but keeps compiler happy */
2584  break;
2585  }
2586 
2587  che_prev = che;
2588  elem_type_prev = elem_type;
2589 
2590  if (err)
2591  goto fail;
2592 
2593  if (get_bits_left(gb) < 3) {
2594  av_log(avctx, AV_LOG_ERROR, overread_err);
2595  err = -1;
2596  goto fail;
2597  }
2598  }
2599 
2600  spectral_to_sample(ac);
2601 
2602  multiplier = (ac->oc[1].m4ac.sbr == 1) ? ac->oc[1].m4ac.ext_sample_rate > ac->oc[1].m4ac.sample_rate : 0;
2603  samples <<= multiplier;
2604  /* for dual-mono audio (SCE + SCE) */
2605  is_dmono = ac->dmono_mode && sce_count == 2 &&
2607 
2608  if (samples) {
2609  ac->frame.nb_samples = samples;
2610  *(AVFrame *)data = ac->frame;
2611  }
2612  *got_frame_ptr = !!samples;
2613 
2614  if (is_dmono) {
2615  if (ac->dmono_mode == 1)
2616  ((AVFrame *)data)->data[1] =((AVFrame *)data)->data[0];
2617  else if (ac->dmono_mode == 2)
2618  ((AVFrame *)data)->data[0] =((AVFrame *)data)->data[1];
2619  }
2620 
2621  if (ac->oc[1].status && audio_found) {
2622  avctx->sample_rate = ac->oc[1].m4ac.sample_rate << multiplier;
2623  avctx->frame_size = samples;
2624  ac->oc[1].status = OC_LOCKED;
2625  }
2626 
2627  if (multiplier) {
2628  int side_size;
2629  uint32_t *side = av_packet_get_side_data(avpkt, AV_PKT_DATA_SKIP_SAMPLES, &side_size);
2630  if (side && side_size>=4)
2631  AV_WL32(side, 2*AV_RL32(side));
2632  }
2633  return 0;
2634 fail:
2636  return err;
2637 }
2638 
2639 static int aac_decode_frame(AVCodecContext *avctx, void *data,
2640  int *got_frame_ptr, AVPacket *avpkt)
2641 {
2642  AACContext *ac = avctx->priv_data;
2643  const uint8_t *buf = avpkt->data;
2644  int buf_size = avpkt->size;
2645  GetBitContext gb;
2646  int buf_consumed;
2647  int buf_offset;
2648  int err;
2649  int new_extradata_size;
2650  const uint8_t *new_extradata = av_packet_get_side_data(avpkt,
2652  &new_extradata_size);
2653  int jp_dualmono_size;
2654  const uint8_t *jp_dualmono = av_packet_get_side_data(avpkt,
2656  &jp_dualmono_size);
2657 
2658  if (new_extradata && 0) {
2659  av_free(avctx->extradata);
2660  avctx->extradata = av_mallocz(new_extradata_size +
2662  if (!avctx->extradata)
2663  return AVERROR(ENOMEM);
2664  avctx->extradata_size = new_extradata_size;
2665  memcpy(avctx->extradata, new_extradata, new_extradata_size);
2667  if (decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
2668  avctx->extradata,
2669  avctx->extradata_size*8, 1) < 0) {
2671  return AVERROR_INVALIDDATA;
2672  }
2673  }
2674 
2675  ac->dmono_mode = 0;
2676  if (jp_dualmono && jp_dualmono_size > 0)
2677  ac->dmono_mode = 1 + *jp_dualmono;
2678  if (ac->force_dmono_mode >= 0)
2679  ac->dmono_mode = ac->force_dmono_mode;
2680 
2681  init_get_bits(&gb, buf, buf_size * 8);
2682 
2683  if ((err = aac_decode_frame_int(avctx, data, got_frame_ptr, &gb, avpkt)) < 0)
2684  return err;
2685 
2686  buf_consumed = (get_bits_count(&gb) + 7) >> 3;
2687  for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
2688  if (buf[buf_offset])
2689  break;
2690 
2691  return buf_size > buf_offset ? buf_consumed : buf_size;
2692 }
2693 
2695 {
2696  AACContext *ac = avctx->priv_data;
2697  int i, type;
2698 
2699  for (i = 0; i < MAX_ELEM_ID; i++) {
2700  for (type = 0; type < 4; type++) {
2701  if (ac->che[type][i])
2702  ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
2703  av_freep(&ac->che[type][i]);
2704  }
2705  }
2706 
2707  ff_mdct_end(&ac->mdct);
2708  ff_mdct_end(&ac->mdct_small);
2709  ff_mdct_end(&ac->mdct_ltp);
2710  return 0;
2711 }
2712 
2713 
2714 #define LOAS_SYNC_WORD 0x2b7 ///< 11 bits LOAS sync word
2715 
2716 struct LATMContext {
2717  AACContext aac_ctx; ///< containing AACContext
2718  int initialized; ///< initialized after a valid extradata was seen
2719 
2720  // parser data
2721  int audio_mux_version_A; ///< LATM syntax version
2722  int frame_length_type; ///< 0/1 variable/fixed frame length
2723  int frame_length; ///< frame length for fixed frame length
2724 };
2725 
2726 static inline uint32_t latm_get_value(GetBitContext *b)
2727 {
2728  int length = get_bits(b, 2);
2729 
2730  return get_bits_long(b, (length+1)*8);
2731 }
2732 
2734  GetBitContext *gb, int asclen)
2735 {
2736  AACContext *ac = &latmctx->aac_ctx;
2737  AVCodecContext *avctx = ac->avctx;
2738  MPEG4AudioConfig m4ac = { 0 };
2739  int config_start_bit = get_bits_count(gb);
2740  int sync_extension = 0;
2741  int bits_consumed, esize;
2742 
2743  if (asclen) {
2744  sync_extension = 1;
2745  asclen = FFMIN(asclen, get_bits_left(gb));
2746  } else
2747  asclen = get_bits_left(gb);
2748 
2749  if (config_start_bit % 8) {
2751  "Non-byte-aligned audio-specific config", 1);
2752  return AVERROR_PATCHWELCOME;
2753  }
2754  if (asclen <= 0)
2755  return AVERROR_INVALIDDATA;
2756  bits_consumed = decode_audio_specific_config(NULL, avctx, &m4ac,
2757  gb->buffer + (config_start_bit / 8),
2758  asclen, sync_extension);
2759 
2760  if (bits_consumed < 0)
2761  return AVERROR_INVALIDDATA;
2762 
2763  if (!latmctx->initialized ||
2764  ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
2765  ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
2766 
2767  if(latmctx->initialized) {
2768  av_log(avctx, AV_LOG_INFO, "audio config changed\n");
2769  } else {
2770  av_log(avctx, AV_LOG_INFO, "initializing latmctx\n");
2771  }
2772  latmctx->initialized = 0;
2773 
2774  esize = (bits_consumed+7) / 8;
2775 
2776  if (avctx->extradata_size < esize) {
2777  av_free(avctx->extradata);
2779  if (!avctx->extradata)
2780  return AVERROR(ENOMEM);
2781  }
2782 
2783  avctx->extradata_size = esize;
2784  memcpy(avctx->extradata, gb->buffer + (config_start_bit/8), esize);
2785  memset(avctx->extradata+esize, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2786  }
2787  skip_bits_long(gb, bits_consumed);
2788 
2789  return bits_consumed;
2790 }
2791 
2792 static int read_stream_mux_config(struct LATMContext *latmctx,
2793  GetBitContext *gb)
2794 {
2795  int ret, audio_mux_version = get_bits(gb, 1);
2796 
2797  latmctx->audio_mux_version_A = 0;
2798  if (audio_mux_version)
2799  latmctx->audio_mux_version_A = get_bits(gb, 1);
2800 
2801  if (!latmctx->audio_mux_version_A) {
2802 
2803  if (audio_mux_version)
2804  latm_get_value(gb); // taraFullness
2805 
2806  skip_bits(gb, 1); // allStreamSameTimeFraming
2807  skip_bits(gb, 6); // numSubFrames
2808  // numPrograms
2809  if (get_bits(gb, 4)) { // numPrograms
2811  "Multiple programs", 1);
2812  return AVERROR_PATCHWELCOME;
2813  }
2814 
2815  // for each program (which there is only one in DVB)
2816 
2817  // for each layer (which there is only one in DVB)
2818  if (get_bits(gb, 3)) { // numLayer
2820  "Multiple layers", 1);
2821  return AVERROR_PATCHWELCOME;
2822  }
2823 
2824  // for all but first stream: use_same_config = get_bits(gb, 1);
2825  if (!audio_mux_version) {
2826  if ((ret = latm_decode_audio_specific_config(latmctx, gb, 0)) < 0)
2827  return ret;
2828  } else {
2829  int ascLen = latm_get_value(gb);
2830  if ((ret = latm_decode_audio_specific_config(latmctx, gb, ascLen)) < 0)
2831  return ret;
2832  ascLen -= ret;
2833  skip_bits_long(gb, ascLen);
2834  }
2835 
2836  latmctx->frame_length_type = get_bits(gb, 3);
2837  switch (latmctx->frame_length_type) {
2838  case 0:
2839  skip_bits(gb, 8); // latmBufferFullness
2840  break;
2841  case 1:
2842  latmctx->frame_length = get_bits(gb, 9);
2843  break;
2844  case 3:
2845  case 4:
2846  case 5:
2847  skip_bits(gb, 6); // CELP frame length table index
2848  break;
2849  case 6:
2850  case 7:
2851  skip_bits(gb, 1); // HVXC frame length table index
2852  break;
2853  }
2854 
2855  if (get_bits(gb, 1)) { // other data
2856  if (audio_mux_version) {
2857  latm_get_value(gb); // other_data_bits
2858  } else {
2859  int esc;
2860  do {
2861  esc = get_bits(gb, 1);
2862  skip_bits(gb, 8);
2863  } while (esc);
2864  }
2865  }
2866 
2867  if (get_bits(gb, 1)) // crc present
2868  skip_bits(gb, 8); // config_crc
2869  }
2870 
2871  return 0;
2872 }
2873 
2875 {
2876  uint8_t tmp;
2877 
2878  if (ctx->frame_length_type == 0) {
2879  int mux_slot_length = 0;
2880  do {
2881  tmp = get_bits(gb, 8);
2882  mux_slot_length += tmp;
2883  } while (tmp == 255);
2884  return mux_slot_length;
2885  } else if (ctx->frame_length_type == 1) {
2886  return ctx->frame_length;
2887  } else if (ctx->frame_length_type == 3 ||
2888  ctx->frame_length_type == 5 ||
2889  ctx->frame_length_type == 7) {
2890  skip_bits(gb, 2); // mux_slot_length_coded
2891  }
2892  return 0;
2893 }
2894 
2895 static int read_audio_mux_element(struct LATMContext *latmctx,
2896  GetBitContext *gb)
2897 {
2898  int err;
2899  uint8_t use_same_mux = get_bits(gb, 1);
2900  if (!use_same_mux) {
2901  if ((err = read_stream_mux_config(latmctx, gb)) < 0)
2902  return err;
2903  } else if (!latmctx->aac_ctx.avctx->extradata) {
2904  av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG,
2905  "no decoder config found\n");
2906  return AVERROR(EAGAIN);
2907  }
2908  if (latmctx->audio_mux_version_A == 0) {
2909  int mux_slot_length_bytes = read_payload_length_info(latmctx, gb);
2910  if (mux_slot_length_bytes * 8 > get_bits_left(gb)) {
2911  av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, "incomplete frame\n");
2912  return AVERROR_INVALIDDATA;
2913  } else if (mux_slot_length_bytes * 8 + 256 < get_bits_left(gb)) {
2914  av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
2915  "frame length mismatch %d << %d\n",
2916  mux_slot_length_bytes * 8, get_bits_left(gb));
2917  return AVERROR_INVALIDDATA;
2918  }
2919  }
2920  return 0;
2921 }
2922 
2923 
2924 static int latm_decode_frame(AVCodecContext *avctx, void *out,
2925  int *got_frame_ptr, AVPacket *avpkt)
2926 {
2927  struct LATMContext *latmctx = avctx->priv_data;
2928  int muxlength, err;
2929  GetBitContext gb;
2930 
2931  init_get_bits(&gb, avpkt->data, avpkt->size * 8);
2932 
2933  // check for LOAS sync word
2934  if (get_bits(&gb, 11) != LOAS_SYNC_WORD)
2935  return AVERROR_INVALIDDATA;
2936 
2937  muxlength = get_bits(&gb, 13) + 3;
2938  // not enough data, the parser should have sorted this out
2939  if (muxlength > avpkt->size)
2940  return AVERROR_INVALIDDATA;
2941 
2942  if ((err = read_audio_mux_element(latmctx, &gb)) < 0)
2943  return err;
2944 
2945  if (!latmctx->initialized) {
2946  if (!avctx->extradata) {
2947  *got_frame_ptr = 0;
2948  return avpkt->size;
2949  } else {
2951  if ((err = decode_audio_specific_config(
2952  &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac,
2953  avctx->extradata, avctx->extradata_size*8, 1)) < 0) {
2954  pop_output_configuration(&latmctx->aac_ctx);
2955  return err;
2956  }
2957  latmctx->initialized = 1;
2958  }
2959  }
2960 
2961  if (show_bits(&gb, 12) == 0xfff) {
2962  av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
2963  "ADTS header detected, probably as result of configuration "
2964  "misparsing\n");
2965  return AVERROR_INVALIDDATA;
2966  }
2967 
2968  if ((err = aac_decode_frame_int(avctx, out, got_frame_ptr, &gb, avpkt)) < 0)
2969  return err;
2970 
2971  return muxlength;
2972 }
2973 
2975 {
2976  struct LATMContext *latmctx = avctx->priv_data;
2977  int ret = aac_decode_init(avctx);
2978 
2979  if (avctx->extradata_size > 0)
2980  latmctx->initialized = !ret;
2981 
2982  return ret;
2983 }
2984 
2985 /**
2986  * AVOptions for Japanese DTV specific extensions (ADTS only)
2987  */
2988 #define AACDEC_FLAGS AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
2989 static const AVOption options[] = {
2990  {"dual_mono_mode", "Select the channel to decode for dual mono",
2991  offsetof(AACContext, force_dmono_mode), AV_OPT_TYPE_INT, {.i64=-1}, -1, 2,
2992  AACDEC_FLAGS, "dual_mono_mode"},
2993 
2994  {"auto", "autoselection", 0, AV_OPT_TYPE_CONST, {.i64=-1}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
2995  {"main", "Select Main/Left channel", 0, AV_OPT_TYPE_CONST, {.i64= 1}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
2996  {"sub" , "Select Sub/Right channel", 0, AV_OPT_TYPE_CONST, {.i64= 2}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
2997  {"both", "Select both channels", 0, AV_OPT_TYPE_CONST, {.i64= 0}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
2998 
2999  {NULL},
3000 };
3001 
3002 static const AVClass aac_decoder_class = {
3003  .class_name = "AAC decoder",
3004  .item_name = av_default_item_name,
3005  .option = options,
3006  .version = LIBAVUTIL_VERSION_INT,
3007 };
3008 
3010  .name = "aac",
3011  .type = AVMEDIA_TYPE_AUDIO,
3012  .id = AV_CODEC_ID_AAC,
3013  .priv_data_size = sizeof(AACContext),
3014  .init = aac_decode_init,
3017  .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
3018  .sample_fmts = (const enum AVSampleFormat[]) {
3020  },
3021  .capabilities = CODEC_CAP_CHANNEL_CONF | CODEC_CAP_DR1,
3022  .channel_layouts = aac_channel_layout,
3023  .flush = flush,
3024  .priv_class = &aac_decoder_class,
3025 };
3026 
3027 /*
3028  Note: This decoder filter is intended to decode LATM streams transferred
3029  in MPEG transport streams which only contain one program.
3030  To do a more complex LATM demuxing a separate LATM demuxer should be used.
3031 */
3033  .name = "aac_latm",
3034  .type = AVMEDIA_TYPE_AUDIO,
3035  .id = AV_CODEC_ID_AAC_LATM,
3036  .priv_data_size = sizeof(struct LATMContext),
3037  .init = latm_decode_init,
3038  .close = aac_decode_close,
3039  .decode = latm_decode_frame,
3040  .long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"),
3041  .sample_fmts = (const enum AVSampleFormat[]) {
3043  },
3044  .capabilities = CODEC_CAP_CHANNEL_CONF | CODEC_CAP_DR1,
3045  .channel_layouts = aac_channel_layout,
3046  .flush = flush,
3047 };