FFmpeg
aacpsy.c
Go to the documentation of this file.
1 /*
2  * AAC encoder psychoacoustic model
3  * Copyright (C) 2008 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * AAC encoder psychoacoustic model
25  */
26 
27 #include "libavutil/attributes.h"
28 #include "libavutil/ffmath.h"
29 #include "libavutil/mem.h"
30 
31 #include "avcodec.h"
32 #include "aac.h"
33 #include "psymodel.h"
34 
35 /***********************************
36  * TODOs:
37  * try other bitrate controlling mechanism (maybe use ratecontrol.c?)
38  * control quality for quality-based output
39  **********************************/
40 
41 /**
42  * constants for 3GPP AAC psychoacoustic model
43  * @{
44  */
45 #define PSY_3GPP_THR_SPREAD_HI 1.5f // spreading factor for low-to-hi threshold spreading (15 dB/Bark)
46 #define PSY_3GPP_THR_SPREAD_LOW 3.0f // spreading factor for hi-to-low threshold spreading (30 dB/Bark)
47 /* spreading factor for low-to-hi energy spreading, long block, > 22kbps/channel (20dB/Bark) */
48 #define PSY_3GPP_EN_SPREAD_HI_L1 2.0f
49 /* spreading factor for low-to-hi energy spreading, long block, <= 22kbps/channel (15dB/Bark) */
50 #define PSY_3GPP_EN_SPREAD_HI_L2 1.5f
51 /* spreading factor for low-to-hi energy spreading, short block (15 dB/Bark) */
52 #define PSY_3GPP_EN_SPREAD_HI_S 1.5f
53 /* spreading factor for hi-to-low energy spreading, long block (30dB/Bark) */
54 #define PSY_3GPP_EN_SPREAD_LOW_L 3.0f
55 /* spreading factor for hi-to-low energy spreading, short block (20dB/Bark) */
56 #define PSY_3GPP_EN_SPREAD_LOW_S 2.0f
57 
58 #define PSY_3GPP_RPEMIN 0.01f
59 #define PSY_3GPP_RPELEV 2.0f
60 
61 #define PSY_3GPP_C1 3.0f /* log2(8) */
62 #define PSY_3GPP_C2 1.3219281f /* log2(2.5) */
63 #define PSY_3GPP_C3 0.55935729f /* 1 - C2 / C1 */
64 
65 #define PSY_SNR_1DB 7.9432821e-1f /* -1dB */
66 #define PSY_SNR_25DB 3.1622776e-3f /* -25dB */
67 
68 #define PSY_3GPP_SAVE_SLOPE_L -0.46666667f
69 #define PSY_3GPP_SAVE_SLOPE_S -0.36363637f
70 #define PSY_3GPP_SAVE_ADD_L -0.84285712f
71 #define PSY_3GPP_SAVE_ADD_S -0.75f
72 #define PSY_3GPP_SPEND_SLOPE_L 0.66666669f
73 #define PSY_3GPP_SPEND_SLOPE_S 0.81818181f
74 #define PSY_3GPP_SPEND_ADD_L -0.35f
75 #define PSY_3GPP_SPEND_ADD_S -0.26111111f
76 #define PSY_3GPP_CLIP_LO_L 0.2f
77 #define PSY_3GPP_CLIP_LO_S 0.2f
78 #define PSY_3GPP_CLIP_HI_L 0.95f
79 #define PSY_3GPP_CLIP_HI_S 0.75f
80 
81 #define PSY_3GPP_AH_THR_LONG 0.5f
82 #define PSY_3GPP_AH_THR_SHORT 0.63f
83 
84 #define PSY_PE_FORGET_SLOPE 511
85 
86 enum {
90 };
91 
92 #define PSY_3GPP_BITS_TO_PE(bits) ((bits) * 1.18f)
93 #define PSY_3GPP_PE_TO_BITS(bits) ((bits) / 1.18f)
94 
95 /* LAME psy model constants */
96 #define PSY_LAME_FIR_LEN 21 ///< LAME psy model FIR order
97 #define AAC_BLOCK_SIZE_LONG 1024 ///< long block size
98 #define AAC_BLOCK_SIZE_SHORT 128 ///< short block size
99 #define AAC_NUM_BLOCKS_SHORT 8 ///< number of blocks in a short sequence
100 #define PSY_LAME_NUM_SUBBLOCKS 2 ///< Number of sub-blocks in each short block
101 
102 /**
103  * @}
104  */
105 
106 /**
107  * information for single band used by 3GPP TS26.403-inspired psychoacoustic model
108  */
109 typedef struct AacPsyBand{
110  float energy; ///< band energy
111  float thr; ///< energy threshold
112  float thr_quiet; ///< threshold in quiet
113  float nz_lines; ///< number of non-zero spectral lines
114  float active_lines; ///< number of active spectral lines
115  float pe; ///< perceptual entropy
116  float pe_const; ///< constant part of the PE calculation
117  float norm_fac; ///< normalization factor for linearization
118  int avoid_holes; ///< hole avoidance flag
119 }AacPsyBand;
120 
121 /**
122  * single/pair channel context for psychoacoustic model
123  */
124 typedef struct AacPsyChannel{
125  AacPsyBand band[128]; ///< bands information
126  AacPsyBand prev_band[128]; ///< bands information from the previous frame
127 
128  float win_energy; ///< sliding average of channel energy
129  float iir_state[2]; ///< hi-pass IIR filter state
130  uint8_t next_grouping; ///< stored grouping scheme for the next frame (in case of 8 short window sequence)
131  enum WindowSequence next_window_seq; ///< window sequence to be used in the next frame
132  /* LAME psy model specific members */
133  float attack_threshold; ///< attack threshold for this channel
135  int prev_attack; ///< attack value for the last short block in the previous sequence
136  int next_attack0_zero; ///< whether attack[0] of the next frame is zero
138 
139 /**
140  * psychoacoustic model frame type-dependent coefficients
141  */
142 typedef struct AacPsyCoeffs{
143  float ath; ///< absolute threshold of hearing per bands
144  float barks; ///< Bark value for each spectral band in long frame
145  float spread_low[2]; ///< spreading factor for low-to-high threshold spreading in long frame
146  float spread_hi [2]; ///< spreading factor for high-to-low threshold spreading in long frame
147  float min_snr; ///< minimal SNR
148 }AacPsyCoeffs;
149 
150 /**
151  * 3GPP TS26.403-inspired psychoacoustic model specific data
152  */
153 typedef struct AacPsyContext{
154  int chan_bitrate; ///< bitrate per channel
155  int frame_bits; ///< average bits per frame
156  int fill_level; ///< bit reservoir fill level
157  struct {
158  float min; ///< minimum allowed PE for bit factor calculation
159  float max; ///< maximum allowed PE for bit factor calculation
160  float previous; ///< allowed PE of the previous frame
161  float correction; ///< PE correction factor
162  } pe;
165  float global_quality; ///< normalized global quality taken from avctx
167 
168 /**
169  * LAME psy model preset struct
170  */
171 typedef struct PsyLamePreset {
172  int quality; ///< Quality to map the rest of the values to.
173  /* This is overloaded to be both kbps per channel in ABR mode, and
174  * requested quality in constant quality mode.
175  */
176  float st_lrm; ///< short threshold for L, R, and M channels
177 } PsyLamePreset;
178 
179 /**
180  * LAME psy model preset table for ABR
181  */
182 static const PsyLamePreset psy_abr_map[] = {
183 /* TODO: Tuning. These were taken from LAME. */
184 /* kbps/ch st_lrm */
185  { 8, 7.60},
186  { 16, 7.60},
187  { 24, 7.60},
188  { 32, 7.60},
189  { 40, 7.60},
190  { 48, 7.60},
191  { 56, 7.60},
192  { 64, 7.40},
193  { 80, 7.00},
194  { 96, 6.60},
195  {112, 6.20},
196  {128, 6.20},
197  {160, 6.20}
198 };
199 
200 /**
201 * LAME psy model preset table for constant quality
202 */
203 static const PsyLamePreset psy_vbr_map[] = {
204 /* vbr_q st_lrm */
205  { 0, 4.20},
206  { 1, 4.20},
207  { 2, 4.20},
208  { 3, 4.20},
209  { 4, 4.20},
210  { 5, 4.20},
211  { 6, 4.20},
212  { 7, 4.20},
213  { 8, 4.20},
214  { 9, 4.20},
215  {10, 4.20}
216 };
217 
218 /**
219  * LAME psy model FIR coefficient table
220  */
221 static const float psy_fir_coeffs[] = {
222  -8.65163e-18 * 2, -0.00851586 * 2, -6.74764e-18 * 2, 0.0209036 * 2,
223  -3.36639e-17 * 2, -0.0438162 * 2, -1.54175e-17 * 2, 0.0931738 * 2,
224  -5.52212e-17 * 2, -0.313819 * 2
225 };
226 
227 /**
228  * Calculate the ABR attack threshold from the above LAME psymodel table.
229  */
231 {
232  /* Assume max bitrate to start with */
233  int lower_range = 12, upper_range = 12;
234  int lower_range_kbps = psy_abr_map[12].quality;
235  int upper_range_kbps = psy_abr_map[12].quality;
236  int i;
237 
238  /* Determine which bitrates the value specified falls between.
239  * If the loop ends without breaking our above assumption of 320kbps was correct.
240  */
241  for (i = 1; i < 13; i++) {
243  upper_range = i;
244  upper_range_kbps = psy_abr_map[i ].quality;
245  lower_range = i - 1;
246  lower_range_kbps = psy_abr_map[i - 1].quality;
247  break; /* Upper range found */
248  }
249  }
250 
251  /* Determine which range the value specified is closer to */
252  if ((upper_range_kbps - bitrate) > (bitrate - lower_range_kbps))
253  return psy_abr_map[lower_range].st_lrm;
254  return psy_abr_map[upper_range].st_lrm;
255 }
256 
257 /**
258  * LAME psy model specific initialization
259  */
261 {
262  int i, j;
263 
264  for (i = 0; i < avctx->ch_layout.nb_channels; i++) {
265  AacPsyChannel *pch = &ctx->ch[i];
266 
267  if (avctx->flags & AV_CODEC_FLAG_QSCALE)
269  else
271 
272  for (j = 0; j < AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS; j++)
273  pch->prev_energy_subshort[j] = 10.0f;
274  }
275 }
276 
277 /**
278  * Calculate Bark value for given line.
279  */
280 static av_cold float calc_bark(float f)
281 {
282  return 13.3f * atanf(0.00076f * f) + 3.5f * atanf((f / 7500.0f) * (f / 7500.0f));
283 }
284 
285 #define ATH_ADD 4
286 /**
287  * Calculate ATH value for given frequency.
288  * Borrowed from Lame.
289  */
290 static av_cold float ath(float f, float add)
291 {
292  f /= 1000.0f;
293  return 3.64 * pow(f, -0.8)
294  - 6.8 * exp(-0.6 * (f - 3.4) * (f - 3.4))
295  + 6.0 * exp(-0.15 * (f - 8.7) * (f - 8.7))
296  + (0.6 + 0.04 * add) * 0.001 * f * f * f * f;
297 }
298 
300  AacPsyContext *pctx;
301  float bark;
302  int i, j, g, start;
303  float prev, minscale, minath, minsnr, pe_min;
304  int chan_bitrate = ctx->avctx->bit_rate / ((ctx->avctx->flags & AV_CODEC_FLAG_QSCALE) ? 2.0f : ctx->avctx->ch_layout.nb_channels);
305 
306  const int bandwidth = ctx->cutoff ? ctx->cutoff : AAC_CUTOFF(ctx->avctx);
307  const float num_bark = calc_bark((float)bandwidth);
308 
309  if (bandwidth <= 0)
310  return AVERROR(EINVAL);
311 
312  ctx->model_priv_data = av_mallocz(sizeof(AacPsyContext));
313  if (!ctx->model_priv_data)
314  return AVERROR(ENOMEM);
315  pctx = ctx->model_priv_data;
316  pctx->global_quality = (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120) * 0.01f;
317 
318  if (ctx->avctx->flags & AV_CODEC_FLAG_QSCALE) {
319  /* Use the target average bitrate to compute spread parameters */
320  chan_bitrate = (int)(chan_bitrate / 120.0 * (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120));
321  }
322 
323  pctx->chan_bitrate = chan_bitrate;
324  pctx->frame_bits = FFMIN(2560, chan_bitrate * AAC_BLOCK_SIZE_LONG / ctx->avctx->sample_rate);
325  pctx->pe.min = 8.0f * AAC_BLOCK_SIZE_LONG * bandwidth / (ctx->avctx->sample_rate * 2.0f);
326  pctx->pe.max = 12.0f * AAC_BLOCK_SIZE_LONG * bandwidth / (ctx->avctx->sample_rate * 2.0f);
327  ctx->bitres.size = 6144 - pctx->frame_bits;
328  ctx->bitres.size -= ctx->bitres.size % 8;
329  pctx->fill_level = ctx->bitres.size;
330  minath = ath(3410 - 0.733 * ATH_ADD, ATH_ADD);
331  for (j = 0; j < 2; j++) {
332  AacPsyCoeffs *coeffs = pctx->psy_coef[j];
333  const uint8_t *band_sizes = ctx->bands[j];
334  float line_to_frequency = ctx->avctx->sample_rate / (j ? 256.f : 2048.0f);
335  float avg_chan_bits = chan_bitrate * (j ? 128.0f : 1024.0f) / ctx->avctx->sample_rate;
336  /* reference encoder uses 2.4% here instead of 60% like the spec says */
337  float bark_pe = 0.024f * PSY_3GPP_BITS_TO_PE(avg_chan_bits) / num_bark;
338  float en_spread_low = j ? PSY_3GPP_EN_SPREAD_LOW_S : PSY_3GPP_EN_SPREAD_LOW_L;
339  /* High energy spreading for long blocks <= 22kbps/channel and short blocks are the same. */
340  float en_spread_hi = (j || (chan_bitrate <= 22.0f)) ? PSY_3GPP_EN_SPREAD_HI_S : PSY_3GPP_EN_SPREAD_HI_L1;
341 
342  i = 0;
343  prev = 0.0;
344  for (g = 0; g < ctx->num_bands[j]; g++) {
345  i += band_sizes[g];
346  bark = calc_bark((i-1) * line_to_frequency);
347  coeffs[g].barks = (bark + prev) / 2.0;
348  prev = bark;
349  }
350  for (g = 0; g < ctx->num_bands[j] - 1; g++) {
351  AacPsyCoeffs *coeff = &coeffs[g];
352  float bark_width = coeffs[g+1].barks - coeffs->barks;
353  coeff->spread_low[0] = ff_exp10(-bark_width * PSY_3GPP_THR_SPREAD_LOW);
354  coeff->spread_hi [0] = ff_exp10(-bark_width * PSY_3GPP_THR_SPREAD_HI);
355  coeff->spread_low[1] = ff_exp10(-bark_width * en_spread_low);
356  coeff->spread_hi [1] = ff_exp10(-bark_width * en_spread_hi);
357  pe_min = bark_pe * bark_width;
358  minsnr = exp2(pe_min / band_sizes[g]) - 1.5f;
359  coeff->min_snr = av_clipf(1.0f / minsnr, PSY_SNR_25DB, PSY_SNR_1DB);
360  }
361  start = 0;
362  for (g = 0; g < ctx->num_bands[j]; g++) {
363  minscale = ath(start * line_to_frequency, ATH_ADD);
364  for (i = 1; i < band_sizes[g]; i++)
365  minscale = FFMIN(minscale, ath((start + i) * line_to_frequency, ATH_ADD));
366  coeffs[g].ath = minscale - minath;
367  start += band_sizes[g];
368  }
369  }
370 
371  pctx->ch = av_calloc(ctx->avctx->ch_layout.nb_channels, sizeof(*pctx->ch));
372  if (!pctx->ch) {
373  av_freep(&ctx->model_priv_data);
374  return AVERROR(ENOMEM);
375  }
376 
377  lame_window_init(pctx, ctx->avctx);
378 
379  return 0;
380 }
381 
382 /**
383  * IIR filter used in block switching decision
384  */
385 static float iir_filter(int in, float state[2])
386 {
387  float ret;
388 
389  ret = 0.7548f * (in - state[0]) + 0.5095f * state[1];
390  state[0] = in;
391  state[1] = ret;
392  return ret;
393 }
394 
395 /**
396  * window grouping information stored as bits (0 - new group, 1 - group continues)
397  */
398 static const uint8_t window_grouping[9] = {
399  0xB6, 0x6C, 0xD8, 0xB2, 0x66, 0xC6, 0x96, 0x36, 0x36
400 };
401 
402 /**
403  * Tell encoder which window types to use.
404  * @see 3GPP TS26.403 5.4.1 "Blockswitching"
405  */
407  const int16_t *audio,
408  const int16_t *la,
409  int channel, int prev_type)
410 {
411  int i, j;
412  int br = ((AacPsyContext*)ctx->model_priv_data)->chan_bitrate;
413  int attack_ratio = br <= 16000 ? 18 : 10;
414  AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
415  AacPsyChannel *pch = &pctx->ch[channel];
416  uint8_t grouping = 0;
417  int next_type = pch->next_window_seq;
418  FFPsyWindowInfo wi = { { 0 } };
419 
420  if (la) {
421  float s[8], v;
422  int switch_to_eight = 0;
423  float sum = 0.0, sum2 = 0.0;
424  int attack_n = 0;
425  int stay_short = 0;
426  for (i = 0; i < 8; i++) {
427  for (j = 0; j < 128; j++) {
428  v = iir_filter(la[i*128+j], pch->iir_state);
429  sum += v*v;
430  }
431  s[i] = sum;
432  sum2 += sum;
433  }
434  for (i = 0; i < 8; i++) {
435  if (s[i] > pch->win_energy * attack_ratio) {
436  attack_n = i + 1;
437  switch_to_eight = 1;
438  break;
439  }
440  }
441  pch->win_energy = pch->win_energy*7/8 + sum2/64;
442 
443  wi.window_type[1] = prev_type;
444  switch (prev_type) {
445  case ONLY_LONG_SEQUENCE:
446  wi.window_type[0] = switch_to_eight ? LONG_START_SEQUENCE : ONLY_LONG_SEQUENCE;
447  next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : ONLY_LONG_SEQUENCE;
448  break;
449  case LONG_START_SEQUENCE:
450  wi.window_type[0] = EIGHT_SHORT_SEQUENCE;
451  grouping = pch->next_grouping;
452  next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
453  break;
454  case LONG_STOP_SEQUENCE:
455  wi.window_type[0] = switch_to_eight ? LONG_START_SEQUENCE : ONLY_LONG_SEQUENCE;
456  next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : ONLY_LONG_SEQUENCE;
457  break;
459  stay_short = next_type == EIGHT_SHORT_SEQUENCE || switch_to_eight;
460  wi.window_type[0] = stay_short ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
461  grouping = next_type == EIGHT_SHORT_SEQUENCE ? pch->next_grouping : 0;
462  next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
463  break;
464  }
465 
466  pch->next_grouping = window_grouping[attack_n];
467  pch->next_window_seq = next_type;
468  } else {
469  for (i = 0; i < 3; i++)
470  wi.window_type[i] = prev_type;
471  grouping = (prev_type == EIGHT_SHORT_SEQUENCE) ? window_grouping[0] : 0;
472  }
473 
474  wi.window_shape = 1;
475  if (wi.window_type[0] != EIGHT_SHORT_SEQUENCE) {
476  wi.num_windows = 1;
477  wi.grouping[0] = 1;
478  } else {
479  int lastgrp = 0;
480  wi.num_windows = 8;
481  for (i = 0; i < 8; i++) {
482  if (!((grouping >> i) & 1))
483  lastgrp = i;
484  wi.grouping[lastgrp]++;
485  }
486  }
487 
488  return wi;
489 }
490 
491 /* 5.6.1.2 "Calculation of Bit Demand" */
492 static int calc_bit_demand(AacPsyContext *ctx, float pe, int bits, int size,
493  int short_window)
494 {
495  const float bitsave_slope = short_window ? PSY_3GPP_SAVE_SLOPE_S : PSY_3GPP_SAVE_SLOPE_L;
496  const float bitsave_add = short_window ? PSY_3GPP_SAVE_ADD_S : PSY_3GPP_SAVE_ADD_L;
497  const float bitspend_slope = short_window ? PSY_3GPP_SPEND_SLOPE_S : PSY_3GPP_SPEND_SLOPE_L;
498  const float bitspend_add = short_window ? PSY_3GPP_SPEND_ADD_S : PSY_3GPP_SPEND_ADD_L;
499  const float clip_low = short_window ? PSY_3GPP_CLIP_LO_S : PSY_3GPP_CLIP_LO_L;
500  const float clip_high = short_window ? PSY_3GPP_CLIP_HI_S : PSY_3GPP_CLIP_HI_L;
501  float clipped_pe, bit_save, bit_spend, bit_factor, fill_level, forgetful_min_pe;
502 
503  ctx->fill_level += ctx->frame_bits - bits;
504  ctx->fill_level = av_clip(ctx->fill_level, 0, size);
505  fill_level = av_clipf((float)ctx->fill_level / size, clip_low, clip_high);
506  clipped_pe = av_clipf(pe, ctx->pe.min, ctx->pe.max);
507  bit_save = (fill_level + bitsave_add) * bitsave_slope;
508  assert(bit_save <= 0.3f && bit_save >= -0.05000001f);
509  bit_spend = (fill_level + bitspend_add) * bitspend_slope;
510  assert(bit_spend <= 0.5f && bit_spend >= -0.1f);
511  /* The bit factor graph in the spec is obviously incorrect.
512  * bit_spend + ((bit_spend - bit_spend))...
513  * The reference encoder subtracts everything from 1, but also seems incorrect.
514  * 1 - bit_save + ((bit_spend + bit_save))...
515  * Hopefully below is correct.
516  */
517  bit_factor = 1.0f - bit_save + ((bit_spend - bit_save) / (ctx->pe.max - ctx->pe.min)) * (clipped_pe - ctx->pe.min);
518  /* NOTE: The reference encoder attempts to center pe max/min around the current pe.
519  * Here we do that by slowly forgetting pe.min when pe stays in a range that makes
520  * it unlikely (ie: above the mean)
521  */
522  ctx->pe.max = FFMAX(pe, ctx->pe.max);
523  forgetful_min_pe = ((ctx->pe.min * PSY_PE_FORGET_SLOPE)
524  + FFMAX(ctx->pe.min, pe * (pe / ctx->pe.max))) / (PSY_PE_FORGET_SLOPE + 1);
525  ctx->pe.min = FFMIN(pe, forgetful_min_pe);
526 
527  /* NOTE: allocate a minimum of 1/8th average frame bits, to avoid
528  * reservoir starvation from producing zero-bit frames
529  */
530  return FFMIN(
531  ctx->frame_bits * bit_factor,
532  FFMAX(ctx->frame_bits + size - bits, ctx->frame_bits / 8));
533 }
534 
535 static float calc_pe_3gpp(AacPsyBand *band)
536 {
537  float pe, a;
538 
539  band->pe = 0.0f;
540  band->pe_const = 0.0f;
541  band->active_lines = 0.0f;
542  if (band->energy > band->thr) {
543  a = log2f(band->energy);
544  pe = a - log2f(band->thr);
545  band->active_lines = band->nz_lines;
546  if (pe < PSY_3GPP_C1) {
547  pe = pe * PSY_3GPP_C3 + PSY_3GPP_C2;
548  a = a * PSY_3GPP_C3 + PSY_3GPP_C2;
549  band->active_lines *= PSY_3GPP_C3;
550  }
551  band->pe = pe * band->nz_lines;
552  band->pe_const = a * band->nz_lines;
553  }
554 
555  return band->pe;
556 }
557 
558 static float calc_reduction_3gpp(float a, float desired_pe, float pe,
559  float active_lines)
560 {
561  float thr_avg, reduction;
562 
563  if(active_lines == 0.0)
564  return 0;
565 
566  thr_avg = exp2f((a - pe) / (4.0f * active_lines));
567  reduction = exp2f((a - desired_pe) / (4.0f * active_lines)) - thr_avg;
568 
569  return FFMAX(reduction, 0.0f);
570 }
571 
572 static float calc_reduced_thr_3gpp(AacPsyBand *band, float min_snr,
573  float reduction)
574 {
575  float thr = band->thr;
576 
577  if (band->energy > thr) {
578  thr = sqrtf(thr);
579  thr = sqrtf(thr) + reduction;
580  thr *= thr;
581  thr *= thr;
582 
583  /* This deviates from the 3GPP spec to match the reference encoder.
584  * It performs min(thr_reduced, max(thr, energy/min_snr)) only for bands
585  * that have hole avoidance on (active or inactive). It always reduces the
586  * threshold of bands with hole avoidance off.
587  */
588  if (thr > band->energy * min_snr && band->avoid_holes != PSY_3GPP_AH_NONE) {
589  thr = FFMAX(band->thr, band->energy * min_snr);
591  }
592  }
593 
594  return thr;
595 }
596 
597 static void calc_thr_3gpp(const FFPsyWindowInfo *wi, const int num_bands, AacPsyChannel *pch,
598  const uint8_t *band_sizes, const float *coefs, const int cutoff)
599 {
600  int i, w, g;
601  int start = 0, wstart = 0;
602  for (w = 0; w < wi->num_windows*16; w += 16) {
603  wstart = 0;
604  for (g = 0; g < num_bands; g++) {
605  AacPsyBand *band = &pch->band[w+g];
606 
607  float form_factor = 0.0f;
608  float Temp;
609  band->energy = 0.0f;
610  if (wstart < cutoff) {
611  for (i = 0; i < band_sizes[g]; i++) {
612  band->energy += coefs[start+i] * coefs[start+i];
613  form_factor += sqrtf(fabs(coefs[start+i]));
614  }
615  }
616  Temp = band->energy > 0 ? sqrtf((float)band_sizes[g] / band->energy) : 0;
617  band->thr = band->energy * 0.001258925f;
618  band->nz_lines = form_factor * sqrtf(Temp);
619 
620  start += band_sizes[g];
621  wstart += band_sizes[g];
622  }
623  }
624 }
625 
626 static void psy_hp_filter(const float *firbuf, float *hpfsmpl, const float *psy_fir_coeffs)
627 {
628  int i, j;
629  for (i = 0; i < AAC_BLOCK_SIZE_LONG; i++) {
630  float sum1, sum2;
631  sum1 = firbuf[i + (PSY_LAME_FIR_LEN - 1) / 2];
632  sum2 = 0.0;
633  for (j = 0; j < ((PSY_LAME_FIR_LEN - 1) / 2) - 1; j += 2) {
634  sum1 += psy_fir_coeffs[j] * (firbuf[i + j] + firbuf[i + PSY_LAME_FIR_LEN - j]);
635  sum2 += psy_fir_coeffs[j + 1] * (firbuf[i + j + 1] + firbuf[i + PSY_LAME_FIR_LEN - j - 1]);
636  }
637  /* NOTE: The LAME psymodel expects it's input in the range -32768 to 32768.
638  * Tuning this for normalized floats would be difficult. */
639  hpfsmpl[i] = (sum1 + sum2) * 32768.0f;
640  }
641 }
642 
643 /**
644  * Calculate band thresholds as suggested in 3GPP TS26.403
645  */
647  const float *coefs, const FFPsyWindowInfo *wi)
648 {
649  AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
650  AacPsyChannel *pch = &pctx->ch[channel];
651  int i, w, g;
652  float desired_bits, desired_pe, delta_pe, reduction= NAN, spread_en[128] = {0};
653  float a = 0.0f, active_lines = 0.0f, norm_fac = 0.0f;
654  float pe = pctx->chan_bitrate > 32000 ? 0.0f : FFMAX(50.0f, 100.0f - pctx->chan_bitrate * 100.0f / 32000.0f);
655  const int num_bands = ctx->num_bands[wi->num_windows == 8];
656  const uint8_t *band_sizes = ctx->bands[wi->num_windows == 8];
657  AacPsyCoeffs *coeffs = pctx->psy_coef[wi->num_windows == 8];
658  const float avoid_hole_thr = wi->num_windows == 8 ? PSY_3GPP_AH_THR_SHORT : PSY_3GPP_AH_THR_LONG;
659  const int bandwidth = ctx->cutoff ? ctx->cutoff : AAC_CUTOFF(ctx->avctx);
660  const int cutoff = bandwidth * 2048 / wi->num_windows / ctx->avctx->sample_rate;
661 
662  //calculate energies, initial thresholds and related values - 5.4.2 "Threshold Calculation"
663  calc_thr_3gpp(wi, num_bands, pch, band_sizes, coefs, cutoff);
664 
665  //modify thresholds and energies - spread, threshold in quiet, pre-echo control
666  for (w = 0; w < wi->num_windows*16; w += 16) {
667  AacPsyBand *bands = &pch->band[w];
668 
669  /* 5.4.2.3 "Spreading" & 5.4.3 "Spread Energy Calculation" */
670  spread_en[0] = bands[0].energy;
671  for (g = 1; g < num_bands; g++) {
672  bands[g].thr = FFMAX(bands[g].thr, bands[g-1].thr * coeffs[g].spread_hi[0]);
673  spread_en[w+g] = FFMAX(bands[g].energy, spread_en[w+g-1] * coeffs[g].spread_hi[1]);
674  }
675  for (g = num_bands - 2; g >= 0; g--) {
676  bands[g].thr = FFMAX(bands[g].thr, bands[g+1].thr * coeffs[g].spread_low[0]);
677  spread_en[w+g] = FFMAX(spread_en[w+g], spread_en[w+g+1] * coeffs[g].spread_low[1]);
678  }
679  //5.4.2.4 "Threshold in quiet"
680  for (g = 0; g < num_bands; g++) {
681  AacPsyBand *band = &bands[g];
682 
683  band->thr_quiet = band->thr = FFMAX(band->thr, coeffs[g].ath);
684  //5.4.2.5 "Pre-echo control"
685  if (!(wi->window_type[0] == LONG_STOP_SEQUENCE || (!w && wi->window_type[1] == LONG_START_SEQUENCE)))
686  band->thr = FFMAX(PSY_3GPP_RPEMIN*band->thr, FFMIN(band->thr,
687  PSY_3GPP_RPELEV*pch->prev_band[w+g].thr_quiet));
688 
689  /* 5.6.1.3.1 "Preparatory steps of the perceptual entropy calculation" */
690  pe += calc_pe_3gpp(band);
691  a += band->pe_const;
692  active_lines += band->active_lines;
693 
694  /* 5.6.1.3.3 "Selection of the bands for avoidance of holes" */
695  if (spread_en[w+g] * avoid_hole_thr > band->energy || coeffs[g].min_snr > 1.0f)
697  else
699  }
700  }
701 
702  /* 5.6.1.3.2 "Calculation of the desired perceptual entropy" */
703  ctx->ch[channel].entropy = pe;
704  if (ctx->avctx->flags & AV_CODEC_FLAG_QSCALE) {
705  /* (2.5 * 120) achieves almost transparent rate, and we want to give
706  * ample room downwards, so we make that equivalent to QSCALE=2.4
707  */
708  desired_pe = pe * (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120) / (2 * 2.5f * 120.0f);
709  desired_bits = FFMIN(2560, PSY_3GPP_PE_TO_BITS(desired_pe));
710  desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits); // reflect clipping
711 
712  /* PE slope smoothing */
713  if (ctx->bitres.bits > 0) {
714  desired_bits = FFMIN(2560, PSY_3GPP_PE_TO_BITS(desired_pe));
715  desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits); // reflect clipping
716  }
717 
718  pctx->pe.max = FFMAX(pe, pctx->pe.max);
719  pctx->pe.min = FFMIN(pe, pctx->pe.min);
720  } else {
721  desired_bits = calc_bit_demand(pctx, pe, ctx->bitres.bits, ctx->bitres.size, wi->num_windows == 8);
722  desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits);
723 
724  /* NOTE: PE correction is kept simple. During initial testing it had very
725  * little effect on the final bitrate. Probably a good idea to come
726  * back and do more testing later.
727  */
728  if (ctx->bitres.bits > 0)
729  desired_pe *= av_clipf(pctx->pe.previous / PSY_3GPP_BITS_TO_PE(ctx->bitres.bits),
730  0.85f, 1.15f);
731  }
732  pctx->pe.previous = PSY_3GPP_BITS_TO_PE(desired_bits);
733  ctx->bitres.alloc = desired_bits;
734 
735  if (desired_pe < pe) {
736  /* 5.6.1.3.4 "First Estimation of the reduction value" */
737  for (w = 0; w < wi->num_windows*16; w += 16) {
738  reduction = calc_reduction_3gpp(a, desired_pe, pe, active_lines);
739  pe = 0.0f;
740  a = 0.0f;
741  active_lines = 0.0f;
742  for (g = 0; g < num_bands; g++) {
743  AacPsyBand *band = &pch->band[w+g];
744 
745  band->thr = calc_reduced_thr_3gpp(band, coeffs[g].min_snr, reduction);
746  /* recalculate PE */
747  pe += calc_pe_3gpp(band);
748  a += band->pe_const;
749  active_lines += band->active_lines;
750  }
751  }
752 
753  /* 5.6.1.3.5 "Second Estimation of the reduction value" */
754  for (i = 0; i < 2; i++) {
755  float pe_no_ah = 0.0f, desired_pe_no_ah;
756  active_lines = a = 0.0f;
757  for (w = 0; w < wi->num_windows*16; w += 16) {
758  for (g = 0; g < num_bands; g++) {
759  AacPsyBand *band = &pch->band[w+g];
760 
761  if (band->avoid_holes != PSY_3GPP_AH_ACTIVE) {
762  pe_no_ah += band->pe;
763  a += band->pe_const;
764  active_lines += band->active_lines;
765  }
766  }
767  }
768  desired_pe_no_ah = FFMAX(desired_pe - (pe - pe_no_ah), 0.0f);
769  if (active_lines > 0.0f)
770  reduction = calc_reduction_3gpp(a, desired_pe_no_ah, pe_no_ah, active_lines);
771 
772  pe = 0.0f;
773  for (w = 0; w < wi->num_windows*16; w += 16) {
774  for (g = 0; g < num_bands; g++) {
775  AacPsyBand *band = &pch->band[w+g];
776 
777  if (active_lines > 0.0f)
778  band->thr = calc_reduced_thr_3gpp(band, coeffs[g].min_snr, reduction);
779  pe += calc_pe_3gpp(band);
780  if (band->thr > 0.0f)
781  band->norm_fac = band->active_lines / band->thr;
782  else
783  band->norm_fac = 0.0f;
784  norm_fac += band->norm_fac;
785  }
786  }
787  delta_pe = desired_pe - pe;
788  if (fabs(delta_pe) > 0.05f * desired_pe)
789  break;
790  }
791 
792  if (pe < 1.15f * desired_pe) {
793  /* 6.6.1.3.6 "Final threshold modification by linearization" */
794  norm_fac = norm_fac ? 1.0f / norm_fac : 0;
795  for (w = 0; w < wi->num_windows*16; w += 16) {
796  for (g = 0; g < num_bands; g++) {
797  AacPsyBand *band = &pch->band[w+g];
798 
799  if (band->active_lines > 0.5f) {
800  float delta_sfb_pe = band->norm_fac * norm_fac * delta_pe;
801  float thr = band->thr;
802 
803  thr *= exp2f(delta_sfb_pe / band->active_lines);
804  if (thr > coeffs[g].min_snr * band->energy && band->avoid_holes == PSY_3GPP_AH_INACTIVE)
805  thr = FFMAX(band->thr, coeffs[g].min_snr * band->energy);
806  band->thr = thr;
807  }
808  }
809  }
810  } else {
811  /* 5.6.1.3.7 "Further perceptual entropy reduction" */
812  g = num_bands;
813  while (pe > desired_pe && g--) {
814  for (w = 0; w < wi->num_windows*16; w+= 16) {
815  AacPsyBand *band = &pch->band[w+g];
816  if (band->avoid_holes != PSY_3GPP_AH_NONE && coeffs[g].min_snr < PSY_SNR_1DB) {
817  coeffs[g].min_snr = PSY_SNR_1DB;
818  band->thr = band->energy * PSY_SNR_1DB;
819  pe += band->active_lines * 1.5f - band->pe;
820  }
821  }
822  }
823  /* TODO: allow more holes (unused without mid/side) */
824  }
825  }
826 
827  for (w = 0; w < wi->num_windows*16; w += 16) {
828  for (g = 0; g < num_bands; g++) {
829  AacPsyBand *band = &pch->band[w+g];
830  FFPsyBand *psy_band = &ctx->ch[channel].psy_bands[w+g];
831 
832  psy_band->threshold = band->thr;
833  psy_band->energy = band->energy;
834  psy_band->spread = band->active_lines * 2.0f / band_sizes[g];
835  psy_band->bits = PSY_3GPP_PE_TO_BITS(band->pe);
836  }
837  }
838 
839  memcpy(pch->prev_band, pch->band, sizeof(pch->band));
840 }
841 
843  const float **coeffs, const FFPsyWindowInfo *wi)
844 {
845  int ch;
847 
848  for (ch = 0; ch < group->num_ch; ch++)
849  psy_3gpp_analyze_channel(ctx, channel + ch, coeffs[ch], &wi[ch]);
850 }
851 
853 {
855  if (pctx)
856  av_freep(&pctx->ch);
857  av_freep(&apc->model_priv_data);
858 }
859 
860 static void lame_apply_block_type(AacPsyChannel *ctx, FFPsyWindowInfo *wi, int uselongblock)
861 {
862  int blocktype = ONLY_LONG_SEQUENCE;
863  if (uselongblock) {
864  if (ctx->next_window_seq == EIGHT_SHORT_SEQUENCE)
865  blocktype = LONG_STOP_SEQUENCE;
866  } else {
867  blocktype = EIGHT_SHORT_SEQUENCE;
868  if (ctx->next_window_seq == ONLY_LONG_SEQUENCE)
869  ctx->next_window_seq = LONG_START_SEQUENCE;
870  if (ctx->next_window_seq == LONG_STOP_SEQUENCE)
871  ctx->next_window_seq = EIGHT_SHORT_SEQUENCE;
872  }
873 
874  wi->window_type[0] = ctx->next_window_seq;
875  ctx->next_window_seq = blocktype;
876 }
877 
878 static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio,
879  const float *la, int channel, int prev_type)
880 {
881  AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
882  AacPsyChannel *pch = &pctx->ch[channel];
883  int grouping = 0;
884  int uselongblock = 1;
885  int attacks[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };
886  int i;
887  FFPsyWindowInfo wi = { { 0 } };
888 
889  if (la) {
890  float hpfsmpl[AAC_BLOCK_SIZE_LONG];
891  const float *pf = hpfsmpl;
892  float attack_intensity[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS];
893  float energy_subshort[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS];
894  float energy_short[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };
895  const float *firbuf = la + (AAC_BLOCK_SIZE_SHORT/4 - PSY_LAME_FIR_LEN);
896  int att_sum = 0;
897 
898  /* LAME comment: apply high pass filter of fs/4 */
899  psy_hp_filter(firbuf, hpfsmpl, psy_fir_coeffs);
900 
901  /* Calculate the energies of each sub-shortblock */
902  for (i = 0; i < PSY_LAME_NUM_SUBBLOCKS; i++) {
903  energy_subshort[i] = pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 1) * PSY_LAME_NUM_SUBBLOCKS)];
904  assert(pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 1) * PSY_LAME_NUM_SUBBLOCKS - 2)] > 0);
905  attack_intensity[i] = energy_subshort[i] / pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 1) * PSY_LAME_NUM_SUBBLOCKS - 2)];
906  energy_short[0] += energy_subshort[i];
907  }
908 
909  for (i = 0; i < AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS; i++) {
910  const float *const pfe = pf + AAC_BLOCK_SIZE_LONG / (AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS);
911  float p = 1.0f;
912  for (; pf < pfe; pf++)
913  p = FFMAX(p, fabsf(*pf));
914  pch->prev_energy_subshort[i] = energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS] = p;
915  energy_short[1 + i / PSY_LAME_NUM_SUBBLOCKS] += p;
916 
917  /* NOTE: The indexes below are [i + 3 - 2] in the LAME source. Compare each sub-block to sub-block - 2 */
918  if (p > energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS - 2])
919  p = p / energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS - 2];
920  else if (energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS - 2] > p * 10.0f)
921  p = energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS - 2] / (p * 10.0f);
922  else
923  p = 0.0;
924 
925  attack_intensity[i + PSY_LAME_NUM_SUBBLOCKS] = p;
926  }
927 
928  /* compare energy between sub-short blocks */
929  for (i = 0; i < (AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS; i++)
930  if (!attacks[i / PSY_LAME_NUM_SUBBLOCKS])
931  if (attack_intensity[i] > pch->attack_threshold)
932  attacks[i / PSY_LAME_NUM_SUBBLOCKS] = (i % PSY_LAME_NUM_SUBBLOCKS) + 1;
933 
934  /* should have energy change between short blocks, in order to avoid periodic signals */
935  /* Good samples to show the effect are Trumpet test songs */
936  /* GB: tuned (1) to avoid too many short blocks for test sample TRUMPET */
937  /* RH: tuned (2) to let enough short blocks through for test sample FSOL and SNAPS */
938  for (i = 1; i < AAC_NUM_BLOCKS_SHORT + 1; i++) {
939  const float u = energy_short[i - 1];
940  const float v = energy_short[i];
941  const float m = FFMAX(u, v);
942  if (m < 40000) { /* (2) */
943  if (u < 2.3f * v && v < 2.3f * u) { /* (1) */
944  if (i == 1 && attacks[0] < attacks[i])
945  attacks[0] = 0;
946  attacks[i] = 0;
947  }
948  }
949  att_sum += attacks[i];
950  }
951 
952  if (pch->next_attack0_zero)
953  attacks[0] = 0;
954  pch->next_attack0_zero = !attacks[AAC_NUM_BLOCKS_SHORT];
955 
956  if (attacks[0] <= pch->prev_attack)
957  attacks[0] = 0;
958 
959  att_sum += attacks[0];
960 
961  /* If the previous attack happened in the last sub-block of the previous sequence,
962  * or if there's a new attack, use short window */
963  if (pch->prev_attack == PSY_LAME_NUM_SUBBLOCKS || att_sum) {
964  uselongblock = 0;
965 
966  for (i = 1; i < AAC_NUM_BLOCKS_SHORT + 1; i++)
967  if (attacks[i] && attacks[i-1])
968  attacks[i] = 0;
969  }
970  } else {
971  /* We have no lookahead info, so just use same type as the previous sequence. */
972  uselongblock = !(prev_type == EIGHT_SHORT_SEQUENCE);
973  }
974 
975  lame_apply_block_type(pch, &wi, uselongblock);
976 
977  wi.window_type[1] = prev_type;
978  if (wi.window_type[0] != EIGHT_SHORT_SEQUENCE) {
979 
980  wi.num_windows = 1;
981  wi.grouping[0] = 1;
982  if (wi.window_type[0] == LONG_START_SEQUENCE)
983  wi.window_shape = 0;
984  else
985  wi.window_shape = 1;
986 
987  } else {
988  int lastgrp = 0;
989 
990  wi.num_windows = 8;
991  wi.window_shape = 0;
992  for (i = 0; i < 8; i++) {
993  if (!((pch->next_grouping >> i) & 1))
994  lastgrp = i;
995  wi.grouping[lastgrp]++;
996  }
997  }
998 
999  /* Determine grouping, based on the location of the first attack, and save for
1000  * the next frame.
1001  * FIXME: Move this to analysis.
1002  * TODO: Tune groupings depending on attack location
1003  * TODO: Handle more than one attack in a group
1004  */
1005  for (i = 0; i < 9; i++) {
1006  if (attacks[i]) {
1007  grouping = i;
1008  break;
1009  }
1010  }
1011  pch->next_grouping = window_grouping[grouping];
1012 
1013  pch->prev_attack = attacks[AAC_NUM_BLOCKS_SHORT - 1];
1014 
1015  return wi;
1016 }
1017 
1019 {
1020  .name = "3GPP TS 26.403-inspired model",
1021  .init = psy_3gpp_init,
1022  .window = psy_lame_window,
1023  .analyze = psy_3gpp_analyze,
1024  .end = psy_3gpp_end,
1025 };
AacPsyCoeffs::spread_low
float spread_low[2]
spreading factor for low-to-high threshold spreading in long frame
Definition: aacpsy.c:145
ff_exp10
static av_always_inline double ff_exp10(double x)
Compute 10^x for floating point values.
Definition: ffmath.h:42
av_clip
#define av_clip
Definition: common.h:100
psy_3gpp_init
static av_cold int psy_3gpp_init(FFPsyContext *ctx)
Definition: aacpsy.c:299
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
psy_3gpp_window
static av_unused FFPsyWindowInfo psy_3gpp_window(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type)
Tell encoder which window types to use.
Definition: aacpsy.c:406
lame_calc_attack_threshold
static float lame_calc_attack_threshold(int bitrate)
Calculate the ABR attack threshold from the above LAME psymodel table.
Definition: aacpsy.c:230
FFPsyModel::name
const char * name
Definition: psymodel.h:115
PSY_PE_FORGET_SLOPE
#define PSY_PE_FORGET_SLOPE
Definition: aacpsy.c:84
psy_lame_window
static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type)
Definition: aacpsy.c:878
log2f
#define log2f(x)
Definition: libm.h:411
AacPsyBand::thr
float thr
energy threshold
Definition: aacpsy.c:111
calc_thr_3gpp
static void calc_thr_3gpp(const FFPsyWindowInfo *wi, const int num_bands, AacPsyChannel *pch, const uint8_t *band_sizes, const float *coefs, const int cutoff)
Definition: aacpsy.c:597
PSY_3GPP_PE_TO_BITS
#define PSY_3GPP_PE_TO_BITS(bits)
Definition: aacpsy.c:93
AV_CODEC_FLAG_QSCALE
#define AV_CODEC_FLAG_QSCALE
Use fixed qscale.
Definition: avcodec.h:213
calc_bark
static av_cold float calc_bark(float f)
Calculate Bark value for given line.
Definition: aacpsy.c:280
AacPsyBand::nz_lines
float nz_lines
number of non-zero spectral lines
Definition: aacpsy.c:113
PSY_3GPP_AH_INACTIVE
@ PSY_3GPP_AH_INACTIVE
Definition: aacpsy.c:88
av_unused
#define av_unused
Definition: attributes.h:151
PSY_3GPP_CLIP_LO_S
#define PSY_3GPP_CLIP_LO_S
Definition: aacpsy.c:77
w
uint8_t w
Definition: llviddspenc.c:38
u
#define u(width, name, range_min, range_max)
Definition: cbs_apv.c:68
PSY_3GPP_AH_THR_LONG
#define PSY_3GPP_AH_THR_LONG
Definition: aacpsy.c:81
FFPsyWindowInfo::window_shape
int window_shape
window shape (sine/KBD/whatever)
Definition: psymodel.h:79
PSY_SNR_1DB
#define PSY_SNR_1DB
Definition: aacpsy.c:65
calc_pe_3gpp
static float calc_pe_3gpp(AacPsyBand *band)
Definition: aacpsy.c:535
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
AacPsyContext::min
float min
minimum allowed PE for bit factor calculation
Definition: aacpsy.c:158
PSY_3GPP_SPEND_SLOPE_L
#define PSY_3GPP_SPEND_SLOPE_L
Definition: aacpsy.c:72
PSY_3GPP_THR_SPREAD_HI
#define PSY_3GPP_THR_SPREAD_HI
constants for 3GPP AAC psychoacoustic model
Definition: aacpsy.c:45
AacPsyContext::fill_level
int fill_level
bit reservoir fill level
Definition: aacpsy.c:156
AVChannelLayout::nb_channels
int nb_channels
Number of channels in this layout.
Definition: channel_layout.h:329
AacPsyCoeffs::spread_hi
float spread_hi[2]
spreading factor for high-to-low threshold spreading in long frame
Definition: aacpsy.c:146
quality
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about quality
Definition: rate_distortion.txt:12
lame_apply_block_type
static void lame_apply_block_type(AacPsyChannel *ctx, FFPsyWindowInfo *wi, int uselongblock)
Definition: aacpsy.c:860
AacPsyCoeffs
psychoacoustic model frame type-dependent coefficients
Definition: aacpsy.c:142
AVCodecContext::ch_layout
AVChannelLayout ch_layout
Audio channel layout.
Definition: avcodec.h:1039
lame_window_init
static av_cold void lame_window_init(AacPsyContext *ctx, AVCodecContext *avctx)
LAME psy model specific initialization.
Definition: aacpsy.c:260
PsyLamePreset::st_lrm
float st_lrm
short threshold for L, R, and M channels
Definition: aacpsy.c:176
PSY_3GPP_EN_SPREAD_HI_S
#define PSY_3GPP_EN_SPREAD_HI_S
Definition: aacpsy.c:52
PSY_3GPP_SPEND_ADD_L
#define PSY_3GPP_SPEND_ADD_L
Definition: aacpsy.c:74
AVCodecContext::flags
int flags
AV_CODEC_FLAG_*.
Definition: avcodec.h:488
AacPsyCoeffs::barks
float barks
Bark value for each spectral band in long frame.
Definition: aacpsy.c:144
AacPsyChannel::prev_energy_subshort
float prev_energy_subshort[AAC_NUM_BLOCKS_SHORT *PSY_LAME_NUM_SUBBLOCKS]
Definition: aacpsy.c:134
fabsf
static __device__ float fabsf(float a)
Definition: cuda_runtime.h:181
FFPsyWindowInfo
windowing related information
Definition: psymodel.h:77
ATH_ADD
#define ATH_ADD
Definition: aacpsy.c:285
AVFormatContext::bit_rate
int64_t bit_rate
Total stream bitrate in bit/s, 0 if not available.
Definition: avformat.h:1406
AacPsyContext::previous
float previous
allowed PE of the previous frame
Definition: aacpsy.c:160
ff_aac_psy_model
const FFPsyModel ff_aac_psy_model
Definition: aacpsy.c:1018
AacPsyContext::ch
AacPsyChannel * ch
Definition: aacpsy.c:164
av_cold
#define av_cold
Definition: attributes.h:106
FFPsyChannelGroup::num_ch
uint8_t num_ch
number of channels in this group
Definition: psymodel.h:70
PsyLamePreset
LAME psy model preset struct.
Definition: aacpsy.c:171
PSY_3GPP_CLIP_HI_S
#define PSY_3GPP_CLIP_HI_S
Definition: aacpsy.c:79
s
#define s(width, name)
Definition: cbs_vp9.c:198
AacPsyBand
information for single band used by 3GPP TS26.403-inspired psychoacoustic model
Definition: aacpsy.c:109
AVCodecContext::global_quality
int global_quality
Global quality for codecs which cannot change it per frame.
Definition: avcodec.h:1217
AVFormatContext::flags
int flags
Flags modifying the (de)muxer behaviour.
Definition: avformat.h:1415
bitrate
int64_t bitrate
Definition: av1_levels.c:47
g
const char * g
Definition: vf_curves.c:128
EIGHT_SHORT_SEQUENCE
@ EIGHT_SHORT_SEQUENCE
Definition: aac.h:62
PsyLamePreset::quality
int quality
Quality to map the rest of the values to.
Definition: aacpsy.c:172
AacPsyBand::pe_const
float pe_const
constant part of the PE calculation
Definition: aacpsy.c:116
bits
uint8_t bits
Definition: vp3data.h:128
AacPsyContext
3GPP TS26.403-inspired psychoacoustic model specific data
Definition: aacpsy.c:153
PSY_3GPP_AH_NONE
@ PSY_3GPP_AH_NONE
Definition: aacpsy.c:87
AacPsyChannel::next_attack0_zero
int next_attack0_zero
whether attack[0] of the next frame is zero
Definition: aacpsy.c:136
AacPsyCoeffs::min_snr
float min_snr
minimal SNR
Definition: aacpsy.c:147
ctx
AVFormatContext * ctx
Definition: movenc.c:49
exp2f
#define exp2f(x)
Definition: libm.h:295
calc_reduction_3gpp
static float calc_reduction_3gpp(float a, float desired_pe, float pe, float active_lines)
Definition: aacpsy.c:558
window_grouping
static const uint8_t window_grouping[9]
window grouping information stored as bits (0 - new group, 1 - group continues)
Definition: aacpsy.c:398
AAC_BLOCK_SIZE_SHORT
#define AAC_BLOCK_SIZE_SHORT
short block size
Definition: aacpsy.c:98
bands
static const float bands[]
Definition: af_superequalizer.c:56
PSY_3GPP_AH_ACTIVE
@ PSY_3GPP_AH_ACTIVE
Definition: aacpsy.c:89
ath
static av_cold float ath(float f, float add)
Calculate ATH value for given frequency.
Definition: aacpsy.c:290
calc_bit_demand
static int calc_bit_demand(AacPsyContext *ctx, float pe, int bits, int size, int short_window)
Definition: aacpsy.c:492
NAN
#define NAN
Definition: mathematics.h:115
PSY_3GPP_AH_THR_SHORT
#define PSY_3GPP_AH_THR_SHORT
Definition: aacpsy.c:82
psy_hp_filter
static void psy_hp_filter(const float *firbuf, float *hpfsmpl, const float *psy_fir_coeffs)
Definition: aacpsy.c:626
if
if(ret)
Definition: filter_design.txt:179
iir_filter
static float iir_filter(int in, float state[2])
IIR filter used in block switching decision.
Definition: aacpsy.c:385
psy_vbr_map
static const PsyLamePreset psy_vbr_map[]
LAME psy model preset table for constant quality.
Definition: aacpsy.c:203
AAC_CUTOFF
#define AAC_CUTOFF(s)
Definition: psymodel.h:41
FFPsyWindowInfo::window_type
int window_type[3]
window type (short/long/transitional, etc.) - current, previous and next
Definition: psymodel.h:78
FFPsyBand::bits
int bits
Definition: psymodel.h:51
fabs
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
AacPsyContext::pe
struct AacPsyContext::@34 pe
PSY_3GPP_RPEMIN
#define PSY_3GPP_RPEMIN
Definition: aacpsy.c:58
psy_abr_map
static const PsyLamePreset psy_abr_map[]
LAME psy model preset table for ABR.
Definition: aacpsy.c:182
state
static struct @541 state
PSY_3GPP_C1
#define PSY_3GPP_C1
Definition: aacpsy.c:61
AVCodecContext::bit_rate
int64_t bit_rate
the average bitrate
Definition: avcodec.h:481
psy_3gpp_end
static av_cold void psy_3gpp_end(FFPsyContext *apc)
Definition: aacpsy.c:852
PSY_3GPP_BITS_TO_PE
#define PSY_3GPP_BITS_TO_PE(bits)
Definition: aacpsy.c:92
FFPsyBand
single band psychoacoustic information
Definition: psymodel.h:50
aac.h
sqrtf
static __device__ float sqrtf(float a)
Definition: cuda_runtime.h:184
FFPsyWindowInfo::grouping
int grouping[8]
window grouping (for e.g. AAC)
Definition: psymodel.h:81
av_clipf
av_clipf
Definition: af_crystalizer.c:122
AacPsyContext::max
float max
maximum allowed PE for bit factor calculation
Definition: aacpsy.c:159
exp
int8_t exp
Definition: eval.c:73
AacPsyChannel::iir_state
float iir_state[2]
hi-pass IIR filter state
Definition: aacpsy.c:129
AacPsyContext::psy_coef
AacPsyCoeffs psy_coef[2][64]
Definition: aacpsy.c:163
AacPsyBand::thr_quiet
float thr_quiet
threshold in quiet
Definition: aacpsy.c:112
AAC_BLOCK_SIZE_LONG
#define AAC_BLOCK_SIZE_LONG
long block size
Definition: aacpsy.c:97
f
f
Definition: af_crystalizer.c:122
ONLY_LONG_SEQUENCE
@ ONLY_LONG_SEQUENCE
Definition: aac.h:60
AacPsyChannel::band
AacPsyBand band[128]
bands information
Definition: aacpsy.c:125
size
int size
Definition: twinvq_data.h:10344
calc_reduced_thr_3gpp
static float calc_reduced_thr_3gpp(AacPsyBand *band, float min_snr, float reduction)
Definition: aacpsy.c:572
AacPsyCoeffs::ath
float ath
absolute threshold of hearing per bands
Definition: aacpsy.c:143
AacPsyBand::active_lines
float active_lines
number of active spectral lines
Definition: aacpsy.c:114
AAC_NUM_BLOCKS_SHORT
#define AAC_NUM_BLOCKS_SHORT
number of blocks in a short sequence
Definition: aacpsy.c:99
PSY_LAME_FIR_LEN
#define PSY_LAME_FIR_LEN
LAME psy model FIR order.
Definition: aacpsy.c:96
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
PSY_3GPP_CLIP_LO_L
#define PSY_3GPP_CLIP_LO_L
Definition: aacpsy.c:76
attributes.h
AacPsyBand::avoid_holes
int avoid_holes
hole avoidance flag
Definition: aacpsy.c:118
PSY_3GPP_THR_SPREAD_LOW
#define PSY_3GPP_THR_SPREAD_LOW
Definition: aacpsy.c:46
PSY_3GPP_SAVE_ADD_S
#define PSY_3GPP_SAVE_ADD_S
Definition: aacpsy.c:71
PSY_3GPP_SPEND_ADD_S
#define PSY_3GPP_SPEND_ADD_S
Definition: aacpsy.c:75
psy_fir_coeffs
static const float psy_fir_coeffs[]
LAME psy model FIR coefficient table.
Definition: aacpsy.c:221
AacPsyChannel::attack_threshold
float attack_threshold
attack threshold for this channel
Definition: aacpsy.c:133
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
AacPsyBand::norm_fac
float norm_fac
normalization factor for linearization
Definition: aacpsy.c:117
FFPsyBand::threshold
float threshold
Definition: psymodel.h:53
PSY_3GPP_CLIP_HI_L
#define PSY_3GPP_CLIP_HI_L
Definition: aacpsy.c:78
LONG_STOP_SEQUENCE
@ LONG_STOP_SEQUENCE
Definition: aac.h:63
atanf
#define atanf(x)
Definition: libm.h:42
exp2
#define exp2(x)
Definition: libm.h:290
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
PSY_3GPP_RPELEV
#define PSY_3GPP_RPELEV
Definition: aacpsy.c:59
AacPsyBand::pe
float pe
perceptual entropy
Definition: aacpsy.c:115
av_mallocz
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:256
av_calloc
void * av_calloc(size_t nmemb, size_t size)
Definition: mem.c:264
AacPsyBand::energy
float energy
band energy
Definition: aacpsy.c:110
avcodec.h
FFPsyChannelGroup
psychoacoustic information for an arbitrary group of channels
Definition: psymodel.h:68
AacPsyChannel::next_window_seq
enum WindowSequence next_window_seq
window sequence to be used in the next frame
Definition: aacpsy.c:131
AacPsyChannel::win_energy
float win_energy
sliding average of channel energy
Definition: aacpsy.c:128
ret
ret
Definition: filter_design.txt:187
AacPsyChannel
single/pair channel context for psychoacoustic model
Definition: aacpsy.c:124
AacPsyContext::correction
float correction
PE correction factor.
Definition: aacpsy.c:161
FFPsyContext::model_priv_data
void * model_priv_data
psychoacoustic model implementation private data
Definition: psymodel.h:108
LONG_START_SEQUENCE
@ LONG_START_SEQUENCE
Definition: aac.h:61
PSY_3GPP_SAVE_SLOPE_S
#define PSY_3GPP_SAVE_SLOPE_S
Definition: aacpsy.c:69
PSY_3GPP_EN_SPREAD_HI_L1
#define PSY_3GPP_EN_SPREAD_HI_L1
Definition: aacpsy.c:48
AacPsyChannel::next_grouping
uint8_t next_grouping
stored grouping scheme for the next frame (in case of 8 short window sequence)
Definition: aacpsy.c:130
FFPsyBand::energy
float energy
Definition: psymodel.h:52
AVCodecContext
main external API structure.
Definition: avcodec.h:431
PSY_LAME_NUM_SUBBLOCKS
#define PSY_LAME_NUM_SUBBLOCKS
Number of sub-blocks in each short block.
Definition: aacpsy.c:100
PSY_SNR_25DB
#define PSY_SNR_25DB
Definition: aacpsy.c:66
AacPsyContext::global_quality
float global_quality
normalized global quality taken from avctx
Definition: aacpsy.c:165
psy_3gpp_analyze_channel
static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel, const float *coefs, const FFPsyWindowInfo *wi)
Calculate band thresholds as suggested in 3GPP TS26.403.
Definition: aacpsy.c:646
FFPsyModel
codec-specific psychoacoustic model implementation
Definition: psymodel.h:114
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
AacPsyContext::frame_bits
int frame_bits
average bits per frame
Definition: aacpsy.c:155
ffmath.h
ff_psy_find_group
FFPsyChannelGroup * ff_psy_find_group(FFPsyContext *ctx, int channel)
Determine what group a channel belongs to.
Definition: psymodel.c:67
psy_3gpp_analyze
static void psy_3gpp_analyze(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi)
Definition: aacpsy.c:842
PSY_3GPP_C3
#define PSY_3GPP_C3
Definition: aacpsy.c:63
mem.h
PSY_3GPP_EN_SPREAD_LOW_L
#define PSY_3GPP_EN_SPREAD_LOW_L
Definition: aacpsy.c:54
AacPsyContext::chan_bitrate
int chan_bitrate
bitrate per channel
Definition: aacpsy.c:154
PSY_3GPP_SAVE_SLOPE_L
#define PSY_3GPP_SAVE_SLOPE_L
Definition: aacpsy.c:68
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
PSY_3GPP_C2
#define PSY_3GPP_C2
Definition: aacpsy.c:62
coeff
static const double coeff[2][5]
Definition: vf_owdenoise.c:80
PSY_3GPP_SPEND_SLOPE_S
#define PSY_3GPP_SPEND_SLOPE_S
Definition: aacpsy.c:73
WindowSequence
WindowSequence
Definition: aac.h:59
FFPsyBand::spread
float spread
Definition: psymodel.h:54
FF_QP2LAMBDA
#define FF_QP2LAMBDA
factor to convert from H.263 QP to lambda
Definition: avutil.h:226
PSY_3GPP_EN_SPREAD_LOW_S
#define PSY_3GPP_EN_SPREAD_LOW_S
Definition: aacpsy.c:56
AacPsyChannel::prev_attack
int prev_attack
attack value for the last short block in the previous sequence
Definition: aacpsy.c:135
FFPsyContext
context used by psychoacoustic model
Definition: psymodel.h:89
AacPsyChannel::prev_band
AacPsyBand prev_band[128]
bands information from the previous frame
Definition: aacpsy.c:126
psymodel.h
channel
channel
Definition: ebur128.h:39
FFPsyWindowInfo::num_windows
int num_windows
number of windows in a frame
Definition: psymodel.h:80
PSY_3GPP_SAVE_ADD_L
#define PSY_3GPP_SAVE_ADD_L
Definition: aacpsy.c:70