FFmpeg
aacpsy.c
Go to the documentation of this file.
1 /*
2  * AAC encoder psychoacoustic model
3  * Copyright (C) 2008 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * AAC encoder psychoacoustic model
25  */
26 
27 #include "libavutil/attributes.h"
28 #include "libavutil/ffmath.h"
29 
30 #include "avcodec.h"
31 #include "aactab.h"
32 #include "psymodel.h"
33 
34 /***********************************
35  * TODOs:
36  * try other bitrate controlling mechanism (maybe use ratecontrol.c?)
37  * control quality for quality-based output
38  **********************************/
39 
40 /**
41  * constants for 3GPP AAC psychoacoustic model
42  * @{
43  */
44 #define PSY_3GPP_THR_SPREAD_HI 1.5f // spreading factor for low-to-hi threshold spreading (15 dB/Bark)
45 #define PSY_3GPP_THR_SPREAD_LOW 3.0f // spreading factor for hi-to-low threshold spreading (30 dB/Bark)
46 /* spreading factor for low-to-hi energy spreading, long block, > 22kbps/channel (20dB/Bark) */
47 #define PSY_3GPP_EN_SPREAD_HI_L1 2.0f
48 /* spreading factor for low-to-hi energy spreading, long block, <= 22kbps/channel (15dB/Bark) */
49 #define PSY_3GPP_EN_SPREAD_HI_L2 1.5f
50 /* spreading factor for low-to-hi energy spreading, short block (15 dB/Bark) */
51 #define PSY_3GPP_EN_SPREAD_HI_S 1.5f
52 /* spreading factor for hi-to-low energy spreading, long block (30dB/Bark) */
53 #define PSY_3GPP_EN_SPREAD_LOW_L 3.0f
54 /* spreading factor for hi-to-low energy spreading, short block (20dB/Bark) */
55 #define PSY_3GPP_EN_SPREAD_LOW_S 2.0f
56 
57 #define PSY_3GPP_RPEMIN 0.01f
58 #define PSY_3GPP_RPELEV 2.0f
59 
60 #define PSY_3GPP_C1 3.0f /* log2(8) */
61 #define PSY_3GPP_C2 1.3219281f /* log2(2.5) */
62 #define PSY_3GPP_C3 0.55935729f /* 1 - C2 / C1 */
63 
64 #define PSY_SNR_1DB 7.9432821e-1f /* -1dB */
65 #define PSY_SNR_25DB 3.1622776e-3f /* -25dB */
66 
67 #define PSY_3GPP_SAVE_SLOPE_L -0.46666667f
68 #define PSY_3GPP_SAVE_SLOPE_S -0.36363637f
69 #define PSY_3GPP_SAVE_ADD_L -0.84285712f
70 #define PSY_3GPP_SAVE_ADD_S -0.75f
71 #define PSY_3GPP_SPEND_SLOPE_L 0.66666669f
72 #define PSY_3GPP_SPEND_SLOPE_S 0.81818181f
73 #define PSY_3GPP_SPEND_ADD_L -0.35f
74 #define PSY_3GPP_SPEND_ADD_S -0.26111111f
75 #define PSY_3GPP_CLIP_LO_L 0.2f
76 #define PSY_3GPP_CLIP_LO_S 0.2f
77 #define PSY_3GPP_CLIP_HI_L 0.95f
78 #define PSY_3GPP_CLIP_HI_S 0.75f
79 
80 #define PSY_3GPP_AH_THR_LONG 0.5f
81 #define PSY_3GPP_AH_THR_SHORT 0.63f
82 
83 #define PSY_PE_FORGET_SLOPE 511
84 
85 enum {
89 };
90 
91 #define PSY_3GPP_BITS_TO_PE(bits) ((bits) * 1.18f)
92 #define PSY_3GPP_PE_TO_BITS(bits) ((bits) / 1.18f)
93 
94 /* LAME psy model constants */
95 #define PSY_LAME_FIR_LEN 21 ///< LAME psy model FIR order
96 #define AAC_BLOCK_SIZE_LONG 1024 ///< long block size
97 #define AAC_BLOCK_SIZE_SHORT 128 ///< short block size
98 #define AAC_NUM_BLOCKS_SHORT 8 ///< number of blocks in a short sequence
99 #define PSY_LAME_NUM_SUBBLOCKS 3 ///< Number of sub-blocks in each short block
100 
101 /**
102  * @}
103  */
104 
105 /**
106  * information for single band used by 3GPP TS26.403-inspired psychoacoustic model
107  */
108 typedef struct AacPsyBand{
109  float energy; ///< band energy
110  float thr; ///< energy threshold
111  float thr_quiet; ///< threshold in quiet
112  float nz_lines; ///< number of non-zero spectral lines
113  float active_lines; ///< number of active spectral lines
114  float pe; ///< perceptual entropy
115  float pe_const; ///< constant part of the PE calculation
116  float norm_fac; ///< normalization factor for linearization
117  int avoid_holes; ///< hole avoidance flag
118 }AacPsyBand;
119 
120 /**
121  * single/pair channel context for psychoacoustic model
122  */
123 typedef struct AacPsyChannel{
124  AacPsyBand band[128]; ///< bands information
125  AacPsyBand prev_band[128]; ///< bands information from the previous frame
126 
127  float win_energy; ///< sliding average of channel energy
128  float iir_state[2]; ///< hi-pass IIR filter state
129  uint8_t next_grouping; ///< stored grouping scheme for the next frame (in case of 8 short window sequence)
130  enum WindowSequence next_window_seq; ///< window sequence to be used in the next frame
131  /* LAME psy model specific members */
132  float attack_threshold; ///< attack threshold for this channel
133  float prev_energy_subshort[AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS];
134  int prev_attack; ///< attack value for the last short block in the previous sequence
136 
137 /**
138  * psychoacoustic model frame type-dependent coefficients
139  */
140 typedef struct AacPsyCoeffs{
141  float ath; ///< absolute threshold of hearing per bands
142  float barks; ///< Bark value for each spectral band in long frame
143  float spread_low[2]; ///< spreading factor for low-to-high threshold spreading in long frame
144  float spread_hi [2]; ///< spreading factor for high-to-low threshold spreading in long frame
145  float min_snr; ///< minimal SNR
146 }AacPsyCoeffs;
147 
148 /**
149  * 3GPP TS26.403-inspired psychoacoustic model specific data
150  */
151 typedef struct AacPsyContext{
152  int chan_bitrate; ///< bitrate per channel
153  int frame_bits; ///< average bits per frame
154  int fill_level; ///< bit reservoir fill level
155  struct {
156  float min; ///< minimum allowed PE for bit factor calculation
157  float max; ///< maximum allowed PE for bit factor calculation
158  float previous; ///< allowed PE of the previous frame
159  float correction; ///< PE correction factor
160  } pe;
161  AacPsyCoeffs psy_coef[2][64];
163  float global_quality; ///< normalized global quality taken from avctx
165 
166 /**
167  * LAME psy model preset struct
168  */
169 typedef struct PsyLamePreset {
170  int quality; ///< Quality to map the rest of the vaules to.
171  /* This is overloaded to be both kbps per channel in ABR mode, and
172  * requested quality in constant quality mode.
173  */
174  float st_lrm; ///< short threshold for L, R, and M channels
175 } PsyLamePreset;
176 
177 /**
178  * LAME psy model preset table for ABR
179  */
180 static const PsyLamePreset psy_abr_map[] = {
181 /* TODO: Tuning. These were taken from LAME. */
182 /* kbps/ch st_lrm */
183  { 8, 6.60},
184  { 16, 6.60},
185  { 24, 6.60},
186  { 32, 6.60},
187  { 40, 6.60},
188  { 48, 6.60},
189  { 56, 6.60},
190  { 64, 6.40},
191  { 80, 6.00},
192  { 96, 5.60},
193  {112, 5.20},
194  {128, 5.20},
195  {160, 5.20}
196 };
197 
198 /**
199 * LAME psy model preset table for constant quality
200 */
201 static const PsyLamePreset psy_vbr_map[] = {
202 /* vbr_q st_lrm */
203  { 0, 4.20},
204  { 1, 4.20},
205  { 2, 4.20},
206  { 3, 4.20},
207  { 4, 4.20},
208  { 5, 4.20},
209  { 6, 4.20},
210  { 7, 4.20},
211  { 8, 4.20},
212  { 9, 4.20},
213  {10, 4.20}
214 };
215 
216 /**
217  * LAME psy model FIR coefficient table
218  */
219 static const float psy_fir_coeffs[] = {
220  -8.65163e-18 * 2, -0.00851586 * 2, -6.74764e-18 * 2, 0.0209036 * 2,
221  -3.36639e-17 * 2, -0.0438162 * 2, -1.54175e-17 * 2, 0.0931738 * 2,
222  -5.52212e-17 * 2, -0.313819 * 2
223 };
224 
225 #if ARCH_MIPS
226 # include "mips/aacpsy_mips.h"
227 #endif /* ARCH_MIPS */
228 
229 /**
230  * Calculate the ABR attack threshold from the above LAME psymodel table.
231  */
233 {
234  /* Assume max bitrate to start with */
235  int lower_range = 12, upper_range = 12;
236  int lower_range_kbps = psy_abr_map[12].quality;
237  int upper_range_kbps = psy_abr_map[12].quality;
238  int i;
239 
240  /* Determine which bitrates the value specified falls between.
241  * If the loop ends without breaking our above assumption of 320kbps was correct.
242  */
243  for (i = 1; i < 13; i++) {
244  if (FFMAX(bitrate, psy_abr_map[i].quality) != bitrate) {
245  upper_range = i;
246  upper_range_kbps = psy_abr_map[i ].quality;
247  lower_range = i - 1;
248  lower_range_kbps = psy_abr_map[i - 1].quality;
249  break; /* Upper range found */
250  }
251  }
252 
253  /* Determine which range the value specified is closer to */
254  if ((upper_range_kbps - bitrate) > (bitrate - lower_range_kbps))
255  return psy_abr_map[lower_range].st_lrm;
256  return psy_abr_map[upper_range].st_lrm;
257 }
258 
259 /**
260  * LAME psy model specific initialization
261  */
263 {
264  int i, j;
265 
266  for (i = 0; i < avctx->channels; i++) {
267  AacPsyChannel *pch = &ctx->ch[i];
268 
269  if (avctx->flags & AV_CODEC_FLAG_QSCALE)
270  pch->attack_threshold = psy_vbr_map[avctx->global_quality / FF_QP2LAMBDA].st_lrm;
271  else
272  pch->attack_threshold = lame_calc_attack_threshold(avctx->bit_rate / avctx->channels / 1000);
273 
274  for (j = 0; j < AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS; j++)
275  pch->prev_energy_subshort[j] = 10.0f;
276  }
277 }
278 
279 /**
280  * Calculate Bark value for given line.
281  */
282 static av_cold float calc_bark(float f)
283 {
284  return 13.3f * atanf(0.00076f * f) + 3.5f * atanf((f / 7500.0f) * (f / 7500.0f));
285 }
286 
287 #define ATH_ADD 4
288 /**
289  * Calculate ATH value for given frequency.
290  * Borrowed from Lame.
291  */
292 static av_cold float ath(float f, float add)
293 {
294  f /= 1000.0f;
295  return 3.64 * pow(f, -0.8)
296  - 6.8 * exp(-0.6 * (f - 3.4) * (f - 3.4))
297  + 6.0 * exp(-0.15 * (f - 8.7) * (f - 8.7))
298  + (0.6 + 0.04 * add) * 0.001 * f * f * f * f;
299 }
300 
302  AacPsyContext *pctx;
303  float bark;
304  int i, j, g, start;
305  float prev, minscale, minath, minsnr, pe_min;
306  int chan_bitrate = ctx->avctx->bit_rate / ((ctx->avctx->flags & AV_CODEC_FLAG_QSCALE) ? 2.0f : ctx->avctx->channels);
307 
308  const int bandwidth = ctx->cutoff ? ctx->cutoff : AAC_CUTOFF(ctx->avctx);
309  const float num_bark = calc_bark((float)bandwidth);
310 
311  ctx->model_priv_data = av_mallocz(sizeof(AacPsyContext));
312  if (!ctx->model_priv_data)
313  return AVERROR(ENOMEM);
314  pctx = ctx->model_priv_data;
315  pctx->global_quality = (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120) * 0.01f;
316 
317  if (ctx->avctx->flags & AV_CODEC_FLAG_QSCALE) {
318  /* Use the target average bitrate to compute spread parameters */
319  chan_bitrate = (int)(chan_bitrate / 120.0 * (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120));
320  }
321 
322  pctx->chan_bitrate = chan_bitrate;
323  pctx->frame_bits = FFMIN(2560, chan_bitrate * AAC_BLOCK_SIZE_LONG / ctx->avctx->sample_rate);
324  pctx->pe.min = 8.0f * AAC_BLOCK_SIZE_LONG * bandwidth / (ctx->avctx->sample_rate * 2.0f);
325  pctx->pe.max = 12.0f * AAC_BLOCK_SIZE_LONG * bandwidth / (ctx->avctx->sample_rate * 2.0f);
326  ctx->bitres.size = 6144 - pctx->frame_bits;
327  ctx->bitres.size -= ctx->bitres.size % 8;
328  pctx->fill_level = ctx->bitres.size;
329  minath = ath(3410 - 0.733 * ATH_ADD, ATH_ADD);
330  for (j = 0; j < 2; j++) {
331  AacPsyCoeffs *coeffs = pctx->psy_coef[j];
332  const uint8_t *band_sizes = ctx->bands[j];
333  float line_to_frequency = ctx->avctx->sample_rate / (j ? 256.f : 2048.0f);
334  float avg_chan_bits = chan_bitrate * (j ? 128.0f : 1024.0f) / ctx->avctx->sample_rate;
335  /* reference encoder uses 2.4% here instead of 60% like the spec says */
336  float bark_pe = 0.024f * PSY_3GPP_BITS_TO_PE(avg_chan_bits) / num_bark;
337  float en_spread_low = j ? PSY_3GPP_EN_SPREAD_LOW_S : PSY_3GPP_EN_SPREAD_LOW_L;
338  /* High energy spreading for long blocks <= 22kbps/channel and short blocks are the same. */
339  float en_spread_hi = (j || (chan_bitrate <= 22.0f)) ? PSY_3GPP_EN_SPREAD_HI_S : PSY_3GPP_EN_SPREAD_HI_L1;
340 
341  i = 0;
342  prev = 0.0;
343  for (g = 0; g < ctx->num_bands[j]; g++) {
344  i += band_sizes[g];
345  bark = calc_bark((i-1) * line_to_frequency);
346  coeffs[g].barks = (bark + prev) / 2.0;
347  prev = bark;
348  }
349  for (g = 0; g < ctx->num_bands[j] - 1; g++) {
350  AacPsyCoeffs *coeff = &coeffs[g];
351  float bark_width = coeffs[g+1].barks - coeffs->barks;
352  coeff->spread_low[0] = ff_exp10(-bark_width * PSY_3GPP_THR_SPREAD_LOW);
353  coeff->spread_hi [0] = ff_exp10(-bark_width * PSY_3GPP_THR_SPREAD_HI);
354  coeff->spread_low[1] = ff_exp10(-bark_width * en_spread_low);
355  coeff->spread_hi [1] = ff_exp10(-bark_width * en_spread_hi);
356  pe_min = bark_pe * bark_width;
357  minsnr = exp2(pe_min / band_sizes[g]) - 1.5f;
358  coeff->min_snr = av_clipf(1.0f / minsnr, PSY_SNR_25DB, PSY_SNR_1DB);
359  }
360  start = 0;
361  for (g = 0; g < ctx->num_bands[j]; g++) {
362  minscale = ath(start * line_to_frequency, ATH_ADD);
363  for (i = 1; i < band_sizes[g]; i++)
364  minscale = FFMIN(minscale, ath((start + i) * line_to_frequency, ATH_ADD));
365  coeffs[g].ath = minscale - minath;
366  start += band_sizes[g];
367  }
368  }
369 
370  pctx->ch = av_mallocz_array(ctx->avctx->channels, sizeof(AacPsyChannel));
371  if (!pctx->ch) {
372  av_freep(&ctx->model_priv_data);
373  return AVERROR(ENOMEM);
374  }
375 
376  lame_window_init(pctx, ctx->avctx);
377 
378  return 0;
379 }
380 
381 /**
382  * IIR filter used in block switching decision
383  */
384 static float iir_filter(int in, float state[2])
385 {
386  float ret;
387 
388  ret = 0.7548f * (in - state[0]) + 0.5095f * state[1];
389  state[0] = in;
390  state[1] = ret;
391  return ret;
392 }
393 
394 /**
395  * window grouping information stored as bits (0 - new group, 1 - group continues)
396  */
397 static const uint8_t window_grouping[9] = {
398  0xB6, 0x6C, 0xD8, 0xB2, 0x66, 0xC6, 0x96, 0x36, 0x36
399 };
400 
401 /**
402  * Tell encoder which window types to use.
403  * @see 3GPP TS26.403 5.4.1 "Blockswitching"
404  */
406  const int16_t *audio,
407  const int16_t *la,
408  int channel, int prev_type)
409 {
410  int i, j;
411  int br = ((AacPsyContext*)ctx->model_priv_data)->chan_bitrate;
412  int attack_ratio = br <= 16000 ? 18 : 10;
414  AacPsyChannel *pch = &pctx->ch[channel];
415  uint8_t grouping = 0;
416  int next_type = pch->next_window_seq;
417  FFPsyWindowInfo wi = { { 0 } };
418 
419  if (la) {
420  float s[8], v;
421  int switch_to_eight = 0;
422  float sum = 0.0, sum2 = 0.0;
423  int attack_n = 0;
424  int stay_short = 0;
425  for (i = 0; i < 8; i++) {
426  for (j = 0; j < 128; j++) {
427  v = iir_filter(la[i*128+j], pch->iir_state);
428  sum += v*v;
429  }
430  s[i] = sum;
431  sum2 += sum;
432  }
433  for (i = 0; i < 8; i++) {
434  if (s[i] > pch->win_energy * attack_ratio) {
435  attack_n = i + 1;
436  switch_to_eight = 1;
437  break;
438  }
439  }
440  pch->win_energy = pch->win_energy*7/8 + sum2/64;
441 
442  wi.window_type[1] = prev_type;
443  switch (prev_type) {
444  case ONLY_LONG_SEQUENCE:
445  wi.window_type[0] = switch_to_eight ? LONG_START_SEQUENCE : ONLY_LONG_SEQUENCE;
446  next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : ONLY_LONG_SEQUENCE;
447  break;
448  case LONG_START_SEQUENCE:
449  wi.window_type[0] = EIGHT_SHORT_SEQUENCE;
450  grouping = pch->next_grouping;
451  next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
452  break;
453  case LONG_STOP_SEQUENCE:
454  wi.window_type[0] = switch_to_eight ? LONG_START_SEQUENCE : ONLY_LONG_SEQUENCE;
455  next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : ONLY_LONG_SEQUENCE;
456  break;
458  stay_short = next_type == EIGHT_SHORT_SEQUENCE || switch_to_eight;
459  wi.window_type[0] = stay_short ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
460  grouping = next_type == EIGHT_SHORT_SEQUENCE ? pch->next_grouping : 0;
461  next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
462  break;
463  }
464 
465  pch->next_grouping = window_grouping[attack_n];
466  pch->next_window_seq = next_type;
467  } else {
468  for (i = 0; i < 3; i++)
469  wi.window_type[i] = prev_type;
470  grouping = (prev_type == EIGHT_SHORT_SEQUENCE) ? window_grouping[0] : 0;
471  }
472 
473  wi.window_shape = 1;
474  if (wi.window_type[0] != EIGHT_SHORT_SEQUENCE) {
475  wi.num_windows = 1;
476  wi.grouping[0] = 1;
477  } else {
478  int lastgrp = 0;
479  wi.num_windows = 8;
480  for (i = 0; i < 8; i++) {
481  if (!((grouping >> i) & 1))
482  lastgrp = i;
483  wi.grouping[lastgrp]++;
484  }
485  }
486 
487  return wi;
488 }
489 
490 /* 5.6.1.2 "Calculation of Bit Demand" */
491 static int calc_bit_demand(AacPsyContext *ctx, float pe, int bits, int size,
492  int short_window)
493 {
494  const float bitsave_slope = short_window ? PSY_3GPP_SAVE_SLOPE_S : PSY_3GPP_SAVE_SLOPE_L;
495  const float bitsave_add = short_window ? PSY_3GPP_SAVE_ADD_S : PSY_3GPP_SAVE_ADD_L;
496  const float bitspend_slope = short_window ? PSY_3GPP_SPEND_SLOPE_S : PSY_3GPP_SPEND_SLOPE_L;
497  const float bitspend_add = short_window ? PSY_3GPP_SPEND_ADD_S : PSY_3GPP_SPEND_ADD_L;
498  const float clip_low = short_window ? PSY_3GPP_CLIP_LO_S : PSY_3GPP_CLIP_LO_L;
499  const float clip_high = short_window ? PSY_3GPP_CLIP_HI_S : PSY_3GPP_CLIP_HI_L;
500  float clipped_pe, bit_save, bit_spend, bit_factor, fill_level, forgetful_min_pe;
501 
502  ctx->fill_level += ctx->frame_bits - bits;
503  ctx->fill_level = av_clip(ctx->fill_level, 0, size);
504  fill_level = av_clipf((float)ctx->fill_level / size, clip_low, clip_high);
505  clipped_pe = av_clipf(pe, ctx->pe.min, ctx->pe.max);
506  bit_save = (fill_level + bitsave_add) * bitsave_slope;
507  assert(bit_save <= 0.3f && bit_save >= -0.05000001f);
508  bit_spend = (fill_level + bitspend_add) * bitspend_slope;
509  assert(bit_spend <= 0.5f && bit_spend >= -0.1f);
510  /* The bit factor graph in the spec is obviously incorrect.
511  * bit_spend + ((bit_spend - bit_spend))...
512  * The reference encoder subtracts everything from 1, but also seems incorrect.
513  * 1 - bit_save + ((bit_spend + bit_save))...
514  * Hopefully below is correct.
515  */
516  bit_factor = 1.0f - bit_save + ((bit_spend - bit_save) / (ctx->pe.max - ctx->pe.min)) * (clipped_pe - ctx->pe.min);
517  /* NOTE: The reference encoder attempts to center pe max/min around the current pe.
518  * Here we do that by slowly forgetting pe.min when pe stays in a range that makes
519  * it unlikely (ie: above the mean)
520  */
521  ctx->pe.max = FFMAX(pe, ctx->pe.max);
522  forgetful_min_pe = ((ctx->pe.min * PSY_PE_FORGET_SLOPE)
523  + FFMAX(ctx->pe.min, pe * (pe / ctx->pe.max))) / (PSY_PE_FORGET_SLOPE + 1);
524  ctx->pe.min = FFMIN(pe, forgetful_min_pe);
525 
526  /* NOTE: allocate a minimum of 1/8th average frame bits, to avoid
527  * reservoir starvation from producing zero-bit frames
528  */
529  return FFMIN(
530  ctx->frame_bits * bit_factor,
531  FFMAX(ctx->frame_bits + size - bits, ctx->frame_bits / 8));
532 }
533 
534 static float calc_pe_3gpp(AacPsyBand *band)
535 {
536  float pe, a;
537 
538  band->pe = 0.0f;
539  band->pe_const = 0.0f;
540  band->active_lines = 0.0f;
541  if (band->energy > band->thr) {
542  a = log2f(band->energy);
543  pe = a - log2f(band->thr);
544  band->active_lines = band->nz_lines;
545  if (pe < PSY_3GPP_C1) {
546  pe = pe * PSY_3GPP_C3 + PSY_3GPP_C2;
547  a = a * PSY_3GPP_C3 + PSY_3GPP_C2;
548  band->active_lines *= PSY_3GPP_C3;
549  }
550  band->pe = pe * band->nz_lines;
551  band->pe_const = a * band->nz_lines;
552  }
553 
554  return band->pe;
555 }
556 
557 static float calc_reduction_3gpp(float a, float desired_pe, float pe,
558  float active_lines)
559 {
560  float thr_avg, reduction;
561 
562  if(active_lines == 0.0)
563  return 0;
564 
565  thr_avg = exp2f((a - pe) / (4.0f * active_lines));
566  reduction = exp2f((a - desired_pe) / (4.0f * active_lines)) - thr_avg;
567 
568  return FFMAX(reduction, 0.0f);
569 }
570 
571 static float calc_reduced_thr_3gpp(AacPsyBand *band, float min_snr,
572  float reduction)
573 {
574  float thr = band->thr;
575 
576  if (band->energy > thr) {
577  thr = sqrtf(thr);
578  thr = sqrtf(thr) + reduction;
579  thr *= thr;
580  thr *= thr;
581 
582  /* This deviates from the 3GPP spec to match the reference encoder.
583  * It performs min(thr_reduced, max(thr, energy/min_snr)) only for bands
584  * that have hole avoidance on (active or inactive). It always reduces the
585  * threshold of bands with hole avoidance off.
586  */
587  if (thr > band->energy * min_snr && band->avoid_holes != PSY_3GPP_AH_NONE) {
588  thr = FFMAX(band->thr, band->energy * min_snr);
590  }
591  }
592 
593  return thr;
594 }
595 
596 #ifndef calc_thr_3gpp
597 static void calc_thr_3gpp(const FFPsyWindowInfo *wi, const int num_bands, AacPsyChannel *pch,
598  const uint8_t *band_sizes, const float *coefs, const int cutoff)
599 {
600  int i, w, g;
601  int start = 0, wstart = 0;
602  for (w = 0; w < wi->num_windows*16; w += 16) {
603  wstart = 0;
604  for (g = 0; g < num_bands; g++) {
605  AacPsyBand *band = &pch->band[w+g];
606 
607  float form_factor = 0.0f;
608  float Temp;
609  band->energy = 0.0f;
610  if (wstart < cutoff) {
611  for (i = 0; i < band_sizes[g]; i++) {
612  band->energy += coefs[start+i] * coefs[start+i];
613  form_factor += sqrtf(fabs(coefs[start+i]));
614  }
615  }
616  Temp = band->energy > 0 ? sqrtf((float)band_sizes[g] / band->energy) : 0;
617  band->thr = band->energy * 0.001258925f;
618  band->nz_lines = form_factor * sqrtf(Temp);
619 
620  start += band_sizes[g];
621  wstart += band_sizes[g];
622  }
623  }
624 }
625 #endif /* calc_thr_3gpp */
626 
627 #ifndef psy_hp_filter
628 static void psy_hp_filter(const float *firbuf, float *hpfsmpl, const float *psy_fir_coeffs)
629 {
630  int i, j;
631  for (i = 0; i < AAC_BLOCK_SIZE_LONG; i++) {
632  float sum1, sum2;
633  sum1 = firbuf[i + (PSY_LAME_FIR_LEN - 1) / 2];
634  sum2 = 0.0;
635  for (j = 0; j < ((PSY_LAME_FIR_LEN - 1) / 2) - 1; j += 2) {
636  sum1 += psy_fir_coeffs[j] * (firbuf[i + j] + firbuf[i + PSY_LAME_FIR_LEN - j]);
637  sum2 += psy_fir_coeffs[j + 1] * (firbuf[i + j + 1] + firbuf[i + PSY_LAME_FIR_LEN - j - 1]);
638  }
639  /* NOTE: The LAME psymodel expects it's input in the range -32768 to 32768.
640  * Tuning this for normalized floats would be difficult. */
641  hpfsmpl[i] = (sum1 + sum2) * 32768.0f;
642  }
643 }
644 #endif /* psy_hp_filter */
645 
646 /**
647  * Calculate band thresholds as suggested in 3GPP TS26.403
648  */
650  const float *coefs, const FFPsyWindowInfo *wi)
651 {
653  AacPsyChannel *pch = &pctx->ch[channel];
654  int i, w, g;
655  float desired_bits, desired_pe, delta_pe, reduction= NAN, spread_en[128] = {0};
656  float a = 0.0f, active_lines = 0.0f, norm_fac = 0.0f;
657  float pe = pctx->chan_bitrate > 32000 ? 0.0f : FFMAX(50.0f, 100.0f - pctx->chan_bitrate * 100.0f / 32000.0f);
658  const int num_bands = ctx->num_bands[wi->num_windows == 8];
659  const uint8_t *band_sizes = ctx->bands[wi->num_windows == 8];
660  AacPsyCoeffs *coeffs = pctx->psy_coef[wi->num_windows == 8];
661  const float avoid_hole_thr = wi->num_windows == 8 ? PSY_3GPP_AH_THR_SHORT : PSY_3GPP_AH_THR_LONG;
662  const int bandwidth = ctx->cutoff ? ctx->cutoff : AAC_CUTOFF(ctx->avctx);
663  const int cutoff = bandwidth * 2048 / wi->num_windows / ctx->avctx->sample_rate;
664 
665  //calculate energies, initial thresholds and related values - 5.4.2 "Threshold Calculation"
666  calc_thr_3gpp(wi, num_bands, pch, band_sizes, coefs, cutoff);
667 
668  //modify thresholds and energies - spread, threshold in quiet, pre-echo control
669  for (w = 0; w < wi->num_windows*16; w += 16) {
670  AacPsyBand *bands = &pch->band[w];
671 
672  /* 5.4.2.3 "Spreading" & 5.4.3 "Spread Energy Calculation" */
673  spread_en[0] = bands[0].energy;
674  for (g = 1; g < num_bands; g++) {
675  bands[g].thr = FFMAX(bands[g].thr, bands[g-1].thr * coeffs[g].spread_hi[0]);
676  spread_en[w+g] = FFMAX(bands[g].energy, spread_en[w+g-1] * coeffs[g].spread_hi[1]);
677  }
678  for (g = num_bands - 2; g >= 0; g--) {
679  bands[g].thr = FFMAX(bands[g].thr, bands[g+1].thr * coeffs[g].spread_low[0]);
680  spread_en[w+g] = FFMAX(spread_en[w+g], spread_en[w+g+1] * coeffs[g].spread_low[1]);
681  }
682  //5.4.2.4 "Threshold in quiet"
683  for (g = 0; g < num_bands; g++) {
684  AacPsyBand *band = &bands[g];
685 
686  band->thr_quiet = band->thr = FFMAX(band->thr, coeffs[g].ath);
687  //5.4.2.5 "Pre-echo control"
688  if (!(wi->window_type[0] == LONG_STOP_SEQUENCE || (!w && wi->window_type[1] == LONG_START_SEQUENCE)))
689  band->thr = FFMAX(PSY_3GPP_RPEMIN*band->thr, FFMIN(band->thr,
690  PSY_3GPP_RPELEV*pch->prev_band[w+g].thr_quiet));
691 
692  /* 5.6.1.3.1 "Preparatory steps of the perceptual entropy calculation" */
693  pe += calc_pe_3gpp(band);
694  a += band->pe_const;
695  active_lines += band->active_lines;
696 
697  /* 5.6.1.3.3 "Selection of the bands for avoidance of holes" */
698  if (spread_en[w+g] * avoid_hole_thr > band->energy || coeffs[g].min_snr > 1.0f)
700  else
702  }
703  }
704 
705  /* 5.6.1.3.2 "Calculation of the desired perceptual entropy" */
706  ctx->ch[channel].entropy = pe;
707  if (ctx->avctx->flags & AV_CODEC_FLAG_QSCALE) {
708  /* (2.5 * 120) achieves almost transparent rate, and we want to give
709  * ample room downwards, so we make that equivalent to QSCALE=2.4
710  */
711  desired_pe = pe * (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120) / (2 * 2.5f * 120.0f);
712  desired_bits = FFMIN(2560, PSY_3GPP_PE_TO_BITS(desired_pe));
713  desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits); // reflect clipping
714 
715  /* PE slope smoothing */
716  if (ctx->bitres.bits > 0) {
717  desired_bits = FFMIN(2560, PSY_3GPP_PE_TO_BITS(desired_pe));
718  desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits); // reflect clipping
719  }
720 
721  pctx->pe.max = FFMAX(pe, pctx->pe.max);
722  pctx->pe.min = FFMIN(pe, pctx->pe.min);
723  } else {
724  desired_bits = calc_bit_demand(pctx, pe, ctx->bitres.bits, ctx->bitres.size, wi->num_windows == 8);
725  desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits);
726 
727  /* NOTE: PE correction is kept simple. During initial testing it had very
728  * little effect on the final bitrate. Probably a good idea to come
729  * back and do more testing later.
730  */
731  if (ctx->bitres.bits > 0)
732  desired_pe *= av_clipf(pctx->pe.previous / PSY_3GPP_BITS_TO_PE(ctx->bitres.bits),
733  0.85f, 1.15f);
734  }
735  pctx->pe.previous = PSY_3GPP_BITS_TO_PE(desired_bits);
736  ctx->bitres.alloc = desired_bits;
737 
738  if (desired_pe < pe) {
739  /* 5.6.1.3.4 "First Estimation of the reduction value" */
740  for (w = 0; w < wi->num_windows*16; w += 16) {
741  reduction = calc_reduction_3gpp(a, desired_pe, pe, active_lines);
742  pe = 0.0f;
743  a = 0.0f;
744  active_lines = 0.0f;
745  for (g = 0; g < num_bands; g++) {
746  AacPsyBand *band = &pch->band[w+g];
747 
748  band->thr = calc_reduced_thr_3gpp(band, coeffs[g].min_snr, reduction);
749  /* recalculate PE */
750  pe += calc_pe_3gpp(band);
751  a += band->pe_const;
752  active_lines += band->active_lines;
753  }
754  }
755 
756  /* 5.6.1.3.5 "Second Estimation of the reduction value" */
757  for (i = 0; i < 2; i++) {
758  float pe_no_ah = 0.0f, desired_pe_no_ah;
759  active_lines = a = 0.0f;
760  for (w = 0; w < wi->num_windows*16; w += 16) {
761  for (g = 0; g < num_bands; g++) {
762  AacPsyBand *band = &pch->band[w+g];
763 
764  if (band->avoid_holes != PSY_3GPP_AH_ACTIVE) {
765  pe_no_ah += band->pe;
766  a += band->pe_const;
767  active_lines += band->active_lines;
768  }
769  }
770  }
771  desired_pe_no_ah = FFMAX(desired_pe - (pe - pe_no_ah), 0.0f);
772  if (active_lines > 0.0f)
773  reduction = calc_reduction_3gpp(a, desired_pe_no_ah, pe_no_ah, active_lines);
774 
775  pe = 0.0f;
776  for (w = 0; w < wi->num_windows*16; w += 16) {
777  for (g = 0; g < num_bands; g++) {
778  AacPsyBand *band = &pch->band[w+g];
779 
780  if (active_lines > 0.0f)
781  band->thr = calc_reduced_thr_3gpp(band, coeffs[g].min_snr, reduction);
782  pe += calc_pe_3gpp(band);
783  if (band->thr > 0.0f)
784  band->norm_fac = band->active_lines / band->thr;
785  else
786  band->norm_fac = 0.0f;
787  norm_fac += band->norm_fac;
788  }
789  }
790  delta_pe = desired_pe - pe;
791  if (fabs(delta_pe) > 0.05f * desired_pe)
792  break;
793  }
794 
795  if (pe < 1.15f * desired_pe) {
796  /* 6.6.1.3.6 "Final threshold modification by linearization" */
797  norm_fac = 1.0f / norm_fac;
798  for (w = 0; w < wi->num_windows*16; w += 16) {
799  for (g = 0; g < num_bands; g++) {
800  AacPsyBand *band = &pch->band[w+g];
801 
802  if (band->active_lines > 0.5f) {
803  float delta_sfb_pe = band->norm_fac * norm_fac * delta_pe;
804  float thr = band->thr;
805 
806  thr *= exp2f(delta_sfb_pe / band->active_lines);
807  if (thr > coeffs[g].min_snr * band->energy && band->avoid_holes == PSY_3GPP_AH_INACTIVE)
808  thr = FFMAX(band->thr, coeffs[g].min_snr * band->energy);
809  band->thr = thr;
810  }
811  }
812  }
813  } else {
814  /* 5.6.1.3.7 "Further perceptual entropy reduction" */
815  g = num_bands;
816  while (pe > desired_pe && g--) {
817  for (w = 0; w < wi->num_windows*16; w+= 16) {
818  AacPsyBand *band = &pch->band[w+g];
819  if (band->avoid_holes != PSY_3GPP_AH_NONE && coeffs[g].min_snr < PSY_SNR_1DB) {
820  coeffs[g].min_snr = PSY_SNR_1DB;
821  band->thr = band->energy * PSY_SNR_1DB;
822  pe += band->active_lines * 1.5f - band->pe;
823  }
824  }
825  }
826  /* TODO: allow more holes (unused without mid/side) */
827  }
828  }
829 
830  for (w = 0; w < wi->num_windows*16; w += 16) {
831  for (g = 0; g < num_bands; g++) {
832  AacPsyBand *band = &pch->band[w+g];
833  FFPsyBand *psy_band = &ctx->ch[channel].psy_bands[w+g];
834 
835  psy_band->threshold = band->thr;
836  psy_band->energy = band->energy;
837  psy_band->spread = band->active_lines * 2.0f / band_sizes[g];
838  psy_band->bits = PSY_3GPP_PE_TO_BITS(band->pe);
839  }
840  }
841 
842  memcpy(pch->prev_band, pch->band, sizeof(pch->band));
843 }
844 
846  const float **coeffs, const FFPsyWindowInfo *wi)
847 {
848  int ch;
849  FFPsyChannelGroup *group = ff_psy_find_group(ctx, channel);
850 
851  for (ch = 0; ch < group->num_ch; ch++)
852  psy_3gpp_analyze_channel(ctx, channel + ch, coeffs[ch], &wi[ch]);
853 }
854 
856 {
858  av_freep(&pctx->ch);
859  av_freep(&apc->model_priv_data);
860 }
861 
862 static void lame_apply_block_type(AacPsyChannel *ctx, FFPsyWindowInfo *wi, int uselongblock)
863 {
864  int blocktype = ONLY_LONG_SEQUENCE;
865  if (uselongblock) {
867  blocktype = LONG_STOP_SEQUENCE;
868  } else {
869  blocktype = EIGHT_SHORT_SEQUENCE;
874  }
875 
876  wi->window_type[0] = ctx->next_window_seq;
877  ctx->next_window_seq = blocktype;
878 }
879 
880 static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio,
881  const float *la, int channel, int prev_type)
882 {
884  AacPsyChannel *pch = &pctx->ch[channel];
885  int grouping = 0;
886  int uselongblock = 1;
887  int attacks[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };
888  int i;
889  FFPsyWindowInfo wi = { { 0 } };
890 
891  if (la) {
892  float hpfsmpl[AAC_BLOCK_SIZE_LONG];
893  const float *pf = hpfsmpl;
894  float attack_intensity[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS];
895  float energy_subshort[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS];
896  float energy_short[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };
897  const float *firbuf = la + (AAC_BLOCK_SIZE_SHORT/4 - PSY_LAME_FIR_LEN);
898  int att_sum = 0;
899 
900  /* LAME comment: apply high pass filter of fs/4 */
901  psy_hp_filter(firbuf, hpfsmpl, psy_fir_coeffs);
902 
903  /* Calculate the energies of each sub-shortblock */
904  for (i = 0; i < PSY_LAME_NUM_SUBBLOCKS; i++) {
905  energy_subshort[i] = pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 1) * PSY_LAME_NUM_SUBBLOCKS)];
906  assert(pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 2) * PSY_LAME_NUM_SUBBLOCKS + 1)] > 0);
907  attack_intensity[i] = energy_subshort[i] / pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 2) * PSY_LAME_NUM_SUBBLOCKS + 1)];
908  energy_short[0] += energy_subshort[i];
909  }
910 
911  for (i = 0; i < AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS; i++) {
912  const float *const pfe = pf + AAC_BLOCK_SIZE_LONG / (AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS);
913  float p = 1.0f;
914  for (; pf < pfe; pf++)
915  p = FFMAX(p, fabsf(*pf));
916  pch->prev_energy_subshort[i] = energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS] = p;
917  energy_short[1 + i / PSY_LAME_NUM_SUBBLOCKS] += p;
918  /* NOTE: The indexes below are [i + 3 - 2] in the LAME source.
919  * Obviously the 3 and 2 have some significance, or this would be just [i + 1]
920  * (which is what we use here). What the 3 stands for is ambiguous, as it is both
921  * number of short blocks, and the number of sub-short blocks.
922  * It seems that LAME is comparing each sub-block to sub-block + 1 in the
923  * previous block.
924  */
925  if (p > energy_subshort[i + 1])
926  p = p / energy_subshort[i + 1];
927  else if (energy_subshort[i + 1] > p * 10.0f)
928  p = energy_subshort[i + 1] / (p * 10.0f);
929  else
930  p = 0.0;
931  attack_intensity[i + PSY_LAME_NUM_SUBBLOCKS] = p;
932  }
933 
934  /* compare energy between sub-short blocks */
935  for (i = 0; i < (AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS; i++)
936  if (!attacks[i / PSY_LAME_NUM_SUBBLOCKS])
937  if (attack_intensity[i] > pch->attack_threshold)
938  attacks[i / PSY_LAME_NUM_SUBBLOCKS] = (i % PSY_LAME_NUM_SUBBLOCKS) + 1;
939 
940  /* should have energy change between short blocks, in order to avoid periodic signals */
941  /* Good samples to show the effect are Trumpet test songs */
942  /* GB: tuned (1) to avoid too many short blocks for test sample TRUMPET */
943  /* RH: tuned (2) to let enough short blocks through for test sample FSOL and SNAPS */
944  for (i = 1; i < AAC_NUM_BLOCKS_SHORT + 1; i++) {
945  const float u = energy_short[i - 1];
946  const float v = energy_short[i];
947  const float m = FFMAX(u, v);
948  if (m < 40000) { /* (2) */
949  if (u < 1.7f * v && v < 1.7f * u) { /* (1) */
950  if (i == 1 && attacks[0] < attacks[i])
951  attacks[0] = 0;
952  attacks[i] = 0;
953  }
954  }
955  att_sum += attacks[i];
956  }
957 
958  if (attacks[0] <= pch->prev_attack)
959  attacks[0] = 0;
960 
961  att_sum += attacks[0];
962  /* 3 below indicates the previous attack happened in the last sub-block of the previous sequence */
963  if (pch->prev_attack == 3 || att_sum) {
964  uselongblock = 0;
965 
966  for (i = 1; i < AAC_NUM_BLOCKS_SHORT + 1; i++)
967  if (attacks[i] && attacks[i-1])
968  attacks[i] = 0;
969  }
970  } else {
971  /* We have no lookahead info, so just use same type as the previous sequence. */
972  uselongblock = !(prev_type == EIGHT_SHORT_SEQUENCE);
973  }
974 
975  lame_apply_block_type(pch, &wi, uselongblock);
976 
977  wi.window_type[1] = prev_type;
978  if (wi.window_type[0] != EIGHT_SHORT_SEQUENCE) {
979 
980  wi.num_windows = 1;
981  wi.grouping[0] = 1;
982  if (wi.window_type[0] == LONG_START_SEQUENCE)
983  wi.window_shape = 0;
984  else
985  wi.window_shape = 1;
986 
987  } else {
988  int lastgrp = 0;
989 
990  wi.num_windows = 8;
991  wi.window_shape = 0;
992  for (i = 0; i < 8; i++) {
993  if (!((pch->next_grouping >> i) & 1))
994  lastgrp = i;
995  wi.grouping[lastgrp]++;
996  }
997  }
998 
999  /* Determine grouping, based on the location of the first attack, and save for
1000  * the next frame.
1001  * FIXME: Move this to analysis.
1002  * TODO: Tune groupings depending on attack location
1003  * TODO: Handle more than one attack in a group
1004  */
1005  for (i = 0; i < 9; i++) {
1006  if (attacks[i]) {
1007  grouping = i;
1008  break;
1009  }
1010  }
1011  pch->next_grouping = window_grouping[grouping];
1012 
1013  pch->prev_attack = attacks[8];
1014 
1015  return wi;
1016 }
1017 
1019 {
1020  .name = "3GPP TS 26.403-inspired model",
1021  .init = psy_3gpp_init,
1022  .window = psy_lame_window,
1023  .analyze = psy_3gpp_analyze,
1024  .end = psy_3gpp_end,
1025 };
int quality
Quality to map the rest of the vaules to.
Definition: aacpsy.c:170
float global_quality
normalized global quality taken from avctx
Definition: aacpsy.c:163
static const uint8_t window_grouping[9]
window grouping information stored as bits (0 - new group, 1 - group continues)
Definition: aacpsy.c:397
int grouping[8]
window grouping (for e.g. AAC)
Definition: psymodel.h:81
#define AAC_BLOCK_SIZE_SHORT
short block size
Definition: aacpsy.c:97
static int calc_bit_demand(AacPsyContext *ctx, float pe, int bits, int size, int short_window)
Definition: aacpsy.c:491
uint8_t ** bands
scalefactor band sizes for possible frame sizes
Definition: psymodel.h:98
#define PSY_3GPP_AH_THR_SHORT
Definition: aacpsy.c:81
int64_t bit_rate
the average bitrate
Definition: avcodec.h:1618
const char * g
Definition: vf_curves.c:115
static const PsyLamePreset psy_vbr_map[]
LAME psy model preset table for constant quality.
Definition: aacpsy.c:201
psychoacoustic information for an arbitrary group of channels
Definition: psymodel.h:68
static float calc_reduction_3gpp(float a, float desired_pe, float pe, float active_lines)
Definition: aacpsy.c:557
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:36
float ath
absolute threshold of hearing per bands
Definition: aacpsy.c:141
#define PSY_3GPP_EN_SPREAD_HI_L1
Definition: aacpsy.c:47
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:236
static av_cold float ath(float f, float add)
Calculate ATH value for given frequency.
Definition: aacpsy.c:292
float prev_energy_subshort[AAC_NUM_BLOCKS_SHORT *PSY_LAME_NUM_SUBBLOCKS]
Definition: aacpsy.c:133
enum WindowSequence next_window_seq
window sequence to be used in the next frame
Definition: aacpsy.c:130
#define PSY_SNR_25DB
Definition: aacpsy.c:65
#define AAC_BLOCK_SIZE_LONG
long block size
Definition: aacpsy.c:96
int * num_bands
number of scalefactor bands for possible frame sizes
Definition: psymodel.h:99
Macro definitions for various function/variable attributes.
LAME psy model preset struct.
Definition: aacpsy.c:169
float thr
energy threshold
Definition: aacpsy.c:110
float correction
PE correction factor.
Definition: aacpsy.c:159
static av_cold void psy_3gpp_end(FFPsyContext *apc)
Definition: aacpsy.c:855
float attack_threshold
attack threshold for this channel
Definition: aacpsy.c:132
#define PSY_3GPP_EN_SPREAD_LOW_L
Definition: aacpsy.c:53
float nz_lines
number of non-zero spectral lines
Definition: aacpsy.c:112
uint8_t
psychoacoustic model frame type-dependent coefficients
Definition: aacpsy.c:140
#define av_cold
Definition: attributes.h:82
int size
size of the bitresevoir in bits
Definition: psymodel.h:103
#define f(width, name)
Definition: cbs_vp9.c:255
static float calc_reduced_thr_3gpp(AacPsyBand *band, float min_snr, float reduction)
Definition: aacpsy.c:571
#define PSY_3GPP_C2
Definition: aacpsy.c:61
#define PSY_LAME_FIR_LEN
LAME psy model FIR order.
Definition: aacpsy.c:95
#define PSY_3GPP_CLIP_LO_L
Definition: aacpsy.c:75
#define PSY_3GPP_SPEND_SLOPE_S
Definition: aacpsy.c:72
#define u(width, name, range_min, range_max)
Definition: cbs_h2645.c:252
#define PSY_3GPP_THR_SPREAD_LOW
Definition: aacpsy.c:45
context used by psychoacoustic model
Definition: psymodel.h:89
#define atanf(x)
Definition: libm.h:40
int flags
Flags modifying the (de)muxer behaviour.
Definition: avformat.h:1489
#define AAC_CUTOFF(s)
Definition: psymodel.h:41
single band psychoacoustic information
Definition: psymodel.h:50
ptrdiff_t size
Definition: opengl_enc.c:100
static float lame_calc_attack_threshold(int bitrate)
Calculate the ABR attack threshold from the above LAME psymodel table.
Definition: aacpsy.c:232
uint8_t next_grouping
stored grouping scheme for the next frame (in case of 8 short window sequence)
Definition: aacpsy.c:129
#define PSY_3GPP_SAVE_ADD_L
Definition: aacpsy.c:69
static av_cold float calc_bark(float f)
Calculate Bark value for given line.
Definition: aacpsy.c:282
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
static av_always_inline double ff_exp10(double x)
Compute 10^x for floating point values.
Definition: ffmath.h:42
#define PSY_3GPP_SPEND_ADD_S
Definition: aacpsy.c:74
#define PSY_SNR_1DB
Definition: aacpsy.c:64
3GPP TS26.403-inspired psychoacoustic model specific data
Definition: aacpsy.c:151
single/pair channel context for psychoacoustic model
Definition: aacpsy.c:123
static const float psy_fir_coeffs[]
LAME psy model FIR coefficient table.
Definition: aacpsy.c:219
int bits
Definition: psymodel.h:51
float barks
Bark value for each spectral band in long frame.
Definition: aacpsy.c:142
int flags
AV_CODEC_FLAG_*.
Definition: avcodec.h:1648
float pe_const
constant part of the PE calculation
Definition: aacpsy.c:115
int num_windows
number of windows in a frame
Definition: psymodel.h:80
static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type)
Definition: aacpsy.c:880
#define PSY_3GPP_SPEND_SLOPE_L
Definition: aacpsy.c:71
uint8_t bits
Definition: vp3data.h:202
#define PSY_3GPP_THR_SPREAD_HI
constants for 3GPP AAC psychoacoustic model
Definition: aacpsy.c:44
float energy
Definition: psymodel.h:52
WindowSequence
Definition: aac.h:75
#define FFMAX(a, b)
Definition: common.h:94
codec-specific psychoacoustic model implementation
Definition: psymodel.h:114
#define PSY_3GPP_RPELEV
Definition: aacpsy.c:58
int8_t exp
Definition: eval.c:72
struct AacPsyContext::@31 pe
float thr_quiet
threshold in quiet
Definition: aacpsy.c:111
static void psy_3gpp_analyze(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi)
Definition: aacpsy.c:845
#define AV_CODEC_FLAG_QSCALE
Use fixed qscale.
Definition: avcodec.h:853
#define NAN
Definition: mathematics.h:64
#define FFMIN(a, b)
Definition: common.h:96
int prev_attack
attack value for the last short block in the previous sequence
Definition: aacpsy.c:134
#define PSY_3GPP_SAVE_SLOPE_S
Definition: aacpsy.c:68
#define PSY_3GPP_C3
Definition: aacpsy.c:62
uint8_t w
Definition: llviddspenc.c:38
uint8_t num_ch
number of channels in this group
Definition: psymodel.h:70
int frame_bits
average bits per frame
Definition: aacpsy.c:153
int fill_level
bit reservoir fill level
Definition: aacpsy.c:154
AVFormatContext * ctx
Definition: movenc.c:48
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about quality
static void lame_apply_block_type(AacPsyChannel *ctx, FFPsyWindowInfo *wi, int uselongblock)
Definition: aacpsy.c:862
#define PSY_3GPP_SAVE_SLOPE_L
Definition: aacpsy.c:67
#define s(width, name)
Definition: cbs_vp9.c:257
Reference: libavcodec/aacpsy.c.
#define PSY_LAME_NUM_SUBBLOCKS
Number of sub-blocks in each short block.
Definition: aacpsy.c:99
#define ATH_ADD
Definition: aacpsy.c:287
float energy
band energy
Definition: aacpsy.c:109
const FFPsyModel ff_aac_psy_model
Definition: aacpsy.c:1018
static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel, const float *coefs, const FFPsyWindowInfo *wi)
Calculate band thresholds as suggested in 3GPP TS26.403.
Definition: aacpsy.c:649
float st_lrm
short threshold for L, R, and M channels
Definition: aacpsy.c:174
#define PSY_3GPP_EN_SPREAD_LOW_S
Definition: aacpsy.c:55
#define exp2f(x)
Definition: libm.h:293
Libavcodec external API header.
static struct @320 state
int sample_rate
samples per second
Definition: avcodec.h:2228
FFPsyChannelGroup * ff_psy_find_group(FFPsyContext *ctx, int channel)
Determine what group a channel belongs to.
Definition: psymodel.c:73
main external API structure.
Definition: avcodec.h:1568
float win_energy
sliding average of channel energy
Definition: aacpsy.c:127
void * model_priv_data
psychoacoustic model implementation private data
Definition: psymodel.h:108
float active_lines
number of active spectral lines
Definition: aacpsy.c:113
static const float bands[]
static float iir_filter(int in, float state[2])
IIR filter used in block switching decision.
Definition: aacpsy.c:384
int avoid_holes
hole avoidance flag
Definition: aacpsy.c:117
AacPsyBand band[128]
bands information
Definition: aacpsy.c:124
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31))))#define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac){}void ff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map){AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);return NULL;}return ac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;}int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){int use_generic=1;int len=in->nb_samples;int p;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
#define PSY_3GPP_CLIP_HI_S
Definition: aacpsy.c:78
#define PSY_3GPP_RPEMIN
Definition: aacpsy.c:57
static const PsyLamePreset psy_abr_map[]
LAME psy model preset table for ABR.
Definition: aacpsy.c:180
int window_shape
window shape (sine/KBD/whatever)
Definition: psymodel.h:79
#define PSY_PE_FORGET_SLOPE
Definition: aacpsy.c:83
#define PSY_3GPP_PE_TO_BITS(bits)
Definition: aacpsy.c:92
int cutoff
lowpass frequency cutoff for analysis
Definition: psymodel.h:96
float min_snr
minimal SNR
Definition: aacpsy.c:145
float max
maximum allowed PE for bit factor calculation
Definition: aacpsy.c:157
static void calc_thr_3gpp(const FFPsyWindowInfo *wi, const int num_bands, AacPsyChannel *pch, const uint8_t *band_sizes, const float *coefs, const int cutoff)
Definition: aacpsy.c:597
float previous
allowed PE of the previous frame
Definition: aacpsy.c:158
struct FFPsyContext::@131 bitres
AacPsyCoeffs psy_coef[2][64]
Definition: aacpsy.c:161
uint8_t pi<< 24) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_U8,(uint64_t)((*(const uint8_t *) pi-0x80U))<< 56) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16,(*(const int16_t *) pi >>8)+0x80) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S16,(uint64_t)(*(const int16_t *) pi)<< 48) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32,(*(const int32_t *) pi >>24)+0x80) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S32,(uint64_t)(*(const int32_t *) pi)<< 32) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S64,(*(const int64_t *) pi >>56)+0x80) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S64,*(const int64_t *) pi *(1.0f/(UINT64_C(1)<< 63))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S64,*(const int64_t *) pi *(1.0/(UINT64_C(1)<< 63))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_FLT, llrintf(*(const float *) pi *(UINT64_C(1)<< 63))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_DBL, llrint(*(const double *) pi *(UINT64_C(1)<< 63)))#define FMT_PAIR_FUNC(out, in) static conv_func_type *const fmt_pair_to_conv_functions[AV_SAMPLE_FMT_NB *AV_SAMPLE_FMT_NB]={FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S64),};static void cpy1(uint8_t **dst, const uint8_t **src, int len){memcpy(*dst,*src, len);}static void cpy2(uint8_t **dst, const uint8_t **src, int len){memcpy(*dst,*src, 2 *len);}static void cpy4(uint8_t **dst, const uint8_t **src, int len){memcpy(*dst,*src, 4 *len);}static void cpy8(uint8_t **dst, const uint8_t **src, int len){memcpy(*dst,*src, 8 *len);}AudioConvert *swri_audio_convert_alloc(enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, const int *ch_map, int flags){AudioConvert *ctx;conv_func_type *f=fmt_pair_to_conv_functions[av_get_packed_sample_fmt(out_fmt)+AV_SAMPLE_FMT_NB *av_get_packed_sample_fmt(in_fmt)];if(!f) return NULL;ctx=av_mallocz(sizeof(*ctx));if(!ctx) return NULL;if(channels==1){in_fmt=av_get_planar_sample_fmt(in_fmt);out_fmt=av_get_planar_sample_fmt(out_fmt);}ctx->channels=channels;ctx->conv_f=f;ctx->ch_map=ch_map;if(in_fmt==AV_SAMPLE_FMT_U8||in_fmt==AV_SAMPLE_FMT_U8P) memset(ctx->silence, 0x80, sizeof(ctx->silence));if(out_fmt==in_fmt &&!ch_map){switch(av_get_bytes_per_sample(in_fmt)){case 1:ctx->simd_f=cpy1;break;case 2:ctx->simd_f=cpy2;break;case 4:ctx->simd_f=cpy4;break;case 8:ctx->simd_f=cpy8;break;}}if(HAVE_X86ASM &&1) swri_audio_convert_init_x86(ctx, out_fmt, in_fmt, channels);if(ARCH_ARM) swri_audio_convert_init_arm(ctx, out_fmt, in_fmt, channels);if(ARCH_AARCH64) swri_audio_convert_init_aarch64(ctx, out_fmt, in_fmt, channels);return ctx;}void swri_audio_convert_free(AudioConvert **ctx){av_freep(ctx);}int swri_audio_convert(AudioConvert *ctx, AudioData *out, AudioData *in, int len){int ch;int off=0;const int os=(out->planar?1:out->ch_count)*out->bps;unsigned misaligned=0;av_assert0(ctx->channels==out->ch_count);if(ctx->in_simd_align_mask){int planes=in->planar?in->ch_count:1;unsigned m=0;for(ch=0;ch< planes;ch++) m|=(intptr_t) in->ch[ch];misaligned|=m &ctx->in_simd_align_mask;}if(ctx->out_simd_align_mask){int planes=out->planar?out->ch_count:1;unsigned m=0;for(ch=0;ch< planes;ch++) m|=(intptr_t) out->ch[ch];misaligned|=m &ctx->out_simd_align_mask;}if(ctx->simd_f &&!ctx->ch_map &&!misaligned){off=len &~15;av_assert1(off >=0);av_assert1(off<=len);av_assert2(ctx->channels==SWR_CH_MAX||!in->ch[ctx->channels]);if(off >0){if(out->planar==in->planar){int planes=out->planar?out->ch_count:1;for(ch=0;ch< planes;ch++){ctx->simd_f(out-> ch ch
Definition: audioconvert.c:56
float min
minimum allowed PE for bit factor calculation
Definition: aacpsy.c:156
int global_quality
Global quality for codecs which cannot change it per frame.
Definition: avcodec.h:1634
static av_cold int psy_3gpp_init(FFPsyContext *ctx)
Definition: aacpsy.c:301
static void psy_hp_filter(const float *firbuf, float *hpfsmpl, const float *psy_fir_coeffs)
Definition: aacpsy.c:628
float spread_hi[2]
spreading factor for high-to-low threshold spreading in long frame
Definition: aacpsy.c:144
const char * name
Definition: psymodel.h:115
int64_t bitrate
Definition: h264_levels.c:131
internal math functions header
static av_unused FFPsyWindowInfo psy_3gpp_window(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type)
Tell encoder which window types to use.
Definition: aacpsy.c:405
int
static float calc_pe_3gpp(AacPsyBand *band)
Definition: aacpsy.c:534
#define exp2(x)
Definition: libm.h:288
windowing related information
Definition: psymodel.h:77
#define log2f(x)
Definition: libm.h:409
channel
Use these values when setting the channel map with ebur128_set_channel().
Definition: ebur128.h:39
#define PSY_3GPP_BITS_TO_PE(bits)
Definition: aacpsy.c:91
#define PSY_3GPP_C1
Definition: aacpsy.c:60
float norm_fac
normalization factor for linearization
Definition: aacpsy.c:116
int chan_bitrate
bitrate per channel
Definition: aacpsy.c:152
#define PSY_3GPP_CLIP_LO_S
Definition: aacpsy.c:76
#define PSY_3GPP_AH_THR_LONG
Definition: aacpsy.c:80
int channels
number of audio channels
Definition: avcodec.h:2229
float pe
perceptual entropy
Definition: aacpsy.c:114
#define PSY_3GPP_EN_SPREAD_HI_S
Definition: aacpsy.c:51
static const double coeff[2][5]
Definition: vf_owdenoise.c:72
#define FF_QP2LAMBDA
factor to convert from H.263 QP to lambda
Definition: avutil.h:227
AacPsyChannel * ch
Definition: aacpsy.c:162
#define PSY_3GPP_SAVE_ADD_S
Definition: aacpsy.c:70
#define av_freep(p)
void INT64 start
Definition: avisynth_c.h:766
information for single band used by 3GPP TS26.403-inspired psychoacoustic model
Definition: aacpsy.c:108
AVCodecContext * avctx
encoder context
Definition: psymodel.h:90
float threshold
Definition: psymodel.h:53
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
AAC data declarations.
float spread_low[2]
spreading factor for low-to-high threshold spreading in long frame
Definition: aacpsy.c:143
#define PSY_3GPP_CLIP_HI_L
Definition: aacpsy.c:77
float spread
Definition: psymodel.h:54
int window_type[3]
window type (short/long/transitional, etc.) - current, previous and next
Definition: psymodel.h:78
#define AAC_NUM_BLOCKS_SHORT
number of blocks in a short sequence
Definition: aacpsy.c:98
#define av_unused
Definition: attributes.h:125
#define PSY_3GPP_SPEND_ADD_L
Definition: aacpsy.c:73
void * av_mallocz_array(size_t nmemb, size_t size)
Definition: mem.c:191
static av_cold void lame_window_init(AacPsyContext *ctx, AVCodecContext *avctx)
LAME psy model specific initialization.
Definition: aacpsy.c:262