FFmpeg
aacenc_pred.c
Go to the documentation of this file.
1 /*
2  * AAC encoder main-type prediction
3  * Copyright (C) 2015 Rostislav Pehlivanov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * AAC encoder main-type prediction
25  * @author Rostislav Pehlivanov ( atomnuker gmail com )
26  */
27 
28 #include "aactab.h"
29 #include "aacenc_pred.h"
30 #include "aacenc_utils.h"
31 #include "aacenc_is.h" /* <- Needed for common window distortions */
32 #include "aacenc_quantization.h"
33 
34 #define RESTORE_PRED(sce, sfb) \
35  if (sce->ics.prediction_used[sfb]) {\
36  sce->ics.prediction_used[sfb] = 0;\
37  sce->band_type[sfb] = sce->band_alt[sfb];\
38  }
39 
40 static inline float flt16_round(float pf)
41 {
42  union av_intfloat32 tmp;
43  tmp.f = pf;
44  tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
45  return tmp.f;
46 }
47 
48 static inline float flt16_even(float pf)
49 {
50  union av_intfloat32 tmp;
51  tmp.f = pf;
52  tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
53  return tmp.f;
54 }
55 
56 static inline float flt16_trunc(float pf)
57 {
58  union av_intfloat32 pun;
59  pun.f = pf;
60  pun.i &= 0xFFFF0000U;
61  return pun.f;
62 }
63 
64 static inline void predict(PredictorState *ps, float *coef, float *rcoef, int set)
65 {
66  float k2;
67  const float a = 0.953125; // 61.0 / 64
68  const float alpha = 0.90625; // 29.0 / 32
69  const float k1 = ps->k1;
70  const float r0 = ps->r0, r1 = ps->r1;
71  const float cor0 = ps->cor0, cor1 = ps->cor1;
72  const float var0 = ps->var0, var1 = ps->var1;
73  const float e0 = *coef - ps->x_est;
74  const float e1 = e0 - k1 * r0;
75 
76  if (set)
77  *coef = e0;
78 
79  ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
80  ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
81  ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
82  ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
83  ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
84  ps->r0 = flt16_trunc(a * e0);
85 
86  /* Prediction for next frame */
87  ps->k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0;
88  k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0;
89  *rcoef = ps->x_est = flt16_round(ps->k1*ps->r0 + k2*ps->r1);
90 }
91 
92 static inline void reset_predict_state(PredictorState *ps)
93 {
94  ps->r0 = 0.0f;
95  ps->r1 = 0.0f;
96  ps->k1 = 0.0f;
97  ps->cor0 = 0.0f;
98  ps->cor1 = 0.0f;
99  ps->var0 = 1.0f;
100  ps->var1 = 1.0f;
101  ps->x_est = 0.0f;
102 }
103 
104 static inline void reset_all_predictors(PredictorState *ps)
105 {
106  int i;
107  for (i = 0; i < MAX_PREDICTORS; i++)
108  reset_predict_state(&ps[i]);
109 }
110 
111 static inline void reset_predictor_group(SingleChannelElement *sce, int group_num)
112 {
113  int i;
114  PredictorState *ps = sce->predictor_state;
115  for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
116  reset_predict_state(&ps[i]);
117 }
118 
120 {
121  int sfb, k;
122  const int pmax = FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
123 
124  if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
125  for (sfb = 0; sfb < pmax; sfb++) {
126  for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
127  predict(&sce->predictor_state[k], &sce->coeffs[k], &sce->prcoeffs[k],
128  sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
129  }
130  }
131  if (sce->ics.predictor_reset_group) {
133  }
134  } else {
136  }
137 }
138 
139 /* If inc = 0 you can check if this returns 0 to see if you can reset freely */
140 static inline int update_counters(IndividualChannelStream *ics, int inc)
141 {
142  int i;
143  for (i = 1; i < 31; i++) {
144  ics->predictor_reset_count[i] += inc;
146  return i; /* Reset this immediately */
147  }
148  return 0;
149 }
150 
152 {
153  int start, w, w2, g, i, count = 0;
154  SingleChannelElement *sce0 = &cpe->ch[0];
155  SingleChannelElement *sce1 = &cpe->ch[1];
156  const int pmax0 = FFMIN(sce0->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
157  const int pmax1 = FFMIN(sce1->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
158  const int pmax = FFMIN(pmax0, pmax1);
159 
160  if (!cpe->common_window ||
163  return;
164 
165  for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
166  start = 0;
167  for (g = 0; g < sce0->ics.num_swb; g++) {
168  int sfb = w*16+g;
169  int sum = sce0->ics.prediction_used[sfb] + sce1->ics.prediction_used[sfb];
170  float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f;
171  struct AACISError ph_err1, ph_err2, *erf;
172  if (sfb < PRED_SFB_START || sfb > pmax || sum != 2) {
173  RESTORE_PRED(sce0, sfb);
174  RESTORE_PRED(sce1, sfb);
175  start += sce0->ics.swb_sizes[g];
176  continue;
177  }
178  for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
179  for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
180  float coef0 = sce0->pcoeffs[start+(w+w2)*128+i];
181  float coef1 = sce1->pcoeffs[start+(w+w2)*128+i];
182  ener0 += coef0*coef0;
183  ener1 += coef1*coef1;
184  ener01 += (coef0 + coef1)*(coef0 + coef1);
185  }
186  }
187  ph_err1 = ff_aac_is_encoding_err(s, cpe, start, w, g,
188  ener0, ener1, ener01, 1, -1);
189  ph_err2 = ff_aac_is_encoding_err(s, cpe, start, w, g,
190  ener0, ener1, ener01, 1, +1);
191  erf = ph_err1.error < ph_err2.error ? &ph_err1 : &ph_err2;
192  if (erf->pass) {
193  sce0->ics.prediction_used[sfb] = 1;
194  sce1->ics.prediction_used[sfb] = 1;
195  count++;
196  } else {
197  RESTORE_PRED(sce0, sfb);
198  RESTORE_PRED(sce1, sfb);
199  }
200  start += sce0->ics.swb_sizes[g];
201  }
202  }
203 
204  sce1->ics.predictor_present = sce0->ics.predictor_present = !!count;
205 }
206 
208 {
209  int i, max_group_id_c, max_frame = 0;
210  float avg_frame = 0.0f;
211  IndividualChannelStream *ics = &sce->ics;
212 
213  /* Update the counters and immediately update any frame behind schedule */
214  if ((ics->predictor_reset_group = update_counters(&sce->ics, 1)))
215  return;
216 
217  for (i = 1; i < 31; i++) {
218  /* Count-based */
219  if (ics->predictor_reset_count[i] > max_frame) {
220  max_group_id_c = i;
221  max_frame = ics->predictor_reset_count[i];
222  }
223  avg_frame = (ics->predictor_reset_count[i] + avg_frame)/2;
224  }
225 
226  if (max_frame > PRED_RESET_MIN) {
227  ics->predictor_reset_group = max_group_id_c;
228  } else {
229  ics->predictor_reset_group = 0;
230  }
231 }
232 
234 {
235  int sfb, i, count = 0, cost_coeffs = 0, cost_pred = 0;
236  const int pmax = FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
237  float *O34 = &s->scoefs[128*0], *P34 = &s->scoefs[128*1];
238  float *SENT = &s->scoefs[128*2], *S34 = &s->scoefs[128*3];
239  float *QERR = &s->scoefs[128*4];
240 
241  if (sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
242  sce->ics.predictor_present = 0;
243  return;
244  }
245 
246  if (!sce->ics.predictor_initialized) {
248  sce->ics.predictor_initialized = 1;
249  memcpy(sce->prcoeffs, sce->coeffs, 1024*sizeof(float));
250  for (i = 1; i < 31; i++)
251  sce->ics.predictor_reset_count[i] = i;
252  }
253 
254  update_pred_resets(sce);
255  memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type));
256 
257  for (sfb = PRED_SFB_START; sfb < pmax; sfb++) {
258  int cost1, cost2, cb_p;
259  float dist1, dist2, dist_spec_err = 0.0f;
260  const int cb_n = sce->zeroes[sfb] ? 0 : sce->band_type[sfb];
261  const int cb_min = sce->zeroes[sfb] ? 0 : 1;
262  const int cb_max = sce->zeroes[sfb] ? 0 : RESERVED_BT;
263  const int start_coef = sce->ics.swb_offset[sfb];
264  const int num_coeffs = sce->ics.swb_offset[sfb + 1] - start_coef;
265  const FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[sfb];
266 
267  if (start_coef + num_coeffs > MAX_PREDICTORS ||
268  (s->cur_channel && sce->band_type[sfb] >= INTENSITY_BT2) ||
269  sce->band_type[sfb] == NOISE_BT)
270  continue;
271 
272  /* Normal coefficients */
273  s->abs_pow34(O34, &sce->coeffs[start_coef], num_coeffs);
274  dist1 = ff_quantize_and_encode_band_cost(s, NULL, &sce->coeffs[start_coef], NULL,
275  O34, num_coeffs, sce->sf_idx[sfb],
276  cb_n, s->lambda / band->threshold, INFINITY, &cost1, NULL);
277  cost_coeffs += cost1;
278 
279  /* Encoded coefficients - needed for #bits, band type and quant. error */
280  for (i = 0; i < num_coeffs; i++)
281  SENT[i] = sce->coeffs[start_coef + i] - sce->prcoeffs[start_coef + i];
282  s->abs_pow34(S34, SENT, num_coeffs);
283  if (cb_n < RESERVED_BT)
284  cb_p = av_clip(find_min_book(find_max_val(1, num_coeffs, S34), sce->sf_idx[sfb]), cb_min, cb_max);
285  else
286  cb_p = cb_n;
287  ff_quantize_and_encode_band_cost(s, NULL, SENT, QERR, S34, num_coeffs,
288  sce->sf_idx[sfb], cb_p, s->lambda / band->threshold, INFINITY,
289  &cost2, NULL);
290 
291  /* Reconstructed coefficients - needed for distortion measurements */
292  for (i = 0; i < num_coeffs; i++)
293  sce->prcoeffs[start_coef + i] += QERR[i] != 0.0f ? (sce->prcoeffs[start_coef + i] - QERR[i]) : 0.0f;
294  s->abs_pow34(P34, &sce->prcoeffs[start_coef], num_coeffs);
295  if (cb_n < RESERVED_BT)
296  cb_p = av_clip(find_min_book(find_max_val(1, num_coeffs, P34), sce->sf_idx[sfb]), cb_min, cb_max);
297  else
298  cb_p = cb_n;
299  dist2 = ff_quantize_and_encode_band_cost(s, NULL, &sce->prcoeffs[start_coef], NULL,
300  P34, num_coeffs, sce->sf_idx[sfb],
301  cb_p, s->lambda / band->threshold, INFINITY, NULL, NULL);
302  for (i = 0; i < num_coeffs; i++)
303  dist_spec_err += (O34[i] - P34[i])*(O34[i] - P34[i]);
304  dist_spec_err *= s->lambda / band->threshold;
305  dist2 += dist_spec_err;
306 
307  if (dist2 <= dist1 && cb_p <= cb_n) {
308  cost_pred += cost2;
309  sce->ics.prediction_used[sfb] = 1;
310  sce->band_alt[sfb] = cb_n;
311  sce->band_type[sfb] = cb_p;
312  count++;
313  } else {
314  cost_pred += cost1;
315  sce->band_alt[sfb] = cb_p;
316  }
317  }
318 
319  if (count && cost_coeffs < cost_pred) {
320  count = 0;
321  for (sfb = PRED_SFB_START; sfb < pmax; sfb++)
322  RESTORE_PRED(sce, sfb);
323  memset(&sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used));
324  }
325 
326  sce->ics.predictor_present = !!count;
327 }
328 
329 /**
330  * Encoder predictors data.
331  */
333 {
334  int sfb;
335  IndividualChannelStream *ics = &sce->ics;
336  const int pmax = FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
337 
338  if (s->profile != AV_PROFILE_AAC_MAIN ||
339  !ics->predictor_present)
340  return;
341 
342  put_bits(&s->pb, 1, !!ics->predictor_reset_group);
343  if (ics->predictor_reset_group)
344  put_bits(&s->pb, 5, ics->predictor_reset_group);
345  for (sfb = 0; sfb < pmax; sfb++)
346  put_bits(&s->pb, 1, ics->prediction_used[sfb]);
347 }
update_pred_resets
static void update_pred_resets(SingleChannelElement *sce)
Definition: aacenc_pred.c:207
SingleChannelElement::band_alt
enum BandType band_alt[128]
alternative band type (used by encoder)
Definition: aac.h:251
av_clip
#define av_clip
Definition: common.h:96
INFINITY
#define INFINITY
Definition: mathematics.h:118
AACISError::dist2
float dist2
Definition: aacenc_is.h:41
SingleChannelElement::zeroes
uint8_t zeroes[128]
band is not coded (used by encoder)
Definition: aac.h:255
reset_all_predictors
static void reset_all_predictors(PredictorState *ps)
Definition: aacenc_pred.c:104
PredictorState::var1
AAC_FLOAT var1
Definition: aac.h:137
put_bits
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:222
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
PredictorState::var0
AAC_FLOAT var0
Definition: aac.h:136
w
uint8_t w
Definition: llviddspenc.c:38
av_intfloat32::i
uint32_t i
Definition: intfloat.h:28
MAX_PREDICTORS
#define MAX_PREDICTORS
Definition: aac.h:144
SingleChannelElement::pcoeffs
INTFLOAT pcoeffs[1024]
coefficients for IMDCT, pristine
Definition: aac.h:259
IndividualChannelStream::num_swb
int num_swb
number of scalefactor window bands
Definition: aac.h:181
AACISError::dist1
float dist1
Definition: aacenc_is.h:40
predict
static void predict(PredictorState *ps, float *coef, float *rcoef, int set)
Definition: aacenc_pred.c:64
PredictorState::r0
AAC_FLOAT r0
Definition: aac.h:138
IndividualChannelStream::prediction_used
uint8_t prediction_used[41]
Definition: aac.h:188
SingleChannelElement::ics
IndividualChannelStream ics
Definition: aac.h:247
IndividualChannelStream::predictor_reset_count
int predictor_reset_count[31]
used by encoder to count prediction resets
Definition: aac.h:187
PRED_SFB_START
#define PRED_SFB_START
Definition: aacenc_pred.h:40
PRED_RESET_MIN
#define PRED_RESET_MIN
Definition: aacenc_pred.h:37
set
static void set(uint8_t *a[], int ch, int index, int ch_count, enum AVSampleFormat f, double v)
Definition: swresample.c:59
flt16_trunc
static float flt16_trunc(float pf)
Definition: aacenc_pred.c:56
NOISE_BT
@ NOISE_BT
Spectral data are scaled white noise not coded in the bitstream.
Definition: aac.h:86
AACISError::error
float error
Definition: aacenc_is.h:39
s
#define s(width, name)
Definition: cbs_vp9.c:198
SingleChannelElement::coeffs
INTFLOAT coeffs[1024]
coefficients for IMDCT, maybe processed
Definition: aac.h:260
ff_aac_apply_main_pred
void ff_aac_apply_main_pred(AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc_pred.c:119
ff_quantize_and_encode_band_cost
float ff_quantize_and_encode_band_cost(struct AACEncContext *s, PutBitContext *pb, const float *in, float *quant, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy)
Definition: aaccoder.c:269
IndividualChannelStream::swb_sizes
const uint8_t * swb_sizes
table of scalefactor band sizes for a particular window
Definition: aac.h:180
g
const char * g
Definition: vf_curves.c:127
EIGHT_SHORT_SEQUENCE
@ EIGHT_SHORT_SEQUENCE
Definition: aac.h:77
INTENSITY_BT2
@ INTENSITY_BT2
Scalefactor data are intensity stereo positions (out of phase).
Definition: aac.h:87
IndividualChannelStream::group_len
uint8_t group_len[8]
Definition: aac.h:177
PredictorState
Predictor State.
Definition: aac.h:133
IndividualChannelStream
Individual Channel Stream.
Definition: aac.h:172
IndividualChannelStream::swb_offset
const uint16_t * swb_offset
table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular wind...
Definition: aac.h:179
AACISError::ener01
float ener01
Definition: aacenc_is.h:42
reset_predict_state
static void reset_predict_state(PredictorState *ps)
Definition: aacenc_pred.c:92
NULL
#define NULL
Definition: coverity.c:32
aacenc_quantization.h
FFPsyBand
single band psychoacoustic information
Definition: psymodel.h:50
aactab.h
IndividualChannelStream::predictor_present
int predictor_present
Definition: aac.h:184
av_intfloat32
Definition: intfloat.h:27
SingleChannelElement::predictor_state
PredictorState predictor_state[MAX_PREDICTORS]
Definition: aac.h:266
PredictorState::k1
AAC_FLOAT k1
Definition: aac.h:140
PredictorState::r1
AAC_FLOAT r1
Definition: aac.h:139
SingleChannelElement::sf_idx
int sf_idx[128]
scalefactor indices (used by encoder)
Definition: aac.h:254
ff_aac_pred_sfb_max
const uint8_t ff_aac_pred_sfb_max[]
Definition: aactab.c:88
f
f
Definition: af_crystalizer.c:121
ChannelElement::ch
SingleChannelElement ch[2]
Definition: aac.h:282
ff_aac_adjust_common_pred
void ff_aac_adjust_common_pred(AACEncContext *s, ChannelElement *cpe)
Definition: aacenc_pred.c:151
update_counters
static int update_counters(IndividualChannelStream *ics, int inc)
Definition: aacenc_pred.c:140
ChannelElement::common_window
int common_window
Set if channels share a common 'IndividualChannelStream' in bitstream.
Definition: aac.h:276
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
SingleChannelElement::prcoeffs
AAC_FLOAT prcoeffs[1024]
Main prediction coefs (used by encoder)
Definition: aac.h:265
flt16_round
static float flt16_round(float pf)
Definition: aacenc_pred.c:40
aacenc_is.h
SingleChannelElement
Single Channel Element - used for both SCE and LFE elements.
Definition: aac.h:246
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:255
IndividualChannelStream::num_windows
int num_windows
Definition: aac.h:182
find_min_book
static int find_min_book(float maxval, int sf)
Definition: aacenc_utils.h:92
FFPsyBand::threshold
float threshold
Definition: psymodel.h:53
ChannelElement
channel element - generic struct for SCE/CPE/CCE/LFE
Definition: aac.h:273
PredictorState::x_est
AAC_FLOAT x_est
Definition: aac.h:141
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
erf
static double erf(double z)
erf function Algorithm taken from the Boost project, source: http://www.boost.org/doc/libs/1_46_1/boo...
Definition: libm.h:121
RESERVED_BT
@ RESERVED_BT
Band types following are encoded differently from others.
Definition: aac.h:85
U
#define U(x)
Definition: vpx_arith.h:37
AACEncContext
AAC encoder context.
Definition: aacenc.h:108
ff_aac_is_encoding_err
struct AACISError ff_aac_is_encoding_err(AACEncContext *s, ChannelElement *cpe, int start, int w, int g, float ener0, float ener1, float ener01, int use_pcoeffs, int phase)
Definition: aacenc_is.c:33
aacenc_pred.h
IndividualChannelStream::window_sequence
enum WindowSequence window_sequence[2]
Definition: aac.h:174
AV_PROFILE_AAC_MAIN
#define AV_PROFILE_AAC_MAIN
Definition: defs.h:68
ff_aac_encode_main_pred
void ff_aac_encode_main_pred(AACEncContext *s, SingleChannelElement *sce)
Encoder predictors data.
Definition: aacenc_pred.c:332
av_intfloat32::f
float f
Definition: intfloat.h:29
flt16_even
static float flt16_even(float pf)
Definition: aacenc_pred.c:48
find_max_val
static float find_max_val(int group_len, int swb_size, const float *scaled)
Definition: aacenc_utils.h:80
PredictorState::cor1
AAC_FLOAT cor1
Definition: aac.h:135
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
ff_aac_search_for_pred
void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce)
Definition: aacenc_pred.c:233
reset_predictor_group
static void reset_predictor_group(SingleChannelElement *sce, int group_num)
Definition: aacenc_pred.c:111
IndividualChannelStream::max_sfb
uint8_t max_sfb
number of scalefactor bands per group
Definition: aac.h:173
aacenc_utils.h
SingleChannelElement::band_type
enum BandType band_type[128]
band types
Definition: aac.h:250
IndividualChannelStream::predictor_reset_group
int predictor_reset_group
Definition: aac.h:186
AACISError
Definition: aacenc_is.h:36
RESTORE_PRED
#define RESTORE_PRED(sce, sfb)
Definition: aacenc_pred.c:34
PRED_RESET_FRAME_MIN
#define PRED_RESET_FRAME_MIN
Definition: aacenc_pred.h:34
IndividualChannelStream::predictor_initialized
int predictor_initialized
Definition: aac.h:185
PredictorState::cor0
AAC_FLOAT cor0
Definition: aac.h:134