FFmpeg
aacenc_quantization.h
Go to the documentation of this file.
1 /*
2  * AAC encoder quantizer
3  * Copyright (C) 2015 Rostislav Pehlivanov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * AAC encoder quantizer
25  * @author Rostislav Pehlivanov ( atomnuker gmail com )
26  */
27 
28 #ifndef AVCODEC_AACENC_QUANTIZATION_H
29 #define AVCODEC_AACENC_QUANTIZATION_H
30 
31 #include "aactab.h"
32 #include "aacenc.h"
33 #include "aacenctab.h"
34 #include "aacenc_utils.h"
35 
36 /**
37  * Calculate rate distortion cost for quantizing with given codebook
38  *
39  * @return quantization distortion
40  */
42  struct AACEncContext *s,
43  PutBitContext *pb, const float *in, float *out,
44  const float *scaled, int size, int scale_idx,
45  int cb, const float lambda, const float uplim,
46  int *bits, float *energy, int BT_ZERO, int BT_UNSIGNED,
47  int BT_PAIR, int BT_ESC, int BT_NOISE, int BT_STEREO,
48  const float ROUNDING)
49 {
50  const int q_idx = POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512;
51  const float Q = ff_aac_pow2sf_tab [q_idx];
52  const float Q34 = ff_aac_pow34sf_tab[q_idx];
53  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
54  const float CLIPPED_ESCAPE = 165140.0f*IQ;
55  int i, j;
56  float cost = 0;
57  float qenergy = 0;
58  const int dim = BT_PAIR ? 2 : 4;
59  int resbits = 0;
60  int off;
61 
62  if (BT_ZERO || BT_NOISE || BT_STEREO) {
63  for (i = 0; i < size; i++)
64  cost += in[i]*in[i];
65  if (bits)
66  *bits = 0;
67  if (energy)
68  *energy = qenergy;
69  if (out) {
70  for (i = 0; i < size; i += dim)
71  for (j = 0; j < dim; j++)
72  out[i+j] = 0.0f;
73  }
74  return cost * lambda;
75  }
76  if (!scaled) {
77  s->abs_pow34(s->scoefs, in, size);
78  scaled = s->scoefs;
79  }
80  s->quant_bands(s->qcoefs, in, scaled, size, !BT_UNSIGNED, aac_cb_maxval[cb], Q34, ROUNDING);
81  if (BT_UNSIGNED) {
82  off = 0;
83  } else {
84  off = aac_cb_maxval[cb];
85  }
86  for (i = 0; i < size; i += dim) {
87  const float *vec;
88  int *quants = s->qcoefs + i;
89  int curidx = 0;
90  int curbits;
91  float quantized, rd = 0.0f;
92  for (j = 0; j < dim; j++) {
93  curidx *= aac_cb_range[cb];
94  curidx += quants[j] + off;
95  }
96  curbits = ff_aac_spectral_bits[cb-1][curidx];
97  vec = &ff_aac_codebook_vectors[cb-1][curidx*dim];
98  if (BT_UNSIGNED) {
99  for (j = 0; j < dim; j++) {
100  float t = fabsf(in[i+j]);
101  float di;
102  if (BT_ESC && vec[j] == 64.0f) { //FIXME: slow
103  if (t >= CLIPPED_ESCAPE) {
104  quantized = CLIPPED_ESCAPE;
105  curbits += 21;
106  } else {
107  int c = av_clip_uintp2(quant(t, Q, ROUNDING), 13);
108  quantized = c*cbrtf(c)*IQ;
109  curbits += av_log2(c)*2 - 4 + 1;
110  }
111  } else {
112  quantized = vec[j]*IQ;
113  }
114  di = t - quantized;
115  if (out)
116  out[i+j] = in[i+j] >= 0 ? quantized : -quantized;
117  if (vec[j] != 0.0f)
118  curbits++;
119  qenergy += quantized*quantized;
120  rd += di*di;
121  }
122  } else {
123  for (j = 0; j < dim; j++) {
124  quantized = vec[j]*IQ;
125  qenergy += quantized*quantized;
126  if (out)
127  out[i+j] = quantized;
128  rd += (in[i+j] - quantized)*(in[i+j] - quantized);
129  }
130  }
131  cost += rd * lambda + curbits;
132  resbits += curbits;
133  if (cost >= uplim)
134  return uplim;
135  if (pb) {
136  put_bits(pb, ff_aac_spectral_bits[cb-1][curidx], ff_aac_spectral_codes[cb-1][curidx]);
137  if (BT_UNSIGNED)
138  for (j = 0; j < dim; j++)
139  if (ff_aac_codebook_vectors[cb-1][curidx*dim+j] != 0.0f)
140  put_bits(pb, 1, in[i+j] < 0.0f);
141  if (BT_ESC) {
142  for (j = 0; j < 2; j++) {
143  if (ff_aac_codebook_vectors[cb-1][curidx*2+j] == 64.0f) {
144  int coef = av_clip_uintp2(quant(fabsf(in[i+j]), Q, ROUNDING), 13);
145  int len = av_log2(coef);
146 
147  put_bits(pb, len - 4 + 1, (1 << (len - 4 + 1)) - 2);
148  put_sbits(pb, len, coef);
149  }
150  }
151  }
152  }
153  }
154 
155  if (bits)
156  *bits = resbits;
157  if (energy)
158  *energy = qenergy;
159  return cost;
160 }
161 
163  const float *in, float *quant, const float *scaled,
164  int size, int scale_idx, int cb,
165  const float lambda, const float uplim,
166  int *bits, float *energy) {
167  av_assert0(0);
168  return 0.0f;
169 }
170 
171 #define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO, ROUNDING) \
172 static float quantize_and_encode_band_cost_ ## NAME( \
173  struct AACEncContext *s, \
174  PutBitContext *pb, const float *in, float *quant, \
175  const float *scaled, int size, int scale_idx, \
176  int cb, const float lambda, const float uplim, \
177  int *bits, float *energy) { \
178  return quantize_and_encode_band_cost_template( \
179  s, pb, in, quant, scaled, size, scale_idx, \
180  BT_ESC ? ESC_BT : cb, lambda, uplim, bits, energy, \
181  BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO, \
182  ROUNDING); \
183 }
184 
185 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ZERO, 1, 0, 0, 0, 0, 0, ROUND_STANDARD)
186 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SQUAD, 0, 0, 0, 0, 0, 0, ROUND_STANDARD)
187 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UQUAD, 0, 1, 0, 0, 0, 0, ROUND_STANDARD)
188 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SPAIR, 0, 0, 1, 0, 0, 0, ROUND_STANDARD)
189 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UPAIR, 0, 1, 1, 0, 0, 0, ROUND_STANDARD)
191 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC_RTZ, 0, 1, 1, 1, 0, 0, ROUND_TO_ZERO)
192 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NOISE, 0, 0, 0, 0, 1, 0, ROUND_STANDARD)
194 
195 static float (*const quantize_and_encode_band_cost_arr[])(
196  struct AACEncContext *s,
197  PutBitContext *pb, const float *in, float *quant,
198  const float *scaled, int size, int scale_idx,
199  int cb, const float lambda, const float uplim,
200  int *bits, float *energy) = {
201  quantize_and_encode_band_cost_ZERO,
202  quantize_and_encode_band_cost_SQUAD,
203  quantize_and_encode_band_cost_SQUAD,
204  quantize_and_encode_band_cost_UQUAD,
205  quantize_and_encode_band_cost_UQUAD,
206  quantize_and_encode_band_cost_SPAIR,
207  quantize_and_encode_band_cost_SPAIR,
208  quantize_and_encode_band_cost_UPAIR,
209  quantize_and_encode_band_cost_UPAIR,
210  quantize_and_encode_band_cost_UPAIR,
211  quantize_and_encode_band_cost_UPAIR,
212  quantize_and_encode_band_cost_ESC,
213  quantize_and_encode_band_cost_NONE, /* CB 12 doesn't exist */
214  quantize_and_encode_band_cost_NOISE,
215  quantize_and_encode_band_cost_STEREO,
216  quantize_and_encode_band_cost_STEREO,
217 };
218 
220  struct AACEncContext *s,
221  PutBitContext *pb, const float *in, float *quant,
222  const float *scaled, int size, int scale_idx,
223  int cb, const float lambda, const float uplim,
224  int *bits, float *energy) = {
225  quantize_and_encode_band_cost_ZERO,
226  quantize_and_encode_band_cost_SQUAD,
227  quantize_and_encode_band_cost_SQUAD,
228  quantize_and_encode_band_cost_UQUAD,
229  quantize_and_encode_band_cost_UQUAD,
230  quantize_and_encode_band_cost_SPAIR,
231  quantize_and_encode_band_cost_SPAIR,
232  quantize_and_encode_band_cost_UPAIR,
233  quantize_and_encode_band_cost_UPAIR,
234  quantize_and_encode_band_cost_UPAIR,
235  quantize_and_encode_band_cost_UPAIR,
236  quantize_and_encode_band_cost_ESC_RTZ,
237  quantize_and_encode_band_cost_NONE, /* CB 12 doesn't exist */
238  quantize_and_encode_band_cost_NOISE,
239  quantize_and_encode_band_cost_STEREO,
240  quantize_and_encode_band_cost_STEREO,
241 };
242 
243 #define quantize_and_encode_band_cost( \
244  s, pb, in, quant, scaled, size, scale_idx, cb, \
245  lambda, uplim, bits, energy, rtz) \
246  ((rtz) ? quantize_and_encode_band_cost_rtz_arr : quantize_and_encode_band_cost_arr)[cb]( \
247  s, pb, in, quant, scaled, size, scale_idx, cb, \
248  lambda, uplim, bits, energy)
249 
250 static inline float quantize_band_cost(struct AACEncContext *s, const float *in,
251  const float *scaled, int size, int scale_idx,
252  int cb, const float lambda, const float uplim,
253  int *bits, float *energy, int rtz)
254 {
255  return quantize_and_encode_band_cost(s, NULL, in, NULL, scaled, size, scale_idx,
256  cb, lambda, uplim, bits, energy, rtz);
257 }
258 
259 static inline int quantize_band_cost_bits(struct AACEncContext *s, const float *in,
260  const float *scaled, int size, int scale_idx,
261  int cb, const float lambda, const float uplim,
262  int *bits, float *energy, int rtz)
263 {
264  int auxbits;
265  quantize_and_encode_band_cost(s, NULL, in, NULL, scaled, size, scale_idx,
266  cb, 0.0f, uplim, &auxbits, energy, rtz);
267  if (bits) {
268  *bits = auxbits;
269  }
270  return auxbits;
271 }
272 
274  const float *in, float *out, int size, int scale_idx,
275  int cb, const float lambda, int rtz)
276 {
277  quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda,
278  INFINITY, NULL, NULL, rtz);
279 }
280 
282 
283 #endif /* AVCODEC_AACENC_QUANTIZATION_H */
#define NULL
Definition: coverity.c:32
static void put_sbits(PutBitContext *pb, int n, int32_t value)
Definition: put_bits.h:240
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:208
int av_log2(unsigned v)
Definition: intmath.c:26
float lambda
Definition: aacenc.h:400
#define ROUND_TO_ZERO
Definition: aacenc_utils.h:37
#define quantize_and_encode_band_cost(s, pb, in, quant, scaled, size, scale_idx, cb, lambda, uplim, bits, energy, rtz)
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
static double cb(void *priv, double x, double y)
Definition: vf_geq.c:112
AAC encoder context.
Definition: aacenc.h:376
#define f(width, name)
Definition: cbs_vp9.c:255
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
int qcoefs[96]
quantized coefficients
Definition: aacenc.h:407
static float(*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s, PutBitContext *pb, const float *in, float *quant, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy)
static float(*const quantize_and_encode_band_cost_rtz_arr[])(struct AACEncContext *s, PutBitContext *pb, const float *in, float *quant, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy)
ptrdiff_t size
Definition: opengl_enc.c:100
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
uint8_t bits
Definition: vp3data.h:202
const float *const ff_aac_codebook_vectors[]
Definition: aactab.c:918
static av_always_inline float quantize_and_encode_band_cost_template(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy, int BT_ZERO, int BT_UNSIGNED, int BT_PAIR, int BT_ESC, int BT_NOISE, int BT_STEREO, const float ROUNDING)
Calculate rate distortion cost for quantizing with given codebook.
float ff_aac_pow2sf_tab[428]
Definition: aactab.c:35
#define SCALE_DIV_512
scalefactor difference that corresponds to scale difference in 512 times
Definition: aac.h:148
float ff_aac_pow34sf_tab[428]
Definition: aactab.c:36
const uint8_t *const ff_aac_spectral_bits[11]
Definition: aactab.c:422
void(* quant_bands)(int *out, const float *in, const float *scaled, int size, int is_signed, int maxval, const float Q34, const float rounding)
Definition: aacenc.h:414
#define s(width, name)
Definition: cbs_vp9.c:257
static int quantize_band_cost_bits(struct AACEncContext *s, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy, int rtz)
PutBitContext pb
Definition: aacenc.h:379
#define ROUND_STANDARD
Definition: aacenc_utils.h:36
static const uint8_t aac_cb_range[12]
Definition: aacenctab.h:125
static av_always_inline float cbrtf(float x)
Definition: libm.h:61
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31))))#define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac){}void ff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map){AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);return NULL;}return ac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;}int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){int use_generic=1;int len=in->nb_samples;int p;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
static float quantize_and_encode_band_cost_NONE(struct AACEncContext *s, PutBitContext *pb, const float *in, float *quant, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy)
int dim
AAC encoder data.
const uint8_t * quant
#define SCALE_ONE_POS
scalefactor index that corresponds to scale=1.0
Definition: aac.h:149
#define STEREO
Definition: cook.c:61
AAC encoder utilities.
static const uint8_t aac_cb_maxval[12]
Definition: aacenctab.h:126
const uint16_t *const ff_aac_spectral_codes[11]
Definition: aactab.c:417
int len
#define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO, ROUNDING)
static void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, int size, int scale_idx, int cb, const float lambda, int rtz)
AAC encoder quantization misc reusable function templates.
#define POW_SF2_ZERO
ff_aac_pow2sf_tab index corresponding to pow(2, 0);
Definition: aac.h:154
FILE * out
Definition: movenc.c:54
#define av_always_inline
Definition: attributes.h:39
static float quantize_band_cost(struct AACEncContext *s, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy, int rtz)
void(* abs_pow34)(float *out, const float *in, const int size)
Definition: aacenc.h:413
#define INFINITY
Definition: mathematics.h:67
AAC data declarations.
float scoefs[1024]
scaled coefficients
Definition: aacenc.h:408