FFmpeg
aacenc_utils.h
Go to the documentation of this file.
1 /*
2  * AAC encoder utilities
3  * Copyright (C) 2015 Rostislav Pehlivanov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * AAC encoder utilities
25  * @author Rostislav Pehlivanov ( atomnuker gmail com )
26  */
27 
28 #ifndef AVCODEC_AACENC_UTILS_H
29 #define AVCODEC_AACENC_UTILS_H
30 
31 #include "libavutil/ffmath.h"
32 #include "aac.h"
33 #include "aacenctab.h"
34 #include "aactab.h"
35 
36 #define ROUND_STANDARD 0.4054f
37 #define ROUND_TO_ZERO 0.1054f
38 #define C_QUANT 0.4054f
39 
40 static inline void abs_pow34_v(float *out, const float *in, const int size)
41 {
42  int i;
43  for (i = 0; i < size; i++) {
44  float a = fabsf(in[i]);
45  out[i] = sqrtf(a * sqrtf(a));
46  }
47 }
48 
49 static inline float pos_pow34(float a)
50 {
51  return sqrtf(a * sqrtf(a));
52 }
53 
54 /**
55  * Quantize one coefficient.
56  * @return absolute value of the quantized coefficient
57  * @see 3GPP TS26.403 5.6.2 "Scalefactor determination"
58  */
59 static inline int quant(float coef, const float Q, const float rounding)
60 {
61  float a = coef * Q;
62  return sqrtf(a * sqrtf(a)) + rounding;
63 }
64 
65 static inline void quantize_bands(int *out, const float *in, const float *scaled,
66  int size, int is_signed, int maxval, const float Q34,
67  const float rounding)
68 {
69  int i;
70  for (i = 0; i < size; i++) {
71  float qc = scaled[i] * Q34;
72  int tmp = (int)FFMIN(qc + rounding, (float)maxval);
73  if (is_signed && in[i] < 0.0f) {
74  tmp = -tmp;
75  }
76  out[i] = tmp;
77  }
78 }
79 
80 static inline float find_max_val(int group_len, int swb_size, const float *scaled)
81 {
82  float maxval = 0.0f;
83  int w2, i;
84  for (w2 = 0; w2 < group_len; w2++) {
85  for (i = 0; i < swb_size; i++) {
86  maxval = FFMAX(maxval, scaled[w2*128+i]);
87  }
88  }
89  return maxval;
90 }
91 
92 static inline int find_min_book(float maxval, int sf)
93 {
95  int qmaxval, cb;
96  qmaxval = maxval * Q34 + C_QUANT;
97  if (qmaxval >= (FF_ARRAY_ELEMS(aac_maxval_cb)))
98  cb = 11;
99  else
100  cb = aac_maxval_cb[qmaxval];
101  return cb;
102 }
103 
104 static inline float find_form_factor(int group_len, int swb_size, float thresh,
105  const float *scaled, float nzslope) {
106  const float iswb_size = 1.0f / swb_size;
107  const float iswb_sizem1 = 1.0f / (swb_size - 1);
108  const float ethresh = thresh;
109  float form = 0.0f, weight = 0.0f;
110  int w2, i;
111  for (w2 = 0; w2 < group_len; w2++) {
112  float e = 0.0f, e2 = 0.0f, var = 0.0f, maxval = 0.0f;
113  float nzl = 0;
114  for (i = 0; i < swb_size; i++) {
115  float s = fabsf(scaled[w2*128+i]);
116  maxval = FFMAX(maxval, s);
117  e += s;
118  e2 += s *= s;
119  /* We really don't want a hard non-zero-line count, since
120  * even below-threshold lines do add up towards band spectral power.
121  * So, fall steeply towards zero, but smoothly
122  */
123  if (s >= ethresh) {
124  nzl += 1.0f;
125  } else {
126  if (nzslope == 2.f)
127  nzl += (s / ethresh) * (s / ethresh);
128  else
129  nzl += ff_fast_powf(s / ethresh, nzslope);
130  }
131  }
132  if (e2 > thresh) {
133  float frm;
134  e *= iswb_size;
135 
136  /** compute variance */
137  for (i = 0; i < swb_size; i++) {
138  float d = fabsf(scaled[w2*128+i]) - e;
139  var += d*d;
140  }
141  var = sqrtf(var * iswb_sizem1);
142 
143  e2 *= iswb_size;
144  frm = e / FFMIN(e+4*var,maxval);
145  form += e2 * sqrtf(frm) / FFMAX(0.5f,nzl);
146  weight += e2;
147  }
148  }
149  if (weight > 0) {
150  return form / weight;
151  } else {
152  return 1.0f;
153  }
154 }
155 
156 /** Return the minimum scalefactor where the quantized coef does not clip. */
157 static inline uint8_t coef2minsf(float coef)
158 {
159  return av_clip_uint8(log2f(coef)*4 - 69 + SCALE_ONE_POS - SCALE_DIV_512);
160 }
161 
162 /** Return the maximum scalefactor where the quantized coef is not zero. */
163 static inline uint8_t coef2maxsf(float coef)
164 {
165  return av_clip_uint8(log2f(coef)*4 + 6 + SCALE_ONE_POS - SCALE_DIV_512);
166 }
167 
168 /*
169  * Returns the closest possible index to an array of float values, given a value.
170  */
171 static inline int quant_array_idx(const float val, const float *arr, const int num)
172 {
173  int i, index = 0;
174  float quant_min_err = INFINITY;
175  for (i = 0; i < num; i++) {
176  float error = (val - arr[i])*(val - arr[i]);
177  if (error < quant_min_err) {
178  quant_min_err = error;
179  index = i;
180  }
181  }
182  return index;
183 }
184 
185 /**
186  * approximates exp10f(-3.0f*(0.5f + 0.5f * cosf(FFMIN(b,15.5f) / 15.5f)))
187  */
188 static av_always_inline float bval2bmax(float b)
189 {
190  return 0.001f + 0.0035f * (b*b*b) / (15.5f*15.5f*15.5f);
191 }
192 
193 /*
194  * Compute a nextband map to be used with SF delta constraint utilities.
195  * The nextband array should contain 128 elements, and positions that don't
196  * map to valid, nonzero bands of the form w*16+g (with w being the initial
197  * window of the window group, only) are left indetermined.
198  */
199 static inline void ff_init_nextband_map(const SingleChannelElement *sce, uint8_t *nextband)
200 {
201  unsigned char prevband = 0;
202  int w, g;
203  /** Just a safe default */
204  for (g = 0; g < 128; g++)
205  nextband[g] = g;
206 
207  /** Now really navigate the nonzero band chain */
208  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
209  for (g = 0; g < sce->ics.num_swb; g++) {
210  if (!sce->zeroes[w*16+g] && sce->band_type[w*16+g] < RESERVED_BT)
211  prevband = nextband[prevband] = w*16+g;
212  }
213  }
214  nextband[prevband] = prevband; /* terminate */
215 }
216 
217 /*
218  * Updates nextband to reflect a removed band (equivalent to
219  * calling ff_init_nextband_map after marking a band as zero)
220  */
221 static inline void ff_nextband_remove(uint8_t *nextband, int prevband, int band)
222 {
223  nextband[prevband] = nextband[band];
224 }
225 
226 /*
227  * Checks whether the specified band could be removed without inducing
228  * scalefactor delta that violates SF delta encoding constraints.
229  * prev_sf has to be the scalefactor of the previous nonzero, nonspecial
230  * band, in encoding order, or negative if there was no such band.
231  */
233  const uint8_t *nextband, int prev_sf, int band)
234 {
235  return prev_sf >= 0
236  && sce->sf_idx[nextband[band]] >= (prev_sf - SCALE_MAX_DIFF)
237  && sce->sf_idx[nextband[band]] <= (prev_sf + SCALE_MAX_DIFF);
238 }
239 
240 /*
241  * Checks whether the specified band's scalefactor could be replaced
242  * with another one without violating SF delta encoding constraints.
243  * prev_sf has to be the scalefactor of the previous nonzero, nonsepcial
244  * band, in encoding order, or negative if there was no such band.
245  */
246 static inline int ff_sfdelta_can_replace(const SingleChannelElement *sce,
247  const uint8_t *nextband, int prev_sf, int new_sf, int band)
248 {
249  return new_sf >= (prev_sf - SCALE_MAX_DIFF)
250  && new_sf <= (prev_sf + SCALE_MAX_DIFF)
251  && sce->sf_idx[nextband[band]] >= (new_sf - SCALE_MAX_DIFF)
252  && sce->sf_idx[nextband[band]] <= (new_sf + SCALE_MAX_DIFF);
253 }
254 
255 /**
256  * linear congruential pseudorandom number generator
257  *
258  * @param previous_val pointer to the current state of the generator
259  *
260  * @return Returns a 32-bit pseudorandom integer
261  */
262 static av_always_inline int lcg_random(unsigned previous_val)
263 {
264  union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 };
265  return v.s;
266 }
267 
268 #define ERROR_IF(cond, ...) \
269  if (cond) { \
270  av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
271  return AVERROR(EINVAL); \
272  }
273 
274 #define WARN_IF(cond, ...) \
275  if (cond) { \
276  av_log(avctx, AV_LOG_WARNING, __VA_ARGS__); \
277  }
278 
279 #endif /* AVCODEC_AACENC_UTILS_H */
INFINITY
#define INFINITY
Definition: mathematics.h:67
out
FILE * out
Definition: movenc.c:54
cb
static double cb(void *priv, double x, double y)
Definition: vf_geq.c:112
u
#define u(width, name, range_min, range_max)
Definition: cbs_h2645.c:252
aacenctab.h
abs_pow34_v
static void abs_pow34_v(float *out, const float *in, const int size)
Definition: aacenc_utils.h:40
log2f
#define log2f(x)
Definition: libm.h:409
SingleChannelElement::zeroes
uint8_t zeroes[128]
band is not coded (used by encoder)
Definition: aac.h:257
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:26
w
uint8_t w
Definition: llviddspenc.c:38
bval2bmax
static av_always_inline float bval2bmax(float b)
approximates exp10f(-3.0f*(0.5f + 0.5f * cosf(FFMIN(b,15.5f) / 15.5f)))
Definition: aacenc_utils.h:188
b
#define b
Definition: input.c:41
ff_fast_powf
static av_always_inline float ff_fast_powf(float x, float y)
Compute x^y for floating point x, y.
Definition: ffmath.h:62
ff_sfdelta_can_remove_band
static int ff_sfdelta_can_remove_band(const SingleChannelElement *sce, const uint8_t *nextband, int prev_sf, int band)
Definition: aacenc_utils.h:232
coef2maxsf
static uint8_t coef2maxsf(float coef)
Return the maximum scalefactor where the quantized coef is not zero.
Definition: aacenc_utils.h:163
IndividualChannelStream::num_swb
int num_swb
number of scalefactor window bands
Definition: aac.h:183
SCALE_DIV_512
#define SCALE_DIV_512
scalefactor difference that corresponds to scale difference in 512 times
Definition: aac.h:148
ff_sfdelta_can_replace
static int ff_sfdelta_can_replace(const SingleChannelElement *sce, const uint8_t *nextband, int prev_sf, int new_sf, int band)
Definition: aacenc_utils.h:246
find_form_factor
static float find_form_factor(int group_len, int swb_size, float thresh, const float *scaled, float nzslope)
Definition: aacenc_utils.h:104
POW_SF2_ZERO
#define POW_SF2_ZERO
ff_aac_pow2sf_tab index corresponding to pow(2, 0);
Definition: aac.h:154
quant
static int quant(float coef, const float Q, const float rounding)
Quantize one coefficient.
Definition: aacenc_utils.h:59
SingleChannelElement::ics
IndividualChannelStream ics
Definition: aac.h:249
s
#define s(width, name)
Definition: cbs_vp9.c:257
g
const char * g
Definition: vf_curves.c:115
IndividualChannelStream::group_len
uint8_t group_len[8]
Definition: aac.h:179
form
This is the more generic form
Definition: tablegen.txt:34
f
#define f(width, name)
Definition: cbs_vp9.c:255
aac.h
aactab.h
ff_init_nextband_map
static void ff_init_nextband_map(const SingleChannelElement *sce, uint8_t *nextband)
Definition: aacenc_utils.h:199
index
int index
Definition: gxfenc.c:89
SingleChannelElement::sf_idx
int sf_idx[128]
scalefactor indices (used by encoder)
Definition: aac.h:256
weight
static int weight(int i, int blen, int offset)
Definition: diracdec.c:1564
error
static void error(const char *err)
Definition: target_dec_fuzzer.c:61
coef2minsf
static uint8_t coef2minsf(float coef)
Return the minimum scalefactor where the quantized coef does not clip.
Definition: aacenc_utils.h:157
quantize_bands
static void quantize_bands(int *out, const float *in, const float *scaled, int size, int is_signed, int maxval, const float Q34, const float rounding)
Definition: aacenc_utils.h:65
FFMAX
#define FFMAX(a, b)
Definition: common.h:94
C_QUANT
#define C_QUANT
Definition: aacenc_utils.h:38
size
int size
Definition: twinvq_data.h:11134
quant_array_idx
static int quant_array_idx(const float val, const float *arr, const int num)
Definition: aacenc_utils.h:171
val
const char const char void * val
Definition: avisynth_c.h:863
FFMIN
#define FFMIN(a, b)
Definition: common.h:96
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
SCALE_MAX_DIFF
#define SCALE_MAX_DIFF
maximum scalefactor difference allowed by standard
Definition: aac.h:151
pos_pow34
static float pos_pow34(float a)
Definition: aacenc_utils.h:49
in
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
Definition: audio_convert.c:326
SingleChannelElement
Single Channel Element - used for both SCE and LFE elements.
Definition: aac.h:248
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
IndividualChannelStream::num_windows
int num_windows
Definition: aac.h:184
SCALE_ONE_POS
#define SCALE_ONE_POS
scalefactor index that corresponds to scale=1.0
Definition: aac.h:149
find_min_book
static int find_min_book(float maxval, int sf)
Definition: aacenc_utils.h:92
av_always_inline
#define av_always_inline
Definition: attributes.h:43
uint8_t
uint8_t
Definition: audio_convert.c:194
lcg_random
static av_always_inline int lcg_random(unsigned previous_val)
linear congruential pseudorandom number generator
Definition: aacenc_utils.h:262
RESERVED_BT
@ RESERVED_BT
Band types following are encoded differently from others.
Definition: aac.h:86
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen_template.c:38
aac_maxval_cb
static const unsigned char aac_maxval_cb[]
Definition: aacenctab.h:128
ff_aac_pow34sf_tab
float ff_aac_pow34sf_tab[428]
Definition: aactab.c:36
ffmath.h
find_max_val
static float find_max_val(int group_len, int swb_size, const float *scaled)
Definition: aacenc_utils.h:80
int
int
Definition: ffmpeg_filter.c:191
SingleChannelElement::band_type
enum BandType band_type[128]
band types
Definition: aac.h:252
ff_nextband_remove
static void ff_nextband_remove(uint8_t *nextband, int prevband, int band)
Definition: aacenc_utils.h:221