[FFmpeg-devel] [PATCH 03/11] aaccoder: add intensity stereo coding support for the trellis quantizer

Rostislav Pehlivanov atomnuker at gmail.com
Fri Jun 26 22:16:32 CEST 2015


This commit extends the trellis quantizer (used by the default twoloop coder) to accept and correctly encode codebooks needed for intensity stereo and perceptual noise substitution.
---
 libavcodec/aaccoder.c | 81 +++++++++++++++++++++++++++++----------------------
 1 file changed, 46 insertions(+), 35 deletions(-)

diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index a6e4cc4..f069a3b 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -43,8 +43,23 @@
 /** Frequency in Hz for lower limit of noise substitution **/
 #define NOISE_LOW_LIMIT 4000
 
+/* Energy spread threshold value below which no PNS is used, this corresponds to
+ * typically around 17Khz, after which PNS usage decays ending at 19Khz */
+#define NOISE_SPREAD_THRESHOLD 152234544.0f
+
+/* Above ~1.26*threshold all normally-zeroed values are PNS'd. Lambda divides
+ * the defined value below as to try to get a ~1.26 multiplier so that there is
+ * a balance between noise and zero bands leaving more bits for actual signal */
+#define NOISE_LAMBDA_NUMERATOR 252.1f
+
+/** Frequency in Hz for lower limit of intensity stereo   **/
+#define INT_STEREO_LOW_LIMIT 6000
+
 /** Total number of usable codebooks **/
-#define CB_TOT 13
+#define CB_TOT 12
+
+/** Total number of codebooks, including special ones **/
+#define CB_TOT_ALL 15
 
 /** bits needed to code codebook run value for long windows */
 static const uint8_t run_value_bits_long[64] = {
@@ -64,9 +79,9 @@ static const uint8_t * const run_value_bits[2] = {
 };
 
 /** Map to convert values from BandCodingPath index to a codebook index **/
-static const uint8_t aac_cb_out_map[CB_TOT]  = {0,1,2,3,4,5,6,7,8,9,10,11,13};
+static const uint8_t aac_cb_out_map[CB_TOT_ALL]  = {0,1,2,3,4,5,6,7,8,9,10,11,13,14,15};
 /** Inverse map to convert from codebooks to BandCodingPath indices **/
-static const uint8_t aac_cb_in_map[CB_TOT+1] = {0,1,2,3,4,5,6,7,8,9,10,11,0,12};
+static const uint8_t aac_cb_in_map[CB_TOT_ALL+1] = {0,1,2,3,4,5,6,7,8,9,10,11,0,12,13,14};
 
 /**
  * Quantize one coefficient.
@@ -118,7 +133,7 @@ static av_always_inline float quantize_and_encode_band_cost_template(
                                 const float *scaled, int size, int scale_idx,
                                 int cb, const float lambda, const float uplim,
                                 int *bits, int BT_ZERO, int BT_UNSIGNED,
-                                int BT_PAIR, int BT_ESC, int BT_NOISE)
+                                int BT_PAIR, int BT_ESC, int BT_NOISE, int BT_STEREO)
 {
     const int q_idx = POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512;
     const float Q   = ff_aac_pow2sf_tab [q_idx];
@@ -131,14 +146,7 @@ static av_always_inline float quantize_and_encode_band_cost_template(
     int resbits = 0;
     int off;
 
-    if (BT_ZERO) {
-        for (i = 0; i < size; i++)
-            cost += in[i]*in[i];
-        if (bits)
-            *bits = 0;
-        return cost * lambda;
-    }
-    if (BT_NOISE) {
+    if (BT_ZERO || BT_NOISE || BT_STEREO) {
         for (i = 0; i < size; i++)
             cost += in[i]*in[i];
         if (bits)
@@ -231,26 +239,27 @@ static float quantize_and_encode_band_cost_NONE(struct AACEncContext *s, PutBitC
     return 0.0f;
 }
 
-#define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE) \
-static float quantize_and_encode_band_cost_ ## NAME(                                    \
-                                struct AACEncContext *s,                                \
-                                PutBitContext *pb, const float *in,                     \
-                                const float *scaled, int size, int scale_idx,           \
-                                int cb, const float lambda, const float uplim,          \
-                                int *bits) {                                            \
-    return quantize_and_encode_band_cost_template(                                      \
-                                s, pb, in, scaled, size, scale_idx,                     \
-                                BT_ESC ? ESC_BT : cb, lambda, uplim, bits,              \
-                                BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE);       \
+#define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO) \
+static float quantize_and_encode_band_cost_ ## NAME(                                         \
+                                struct AACEncContext *s,                                     \
+                                PutBitContext *pb, const float *in,                          \
+                                const float *scaled, int size, int scale_idx,                \
+                                int cb, const float lambda, const float uplim,               \
+                                int *bits) {                                                 \
+    return quantize_and_encode_band_cost_template(                                           \
+                                s, pb, in, scaled, size, scale_idx,                          \
+                                BT_ESC ? ESC_BT : cb, lambda, uplim, bits,                   \
+                                BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO); \
 }
 
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ZERO,  1, 0, 0, 0, 0)
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SQUAD, 0, 0, 0, 0, 0)
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UQUAD, 0, 1, 0, 0, 0)
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SPAIR, 0, 0, 1, 0, 0)
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UPAIR, 0, 1, 1, 0, 0)
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC,   0, 1, 1, 1, 0)
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NOISE, 0, 0, 0, 0, 1)
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ZERO,  1, 0, 0, 0, 0, 0)
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SQUAD, 0, 0, 0, 0, 0, 0)
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UQUAD, 0, 1, 0, 0, 0, 0)
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SPAIR, 0, 0, 1, 0, 0, 0)
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UPAIR, 0, 1, 1, 0, 0, 0)
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC,   0, 1, 1, 1, 0, 0)
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NOISE, 0, 0, 0, 0, 1, 0)
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(STEREO,0, 0, 0, 0, 0, 1)
 
 static float (*const quantize_and_encode_band_cost_arr[])(
                                 struct AACEncContext *s,
@@ -272,6 +281,8 @@ static float (*const quantize_and_encode_band_cost_arr[])(
     quantize_and_encode_band_cost_ESC,
     quantize_and_encode_band_cost_NONE,     /* CB 12 doesn't exist */
     quantize_and_encode_band_cost_NOISE,
+    quantize_and_encode_band_cost_STEREO,
+    quantize_and_encode_band_cost_STEREO,
 };
 
 #define quantize_and_encode_band_cost(                                  \
@@ -454,7 +465,7 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
 
     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
     start = win*128;
-    for (cb = 0; cb < CB_TOT; cb++) {
+    for (cb = 0; cb < CB_TOT_ALL; cb++) {
         path[0][cb].cost     = run_bits+4;
         path[0][cb].prev_idx = -1;
         path[0][cb].run      = 0;
@@ -478,7 +489,7 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
             }
             next_minbits = path[swb+1][0].cost;
             next_mincb = 0;
-            for (cb = 1; cb < CB_TOT; cb++) {
+            for (cb = 1; cb < CB_TOT_ALL; cb++) {
                 path[swb+1][cb].cost = 61450;
                 path[swb+1][cb].prev_idx = -1;
                 path[swb+1][cb].run = 0;
@@ -495,10 +506,10 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
                 path[swb+1][cb].prev_idx = -1;
                 path[swb+1][cb].run = 0;
             }
-            for (cb = startcb; cb < CB_TOT; cb++) {
+            for (cb = startcb; cb < CB_TOT_ALL; cb++) {
                 float cost_stay_here, cost_get_here;
                 float bits = 0.0f;
-                if (cb == 12 && sce->band_type[win*16+swb] != NOISE_BT) {
+                if (cb >= 12 && sce->band_type[win*16+swb] != aac_cb_out_map[cb]) {
                     path[swb+1][cb].cost = 61450;
                     path[swb+1][cb].prev_idx = -1;
                     path[swb+1][cb].run = 0;
@@ -537,7 +548,7 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
     //convert resulting path from backward-linked list
     stack_len = 0;
     idx       = 0;
-    for (cb = 1; cb < CB_TOT; cb++)
+    for (cb = 1; cb < CB_TOT_ALL; cb++)
         if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
             idx = cb;
     ppos = max_sfb;
-- 
2.1.4



More information about the ffmpeg-devel mailing list