[FFmpeg-cvslog] aacenc_pred: rework the way prediction is done

Rostislav Pehlivanov git at videolan.org
Sat Aug 29 08:48:36 CEST 2015


ffmpeg | branch: master | Rostislav Pehlivanov <atomnuker at gmail.com> | Sat Aug 29 06:34:08 2015 +0100| [44ddee945a2e8cfc1b7074de6e35595ed41da4e4] | committer: Rostislav Pehlivanov

aacenc_pred: rework the way prediction is done

This commit completely alters the algorithm of prediction.
The original commit which introduced prediction was completely
incorrect to even remotely care about what the actual coefficients
contain or whether any options were enabled. Not my actual fault.

This commit treats prediction the way the decoder does and expects
to do: like lossy encryption. Everything related to prediction now
happens at the very end but just before quantization and encoding
of coefficients. On the decoder side, prediction happens before
anything has had a chance to even access the coefficients.

Also the original implementation had problems because it actually
touched the band_type of special bands which already had their
scalefactor indices marked and it's a wonder the asserion wasn't
triggered when transmitting those.

Overall, this now drastically increases audio quality and you should
think about enabling it if you don't plan on playing anything encoded
on really old low power ultra-embedded devices since they might not
support decoding of prediction or AAC-Main. Though the specifications
were written ages ago and as times change so do the FLOPS.

Signed-off-by: Rostislav Pehlivanov <atomnuker at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=44ddee945a2e8cfc1b7074de6e35595ed41da4e4
---

 libavcodec/aac.h         |    2 +-
 libavcodec/aaccoder.c    |    4 -
 libavcodec/aacenc.c      |   41 +++---
 libavcodec/aacenc.h      |    1 -
 libavcodec/aacenc_pred.c |  361 ++++++++++++++++++++++------------------------
 libavcodec/aacenc_pred.h |    6 +-
 6 files changed, 190 insertions(+), 225 deletions(-)

diff --git a/libavcodec/aac.h b/libavcodec/aac.h
index dc6b439..aa4b53b 100644
--- a/libavcodec/aac.h
+++ b/libavcodec/aac.h
@@ -247,7 +247,7 @@ typedef struct SingleChannelElement {
     TemporalNoiseShaping tns;
     Pulse pulse;
     enum BandType band_type[128];                   ///< band types
-    enum BandType orig_band_type[128];              ///< band type backups for undoing prediction
+    enum BandType band_alt[128];                    ///< alternative band type (used by encoder)
     int band_type_run_end[120];                     ///< band type run end points
     INTFLOAT sf[120];                               ///< scalefactors
     int sf_idx[128];                                ///< scalefactor indices (used by encoder)
diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index c273c54..8256f96 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -964,7 +964,6 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
         ff_aac_encode_main_pred,
         ff_aac_adjust_common_prediction,
         ff_aac_apply_main_pred,
-        ff_aac_update_main_pred,
         set_special_band_scalefactors,
         search_for_pns,
         ff_aac_search_for_tns,
@@ -980,7 +979,6 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
         ff_aac_encode_main_pred,
         ff_aac_adjust_common_prediction,
         ff_aac_apply_main_pred,
-        ff_aac_update_main_pred,
         set_special_band_scalefactors,
         search_for_pns,
         ff_aac_search_for_tns,
@@ -996,7 +994,6 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
         ff_aac_encode_main_pred,
         ff_aac_adjust_common_prediction,
         ff_aac_apply_main_pred,
-        ff_aac_update_main_pred,
         set_special_band_scalefactors,
         search_for_pns,
         ff_aac_search_for_tns,
@@ -1012,7 +1009,6 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
         ff_aac_encode_main_pred,
         ff_aac_adjust_common_prediction,
         ff_aac_apply_main_pred,
-        ff_aac_update_main_pred,
         set_special_band_scalefactors,
         search_for_pns,
         ff_aac_search_for_tns,
diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index ece2328..a7c43c7 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -354,15 +354,15 @@ static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
                 start += sce->ics.swb_sizes[i];
                 continue;
             }
-            for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++)
+            for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++) {
                 s->coder->quantize_and_encode_band(s, &s->pb,
                                                    &sce->coeffs[start + w2*128],
-                                                   &sce->pqcoeffs[start + w2*128],
-                                                   sce->ics.swb_sizes[i],
+                                                   NULL, sce->ics.swb_sizes[i],
                                                    sce->sf_idx[w*16 + i],
                                                    sce->band_type[w*16 + i],
                                                    s->lambda,
                                                    sce->ics.window_clipping[w]);
+            }
             start += sce->ics.swb_sizes[i];
         }
     }
@@ -609,12 +609,8 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                     s->coder->search_for_pns(s, avctx, sce);
                 if (s->options.tns && s->coder->search_for_tns)
                     s->coder->search_for_tns(s, sce);
-                if (s->options.pred && s->coder->search_for_pred)
-                    s->coder->search_for_pred(s, sce);
                 if (sce->tns.present)
                     tns_mode = 1;
-                if (sce->ics.predictor_present)
-                    pred_mode = 1;
             }
             s->cur_channel = start_ch;
             if (s->options.stereo_mode && cpe->common_window) {
@@ -631,15 +627,26 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                 s->coder->search_for_is(s, avctx, cpe);
                 if (cpe->is_mode) is_mode = 1;
             }
-            if (s->options.pred && s->coder->adjust_common_prediction)
-                s->coder->adjust_common_prediction(s, cpe);
             if (s->coder->set_special_band_scalefactors)
                 for (ch = 0; ch < chans; ch++)
                     s->coder->set_special_band_scalefactors(s, &cpe->ch[ch]);
-            if (s->options.pred && s->coder->apply_main_pred)
-                for (ch = 0; ch < chans; ch++)
-                    s->coder->apply_main_pred(s, &cpe->ch[ch]);
             adjust_frame_information(cpe, chans);
+            for (ch = 0; ch < chans; ch++) {
+                sce = &cpe->ch[ch];
+                s->cur_channel = start_ch + ch;
+                if (s->options.pred && s->coder->search_for_pred)
+                    s->coder->search_for_pred(s, sce);
+                if (cpe->ch[ch].ics.predictor_present) pred_mode = 1;
+            }
+            if (s->options.pred && s->coder->adjust_common_prediction)
+                s->coder->adjust_common_prediction(s, cpe);
+            for (ch = 0; ch < chans; ch++) {
+                sce = &cpe->ch[ch];
+                s->cur_channel = start_ch + ch;
+                if (s->options.pred && s->coder->apply_main_pred)
+                    s->coder->apply_main_pred(s, sce);
+            }
+            s->cur_channel = start_ch;
             if (chans == 2) {
                 put_bits(&s->pb, 1, cpe->common_window);
                 if (cpe->common_window) {
@@ -676,16 +683,6 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 
     } while (1);
 
-    // update predictor state
-    if (s->options.pred && s->coder->update_main_pred) {
-        for (i = 0; i < s->chan_map[0]; i++) {
-            cpe = &s->cpe[i];
-            for (ch = 0; ch < chans; ch++)
-                s->coder->update_main_pred(s, &cpe->ch[ch],
-                                           (cpe->common_window && !ch) ? cpe : NULL);
-        }
-    }
-
     put_bits(&s->pb, 3, TYPE_END);
     flush_put_bits(&s->pb);
     avctx->frame_bits = put_bits_count(&s->pb);
diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h
index 9ee854f..69a8c01 100644
--- a/libavcodec/aacenc.h
+++ b/libavcodec/aacenc.h
@@ -63,7 +63,6 @@ typedef struct AACCoefficientsEncoder {
     void (*encode_main_pred)(struct AACEncContext *s, SingleChannelElement *sce);
     void (*adjust_common_prediction)(struct AACEncContext *s, ChannelElement *cpe);
     void (*apply_main_pred)(struct AACEncContext *s, SingleChannelElement *sce);
-    void (*update_main_pred)(struct AACEncContext *s, SingleChannelElement *sce, ChannelElement *cpe);
     void (*set_special_band_scalefactors)(struct AACEncContext *s, SingleChannelElement *sce);
     void (*search_for_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce);
     void (*search_for_tns)(struct AACEncContext *s, SingleChannelElement *sce);
diff --git a/libavcodec/aacenc_pred.c b/libavcodec/aacenc_pred.c
index c638b70..fafe002 100644
--- a/libavcodec/aacenc_pred.c
+++ b/libavcodec/aacenc_pred.c
@@ -21,15 +21,22 @@
 
 /**
  * @file
- * AAC encoder main prediction
+ * AAC encoder Intensity Stereo
  * @author Rostislav Pehlivanov ( atomnuker gmail com )
  */
 
 #include "aactab.h"
 #include "aacenc_pred.h"
 #include "aacenc_utils.h"
+#include "aacenc_is.h"            /* <- Needed for common window distortions */
 #include "aacenc_quantization.h"
 
+#define RESTORE_PRED(sce, sfb) \
+        if (sce->ics.prediction_used[sfb]) {\
+            sce->ics.prediction_used[sfb] = 0;\
+            sce->band_type[sfb] = sce->band_alt[sfb];\
+        }
+
 static inline float flt16_round(float pf)
 {
     union av_intfloat32 tmp;
@@ -54,73 +61,57 @@ static inline float flt16_trunc(float pf)
     return pun.f;
 }
 
-static inline void predict(PredictorState *ps, float *coef, float *rcoef,
-                           int output_enable)
+static inline void predict(PredictorState *ps, float *coef, float *rcoef, int set)
 {
-    const float a     = 0.953125; // 61.0 / 64
     float k2;
-    float   r0 = ps->r0,     r1 = ps->r1;
-    float cor0 = ps->cor0, cor1 = ps->cor1;
-    float var0 = ps->var0, var1 = ps->var1;
-
-    ps->k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
-        k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
-
-    ps->x_est = flt16_round(ps->k1*r0 + k2*r1);
-
-    if (output_enable)
-        *coef -= ps->x_est;
-    else
-        *rcoef = *coef - ps->x_est;
-}
-
-static inline void update_predictor(PredictorState *ps, float qcoef)
-{
-    const float alpha = 0.90625;  // 29.0 / 32
     const float a     = 0.953125; // 61.0 / 64
-    float k1 = ps->k1;
-    float r0 = ps->r0;
-    float r1 = ps->r1;
-    float e0 = qcoef + ps->x_est;
-    float e1 = e0 - k1 * r0;
-    float cor0 = ps->cor0, cor1 = ps->cor1;
-    float var0 = ps->var0, var1 = ps->var1;
+    const float alpha = 0.90625;  // 29.0 / 32
+    const float   k1 = ps->k1;
+    const float   r0 = ps->r0,     r1 = ps->r1;
+    const float cor0 = ps->cor0, cor1 = ps->cor1;
+    const float var0 = ps->var0, var1 = ps->var1;
+    const float e0 = *coef - ps->x_est;
+    const float e1 = e0 - k1 * r0;
+
+    if (set)
+        *coef = e0;
 
     ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
     ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
     ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
     ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
+    ps->r1   = flt16_trunc(a * (r0 - k1 * e0));
+    ps->r0   = flt16_trunc(a * e0);
 
-    ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
-    ps->r0 = flt16_trunc(a * e0);
+    /* Prediction for next frame */
+    ps->k1   = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0;
+    k2       = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0;
+    *rcoef   = ps->x_est = flt16_round(ps->k1*ps->r0 + k2*ps->r1);
 }
 
 static inline void reset_predict_state(PredictorState *ps)
 {
-    ps->r0   = 0.0f;
-    ps->r1   = 0.0f;
-    ps->cor0 = 0.0f;
-    ps->cor1 = 0.0f;
-    ps->var0 = 1.0f;
-    ps->var1 = 1.0f;
-    ps->k1   = 0.0f;
-    ps->x_est= 0.0f;
+    ps->r0    = 0.0f;
+    ps->r1    = 0.0f;
+    ps->k1    = 0.0f;
+    ps->cor0  = 0.0f;
+    ps->cor1  = 0.0f;
+    ps->var0  = 1.0f;
+    ps->var1  = 1.0f;
+    ps->x_est = 0.0f;
 }
 
-static inline void reset_all_predictors(SingleChannelElement *sce)
+static inline void reset_all_predictors(PredictorState *ps)
 {
     int i;
     for (i = 0; i < MAX_PREDICTORS; i++)
-        reset_predict_state(&sce->predictor_state[i]);
-    for (i = 1; i < 31; i++)
-        sce->ics.predictor_reset_count[i] = 0;
+        reset_predict_state(&ps[i]);
 }
 
 static inline void reset_predictor_group(SingleChannelElement *sce, int group_num)
 {
     int i;
     PredictorState *ps = sce->predictor_state;
-    sce->ics.predictor_reset_count[group_num] = 0;
     for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
         reset_predict_state(&ps[i]);
 }
@@ -128,136 +119,89 @@ static inline void reset_predictor_group(SingleChannelElement *sce, int group_nu
 void ff_aac_apply_main_pred(AACEncContext *s, SingleChannelElement *sce)
 {
     int sfb, k;
+    const int pmax = FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
 
     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
-        for (sfb = 0; sfb < ff_aac_pred_sfb_max[s->samplerate_index]; sfb++) {
-            for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++)
+        for (sfb = 0; sfb < pmax; sfb++) {
+            for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
                 predict(&sce->predictor_state[k], &sce->coeffs[k], &sce->prcoeffs[k],
-                        (sce->ics.predictor_present && sce->ics.prediction_used[sfb]));
-        }
-    }
-}
-
-static void decode_joint_stereo(ChannelElement *cpe)
-{
-    int i, w, w2, g;
-    SingleChannelElement *sce0 = &cpe->ch[0];
-    SingleChannelElement *sce1 = &cpe->ch[1];
-    IndividualChannelStream *ics;
-
-    for (i = 0; i < MAX_PREDICTORS; i++)
-        sce0->prcoeffs[i] = sce0->predictor_state[i].x_est;
-
-    ics = &sce0->ics;
-    for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
-        for (w2 =  0; w2 < ics->group_len[w]; w2++) {
-            int start = (w+w2) * 128;
-            for (g = 0; g < ics->num_swb; g++) {
-                int sfb = w*16 + g;
-                //apply Intensity stereo coeffs transformation
-                if (cpe->is_mask[sfb]) {
-                    int p = -1 + 2 * (sce1->band_type[sfb] - 14);
-                    float rscale = ff_aac_pow2sf_tab[-sce1->sf_idx[sfb] + POW_SF2_ZERO];
-                    p *= 1 - 2 * cpe->ms_mask[sfb];
-                    for (i = 0; i < ics->swb_sizes[g]; i++) {
-                        sce0->pqcoeffs[start+i] = (sce0->prcoeffs[start+i] + p*sce0->pqcoeffs[start+i]) * rscale;
-                    }
-                } else if (cpe->ms_mask[sfb] &&
-                           sce0->band_type[sfb] < NOISE_BT &&
-                           sce1->band_type[sfb] < NOISE_BT) {
-                    for (i = 0; i < ics->swb_sizes[g]; i++) {
-                        float L = sce0->pqcoeffs[start+i] + sce1->pqcoeffs[start+i];
-                        float R = sce0->pqcoeffs[start+i] - sce1->pqcoeffs[start+i];
-                        sce0->pqcoeffs[start+i] = L;
-                        sce1->pqcoeffs[start+i] = R;
-                    }
-                }
-                start += ics->swb_sizes[g];
+                        sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
             }
         }
+        if (sce->ics.predictor_reset_group) {
+            reset_predictor_group(sce, sce->ics.predictor_reset_group);
+        }
+    } else {
+        reset_all_predictors(sce->predictor_state);
     }
 }
 
-static inline void prepare_predictors(SingleChannelElement *sce)
-{
-    int k;
-    for (k = 0; k < MAX_PREDICTORS; k++)
-        predict(&sce->predictor_state[k], &sce->coeffs[k], &sce->prcoeffs[k], 0);
-}
-
-void ff_aac_update_main_pred(AACEncContext *s, SingleChannelElement *sce, ChannelElement *cpe)
-{
-    int k;
-
-    if (sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE)
-        return;
-
-    if (cpe && cpe->common_window)
-        decode_joint_stereo(cpe);
-
-    for (k = 0; k < MAX_PREDICTORS; k++)
-        update_predictor(&sce->predictor_state[k], sce->pqcoeffs[k]);
-
-    if (sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
-        reset_all_predictors(sce);
-    }
-
-    if (sce->ics.predictor_reset_group)
-        reset_predictor_group(sce, sce->ics.predictor_reset_group);
-}
-
-/* If inc == 0 check if it returns 0 to see if you can reset freely */
+/* If inc = 0 you can check if this returns 0 to see if you can reset freely */
 static inline int update_counters(IndividualChannelStream *ics, int inc)
 {
-    int i, rg = 0;
+    int i;
     for (i = 1; i < 31; i++) {
         ics->predictor_reset_count[i] += inc;
-        if (!rg && ics->predictor_reset_count[i] > PRED_RESET_FRAME_MIN)
-            rg = i; /* Reset this immediately */
+        if (ics->predictor_reset_count[i] > PRED_RESET_FRAME_MIN)
+            return i; /* Reset this immediately */
     }
-    return rg;
+    return 0;
 }
 
 void ff_aac_adjust_common_prediction(AACEncContext *s, ChannelElement *cpe)
 {
-    int start, w, g, count = 0;
+    int start, w, w2, g, i, count = 0;
     SingleChannelElement *sce0 = &cpe->ch[0];
     SingleChannelElement *sce1 = &cpe->ch[1];
+    const int pmax0 = FFMIN(sce0->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
+    const int pmax1 = FFMIN(sce1->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
+    const int pmax  = FFMIN(pmax0, pmax1);
 
-    if (!cpe->common_window || sce0->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE)
+    if (!cpe->common_window ||
+        sce0->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE ||
+        sce1->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE)
         return;
 
-    /* Predict if IS or MS is on and at least one channel is marked or when both are */
     for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
         start = 0;
         for (g = 0; g < sce0->ics.num_swb; g++) {
             int sfb = w*16+g;
-            if (sfb < PRED_SFB_START || sfb > ff_aac_pred_sfb_max[s->samplerate_index]) {
-                ;
-            } else if ((cpe->is_mask[sfb] || cpe->ms_mask[sfb]) &&
-                (sce0->ics.prediction_used[sfb] || sce1->ics.prediction_used[sfb])) {
-                sce0->ics.prediction_used[sfb] = sce1->ics.prediction_used[sfb] = 1;
-                count++;
-            } else if (sce0->ics.prediction_used[sfb] && sce1->ics.prediction_used[sfb]) {
+            int sum = sce0->ics.prediction_used[sfb] + sce1->ics.prediction_used[sfb];
+            float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f;
+            struct AACISError ph_err1, ph_err2, *erf;
+            if (sfb < PRED_SFB_START || sfb > pmax || sum != 2) {
+                RESTORE_PRED(sce0, sfb);
+                RESTORE_PRED(sce1, sfb);
+                start += sce0->ics.swb_sizes[g];
+                continue;
+            }
+            for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
+                for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
+                    float coef0 = sce0->pcoeffs[start+(w+w2)*128+i];
+                    float coef1 = sce1->pcoeffs[start+(w+w2)*128+i];
+                    ener0 += coef0*coef0;
+                    ener1 += coef1*coef1;
+                    ener01 += (coef0 + coef1)*(coef0 + coef1);
+                }
+            }
+            ph_err1 = ff_aac_is_encoding_err(s, cpe, start, w, g,
+                                             ener0, ener1, ener01, -1);
+            ph_err2 = ff_aac_is_encoding_err(s, cpe, start, w, g,
+                                             ener0, ener1, ener01, +1);
+            erf = ph_err1.error < ph_err2.error ? &ph_err1 : &ph_err2;
+            if (erf->pass) {
+                sce0->ics.prediction_used[sfb] = 1;
+                sce1->ics.prediction_used[sfb] = 1;
                 count++;
             } else {
-                /* Restore band types, if changed - prediction never sets > RESERVED_BT */
-                if (sce0->ics.prediction_used[sfb] && sce0->band_type[sfb] < RESERVED_BT)
-                    sce0->band_type[sfb] = sce0->orig_band_type[sfb];
-                if (sce1->ics.prediction_used[sfb] && sce1->band_type[sfb] < RESERVED_BT)
-                    sce1->band_type[sfb] = sce1->orig_band_type[sfb];
-                sce0->ics.prediction_used[sfb] = sce1->ics.prediction_used[sfb] = 0;
+                RESTORE_PRED(sce0, sfb);
+                RESTORE_PRED(sce1, sfb);
             }
             start += sce0->ics.swb_sizes[g];
         }
     }
 
     sce1->ics.predictor_present = sce0->ics.predictor_present = !!count;
-
-    if (!count)
-        return;
-
-    sce1->ics.predictor_reset_group = sce0->ics.predictor_reset_group;
 }
 
 static void update_pred_resets(SingleChannelElement *sce)
@@ -266,14 +210,12 @@ static void update_pred_resets(SingleChannelElement *sce)
     float avg_frame = 0.0f;
     IndividualChannelStream *ics = &sce->ics;
 
-    /* Some other code probably chose the reset group */
-    if (ics->predictor_reset_group)
-        return;
-
+    /* Update the counters and immediately update any frame behind schedule */
     if ((ics->predictor_reset_group = update_counters(&sce->ics, 1)))
         return;
 
     for (i = 1; i < 31; i++) {
+        /* Count-based */
         if (ics->predictor_reset_count[i] > max_frame) {
             max_group_id_c = i;
             max_frame = ics->predictor_reset_count[i];
@@ -281,8 +223,7 @@ static void update_pred_resets(SingleChannelElement *sce)
         avg_frame = (ics->predictor_reset_count[i] + avg_frame)/2;
     }
 
-    if (avg_frame*2 > max_frame && max_frame > PRED_RESET_MIN ||
-        max_frame > (2*PRED_RESET_MIN)/3) {
+    if (max_frame > PRED_RESET_MIN) {
         ics->predictor_reset_group = max_group_id_c;
     } else {
         ics->predictor_reset_group = 0;
@@ -291,56 +232,91 @@ static void update_pred_resets(SingleChannelElement *sce)
 
 void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce)
 {
-    int sfb, i, count = 0;
-    float *O34  = &s->scoefs[256*0], *P34  = &s->scoefs[256*1];
-    int cost_coeffs = PRICE_OFFSET;
-    int cost_pred = 1+(sce->ics.predictor_reset_group ? 5 : 0) +
-                  FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
+    int sfb, i, count = 0, cost_coeffs = 0, cost_pred = 0;
+    const int pmax = FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
+    float *O34  = &s->scoefs[128*0], *P34 = &s->scoefs[128*1];
+    float *SENT = &s->scoefs[128*2], *S34 = &s->scoefs[128*3];
+    float *QERR = &s->scoefs[128*4];
 
-    memcpy(sce->orig_band_type, sce->band_type, 128*sizeof(enum BandType));
+    if (sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+        sce->ics.predictor_present = 0;
+        return;
+    }
 
-    if (!sce->ics.predictor_initialized ||
-        sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
-        reset_all_predictors(sce);
+    if (!sce->ics.predictor_initialized) {
+        reset_all_predictors(sce->predictor_state);
+        sce->ics.predictor_initialized = 1;
+        memcpy(sce->prcoeffs, sce->coeffs, 1024*sizeof(float));
         for (i = 1; i < 31; i++)
             sce->ics.predictor_reset_count[i] = i;
-        sce->ics.predictor_initialized = 1;
     }
 
     update_pred_resets(sce);
-    prepare_predictors(sce);
-    sce->ics.predictor_reset_group = 0;
-
-    for (sfb = PRED_SFB_START; sfb < ff_aac_pred_sfb_max[s->samplerate_index]; sfb++) {
-        float dist1 = 0.0f, dist2 = 0.0f;
-        int swb_start = sce->ics.swb_offset[sfb];
-        int swb_len = sce->ics.swb_offset[sfb + 1] - swb_start;
-        int cb1 = sce->band_type[sfb], cb2, bits1 = 0, bits2 = 0;
-        FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[sfb];
-        abs_pow34_v(O34, &sce->coeffs[swb_start], swb_len);
-        abs_pow34_v(P34, &sce->prcoeffs[swb_start], swb_len);
-        cb2 = find_min_book(find_max_val(1, swb_len, P34), sce->sf_idx[sfb]);
-        if (cb2 <= cb1) {
-            dist1 += quantize_band_cost(s, &sce->coeffs[swb_start],   O34, swb_len,
-                                        sce->sf_idx[sfb], cb1, s->lambda / band->threshold,
-                                        INFINITY, &bits1, 0);
-            dist2 += quantize_band_cost(s, &sce->prcoeffs[swb_start], P34, swb_len,
-                                        sce->sf_idx[sfb], cb2, s->lambda / band->threshold,
-                                        INFINITY, &bits2, 0);
-            if (dist2 <= dist1) {
-                sce->ics.prediction_used[sfb] = 1;
-                sce->band_type[sfb] = cb2;
-                count++;
-            }
-            cost_coeffs += bits1;
-            cost_pred   += bits2;
+    memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type));
+
+    for (sfb = PRED_SFB_START; sfb < pmax; sfb++) {
+        int cost1, cost2, cb_p;
+        float dist1, dist2, dist_spec_err = 0.0f;
+        const int cb_n = sce->band_type[sfb];
+        const int start_coef = sce->ics.swb_offset[sfb];
+        const int num_coeffs = sce->ics.swb_offset[sfb + 1] - start_coef;
+        const FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[sfb];
+
+        if (start_coef + num_coeffs > MAX_PREDICTORS)
+            continue;
+
+        /* Normal coefficients */
+        abs_pow34_v(O34, &sce->coeffs[start_coef], num_coeffs);
+        dist1 = quantize_and_encode_band_cost(s, NULL, &sce->coeffs[start_coef], NULL,
+                                              O34, num_coeffs, sce->sf_idx[sfb],
+                                              cb_n, s->lambda / band->threshold, INFINITY, &cost1, 0);
+        cost_coeffs += cost1;
+
+        /* Encoded coefficients - needed for #bits, band type and quant. error */
+        for (i = 0; i < num_coeffs; i++)
+            SENT[i] = sce->coeffs[start_coef + i] - sce->prcoeffs[start_coef + i];
+        abs_pow34_v(S34, SENT, num_coeffs);
+        if (cb_n < RESERVED_BT)
+            cb_p = find_min_book(find_max_val(1, num_coeffs, S34), sce->sf_idx[sfb]);
+        else
+            cb_p = cb_n;
+        quantize_and_encode_band_cost(s, NULL, SENT, QERR, S34, num_coeffs,
+                                      sce->sf_idx[sfb], cb_p, s->lambda / band->threshold, INFINITY,
+                                      &cost2, 0);
+
+        /* Reconstructed coefficients - needed for distortion measurements */
+        for (i = 0; i < num_coeffs; i++)
+            sce->prcoeffs[start_coef + i] += QERR[i] != 0.0f ? (sce->prcoeffs[start_coef + i] - QERR[i]) : 0.0f;
+        abs_pow34_v(P34, &sce->prcoeffs[start_coef], num_coeffs);
+        if (cb_n < RESERVED_BT)
+            cb_p = find_min_book(find_max_val(1, num_coeffs, P34), sce->sf_idx[sfb]);
+        else
+            cb_p = cb_n;
+        dist2 = quantize_and_encode_band_cost(s, NULL, &sce->prcoeffs[start_coef], NULL,
+                                              P34, num_coeffs, sce->sf_idx[sfb],
+                                              cb_p, s->lambda / band->threshold, INFINITY, NULL, 0);
+        for (i = 0; i < num_coeffs; i++)
+            dist_spec_err += (O34[i] - P34[i])*(O34[i] - P34[i]);
+        dist_spec_err *= s->lambda / band->threshold;
+        dist2 += dist_spec_err;
+
+        if (dist2 <= dist1 && cb_p <= cb_n) {
+            cost_pred += cost2;
+            sce->ics.prediction_used[sfb] = 1;
+            sce->band_alt[sfb]  = cb_n;
+            sce->band_type[sfb] = cb_p;
+            count++;
+        } else {
+            cost_pred += cost1;
+            sce->band_alt[sfb] = cb_p;
         }
     }
 
-    if (count && cost_pred > cost_coeffs) {
-        memset(sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used));
-        memcpy(sce->band_type, sce->orig_band_type, sizeof(sce->band_type));
+    if (count && cost_coeffs < cost_pred) {
         count = 0;
+        for (sfb = PRED_SFB_START; sfb < pmax; sfb++)
+            RESTORE_PRED(sce, sfb);
+        memset(&sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used));
     }
 
     sce->ics.predictor_present = !!count;
@@ -352,14 +328,15 @@ void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce)
 void ff_aac_encode_main_pred(AACEncContext *s, SingleChannelElement *sce)
 {
     int sfb;
+    IndividualChannelStream *ics = &sce->ics;
+    const int pmax = FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
 
-    if (!sce->ics.predictor_present ||
-        sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE)
+    if (!ics->predictor_present)
         return;
 
-    put_bits(&s->pb, 1, !!sce->ics.predictor_reset_group);
-    if (sce->ics.predictor_reset_group)
-        put_bits(&s->pb, 5, sce->ics.predictor_reset_group);
-    for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]); sfb++)
-        put_bits(&s->pb, 1, sce->ics.prediction_used[sfb]);
+    put_bits(&s->pb, 1, !!ics->predictor_reset_group);
+    if (ics->predictor_reset_group)
+        put_bits(&s->pb, 5, ics->predictor_reset_group);
+    for (sfb = 0; sfb < pmax; sfb++)
+        put_bits(&s->pb, 1, ics->prediction_used[sfb]);
 }
diff --git a/libavcodec/aacenc_pred.h b/libavcodec/aacenc_pred.h
index 0694c8a..999af86 100644
--- a/libavcodec/aacenc_pred.h
+++ b/libavcodec/aacenc_pred.h
@@ -34,16 +34,12 @@
 #define PRED_RESET_FRAME_MIN 240
 
 /* Any frame with less than this amount of frames since last reset is ok */
-#define PRED_RESET_MIN 128
+#define PRED_RESET_MIN 64
 
 /* Raise to filter any low frequency artifacts due to prediction */
 #define PRED_SFB_START 10
 
-/* Offset for the number of bits to encode normal coefficients */
-#define PRICE_OFFSET 440
-
 void ff_aac_apply_main_pred(AACEncContext *s, SingleChannelElement *sce);
-void ff_aac_update_main_pred(AACEncContext *s, SingleChannelElement *sce, ChannelElement *cpe);
 void ff_aac_adjust_common_prediction(AACEncContext *s, ChannelElement *cpe);
 void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce);
 void ff_aac_encode_main_pred(AACEncContext *s, SingleChannelElement *sce);



More information about the ffmpeg-cvslog mailing list