[FFmpeg-cvslog] aacenc: reorder coding tools

Rostislav Pehlivanov git at videolan.org
Wed Sep 2 07:41:46 CEST 2015


ffmpeg | branch: master | Rostislav Pehlivanov <atomnuker at gmail.com> | Wed Sep  2 06:26:45 2015 +0100| [20dc527139b44ede4e0e35d6473f4c3993a71844] | committer: Rostislav Pehlivanov

aacenc: reorder coding tools

This commit reorders the coding tools such that they're doing what
the decoder does in reverse order. The very first thing the decoder
does is to decode M/S stereo if that's signalled, then prediction,
IS, and finally TNS and PNS in another function.
adjust_frame_information()'s application of IS and M/S was taken
out into two separate functions since prediction doesn't expect
to get the raw coefficients but rathe the coefficients at that
part of the encoding process.

The results show a much better PSNR when any combination of
Intensity Stereo, Mid/Side stereo and Prediction is used, which
is a sign of an increased encoder efficiency as well as the fact
that the decoder gets what it expects.

Otherwise, with only IS, PNS or prediction there are neither
regressions nor improvements except in the case of IS, which
now by itself (or with PNS) is less prone to artifacts. Enabling
M/S (using stereo_mode) as well will also reduce stereo artifacts
induced by IS, so in the very near future M/S may be enabled
by default.

Signed-off-by: Rostislav Pehlivanov <atomnuker at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=20dc527139b44ede4e0e35d6473f4c3993a71844
---

 libavcodec/aacenc.c |  151 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 88 insertions(+), 63 deletions(-)

diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index 444ca0e..9cce1a2 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -196,37 +196,6 @@ static void adjust_frame_information(ChannelElement *cpe, int chans)
 {
     int i, w, w2, g, ch;
     int maxsfb, cmaxsfb;
-    IndividualChannelStream *ics;
-
-    if (cpe->common_window) {
-        ics = &cpe->ch[0].ics;
-        for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
-            for (w2 =  0; w2 < ics->group_len[w]; w2++) {
-                int start = (w+w2) * 128;
-                for (g = 0; g < ics->num_swb; g++) {
-                    //apply Intensity stereo coeffs transformation
-                    if (cpe->is_mask[w*16 + g]) {
-                        int p = -1 + 2 * (cpe->ch[1].band_type[w*16+g] - 14);
-                        float scale = cpe->ch[0].is_ener[w*16+g];
-                        for (i = 0; i < ics->swb_sizes[g]; i++) {
-                            cpe->ch[0].coeffs[start+i] = (cpe->ch[0].coeffs[start+i] + p*cpe->ch[1].coeffs[start+i]) * scale;
-                            cpe->ch[1].coeffs[start+i] = 0.0f;
-                        }
-                    } else if (cpe->ms_mask[w*16 + g] &&
-                               cpe->ch[0].band_type[w*16 + g] < NOISE_BT &&
-                               cpe->ch[1].band_type[w*16 + g] < NOISE_BT) {
-                        for (i = 0; i < ics->swb_sizes[g]; i++) {
-                            float L = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) * 0.5f;
-                            float R = L - cpe->ch[1].coeffs[start+i];
-                            cpe->ch[0].coeffs[start+i] = L;
-                            cpe->ch[1].coeffs[start+i] = R;
-                        }
-                    }
-                    start += ics->swb_sizes[g];
-                }
-            }
-        }
-    }
 
     for (ch = 0; ch < chans; ch++) {
         IndividualChannelStream *ics = &cpe->ch[ch].ics;
@@ -273,6 +242,59 @@ static void adjust_frame_information(ChannelElement *cpe, int chans)
     }
 }
 
+static void apply_intensity_stereo(ChannelElement *cpe)
+{
+    int w, w2, g, i;
+    IndividualChannelStream *ics = &cpe->ch[0].ics;
+    if (!cpe->common_window)
+        return;
+    for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
+        for (w2 =  0; w2 < ics->group_len[w]; w2++) {
+            int start = (w+w2) * 128;
+            for (g = 0; g < ics->num_swb; g++) {
+                int p  = -1 + 2 * (cpe->ch[1].band_type[w*16+g] - 14);
+                float scale = cpe->ch[0].is_ener[w*16+g];
+                if (!cpe->is_mask[w*16 + g]) {
+                    start += ics->swb_sizes[g];
+                    continue;
+                }
+                for (i = 0; i < ics->swb_sizes[g]; i++) {
+                    float sum = (cpe->ch[0].coeffs[start+i] + p*cpe->ch[1].coeffs[start+i])*scale;
+                    cpe->ch[0].coeffs[start+i] = sum;
+                    cpe->ch[1].coeffs[start+i] = 0.0f;
+                }
+                start += ics->swb_sizes[g];
+            }
+        }
+    }
+}
+
+static void apply_mid_side_stereo(ChannelElement *cpe)
+{
+    int w, w2, g, i;
+    IndividualChannelStream *ics = &cpe->ch[0].ics;
+    if (!cpe->common_window)
+        return;
+    for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
+        for (w2 =  0; w2 < ics->group_len[w]; w2++) {
+            int start = (w+w2) * 128;
+            for (g = 0; g < ics->num_swb; g++) {
+                if (!cpe->ms_mask[w*16 + g]) {
+                    start += ics->swb_sizes[g];
+                    continue;
+                }
+                for (i = 0; i < ics->swb_sizes[g]; i++) {
+                    float L = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) * 0.5f;
+                    float R = L - cpe->ch[1].coeffs[start+i];
+                    cpe->ch[0].coeffs[start+i] = L;
+                    cpe->ch[1].coeffs[start+i] = R;
+                }
+                start += ics->swb_sizes[g];
+            }
+        }
+    }
+}
+
 /**
  * Encode scalefactor band coding type.
  */
@@ -280,6 +302,9 @@ static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
 {
     int w;
 
+    if (s->coder->set_special_band_scalefactors)
+        s->coder->set_special_band_scalefactors(s, sce);
+
     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
         s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
 }
@@ -464,7 +489,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     float **samples = s->planar_samples, *samples2, *la, *overlap;
     ChannelElement *cpe;
     SingleChannelElement *sce;
-    int i, ch, w, g, chans, tag, start_ch, ret;
+    int i, ch, w, chans, tag, start_ch, ret;
     int ms_mode = 0, is_mode = 0, tns_mode = 0, pred_mode = 0;
     int chan_el_counter[4];
     FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
@@ -603,7 +628,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                     }
                 }
             }
-            for (ch = 0; ch < chans; ch++) {
+            for (ch = 0; ch < chans; ch++) { /* TNS and PNS */
                 sce = &cpe->ch[ch];
                 s->cur_channel = start_ch + ch;
                 if (s->options.pns && s->coder->search_for_pns)
@@ -616,40 +641,40 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                     tns_mode = 1;
             }
             s->cur_channel = start_ch;
-            if (s->options.stereo_mode && cpe->common_window) {
-                if (s->options.stereo_mode > 0) {
-                    IndividualChannelStream *ics = &cpe->ch[0].ics;
-                    for (w = 0; w < ics->num_windows; w += ics->group_len[w])
-                        for (g = 0;  g < ics->num_swb; g++)
-                            cpe->ms_mask[w*16+g] = 1;
-                } else if (s->coder->search_for_ms) {
-                    s->coder->search_for_ms(s, cpe);
-                }
-            }
-            if (s->options.intensity_stereo && s->coder->search_for_is) {
-                s->coder->search_for_is(s, avctx, cpe);
+            if (s->options.intensity_stereo) { /* Intensity Stereo */
+                if (s->coder->search_for_is)
+                    s->coder->search_for_is(s, avctx, cpe);
                 if (cpe->is_mode) is_mode = 1;
+                apply_intensity_stereo(cpe);
             }
-            if (s->coder->set_special_band_scalefactors)
-                for (ch = 0; ch < chans; ch++)
-                    s->coder->set_special_band_scalefactors(s, &cpe->ch[ch]);
-            adjust_frame_information(cpe, chans);
-            for (ch = 0; ch < chans; ch++) {
-                sce = &cpe->ch[ch];
-                s->cur_channel = start_ch + ch;
-                if (s->options.pred && s->coder->search_for_pred)
-                    s->coder->search_for_pred(s, sce);
-                if (cpe->ch[ch].ics.predictor_present) pred_mode = 1;
+            if (s->options.pred) { /* Prediction */
+                for (ch = 0; ch < chans; ch++) {
+                    sce = &cpe->ch[ch];
+                    s->cur_channel = start_ch + ch;
+                    if (s->options.pred && s->coder->search_for_pred)
+                        s->coder->search_for_pred(s, sce);
+                    if (cpe->ch[ch].ics.predictor_present) pred_mode = 1;
+                }
+                if (s->coder->adjust_common_prediction)
+                    s->coder->adjust_common_prediction(s, cpe);
+                for (ch = 0; ch < chans; ch++) {
+                    sce = &cpe->ch[ch];
+                    s->cur_channel = start_ch + ch;
+                    if (s->options.pred && s->coder->apply_main_pred)
+                        s->coder->apply_main_pred(s, sce);
+                }
+                s->cur_channel = start_ch;
             }
-            if (s->options.pred && s->coder->adjust_common_prediction)
-                s->coder->adjust_common_prediction(s, cpe);
-            for (ch = 0; ch < chans; ch++) {
-                sce = &cpe->ch[ch];
-                s->cur_channel = start_ch + ch;
-                if (s->options.pred && s->coder->apply_main_pred)
-                    s->coder->apply_main_pred(s, sce);
+            if (s->options.stereo_mode) { /* Mid/Side stereo */
+                if (s->options.stereo_mode == -1 && s->coder->search_for_ms)
+                    s->coder->search_for_ms(s, cpe);
+                else if (cpe->common_window)
+                    memset(cpe->ms_mask, 1, sizeof(cpe->ms_mask));
+                for (w = 0; w < 128; w++)
+                    cpe->ms_mask[w] = cpe->is_mask[w] ? 0 : cpe->ms_mask[w];
+                apply_mid_side_stereo(cpe);
             }
-            s->cur_channel = start_ch;
+            adjust_frame_information(cpe, chans);
             if (chans == 2) {
                 put_bits(&s->pb, 1, cpe->common_window);
                 if (cpe->common_window) {



More information about the ffmpeg-cvslog mailing list