[FFmpeg-devel] [PATCH 2/3] [GSoC] [AAC] aaccoder: Implement Perceptual Noise Substitution
Rostislav Pehlivanov
atomnuker at gmail.com
Mon Apr 13 03:59:55 CEST 2015
Here's an objective comparison of the difference the patch makes:
Original spectrum:
https://0x0.st/T7.png
Encoded without the patchset:
https://0x0.st/Th.png
Encoded with the patchset:
https://0x0.st/TF.png
Difference:
https://0x0.st/TR.png <https://0x0.st/T5.png>
Made by: "$ composite Encoded_clean.png Encoded_noise.png -compose
difference Difference.png"
On 12 April 2015 at 05:50, Rostislav Pehlivanov <atomnuker at gmail.com> wrote:
> This commit enables the use of the pseudo-codebook NOISE_BT for encoding
> noise values for the twoloop coder. It uses the energy values from the
> psychoacoustic model to determine whether it's acceptible to use noise for
> encoding and if so, determine the energy of the noise. The cost system was
> modified to accept the 13th codebook (skipping the nonexistant 12). The
> system was extended such that in the future it should be easy to add
> support for intensity stereo coding, hence the use of arrays for the maps.
>
> The parameters used (such as the factor by which uplims is multiplied when
> comparing and the cost returned by the BT_NOISE case) and the way energy
> values are converted to scalefactor indices have not been extensively
> tested, so safe values which should not break anything were used. They are
> to be tweaked in the future to optimize audio quality if needed.
> ---
> libavcodec/aaccoder.c | 128
> +++++++++++++++++++++++++++++++++-----------------
> 1 file changed, 86 insertions(+), 42 deletions(-)
>
> diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
> index 64eee32..f7662fd 100644
> --- a/libavcodec/aaccoder.c
> +++ b/libavcodec/aaccoder.c
> @@ -40,6 +40,9 @@
> #include "aacenc.h"
> #include "aactab.h"
>
> +/** Total number of usable codebooks **/
> +#define CB_TOT 13
> +
> /** bits needed to code codebook run value for long windows */
> static const uint8_t run_value_bits_long[64] = {
> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
> @@ -57,6 +60,10 @@ static const uint8_t * const run_value_bits[2] = {
> run_value_bits_long, run_value_bits_short
> };
>
> +/** Map to convert values from BandCodingPath index to a codebook index
> **/
> +static const uint8_t aac_cb_out_map[CB_TOT] =
> {0,1,2,3,4,5,6,7,8,9,10,11,13};
> +/** Inverse map to convert from codebooks to BandCodingPath indices **/
> +static const uint8_t aac_cb_in_map[CB_TOT+1] =
> {0,1,2,3,4,5,6,7,8,9,10,11,0,12};
>
> /**
> * Quantize one coefficient.
> @@ -108,7 +115,7 @@ static av_always_inline float
> quantize_and_encode_band_cost_template(
> const float *scaled, int size, int
> scale_idx,
> int cb, const float lambda, const float
> uplim,
> int *bits, int BT_ZERO, int BT_UNSIGNED,
> - int BT_PAIR, int BT_ESC)
> + int BT_PAIR, int BT_ESC, int BT_NOISE)
> {
> const int q_idx = POW_SF2_ZERO - scale_idx + SCALE_ONE_POS -
> SCALE_DIV_512;
> const float Q = ff_aac_pow2sf_tab [q_idx];
> @@ -119,8 +126,6 @@ static av_always_inline float
> quantize_and_encode_band_cost_template(
> float cost = 0;
> const int dim = BT_PAIR ? 2 : 4;
> int resbits = 0;
> - const int range = aac_cb_range[cb];
> - const int maxval = aac_cb_maxval[cb];
> int off;
>
> if (BT_ZERO) {
> @@ -130,15 +135,22 @@ static av_always_inline float
> quantize_and_encode_band_cost_template(
> *bits = 0;
> return cost * lambda;
> }
> + if (BT_NOISE) {
> + for (i = 0; i < size; i++)
> + cost += in[i]*in[i];
> + if (bits)
> + *bits = 0;
> + return cost * lambda;
> + }
> if (!scaled) {
> abs_pow34_v(s->scoefs, in, size);
> scaled = s->scoefs;
> }
> - quantize_bands(s->qcoefs, in, scaled, size, Q34, !BT_UNSIGNED,
> maxval);
> + quantize_bands(s->qcoefs, in, scaled, size, Q34, !BT_UNSIGNED,
> aac_cb_maxval[cb]);
> if (BT_UNSIGNED) {
> off = 0;
> } else {
> - off = maxval;
> + off = aac_cb_maxval[cb];
> }
> for (i = 0; i < size; i += dim) {
> const float *vec;
> @@ -147,7 +159,7 @@ static av_always_inline float
> quantize_and_encode_band_cost_template(
> int curbits;
> float rd = 0.0f;
> for (j = 0; j < dim; j++) {
> - curidx *= range;
> + curidx *= aac_cb_range[cb];
> curidx += quants[j] + off;
> }
> curbits = ff_aac_spectral_bits[cb-1][curidx];
> @@ -207,8 +219,8 @@ static av_always_inline float
> quantize_and_encode_band_cost_template(
> return cost;
> }
>
> -#define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED,
> BT_PAIR, BT_ESC) \
> -static float quantize_and_encode_band_cost_ ## NAME(
> \
> +#define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED,
> BT_PAIR, BT_ESC, BT_NOISE) \
> +static float quantize_and_encode_band_cost_ ## NAME(
> \
> struct AACEncContext *s,
> \
> PutBitContext *pb, const float *in,
> \
> const float *scaled, int size, int
> scale_idx, \
> @@ -217,15 +229,16 @@ static float quantize_and_encode_band_cost_ ## NAME(
> return quantize_and_encode_band_cost_template(
> \
> s, pb, in, scaled, size, scale_idx,
> \
> BT_ESC ? ESC_BT : cb, lambda, uplim,
> bits, \
> - BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC);
> \
> + BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC,
> BT_NOISE); \
> }
>
> -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ZERO, 1, 0, 0, 0)
> -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SQUAD, 0, 0, 0, 0)
> -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UQUAD, 0, 1, 0, 0)
> -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SPAIR, 0, 0, 1, 0)
> -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UPAIR, 0, 1, 1, 0)
> -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC, 0, 1, 1, 1)
> +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ZERO, 1, 0, 0, 0, 0)
> +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SQUAD, 0, 0, 0, 0, 0)
> +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UQUAD, 0, 1, 0, 0, 0)
> +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SPAIR, 0, 0, 1, 0, 0)
> +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UPAIR, 0, 1, 1, 0, 0)
> +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC, 0, 1, 1, 1, 0)
> +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NOISE, 0, 0, 0, 0, 1)
>
> static float (*const quantize_and_encode_band_cost_arr[])(
> struct AACEncContext *s,
> @@ -245,6 +258,8 @@ static float (*const
> quantize_and_encode_band_cost_arr[])(
> quantize_and_encode_band_cost_UPAIR,
> quantize_and_encode_band_cost_UPAIR,
> quantize_and_encode_band_cost_ESC,
> + NULL,
> + quantize_and_encode_band_cost_NOISE,
> };
>
> #define quantize_and_encode_band_cost( \
> @@ -312,7 +327,7 @@ typedef struct BandCodingPath {
> static void encode_window_bands_info(AACEncContext *s,
> SingleChannelElement *sce,
> int win, int group_len, const float
> lambda)
> {
> - BandCodingPath path[120][12];
> + BandCodingPath path[120][CB_TOT];
> int w, swb, cb, start, size;
> int i, j;
> const int max_sfb = sce->ics.max_sfb;
> @@ -325,7 +340,7 @@ static void encode_window_bands_info(AACEncContext *s,
> SingleChannelElement *sce
>
> abs_pow34_v(s->scoefs, sce->coeffs, 1024);
> start = win*128;
> - for (cb = 0; cb < 12; cb++) {
> + for (cb = 0; cb < CB_TOT; cb++) {
> path[0][cb].cost = 0.0f;
> path[0][cb].prev_idx = -1;
> path[0][cb].run = 0;
> @@ -333,7 +348,7 @@ static void encode_window_bands_info(AACEncContext *s,
> SingleChannelElement *sce
> for (swb = 0; swb < max_sfb; swb++) {
> size = sce->ics.swb_sizes[swb];
> if (sce->zeroes[win*16 + swb]) {
> - for (cb = 0; cb < 12; cb++) {
> + for (cb = 0; cb < CB_TOT; cb++) {
> path[swb+1][cb].prev_idx = cb;
> path[swb+1][cb].cost = path[swb][cb].cost;
> path[swb+1][cb].run = path[swb][cb].run + 1;
> @@ -343,14 +358,14 @@ static void encode_window_bands_info(AACEncContext
> *s, SingleChannelElement *sce
> int mincb = next_mincb;
> next_minrd = INFINITY;
> next_mincb = 0;
> - for (cb = 0; cb < 12; cb++) {
> + for (cb = 0; cb < CB_TOT; cb++) {
> float cost_stay_here, cost_get_here;
> float rd = 0.0f;
> for (w = 0; w < group_len; w++) {
> FFPsyBand *band = &s->psy.ch
> [s->cur_channel].psy_bands[(win+w)*16+swb];
> rd += quantize_band_cost(s, sce->coeffs + start +
> w*128,
> s->scoefs + start + w*128,
> size,
> - sce->sf_idx[(win+w)*16+swb],
> cb,
> + sce->sf_idx[(win+w)*16+swb],
> aac_cb_out_map[cb],
> lambda / band->threshold,
> INFINITY, NULL);
> }
> cost_stay_here = path[swb][cb].cost + rd;
> @@ -379,7 +394,7 @@ static void encode_window_bands_info(AACEncContext *s,
> SingleChannelElement *sce
> //convert resulting path from backward-linked list
> stack_len = 0;
> idx = 0;
> - for (cb = 1; cb < 12; cb++)
> + for (cb = 1; cb < CB_TOT; cb++)
> if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
> idx = cb;
> ppos = max_sfb;
> @@ -394,12 +409,13 @@ static void encode_window_bands_info(AACEncContext
> *s, SingleChannelElement *sce
> //perform actual band info encoding
> start = 0;
> for (i = stack_len - 1; i >= 0; i--) {
> - put_bits(&s->pb, 4, stackcb[i]);
> + cb = aac_cb_out_map[stackcb[i]];
> + put_bits(&s->pb, 4, cb);
> count = stackrun[i];
> - memset(sce->zeroes + win*16 + start, !stackcb[i], count);
> + memset(sce->zeroes + win*16 + start, !cb, count);
> //XXX: memset when band_type is also uint8_t
> for (j = 0; j < count; j++) {
> - sce->band_type[win*16 + start] = stackcb[i];
> + sce->band_type[win*16 + start] = cb;
> start++;
> }
> while (count >= run_esc) {
> @@ -413,7 +429,7 @@ static void encode_window_bands_info(AACEncContext *s,
> SingleChannelElement *sce
> static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement
> *sce,
> int win, int group_len, const float
> lambda)
> {
> - BandCodingPath path[120][12];
> + BandCodingPath path[120][CB_TOT];
> int w, swb, cb, start, size;
> int i, j;
> const int max_sfb = sce->ics.max_sfb;
> @@ -426,7 +442,7 @@ static void codebook_trellis_rate(AACEncContext *s,
> SingleChannelElement *sce,
>
> abs_pow34_v(s->scoefs, sce->coeffs, 1024);
> start = win*128;
> - for (cb = 0; cb < 12; cb++) {
> + for (cb = 0; cb < CB_TOT; cb++) {
> path[0][cb].cost = run_bits+4;
> path[0][cb].prev_idx = -1;
> path[0][cb].run = 0;
> @@ -450,7 +466,7 @@ static void codebook_trellis_rate(AACEncContext *s,
> SingleChannelElement *sce,
> }
> next_minbits = path[swb+1][0].cost;
> next_mincb = 0;
> - for (cb = 1; cb < 12; cb++) {
> + for (cb = 1; cb < CB_TOT; cb++) {
> path[swb+1][cb].cost = 61450;
> path[swb+1][cb].prev_idx = -1;
> path[swb+1][cb].run = 0;
> @@ -459,6 +475,7 @@ static void codebook_trellis_rate(AACEncContext *s,
> SingleChannelElement *sce,
> float minbits = next_minbits;
> int mincb = next_mincb;
> int startcb = sce->band_type[win*16+swb];
> + startcb = aac_cb_in_map[startcb];
> next_minbits = INFINITY;
> next_mincb = 0;
> for (cb = 0; cb < startcb; cb++) {
> @@ -466,13 +483,20 @@ static void codebook_trellis_rate(AACEncContext *s,
> SingleChannelElement *sce,
> path[swb+1][cb].prev_idx = -1;
> path[swb+1][cb].run = 0;
> }
> - for (cb = startcb; cb < 12; cb++) {
> + for (cb = startcb; cb < CB_TOT; cb++) {
> float cost_stay_here, cost_get_here;
> float bits = 0.0f;
> + if (cb == 12 && sce->band_type[win*16+swb] != NOISE_BT) {
> + path[swb+1][cb].cost = 61450;
> + path[swb+1][cb].prev_idx = -1;
> + path[swb+1][cb].run = 0;
> + continue;
> + }
> for (w = 0; w < group_len; w++) {
> bits += quantize_band_cost(s, sce->coeffs + start +
> w*128,
> s->scoefs + start + w*128,
> size,
> -
> sce->sf_idx[(win+w)*16+swb], cb,
> +
> sce->sf_idx[(win+w)*16+swb],
> + aac_cb_out_map[cb],
> 0, INFINITY, NULL);
> }
> cost_stay_here = path[swb][cb].cost + bits;
> @@ -501,7 +525,7 @@ static void codebook_trellis_rate(AACEncContext *s,
> SingleChannelElement *sce,
> //convert resulting path from backward-linked list
> stack_len = 0;
> idx = 0;
> - for (cb = 1; cb < 12; cb++)
> + for (cb = 1; cb < CB_TOT; cb++)
> if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
> idx = cb;
> ppos = max_sfb;
> @@ -517,12 +541,13 @@ static void codebook_trellis_rate(AACEncContext *s,
> SingleChannelElement *sce,
> //perform actual band info encoding
> start = 0;
> for (i = stack_len - 1; i >= 0; i--) {
> - put_bits(&s->pb, 4, stackcb[i]);
> + cb = aac_cb_out_map[stackcb[i]];
> + put_bits(&s->pb, 4, cb);
> count = stackrun[i];
> - memset(sce->zeroes + win*16 + start, !stackcb[i], count);
> + memset(sce->zeroes + win*16 + start, !cb, count);
> //XXX: memset when band_type is also uint8_t
> for (j = 0; j < count; j++) {
> - sce->band_type[win*16 + start] = stackcb[i];
> + sce->band_type[win*16 + start] = cb;
> start++;
> }
> while (count >= run_esc) {
> @@ -711,8 +736,9 @@ static void
> search_for_quantizers_twoloop(AVCodecContext *avctx,
> {
> int start = 0, i, w, w2, g;
> int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate /
> avctx->channels * (lambda / 120.f);
> - float dists[128] = { 0 }, uplims[128];
> + float dists[128] = { 0 }, uplims[128] = { 0 }, energies[128] = { 0 };
> float maxvals[128];
> + float energy_avg = 0;
> int fflag, minscaler;
> int its = 0;
> int allz = 0;
> @@ -724,32 +750,47 @@ static void
> search_for_quantizers_twoloop(AVCodecContext *avctx,
> //XXX: some heuristic to determine initial quantizers will reduce
> search time
> //determine zero bands and upper limits
> for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
> + start = 0;
> for (g = 0; g < sce->ics.num_swb; g++) {
> int nz = 0;
> - float uplim = 0.0f;
> + float uplim = 0.0f, energy = 0.0f;
> + float freq =
> (w*16+g)*(avctx->sample_rate/(1024/sce->ics.num_windows)/2);
> for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
> FFPsyBand *band = &s->psy.ch
> [s->cur_channel].psy_bands[(w+w2)*16+g];
> uplim += band->threshold;
> - if (band->energy <= band->threshold || band->threshold ==
> 0.0f) {
> + energy += band->energy;
> + if (band->threshold == 0.0f || band->energy <
> band->threshold) {
> sce->zeroes[(w+w2)*16+g] = 1;
> continue;
> }
> nz = 1;
> }
> uplims[w*16+g] = uplim *512;
> + energies[w*16+g] = log2f(2*(energy*energy));
> + energy_avg = (energies[w*16+g] + energy_avg)/2;
> + if (freq > 4000.0f && energy <= uplim * 1.52f) {
> + sce->band_type[w*16+g] = NOISE_BT;
> + nz = 1;
> + } else { /* Will be determined in the two-loop search */
> + sce->band_type[w*16+g] = 0;
> + }
> sce->zeroes[w*16+g] = !nz;
> if (nz)
> minthr = FFMIN(minthr, uplim);
> allz |= nz;
> + start += sce->ics.swb_sizes[g];
> }
> }
> for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
> for (g = 0; g < sce->ics.num_swb; g++) {
> if (sce->zeroes[w*16+g]) {
> sce->sf_idx[w*16+g] = SCALE_ONE_POS;
> - continue;
> + } else if (sce->band_type[w*16+g] == NOISE_BT) {
> + float energy_norm = (energies[w*16+g]/energy_avg);
> + sce->sf_idx[w*16+g] = av_clip((energy_norm*256) - 70,
> -100, 155);
> + } else {
> + sce->sf_idx[w*16+g] = SCALE_ONE_POS +
> FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
> }
> - sce->sf_idx[w*16+g] = SCALE_ONE_POS +
> FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
> }
> }
>
> @@ -785,7 +826,8 @@ static void
> search_for_quantizers_twoloop(AVCodecContext *avctx,
> int cb;
> float dist = 0.0f;
>
> - if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >=
> 218) {
> + if (sce->zeroes[w*16+g] || sce->band_type[w*16+g] >=
> NOISE_BT ||
> + sce->sf_idx[w*16+g] >= 218) {
> start += sce->ics.swb_sizes[g];
> continue;
> }
> @@ -814,11 +856,11 @@ static void
> search_for_quantizers_twoloop(AVCodecContext *avctx,
> }
> if (tbits > destbits) {
> for (i = 0; i < 128; i++)
> - if (sce->sf_idx[i] < 218 - qstep)
> + if (sce->sf_idx[i] < 218 - qstep && sce->band_type[i]
> < NOISE_BT)
> sce->sf_idx[i] += qstep;
> } else {
> for (i = 0; i < 128; i++)
> - if (sce->sf_idx[i] > 60 - qstep)
> + if (sce->sf_idx[i] > 60 - qstep && sce->band_type[i]
> < NOISE_BT)
> sce->sf_idx[i] -= qstep;
> }
> qstep >>= 1;
> @@ -831,7 +873,7 @@ static void
> search_for_quantizers_twoloop(AVCodecContext *avctx,
> for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
> {
> for (g = 0; g < sce->ics.num_swb; g++) {
> int prevsc = sce->sf_idx[w*16+g];
> - if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g]
> > 60) {
> + if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g]
> > 60 && sce->band_type[w*16+g] < NOISE_BT) {
> if (find_min_book(maxvals[w*16+g],
> sce->sf_idx[w*16+g]-1))
> sce->sf_idx[w*16+g]--;
> else //Try to make sure there is some energy in every
> band
> @@ -839,6 +881,8 @@ static void
> search_for_quantizers_twoloop(AVCodecContext *avctx,
> }
> sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g],
> minscaler, minscaler + SCALE_MAX_DIFF);
> sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
> + if (sce->band_type[w*16+g] >= NOISE_BT)
> + continue;
> if (sce->sf_idx[w*16+g] != prevsc)
> fflag = 1;
> sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g],
> sce->sf_idx[w*16+g]);
> --
> 2.1.4
>
>
More information about the ffmpeg-devel
mailing list